1119 files changed, 31673 insertions, 20556 deletions
diff --git a/drivers/gpu/drm/Kconfig b/drivers/gpu/drm/Kconfig
index 34f5a092c99e..315cbdf61979 100644
--- a/drivers/gpu/drm/Kconfig
+++ b/drivers/gpu/drm/Kconfig
@@ -8,7 +8,6 @@
 menuconfig DRM
 	tristate "Direct Rendering Manager (XFree86 4.1.0 and higher DRI support)"
 	depends on (AGP || AGP=n) && !EMULATED_CMPXCHG && HAS_DMA
-	select DRM_NOMODESET
 	select DRM_PANEL_ORIENTATION_QUIRKS
 	select HDMI
 	select FB_CMDLINE
@@ -19,6 +18,7 @@ menuconfig DRM
 # gallium uses SYS_kcmp for os_same_file_description() to de-duplicate
 # device and dmabuf fd. Let's make sure that is available for our userspace.
 	select KCMP
+	select VIDEO_NOMODESET
 	help
 	  Kernel-level support for the Direct Rendering Infrastructure (DRI)
 	  introduced in XFree86 4.0. If you say Y here, you need to select
@@ -233,64 +233,8 @@ source "drivers/gpu/drm/i2c/Kconfig"
 
 source "drivers/gpu/drm/arm/Kconfig"
 
-config DRM_RADEON
-	tristate "ATI Radeon"
-	depends on DRM && PCI && MMU
-	depends on AGP || !AGP
-	select FW_LOADER
-	select DRM_DISPLAY_DP_HELPER
-	select DRM_DISPLAY_HELPER
-        select DRM_KMS_HELPER
-        select DRM_TTM
-	select DRM_TTM_HELPER
-	select POWER_SUPPLY
-	select HWMON
-	select BACKLIGHT_CLASS_DEVICE
-	select INTERVAL_TREE
-	# radeon depends on ACPI_VIDEO when ACPI is enabled, for select to work
-	# ACPI_VIDEO's dependencies must also be selected.
-	select INPUT if ACPI
-	select ACPI_VIDEO if ACPI
-	# On x86 ACPI_VIDEO also needs ACPI_WMI
-	select X86_PLATFORM_DEVICES if ACPI && X86
-	select ACPI_WMI if ACPI && X86
-	help
-	  Choose this option if you have an ATI Radeon graphics card.  There
-	  are both PCI and AGP versions.  You don't need to choose this to
-	  run the Radeon in plain VGA mode.
-
-	  If M is selected, the module will be called radeon.
-
 source "drivers/gpu/drm/radeon/Kconfig"
 
-config DRM_AMDGPU
-	tristate "AMD GPU"
-	depends on DRM && PCI && MMU
-	select FW_LOADER
-	select DRM_DISPLAY_DP_HELPER
-	select DRM_DISPLAY_HDMI_HELPER
-	select DRM_DISPLAY_HELPER
-	select DRM_KMS_HELPER
-	select DRM_SCHED
-	select DRM_TTM
-	select DRM_TTM_HELPER
-	select POWER_SUPPLY
-	select HWMON
-	select BACKLIGHT_CLASS_DEVICE
-	select INTERVAL_TREE
-	select DRM_BUDDY
-	# amdgpu depends on ACPI_VIDEO when ACPI is enabled, for select to work
-	# ACPI_VIDEO's dependencies must also be selected.
-	select INPUT if ACPI
-	select ACPI_VIDEO if ACPI
-	# On x86 ACPI_VIDEO also needs ACPI_WMI
-	select X86_PLATFORM_DEVICES if ACPI && X86
-	select ACPI_WMI if ACPI && X86
-	help
-	  Choose this option if you have a recent AMD Radeon graphics card.
-
-	  If M is selected, the module will be called amdgpu.
-
 source "drivers/gpu/drm/amd/amdgpu/Kconfig"
 
 source "drivers/gpu/drm/nouveau/Kconfig"
@@ -514,11 +458,6 @@ config DRM_EXPORT_FOR_TESTS
 config DRM_PANEL_ORIENTATION_QUIRKS
 	tristate
 
-# Separate option because nomodeset parameter is global and expected built-in
-config DRM_NOMODESET
-	bool
-	default n
-
 config DRM_LIB_RANDOM
 	bool
 	default n
diff --git a/drivers/gpu/drm/Makefile b/drivers/gpu/drm/Makefile
index 6e55c47288e4..cc637343d87b 100644
--- a/drivers/gpu/drm/Makefile
+++ b/drivers/gpu/drm/Makefile
@@ -70,9 +70,9 @@ drm-$(CONFIG_DRM_LOAD_EDID_FIRMWARE) += drm_edid_load.o
 drm-$(CONFIG_DRM_PRIVACY_SCREEN) += \
 	drm_privacy_screen.o \
 	drm_privacy_screen_x86.o
+drm-$(CONFIG_DRM_ACCEL) += ../../accel/drm_accel.o
 obj-$(CONFIG_DRM)	+= drm.o
 
-obj-$(CONFIG_DRM_NOMODESET) += drm_nomodeset.o
 obj-$(CONFIG_DRM_PANEL_ORIENTATION_QUIRKS) += drm_panel_orientation_quirks.o
 
 #
@@ -117,7 +117,9 @@ drm_kms_helper-y := \
 	drm_self_refresh_helper.o \
 	drm_simple_kms_helper.o
 drm_kms_helper-$(CONFIG_DRM_PANEL_BRIDGE) += bridge/panel.o
-drm_kms_helper-$(CONFIG_DRM_FBDEV_EMULATION) += drm_fb_helper.o
+drm_kms_helper-$(CONFIG_DRM_FBDEV_EMULATION) += \
+	drm_fbdev_generic.o \
+	drm_fb_helper.o
 obj-$(CONFIG_DRM_KMS_HELPER) += drm_kms_helper.o
 
 #
diff --git a/drivers/gpu/drm/amd/amdgpu/Kconfig b/drivers/gpu/drm/amd/amdgpu/Kconfig
index 7777d55275de..5fcd510f1abb 100644
--- a/drivers/gpu/drm/amd/amdgpu/Kconfig
+++ b/drivers/gpu/drm/amd/amdgpu/Kconfig
@@ -1,4 +1,33 @@
 # SPDX-License-Identifier: MIT
+
+config DRM_AMDGPU
+	tristate "AMD GPU"
+	depends on DRM && PCI && MMU
+	select FW_LOADER
+	select DRM_DISPLAY_DP_HELPER
+	select DRM_DISPLAY_HDMI_HELPER
+	select DRM_DISPLAY_HELPER
+	select DRM_KMS_HELPER
+	select DRM_SCHED
+	select DRM_TTM
+	select DRM_TTM_HELPER
+	select POWER_SUPPLY
+	select HWMON
+	select BACKLIGHT_CLASS_DEVICE
+	select INTERVAL_TREE
+	select DRM_BUDDY
+	# amdgpu depends on ACPI_VIDEO when ACPI is enabled, for select to work
+	# ACPI_VIDEO's dependencies must also be selected.
+	select INPUT if ACPI
+	select ACPI_VIDEO if ACPI
+	# On x86 ACPI_VIDEO also needs ACPI_WMI
+	select X86_PLATFORM_DEVICES if ACPI && X86
+	select ACPI_WMI if ACPI && X86
+	help
+	  Choose this option if you have a recent AMD Radeon graphics card.
+
+	  If M is selected, the module will be called amdgpu.
+
 config DRM_AMDGPU_SI
 	bool "Enable amdgpu support for SI parts"
 	depends on DRM_AMDGPU
diff --git a/drivers/gpu/drm/amd/amdgpu/Makefile b/drivers/gpu/drm/amd/amdgpu/Makefile
index 6ad39cf71bdd..798d0e9a60b7 100644
--- a/drivers/gpu/drm/amd/amdgpu/Makefile
+++ b/drivers/gpu/drm/amd/amdgpu/Makefile
@@ -58,7 +58,8 @@ amdgpu-y += amdgpu_device.o amdgpu_kms.o \
 	amdgpu_vm_sdma.o amdgpu_discovery.o amdgpu_ras_eeprom.o amdgpu_nbio.o \
 	amdgpu_umc.o smu_v11_0_i2c.o amdgpu_fru_eeprom.o amdgpu_rap.o \
 	amdgpu_fw_attestation.o amdgpu_securedisplay.o \
-	amdgpu_eeprom.o amdgpu_mca.o amdgpu_psp_ta.o amdgpu_lsdma.o
+	amdgpu_eeprom.o amdgpu_mca.o amdgpu_psp_ta.o amdgpu_lsdma.o \
+	amdgpu_ring_mux.o
 
 amdgpu-$(CONFIG_PROC_FS) += amdgpu_fdinfo.o
 
@@ -250,7 +251,7 @@ endif
 amdgpu-$(CONFIG_COMPAT) += amdgpu_ioc32.o
 amdgpu-$(CONFIG_VGA_SWITCHEROO) += amdgpu_atpx_handler.o
 amdgpu-$(CONFIG_ACPI) += amdgpu_acpi.o
-amdgpu-$(CONFIG_HMM_MIRROR) += amdgpu_mn.o
+amdgpu-$(CONFIG_HMM_MIRROR) += amdgpu_hmm.o
 
 include $(FULL_AMD_PATH)/pm/Makefile
 
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
index 8639a4f9c6e8..6b74df446694 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
@@ -82,7 +82,6 @@
 #include "amdgpu_vce.h"
 #include "amdgpu_vcn.h"
 #include "amdgpu_jpeg.h"
-#include "amdgpu_mn.h"
 #include "amdgpu_gmc.h"
 #include "amdgpu_gfx.h"
 #include "amdgpu_sdma.h"
@@ -219,10 +218,12 @@ extern int amdgpu_use_xgmi_p2p;
 extern int sched_policy;
 extern bool debug_evictions;
 extern bool no_system_mem_limit;
+extern int halt_if_hws_hang;
 #else
 static const int __maybe_unused sched_policy = KFD_SCHED_POLICY_HWS;
 static const bool __maybe_unused debug_evictions; /* = false */
 static const bool __maybe_unused no_system_mem_limit;
+static const int __maybe_unused halt_if_hws_hang;
 #endif
 #ifdef CONFIG_HSA_AMD_P2P
 extern bool pcie_p2p;
@@ -675,7 +676,7 @@ enum amd_hw_ip_block_type {
 	MAX_HWIP
 };
 
-#define HWIP_MAX_INSTANCE	11
+#define HWIP_MAX_INSTANCE	28
 
 #define HW_ID_MAX		300
 #define IP_VERSION(mj, mn, rv) (((mj) << 16) | ((mn) << 8) | (rv))
@@ -1063,6 +1064,7 @@ struct amdgpu_device {
 	struct work_struct		reset_work;
 
 	bool                            job_hang;
+	bool                            dc_enabled;
 };
 
 static inline struct amdgpu_device *drm_to_adev(struct drm_device *ddev)
@@ -1120,6 +1122,8 @@ void amdgpu_device_indirect_wreg64(struct amdgpu_device *adev,
 bool amdgpu_device_asic_has_dc_support(enum amd_asic_type asic_type);
 bool amdgpu_device_has_dc_support(struct amdgpu_device *adev);
 
+void amdgpu_device_set_sriov_virtual_display(struct amdgpu_device *adev);
+
 int amdgpu_device_pre_asic_reset(struct amdgpu_device *adev,
 				 struct amdgpu_reset_context *reset_context);
 
@@ -1293,6 +1297,7 @@ void amdgpu_device_pcie_port_wreg(struct amdgpu_device *adev,
 				u32 reg, u32 v);
 struct dma_fence *amdgpu_device_switch_gang(struct amdgpu_device *adev,
 					    struct dma_fence *gang);
+bool amdgpu_device_has_display_hardware(struct amdgpu_device *adev);
 
 /* atpx handler */
 #if defined(CONFIG_VGA_SWITCHEROO)
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_acpi.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_acpi.c
index b14800ac179e..57b5e11446c6 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_acpi.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_acpi.c
@@ -847,7 +847,7 @@ int amdgpu_acpi_init(struct amdgpu_device *adev)
 	struct amdgpu_atif *atif = &amdgpu_acpi_priv.atif;
 
 	if (atif->notifications.brightness_change) {
-		if (amdgpu_device_has_dc_support(adev)) {
+		if (adev->dc_enabled) {
 #if defined(CONFIG_DRM_AMD_DC)
 			struct amdgpu_display_manager *dm = &adev->dm;
 
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c
index 0561812aa0a4..f99d4873bf22 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c
@@ -195,7 +195,7 @@ void amdgpu_amdkfd_device_init(struct amdgpu_device *adev)
 		}
 
 		adev->kfd.init_complete = kgd2kfd_device_init(adev->kfd.dev,
-						adev_to_drm(adev), &gpu_resources);
+							&gpu_resources);
 
 		amdgpu_amdkfd_total_mem_size += adev->gmc.real_vram_size;
 
@@ -673,7 +673,7 @@ int amdgpu_amdkfd_submit_ib(struct amdgpu_device *adev,
 		goto err;
 	}
 
-	ret = amdgpu_job_alloc(adev, 1, &job, NULL);
+	ret = amdgpu_job_alloc(adev, NULL, NULL, NULL, 1, &job);
 	if (ret)
 		goto err;
 
@@ -706,6 +706,13 @@ err:
 
 void amdgpu_amdkfd_set_compute_idle(struct amdgpu_device *adev, bool idle)
 {
+	/* Temporary workaround to fix issues observed in some
+	 * compute applications when GFXOFF is enabled on GFX11.
+	 */
+	if (IP_VERSION_MAJ(adev->ip_versions[GC_HWIP][0]) == 11) {
+		pr_debug("GFXOFF is %s\n", idle ? "enabled" : "disabled");
+		amdgpu_gfx_off_ctrl(adev, idle);
+	}
 	amdgpu_dpm_switch_power_profile(adev,
 					PP_SMC_POWER_PROFILE_COMPUTE,
 					!idle);
@@ -753,9 +760,7 @@ bool amdgpu_amdkfd_have_atomics_support(struct amdgpu_device *adev)
 
 void amdgpu_amdkfd_ras_poison_consumption_handler(struct amdgpu_device *adev, bool reset)
 {
-	struct ras_err_data err_data = {0, 0, 0, NULL};
-
-	amdgpu_umc_poison_handler(adev, &err_data, reset);
+	amdgpu_umc_poison_handler(adev, reset);
 }
 
 bool amdgpu_amdkfd_ras_query_utcl2_poison_status(struct amdgpu_device *adev)
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
index 647220a8762d..0040deaf8a83 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
@@ -29,6 +29,7 @@
 #include <linux/mm.h>
 #include <linux/kthread.h>
 #include <linux/workqueue.h>
+#include <linux/mmu_notifier.h>
 #include <kgd_kfd_interface.h>
 #include <drm/ttm/ttm_execbuf_util.h>
 #include "amdgpu_sync.h"
@@ -65,6 +66,7 @@ struct kgd_mem {
 	struct mutex lock;
 	struct amdgpu_bo *bo;
 	struct dma_buf *dmabuf;
+	struct hmm_range *range;
 	struct list_head attachments;
 	/* protected by amdkfd_process_info.lock */
 	struct ttm_validate_buffer validate_list;
@@ -75,7 +77,7 @@ struct kgd_mem {
 
 	uint32_t alloc_flags;
 
-	atomic_t invalid;
+	uint32_t invalid;
 	struct amdkfd_process_info *process_info;
 
 	struct amdgpu_sync sync;
@@ -131,7 +133,8 @@ struct amdkfd_process_info {
 	struct amdgpu_amdkfd_fence *eviction_fence;
 
 	/* MMU-notifier related fields */
-	atomic_t evicted_bos;
+	struct mutex notifier_lock;
+	uint32_t evicted_bos;
 	struct delayed_work restore_userptr_work;
 	struct pid *pid;
 	bool block_mmu_notifications;
@@ -180,7 +183,8 @@ int kfd_debugfs_kfd_mem_limits(struct seq_file *m, void *data);
 bool amdkfd_fence_check_mm(struct dma_fence *f, struct mm_struct *mm);
 struct amdgpu_amdkfd_fence *to_amdgpu_amdkfd_fence(struct dma_fence *f);
 int amdgpu_amdkfd_remove_fence_on_pt_pd_bos(struct amdgpu_bo *bo);
-int amdgpu_amdkfd_evict_userptr(struct kgd_mem *mem, struct mm_struct *mm);
+int amdgpu_amdkfd_evict_userptr(struct mmu_interval_notifier *mni,
+				unsigned long cur_seq, struct kgd_mem *mem);
 #else
 static inline
 bool amdkfd_fence_check_mm(struct dma_fence *f, struct mm_struct *mm)
@@ -201,7 +205,8 @@ int amdgpu_amdkfd_remove_fence_on_pt_pd_bos(struct amdgpu_bo *bo)
 }
 
 static inline
-int amdgpu_amdkfd_evict_userptr(struct kgd_mem *mem, struct mm_struct *mm)
+int amdgpu_amdkfd_evict_userptr(struct mmu_interval_notifier *mni,
+				unsigned long cur_seq, struct kgd_mem *mem)
 {
 	return 0;
 }
@@ -265,8 +270,10 @@ int amdgpu_amdkfd_get_pcie_bandwidth_mbytes(struct amdgpu_device *adev, bool is_
 	(&((struct amdgpu_fpriv *)					\
 		((struct drm_file *)(drm_priv))->driver_priv)->vm)
 
+int amdgpu_amdkfd_gpuvm_set_vm_pasid(struct amdgpu_device *adev,
+				     struct file *filp, u32 pasid);
 int amdgpu_amdkfd_gpuvm_acquire_process_vm(struct amdgpu_device *adev,
-					struct file *filp, u32 pasid,
+					struct file *filp,
 					void **process_info,
 					struct dma_fence **ef);
 void amdgpu_amdkfd_gpuvm_release_process_vm(struct amdgpu_device *adev,
@@ -353,7 +360,6 @@ int kgd2kfd_init(void);
 void kgd2kfd_exit(void);
 struct kfd_dev *kgd2kfd_probe(struct amdgpu_device *adev, bool vf);
 bool kgd2kfd_device_init(struct kfd_dev *kfd,
-			 struct drm_device *ddev,
 			 const struct kgd2kfd_shared_resources *gpu_resources);
 void kgd2kfd_device_exit(struct kfd_dev *kfd);
 void kgd2kfd_suspend(struct kfd_dev *kfd, bool run_pm);
@@ -381,7 +387,7 @@ struct kfd_dev *kgd2kfd_probe(struct amdgpu_device *adev, bool vf)
 }
 
 static inline
-bool kgd2kfd_device_init(struct kfd_dev *kfd, struct drm_device *ddev,
+bool kgd2kfd_device_init(struct kfd_dev *kfd,
 				const struct kgd2kfd_shared_resources *gpu_resources)
 {
 	return false;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_aldebaran.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_aldebaran.c
index c8935d718207..4485bb29bec9 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_aldebaran.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_aldebaran.c
@@ -41,5 +41,6 @@ const struct kfd2kgd_calls aldebaran_kfd2kgd = {
 	.get_atc_vmid_pasid_mapping_info =
 				kgd_gfx_v9_get_atc_vmid_pasid_mapping_info,
 	.set_vm_context_page_table_base = kgd_gfx_v9_set_vm_context_page_table_base,
+	.get_cu_occupancy = kgd_gfx_v9_get_cu_occupancy,
 	.program_trap_handler_settings = kgd_gfx_v9_program_trap_handler_settings
 };
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c
index 81e3b528bbc9..e92b93557c13 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c
@@ -787,7 +787,7 @@ void kgd_gfx_v9_get_cu_occupancy(struct amdgpu_device *adev, int pasid,
 	for (se_idx = 0; se_idx < se_cnt; se_idx++) {
 		for (sh_idx = 0; sh_idx < sh_cnt; sh_idx++) {
 
-			gfx_v9_0_select_se_sh(adev, se_idx, sh_idx, 0xffffffff);
+			amdgpu_gfx_select_se_sh(adev, se_idx, sh_idx, 0xffffffff);
 			queue_map = RREG32_SOC15(GC, 0, mmSPI_CSQ_WF_ACTIVE_STATUS);
 
 			/*
@@ -820,7 +820,7 @@ void kgd_gfx_v9_get_cu_occupancy(struct amdgpu_device *adev, int pasid,
 		}
 	}
 
-	gfx_v9_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
+	amdgpu_gfx_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
 	soc15_grbm_select(adev, 0, 0, 0, 0);
 	unlock_spi_csq_mutexes(adev);
 
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
index 978d3970b5cc..b15091d8310d 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
@@ -29,6 +29,7 @@
 #include "amdgpu_object.h"
 #include "amdgpu_gem.h"
 #include "amdgpu_vm.h"
+#include "amdgpu_hmm.h"
 #include "amdgpu_amdkfd.h"
 #include "amdgpu_dma_buf.h"
 #include <uapi/linux/kfd_ioctl.h>
@@ -171,9 +172,7 @@ int amdgpu_amdkfd_reserve_mem_limit(struct amdgpu_device *adev,
 	    (kfd_mem_limit.ttm_mem_used + ttm_mem_needed >
 	     kfd_mem_limit.max_ttm_mem_limit) ||
 	    (adev && adev->kfd.vram_used + vram_needed >
-	     adev->gmc.real_vram_size -
-	     atomic64_read(&adev->vram_pin_size) -
-	     reserved_for_pt)) {
+	     adev->gmc.real_vram_size - reserved_for_pt)) {
 		ret = -ENOMEM;
 		goto release;
 	}
@@ -405,63 +404,15 @@ static int vm_update_pds(struct amdgpu_vm *vm, struct amdgpu_sync *sync)
 
 static uint64_t get_pte_flags(struct amdgpu_device *adev, struct kgd_mem *mem)
 {
-	struct amdgpu_device *bo_adev = amdgpu_ttm_adev(mem->bo->tbo.bdev);
-	bool coherent = mem->alloc_flags & KFD_IOC_ALLOC_MEM_FLAGS_COHERENT;
-	bool uncached = mem->alloc_flags & KFD_IOC_ALLOC_MEM_FLAGS_UNCACHED;
-	uint32_t mapping_flags;
-	uint64_t pte_flags;
-	bool snoop = false;
+	uint32_t mapping_flags = AMDGPU_VM_PAGE_READABLE |
+				 AMDGPU_VM_MTYPE_DEFAULT;
 
-	mapping_flags = AMDGPU_VM_PAGE_READABLE;
 	if (mem->alloc_flags & KFD_IOC_ALLOC_MEM_FLAGS_WRITABLE)
 		mapping_flags |= AMDGPU_VM_PAGE_WRITEABLE;
 	if (mem->alloc_flags & KFD_IOC_ALLOC_MEM_FLAGS_EXECUTABLE)
 		mapping_flags |= AMDGPU_VM_PAGE_EXECUTABLE;
 
-	switch (adev->asic_type) {
-	case CHIP_ARCTURUS:
-	case CHIP_ALDEBARAN:
-		if (mem->alloc_flags & KFD_IOC_ALLOC_MEM_FLAGS_VRAM) {
-			if (bo_adev == adev) {
-				if (uncached)
-					mapping_flags |= AMDGPU_VM_MTYPE_UC;
-				else if (coherent)
-					mapping_flags |= AMDGPU_VM_MTYPE_CC;
-				else
-					mapping_flags |= AMDGPU_VM_MTYPE_RW;
-				if (adev->asic_type == CHIP_ALDEBARAN &&
-				    adev->gmc.xgmi.connected_to_cpu)
-					snoop = true;
-			} else {
-				if (uncached || coherent)
-					mapping_flags |= AMDGPU_VM_MTYPE_UC;
-				else
-					mapping_flags |= AMDGPU_VM_MTYPE_NC;
-				if (amdgpu_xgmi_same_hive(adev, bo_adev))
-					snoop = true;
-			}
-		} else {
-			if (uncached || coherent)
-				mapping_flags |= AMDGPU_VM_MTYPE_UC;
-			else
-				mapping_flags |= AMDGPU_VM_MTYPE_NC;
-			snoop = true;
-		}
-		break;
-	default:
-		if (uncached || coherent)
-			mapping_flags |= AMDGPU_VM_MTYPE_UC;
-		else
-			mapping_flags |= AMDGPU_VM_MTYPE_NC;
-
-		if (!(mem->alloc_flags & KFD_IOC_ALLOC_MEM_FLAGS_VRAM))
-			snoop = true;
-	}
-
-	pte_flags = amdgpu_gem_va_map_flags(adev, mapping_flags);
-	pte_flags |= snoop ? AMDGPU_PTE_SNOOPED : 0;
-
-	return pte_flags;
+	return amdgpu_gem_va_map_flags(adev, mapping_flags);
 }
 
 /**
@@ -510,13 +461,13 @@ kfd_mem_dmamap_userptr(struct kgd_mem *mem,
 	struct ttm_tt *ttm = bo->tbo.ttm;
 	int ret;
 
+	if (WARN_ON(ttm->num_pages != src_ttm->num_pages))
+		return -EINVAL;
+
 	ttm->sg = kmalloc(sizeof(*ttm->sg), GFP_KERNEL);
 	if (unlikely(!ttm->sg))
 		return -ENOMEM;
 
-	if (WARN_ON(ttm->num_pages != src_ttm->num_pages))
-		return -EINVAL;
-
 	/* Same sequence as in amdgpu_ttm_tt_pin_userptr */
 	ret = sg_alloc_table_from_pages(ttm->sg, src_ttm->pages,
 					ttm->num_pages, 0,
@@ -988,6 +939,7 @@ static int init_user_pages(struct kgd_mem *mem, uint64_t user_addr,
 	struct amdkfd_process_info *process_info = mem->process_info;
 	struct amdgpu_bo *bo = mem->bo;
 	struct ttm_operation_ctx ctx = { true, false };
+	struct hmm_range *range;
 	int ret = 0;
 
 	mutex_lock(&process_info->lock);
@@ -998,7 +950,7 @@ static int init_user_pages(struct kgd_mem *mem, uint64_t user_addr,
 		goto out;
 	}
 
-	ret = amdgpu_mn_register(bo, user_addr);
+	ret = amdgpu_hmm_register(bo, user_addr);
 	if (ret) {
 		pr_err("%s: Failed to register MMU notifier: %d\n",
 		       __func__, ret);
@@ -1012,12 +964,14 @@ static int init_user_pages(struct kgd_mem *mem, uint64_t user_addr,
 		 * later stage when it is scheduled by another ioctl called by
 		 * CRIU master process for the target pid for restore.
 		 */
-		atomic_inc(&mem->invalid);
+		mutex_lock(&process_info->notifier_lock);
+		mem->invalid++;
+		mutex_unlock(&process_info->notifier_lock);
 		mutex_unlock(&process_info->lock);
 		return 0;
 	}
 
-	ret = amdgpu_ttm_tt_get_user_pages(bo, bo->tbo.ttm->pages);
+	ret = amdgpu_ttm_tt_get_user_pages(bo, bo->tbo.ttm->pages, &range);
 	if (ret) {
 		pr_err("%s: Failed to get user pages: %d\n", __func__, ret);
 		goto unregister_out;
@@ -1035,10 +989,10 @@ static int init_user_pages(struct kgd_mem *mem, uint64_t user_addr,
 	amdgpu_bo_unreserve(bo);
 
 release_out:
-	amdgpu_ttm_tt_get_user_pages_done(bo->tbo.ttm);
+	amdgpu_ttm_tt_get_user_pages_done(bo->tbo.ttm, range);
 unregister_out:
 	if (ret)
-		amdgpu_mn_unregister(bo);
+		amdgpu_hmm_unregister(bo);
 out:
 	mutex_unlock(&process_info->lock);
 	return ret;
@@ -1349,6 +1303,7 @@ static int init_kfd_vm(struct amdgpu_vm *vm, void **process_info,
 			return -ENOMEM;
 
 		mutex_init(&info->lock);
+		mutex_init(&info->notifier_lock);
 		INIT_LIST_HEAD(&info->vm_list_head);
 		INIT_LIST_HEAD(&info->kfd_bo_list);
 		INIT_LIST_HEAD(&info->userptr_valid_list);
@@ -1365,7 +1320,6 @@ static int init_kfd_vm(struct amdgpu_vm *vm, void **process_info,
 		}
 
 		info->pid = get_task_pid(current->group_leader, PIDTYPE_PID);
-		atomic_set(&info->evicted_bos, 0);
 		INIT_DELAYED_WORK(&info->restore_userptr_work,
 				  amdgpu_amdkfd_restore_userptr_worker);
 
@@ -1420,6 +1374,7 @@ reserve_pd_fail:
 		put_pid(info->pid);
 create_evict_fence_fail:
 		mutex_destroy(&info->lock);
+		mutex_destroy(&info->notifier_lock);
 		kfree(info);
 	}
 	return ret;
@@ -1474,10 +1429,9 @@ static void amdgpu_amdkfd_gpuvm_unpin_bo(struct amdgpu_bo *bo)
 	amdgpu_bo_unreserve(bo);
 }
 
-int amdgpu_amdkfd_gpuvm_acquire_process_vm(struct amdgpu_device *adev,
-					   struct file *filp, u32 pasid,
-					   void **process_info,
-					   struct dma_fence **ef)
+int amdgpu_amdkfd_gpuvm_set_vm_pasid(struct amdgpu_device *adev,
+				     struct file *filp, u32 pasid)
+
 {
 	struct amdgpu_fpriv *drv_priv;
 	struct amdgpu_vm *avm;
@@ -1488,10 +1442,6 @@ int amdgpu_amdkfd_gpuvm_acquire_process_vm(struct amdgpu_device *adev,
 		return ret;
 	avm = &drv_priv->vm;
 
-	/* Already a compute VM? */
-	if (avm->process_info)
-		return -EINVAL;
-
 	/* Free the original amdgpu allocated pasid,
 	 * will be replaced with kfd allocated pasid.
 	 */
@@ -1500,14 +1450,36 @@ int amdgpu_amdkfd_gpuvm_acquire_process_vm(struct amdgpu_device *adev,
 		amdgpu_vm_set_pasid(adev, avm, 0);
 	}
 
-	/* Convert VM into a compute VM */
-	ret = amdgpu_vm_make_compute(adev, avm);
+	ret = amdgpu_vm_set_pasid(adev, avm, pasid);
 	if (ret)
 		return ret;
 
-	ret = amdgpu_vm_set_pasid(adev, avm, pasid);
+	return 0;
+}
+
+int amdgpu_amdkfd_gpuvm_acquire_process_vm(struct amdgpu_device *adev,
+					   struct file *filp,
+					   void **process_info,
+					   struct dma_fence **ef)
+{
+	struct amdgpu_fpriv *drv_priv;
+	struct amdgpu_vm *avm;
+	int ret;
+
+	ret = amdgpu_file_to_fpriv(filp, &drv_priv);
 	if (ret)
 		return ret;
+	avm = &drv_priv->vm;
+
+	/* Already a compute VM? */
+	if (avm->process_info)
+		return -EINVAL;
+
+	/* Convert VM into a compute VM */
+	ret = amdgpu_vm_make_compute(adev, avm);
+	if (ret)
+		return ret;
+
 	/* Initialize KFD part of the VM and process info */
 	ret = init_kfd_vm(avm, process_info, ef);
 	if (ret)
@@ -1544,6 +1516,7 @@ void amdgpu_amdkfd_gpuvm_destroy_cb(struct amdgpu_device *adev,
 		cancel_delayed_work_sync(&process_info->restore_userptr_work);
 		put_pid(process_info->pid);
 		mutex_destroy(&process_info->lock);
+		mutex_destroy(&process_info->notifier_lock);
 		kfree(process_info);
 	}
 }
@@ -1596,7 +1569,9 @@ int amdgpu_amdkfd_criu_resume(void *p)
 
 	mutex_lock(&pinfo->lock);
 	pr_debug("scheduling work\n");
-	atomic_inc(&pinfo->evicted_bos);
+	mutex_lock(&pinfo->notifier_lock);
+	pinfo->evicted_bos++;
+	mutex_unlock(&pinfo->notifier_lock);
 	if (!READ_ONCE(pinfo->block_mmu_notifications)) {
 		ret = -EINVAL;
 		goto out_unlock;
@@ -1673,6 +1648,11 @@ int amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu(
 		}
 	}
 
+	if (flags & KFD_IOC_ALLOC_MEM_FLAGS_COHERENT)
+		alloc_flags |= AMDGPU_GEM_CREATE_COHERENT;
+	if (flags & KFD_IOC_ALLOC_MEM_FLAGS_UNCACHED)
+		alloc_flags |= AMDGPU_GEM_CREATE_UNCACHED;
+
 	*mem = kzalloc(sizeof(struct kgd_mem), GFP_KERNEL);
 	if (!*mem) {
 		ret = -ENOMEM;
@@ -1816,8 +1796,13 @@ int amdgpu_amdkfd_gpuvm_free_memory_of_gpu(
 	list_del(&bo_list_entry->head);
 	mutex_unlock(&process_info->lock);
 
-	/* No more MMU notifiers */
-	amdgpu_mn_unregister(mem->bo);
+	/* Cleanup user pages and MMU notifiers */
+	if (amdgpu_ttm_tt_get_usermm(mem->bo->tbo.ttm)) {
+		amdgpu_hmm_unregister(mem->bo);
+		mutex_lock(&process_info->notifier_lock);
+		amdgpu_ttm_tt_discard_user_pages(mem->bo->tbo.ttm, mem->range);
+		mutex_unlock(&process_info->notifier_lock);
+	}
 
 	ret = reserve_bo_and_cond_vms(mem, NULL, BO_VM_ALL, &ctx);
 	if (unlikely(ret))
@@ -1907,14 +1892,14 @@ int amdgpu_amdkfd_gpuvm_map_memory_to_gpu(
 	 */
 	mutex_lock(&mem->process_info->lock);
 
-	/* Lock mmap-sem. If we find an invalid userptr BO, we can be
+	/* Lock notifier lock. If we find an invalid userptr BO, we can be
 	 * sure that the MMU notifier is no longer running
 	 * concurrently and the queues are actually stopped
 	 */
 	if (amdgpu_ttm_tt_get_usermm(bo->tbo.ttm)) {
-		mmap_write_lock(current->mm);
-		is_invalid_userptr = atomic_read(&mem->invalid);
-		mmap_write_unlock(current->mm);
+		mutex_lock(&mem->process_info->notifier_lock);
+		is_invalid_userptr = !!mem->invalid;
+		mutex_unlock(&mem->process_info->notifier_lock);
 	}
 
 	mutex_lock(&mem->lock);
@@ -2257,7 +2242,7 @@ int amdgpu_amdkfd_gpuvm_import_dmabuf(struct amdgpu_device *adev,
 
 	ret = drm_vma_node_allow(&obj->vma_node, drm_priv);
 	if (ret) {
-		kfree(mem);
+		kfree(*mem);
 		return ret;
 	}
 
@@ -2294,34 +2279,38 @@ int amdgpu_amdkfd_gpuvm_import_dmabuf(struct amdgpu_device *adev,
  *
  * Runs in MMU notifier, may be in RECLAIM_FS context. This means it
  * cannot do any memory allocations, and cannot take any locks that
- * are held elsewhere while allocating memory. Therefore this is as
- * simple as possible, using atomic counters.
+ * are held elsewhere while allocating memory.
  *
  * It doesn't do anything to the BO itself. The real work happens in
  * restore, where we get updated page addresses. This function only
  * ensures that GPU access to the BO is stopped.
  */
-int amdgpu_amdkfd_evict_userptr(struct kgd_mem *mem,
-				struct mm_struct *mm)
+int amdgpu_amdkfd_evict_userptr(struct mmu_interval_notifier *mni,
+				unsigned long cur_seq, struct kgd_mem *mem)
 {
 	struct amdkfd_process_info *process_info = mem->process_info;
-	int evicted_bos;
 	int r = 0;
 
-	/* Do not process MMU notifications until stage-4 IOCTL is received */
+	/* Do not process MMU notifications during CRIU restore until
+	 * KFD_CRIU_OP_RESUME IOCTL is received
+	 */
 	if (READ_ONCE(process_info->block_mmu_notifications))
 		return 0;
 
-	atomic_inc(&mem->invalid);
-	evicted_bos = atomic_inc_return(&process_info->evicted_bos);
-	if (evicted_bos == 1) {
+	mutex_lock(&process_info->notifier_lock);
+	mmu_interval_set_seq(mni, cur_seq);
+
+	mem->invalid++;
+	if (++process_info->evicted_bos == 1) {
 		/* First eviction, stop the queues */
-		r = kgd2kfd_quiesce_mm(mm, KFD_QUEUE_EVICTION_TRIGGER_USERPTR);
+		r = kgd2kfd_quiesce_mm(mni->mm,
+				       KFD_QUEUE_EVICTION_TRIGGER_USERPTR);
 		if (r)
 			pr_err("Failed to quiesce KFD\n");
 		schedule_delayed_work(&process_info->restore_userptr_work,
 			msecs_to_jiffies(AMDGPU_USERPTR_RESTORE_DELAY_MS));
 	}
+	mutex_unlock(&process_info->notifier_lock);
 
 	return r;
 }
@@ -2338,51 +2327,58 @@ static int update_invalid_user_pages(struct amdkfd_process_info *process_info,
 	struct kgd_mem *mem, *tmp_mem;
 	struct amdgpu_bo *bo;
 	struct ttm_operation_ctx ctx = { false, false };
-	int invalid, ret;
+	uint32_t invalid;
+	int ret = 0;
 
-	/* Move all invalidated BOs to the userptr_inval_list and
-	 * release their user pages by migration to the CPU domain
-	 */
+	mutex_lock(&process_info->notifier_lock);
+
+	/* Move all invalidated BOs to the userptr_inval_list */
 	list_for_each_entry_safe(mem, tmp_mem,
 				 &process_info->userptr_valid_list,
-				 validate_list.head) {
-		if (!atomic_read(&mem->invalid))
-			continue; /* BO is still valid */
-
-		bo = mem->bo;
-
-		if (amdgpu_bo_reserve(bo, true))
-			return -EAGAIN;
-		amdgpu_bo_placement_from_domain(bo, AMDGPU_GEM_DOMAIN_CPU);
-		ret = ttm_bo_validate(&bo->tbo, &bo->placement, &ctx);
-		amdgpu_bo_unreserve(bo);
-		if (ret) {
-			pr_err("%s: Failed to invalidate userptr BO\n",
-			       __func__);
-			return -EAGAIN;
-		}
-
-		list_move_tail(&mem->validate_list.head,
-			       &process_info->userptr_inval_list);
-	}
-
-	if (list_empty(&process_info->userptr_inval_list))
-		return 0; /* All evicted userptr BOs were freed */
+				 validate_list.head)
+		if (mem->invalid)
+			list_move_tail(&mem->validate_list.head,
+				       &process_info->userptr_inval_list);
 
 	/* Go through userptr_inval_list and update any invalid user_pages */
 	list_for_each_entry(mem, &process_info->userptr_inval_list,
 			    validate_list.head) {
-		invalid = atomic_read(&mem->invalid);
+		invalid = mem->invalid;
 		if (!invalid)
 			/* BO hasn't been invalidated since the last
-			 * revalidation attempt. Keep its BO list.
+			 * revalidation attempt. Keep its page list.
 			 */
 			continue;
 
 		bo = mem->bo;
 
+		amdgpu_ttm_tt_discard_user_pages(bo->tbo.ttm, mem->range);
+		mem->range = NULL;
+
+		/* BO reservations and getting user pages (hmm_range_fault)
+		 * must happen outside the notifier lock
+		 */
+		mutex_unlock(&process_info->notifier_lock);
+
+		/* Move the BO to system (CPU) domain if necessary to unmap
+		 * and free the SG table
+		 */
+		if (bo->tbo.resource->mem_type != TTM_PL_SYSTEM) {
+			if (amdgpu_bo_reserve(bo, true))
+				return -EAGAIN;
+			amdgpu_bo_placement_from_domain(bo, AMDGPU_GEM_DOMAIN_CPU);
+			ret = ttm_bo_validate(&bo->tbo, &bo->placement, &ctx);
+			amdgpu_bo_unreserve(bo);
+			if (ret) {
+				pr_err("%s: Failed to invalidate userptr BO\n",
+				       __func__);
+				return -EAGAIN;
+			}
+		}
+
 		/* Get updated user pages */
-		ret = amdgpu_ttm_tt_get_user_pages(bo, bo->tbo.ttm->pages);
+		ret = amdgpu_ttm_tt_get_user_pages(bo, bo->tbo.ttm->pages,
+						   &mem->range);
 		if (ret) {
 			pr_debug("Failed %d to get user pages\n", ret);
 
@@ -2395,30 +2391,32 @@ static int update_invalid_user_pages(struct amdkfd_process_info *process_info,
 			 */
 			if (ret != -EFAULT)
 				return ret;
-		} else {
 
-			/*
-			 * FIXME: Cannot ignore the return code, must hold
-			 * notifier_lock
-			 */
-			amdgpu_ttm_tt_get_user_pages_done(bo->tbo.ttm);
+			ret = 0;
 		}
 
+		mutex_lock(&process_info->notifier_lock);
+
 		/* Mark the BO as valid unless it was invalidated
 		 * again concurrently.
 		 */
-		if (atomic_cmpxchg(&mem->invalid, invalid, 0) != invalid)
-			return -EAGAIN;
+		if (mem->invalid != invalid) {
+			ret = -EAGAIN;
+			goto unlock_out;
+		}
+		mem->invalid = 0;
 	}
 
-	return 0;
+unlock_out:
+	mutex_unlock(&process_info->notifier_lock);
+
+	return ret;
 }
 
 /* Validate invalid userptr BOs
  *
- * Validates BOs on the userptr_inval_list, and moves them back to the
- * userptr_valid_list. Also updates GPUVM page tables with new page
- * addresses and waits for the page table updates to complete.
+ * Validates BOs on the userptr_inval_list. Also updates GPUVM page tables
+ * with new page addresses and waits for the page table updates to complete.
  */
 static int validate_invalid_user_pages(struct amdkfd_process_info *process_info)
 {
@@ -2489,9 +2487,6 @@ static int validate_invalid_user_pages(struct amdkfd_process_info *process_info)
 			}
 		}
 
-		list_move_tail(&mem->validate_list.head,
-			       &process_info->userptr_valid_list);
-
 		/* Update mapping. If the BO was not validated
 		 * (because we couldn't get user pages), this will
 		 * clear the page table entries, which will result in
@@ -2507,7 +2502,9 @@ static int validate_invalid_user_pages(struct amdkfd_process_info *process_info)
 			if (ret) {
 				pr_err("%s: update PTE failed\n", __func__);
 				/* make sure this gets validated again */
-				atomic_inc(&mem->invalid);
+				mutex_lock(&process_info->notifier_lock);
+				mem->invalid++;
+				mutex_unlock(&process_info->notifier_lock);
 				goto unreserve_out;
 			}
 		}
@@ -2527,6 +2524,36 @@ out_no_mem:
 	return ret;
 }
 
+/* Confirm that all user pages are valid while holding the notifier lock
+ *
+ * Moves valid BOs from the userptr_inval_list back to userptr_val_list.
+ */
+static int confirm_valid_user_pages_locked(struct amdkfd_process_info *process_info)
+{
+	struct kgd_mem *mem, *tmp_mem;
+	int ret = 0;
+
+	list_for_each_entry_safe(mem, tmp_mem,
+				 &process_info->userptr_inval_list,
+				 validate_list.head) {
+		bool valid = amdgpu_ttm_tt_get_user_pages_done(
+				mem->bo->tbo.ttm, mem->range);
+
+		mem->range = NULL;
+		if (!valid) {
+			WARN(!mem->invalid, "Invalid BO not marked invalid");
+			ret = -EAGAIN;
+			continue;
+		}
+		WARN(mem->invalid, "Valid BO is marked invalid");
+
+		list_move_tail(&mem->validate_list.head,
+			       &process_info->userptr_valid_list);
+	}
+
+	return ret;
+}
+
 /* Worker callback to restore evicted userptr BOs
  *
  * Tries to update and validate all userptr BOs. If successful and no
@@ -2541,9 +2568,11 @@ static void amdgpu_amdkfd_restore_userptr_worker(struct work_struct *work)
 			     restore_userptr_work);
 	struct task_struct *usertask;
 	struct mm_struct *mm;
-	int evicted_bos;
+	uint32_t evicted_bos;
 
-	evicted_bos = atomic_read(&process_info->evicted_bos);
+	mutex_lock(&process_info->notifier_lock);
+	evicted_bos = process_info->evicted_bos;
+	mutex_unlock(&process_info->notifier_lock);
 	if (!evicted_bos)
 		return;
 
@@ -2566,9 +2595,6 @@ static void amdgpu_amdkfd_restore_userptr_worker(struct work_struct *work)
 	 * and we can just restart the queues.
 	 */
 	if (!list_empty(&process_info->userptr_inval_list)) {
-		if (atomic_read(&process_info->evicted_bos) != evicted_bos)
-			goto unlock_out; /* Concurrent eviction, try again */
-
 		if (validate_invalid_user_pages(process_info))
 			goto unlock_out;
 	}
@@ -2577,10 +2603,17 @@ static void amdgpu_amdkfd_restore_userptr_worker(struct work_struct *work)
 	 * be a first eviction that calls quiesce_mm. The eviction
 	 * reference counting inside KFD will handle this case.
 	 */
-	if (atomic_cmpxchg(&process_info->evicted_bos, evicted_bos, 0) !=
-	    evicted_bos)
-		goto unlock_out;
-	evicted_bos = 0;
+	mutex_lock(&process_info->notifier_lock);
+	if (process_info->evicted_bos != evicted_bos)
+		goto unlock_notifier_out;
+
+	if (confirm_valid_user_pages_locked(process_info)) {
+		WARN(1, "User pages unexpectedly invalid");
+		goto unlock_notifier_out;
+	}
+
+	process_info->evicted_bos = evicted_bos = 0;
+
 	if (kgd2kfd_resume_mm(mm)) {
 		pr_err("%s: Failed to resume KFD\n", __func__);
 		/* No recovery from this failure. Probably the CP is
@@ -2588,6 +2621,8 @@ static void amdgpu_amdkfd_restore_userptr_worker(struct work_struct *work)
 		 */
 	}
 
+unlock_notifier_out:
+	mutex_unlock(&process_info->notifier_lock);
 unlock_out:
 	mutex_unlock(&process_info->lock);
 
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_atomfirmware.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_atomfirmware.c
index b81b77a9efa6..ac6fe0ae4609 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_atomfirmware.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_atomfirmware.c
@@ -101,39 +101,101 @@ void amdgpu_atomfirmware_scratch_regs_init(struct amdgpu_device *adev)
 	}
 }
 
+static int amdgpu_atomfirmware_allocate_fb_v2_1(struct amdgpu_device *adev,
+	struct vram_usagebyfirmware_v2_1 *fw_usage, int *usage_bytes)
+{
+	u32 start_addr, fw_size, drv_size;
+
+	start_addr = le32_to_cpu(fw_usage->start_address_in_kb);
+	fw_size = le16_to_cpu(fw_usage->used_by_firmware_in_kb);
+	drv_size = le16_to_cpu(fw_usage->used_by_driver_in_kb);
+
+	DRM_DEBUG("atom firmware v2_1 requested %08x %dkb fw %dkb drv\n",
+			  start_addr,
+			  fw_size,
+			  drv_size);
+
+	if ((start_addr & ATOM_VRAM_OPERATION_FLAGS_MASK) ==
+		(u32)(ATOM_VRAM_BLOCK_SRIOV_MSG_SHARE_RESERVATION <<
+		ATOM_VRAM_OPERATION_FLAGS_SHIFT)) {
+		/* Firmware request VRAM reservation for SR-IOV */
+		adev->mman.fw_vram_usage_start_offset = (start_addr &
+			(~ATOM_VRAM_OPERATION_FLAGS_MASK)) << 10;
+		adev->mman.fw_vram_usage_size = fw_size << 10;
+		/* Use the default scratch size */
+		*usage_bytes = 0;
+	} else {
+		*usage_bytes = drv_size << 10;
+	}
+	return 0;
+}
+
+static int amdgpu_atomfirmware_allocate_fb_v2_2(struct amdgpu_device *adev,
+		struct vram_usagebyfirmware_v2_2 *fw_usage, int *usage_bytes)
+{
+	u32 fw_start_addr, fw_size, drv_start_addr, drv_size;
+
+	fw_start_addr = le32_to_cpu(fw_usage->fw_region_start_address_in_kb);
+	fw_size = le16_to_cpu(fw_usage->used_by_firmware_in_kb);
+
+	drv_start_addr = le32_to_cpu(fw_usage->driver_region0_start_address_in_kb);
+	drv_size = le32_to_cpu(fw_usage->used_by_driver_region0_in_kb);
+
+	DRM_DEBUG("atom requested fw start at %08x %dkb and drv start at %08x %dkb\n",
+			  fw_start_addr,
+			  fw_size,
+			  drv_start_addr,
+			  drv_size);
+
+	if (amdgpu_sriov_vf(adev) &&
+	    ((fw_start_addr & (ATOM_VRAM_BLOCK_NEEDS_NO_RESERVATION <<
+		ATOM_VRAM_OPERATION_FLAGS_SHIFT)) == 0)) {
+		/* Firmware request VRAM reservation for SR-IOV */
+		adev->mman.fw_vram_usage_start_offset = (fw_start_addr &
+			(~ATOM_VRAM_OPERATION_FLAGS_MASK)) << 10;
+		adev->mman.fw_vram_usage_size = fw_size << 10;
+	}
+
+	if (amdgpu_sriov_vf(adev) &&
+	    ((drv_start_addr & (ATOM_VRAM_BLOCK_NEEDS_NO_RESERVATION <<
+		ATOM_VRAM_OPERATION_FLAGS_SHIFT)) == 0)) {
+		/* driver request VRAM reservation for SR-IOV */
+		adev->mman.drv_vram_usage_start_offset = (drv_start_addr &
+			(~ATOM_VRAM_OPERATION_FLAGS_MASK)) << 10;
+		adev->mman.drv_vram_usage_size = drv_size << 10;
+	}
+
+	*usage_bytes = 0;
+	return 0;
+}
+
 int amdgpu_atomfirmware_allocate_fb_scratch(struct amdgpu_device *adev)
 {
 	struct atom_context *ctx = adev->mode_info.atom_context;
 	int index = get_index_into_master_table(atom_master_list_of_data_tables_v2_1,
 						vram_usagebyfirmware);
-	struct vram_usagebyfirmware_v2_1 *firmware_usage;
-	uint32_t start_addr, size;
-	uint16_t data_offset;
+	struct vram_usagebyfirmware_v2_1 *fw_usage_v2_1;
+	struct vram_usagebyfirmware_v2_2 *fw_usage_v2_2;
+	u16 data_offset;
+	u8 frev, crev;
 	int usage_bytes = 0;
 
-	if (amdgpu_atom_parse_data_header(ctx, index, NULL, NULL, NULL, &data_offset)) {
-		firmware_usage = (struct vram_usagebyfirmware_v2_1 *)(ctx->bios + data_offset);
-		DRM_DEBUG("atom firmware requested %08x %dkb fw %dkb drv\n",
-			  le32_to_cpu(firmware_usage->start_address_in_kb),
-			  le16_to_cpu(firmware_usage->used_by_firmware_in_kb),
-			  le16_to_cpu(firmware_usage->used_by_driver_in_kb));
-
-		start_addr = le32_to_cpu(firmware_usage->start_address_in_kb);
-		size = le16_to_cpu(firmware_usage->used_by_firmware_in_kb);
-
-		if ((uint32_t)(start_addr & ATOM_VRAM_OPERATION_FLAGS_MASK) ==
-			(uint32_t)(ATOM_VRAM_BLOCK_SRIOV_MSG_SHARE_RESERVATION <<
-			ATOM_VRAM_OPERATION_FLAGS_SHIFT)) {
-			/* Firmware request VRAM reservation for SR-IOV */
-			adev->mman.fw_vram_usage_start_offset = (start_addr &
-				(~ATOM_VRAM_OPERATION_FLAGS_MASK)) << 10;
-			adev->mman.fw_vram_usage_size = size << 10;
-			/* Use the default scratch size */
-			usage_bytes = 0;
-		} else {
-			usage_bytes = le16_to_cpu(firmware_usage->used_by_driver_in_kb) << 10;
+	if (amdgpu_atom_parse_data_header(ctx, index, NULL, &frev, &crev, &data_offset)) {
+		if (frev == 2 && crev == 1) {
+			fw_usage_v2_1 =
+				(struct vram_usagebyfirmware_v2_1 *)(ctx->bios + data_offset);
+			amdgpu_atomfirmware_allocate_fb_v2_1(adev,
+					fw_usage_v2_1,
+					&usage_bytes);
+		} else if (frev >= 2 && crev >= 2) {
+			fw_usage_v2_2 =
+				(struct vram_usagebyfirmware_v2_2 *)(ctx->bios + data_offset);
+			amdgpu_atomfirmware_allocate_fb_v2_2(adev,
+					fw_usage_v2_2,
+					&usage_bytes);
 		}
 	}
+
 	ctx->scratch_size_bytes = 0;
 	if (usage_bytes == 0)
 		usage_bytes = 20 * 1024;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_bios.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_bios.c
index e363f56c72af..30c28a69e847 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_bios.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_bios.c
@@ -317,6 +317,7 @@ static bool amdgpu_atrm_get_bios(struct amdgpu_device *adev)
 
 	if (!found)
 		return false;
+	pci_dev_put(pdev);
 
 	adev->bios = kmalloc(size, GFP_KERNEL);
 	if (!adev->bios) {
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.c
index 2168163aad2d..252a876b0725 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.c
@@ -209,6 +209,7 @@ void amdgpu_bo_list_get_list(struct amdgpu_bo_list *list,
 			list_add_tail(&e->tv.head, &bucket[priority]);
 
 		e->user_pages = NULL;
+		e->range = NULL;
 	}
 
 	/* Connect the sorted buckets in the output list. */
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.h
index 9caea1688fc3..e4d78491bcc7 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.h
@@ -26,6 +26,8 @@
 #include <drm/ttm/ttm_execbuf_util.h>
 #include <drm/amdgpu_drm.h>
 
+struct hmm_range;
+
 struct amdgpu_device;
 struct amdgpu_bo;
 struct amdgpu_bo_va;
@@ -36,6 +38,7 @@ struct amdgpu_bo_list_entry {
 	struct amdgpu_bo_va		*bo_va;
 	uint32_t			priority;
 	struct page			**user_pages;
+	struct hmm_range		*range;
 	bool				user_invalidated;
 };
 
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_connectors.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_connectors.c
index 491d4846fc02..2ebbc6382a06 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_connectors.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_connectors.c
@@ -26,7 +26,6 @@
 
 #include <drm/display/drm_dp_helper.h>
 #include <drm/drm_edid.h>
-#include <drm/drm_fb_helper.h>
 #include <drm/drm_probe_helper.h>
 #include <drm/amdgpu_drm.h>
 #include "amdgpu.h"
@@ -328,7 +327,6 @@ static void amdgpu_connector_free_edid(struct drm_connector *connector)
 
 	kfree(amdgpu_connector->edid);
 	amdgpu_connector->edid = NULL;
-	drm_connector_update_edid_property(connector, NULL);
 }
 
 static int amdgpu_connector_ddc_get_modes(struct drm_connector *connector)
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
index 1bbd39b3b0fc..8516c814bc9b 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
@@ -109,6 +109,7 @@ static int amdgpu_cs_p1_ib(struct amdgpu_cs_parser *p,
 		return r;
 
 	++(num_ibs[r]);
+	p->gang_leader_idx = r;
 	return 0;
 }
 
@@ -287,20 +288,18 @@ static int amdgpu_cs_pass1(struct amdgpu_cs_parser *p,
 		}
 	}
 
-	if (!p->gang_size)
-		return -EINVAL;
+	if (!p->gang_size) {
+		ret = -EINVAL;
+		goto free_partial_kdata;
+	}
 
 	for (i = 0; i < p->gang_size; ++i) {
-		ret = amdgpu_job_alloc(p->adev, num_ibs[i], &p->jobs[i], vm);
-		if (ret)
-			goto free_all_kdata;
-
-		ret = drm_sched_job_init(&p->jobs[i]->base, p->entities[i],
-					 &fpriv->vm);
+		ret = amdgpu_job_alloc(p->adev, vm, p->entities[i], vm,
+				       num_ibs[i], &p->jobs[i]);
 		if (ret)
 			goto free_all_kdata;
 	}
-	p->gang_leader = p->jobs[p->gang_size - 1];
+	p->gang_leader = p->jobs[p->gang_leader_idx];
 
 	if (p->ctx->vram_lost_counter != p->gang_leader->vram_lost_counter) {
 		ret = -ECANCELED;
@@ -430,7 +429,7 @@ static int amdgpu_cs_p2_dependencies(struct amdgpu_cs_parser *p,
 			dma_fence_put(old);
 		}
 
-		r = amdgpu_sync_fence(&p->gang_leader->sync, fence);
+		r = amdgpu_sync_fence(&p->sync, fence);
 		dma_fence_put(fence);
 		if (r)
 			return r;
@@ -452,9 +451,20 @@ static int amdgpu_syncobj_lookup_and_add(struct amdgpu_cs_parser *p,
 		return r;
 	}
 
-	r = amdgpu_sync_fence(&p->gang_leader->sync, fence);
-	dma_fence_put(fence);
+	r = amdgpu_sync_fence(&p->sync, fence);
+	if (r)
+		goto error;
+
+	/*
+	 * When we have an explicit dependency it might be necessary to insert a
+	 * pipeline sync to make sure that all caches etc are flushed and the
+	 * next job actually sees the results from the previous one.
+	 */
+	if (fence->context == p->gang_leader->base.entity->fence_context)
+		r = amdgpu_sync_fence(&p->gang_leader->explicit_sync, fence);
 
+error:
+	dma_fence_put(fence);
 	return r;
 }
 
@@ -910,7 +920,7 @@ static int amdgpu_cs_parser_bos(struct amdgpu_cs_parser *p,
 			goto out_free_user_pages;
 		}
 
-		r = amdgpu_ttm_tt_get_user_pages(bo, e->user_pages);
+		r = amdgpu_ttm_tt_get_user_pages(bo, e->user_pages, &e->range);
 		if (r) {
 			kvfree(e->user_pages);
 			e->user_pages = NULL;
@@ -988,10 +998,12 @@ out_free_user_pages:
 
 		if (!e->user_pages)
 			continue;
-		amdgpu_ttm_tt_get_user_pages_done(bo->tbo.ttm);
+		amdgpu_ttm_tt_get_user_pages_done(bo->tbo.ttm, e->range);
 		kvfree(e->user_pages);
 		e->user_pages = NULL;
+		e->range = NULL;
 	}
+	mutex_unlock(&p->bo_list->bo_list_mutex);
 	return r;
 }
 
@@ -1101,7 +1113,7 @@ static int amdgpu_cs_vm_handling(struct amdgpu_cs_parser *p)
 	if (r)
 		return r;
 
-	r = amdgpu_sync_fence(&job->sync, fpriv->prt_va->last_pt_update);
+	r = amdgpu_sync_fence(&p->sync, fpriv->prt_va->last_pt_update);
 	if (r)
 		return r;
 
@@ -1112,7 +1124,7 @@ static int amdgpu_cs_vm_handling(struct amdgpu_cs_parser *p)
 		if (r)
 			return r;
 
-		r = amdgpu_sync_fence(&job->sync, bo_va->last_pt_update);
+		r = amdgpu_sync_fence(&p->sync, bo_va->last_pt_update);
 		if (r)
 			return r;
 	}
@@ -1131,7 +1143,7 @@ static int amdgpu_cs_vm_handling(struct amdgpu_cs_parser *p)
 		if (r)
 			return r;
 
-		r = amdgpu_sync_fence(&job->sync, bo_va->last_pt_update);
+		r = amdgpu_sync_fence(&p->sync, bo_va->last_pt_update);
 		if (r)
 			return r;
 	}
@@ -1144,7 +1156,7 @@ static int amdgpu_cs_vm_handling(struct amdgpu_cs_parser *p)
 	if (r)
 		return r;
 
-	r = amdgpu_sync_fence(&job->sync, vm->last_update);
+	r = amdgpu_sync_fence(&p->sync, vm->last_update);
 	if (r)
 		return r;
 
@@ -1176,7 +1188,6 @@ static int amdgpu_cs_vm_handling(struct amdgpu_cs_parser *p)
 static int amdgpu_cs_sync_rings(struct amdgpu_cs_parser *p)
 {
 	struct amdgpu_fpriv *fpriv = p->filp->driver_priv;
-	struct amdgpu_job *leader = p->gang_leader;
 	struct amdgpu_bo_list_entry *e;
 	unsigned int i;
 	int r;
@@ -1188,22 +1199,21 @@ static int amdgpu_cs_sync_rings(struct amdgpu_cs_parser *p)
 
 		sync_mode = amdgpu_bo_explicit_sync(bo) ?
 			AMDGPU_SYNC_EXPLICIT : AMDGPU_SYNC_NE_OWNER;
-		r = amdgpu_sync_resv(p->adev, &leader->sync, resv, sync_mode,
+		r = amdgpu_sync_resv(p->adev, &p->sync, resv, sync_mode,
 				     &fpriv->vm);
 		if (r)
 			return r;
 	}
 
-	for (i = 0; i < p->gang_size - 1; ++i) {
-		r = amdgpu_sync_clone(&leader->sync, &p->jobs[i]->sync);
+	for (i = 0; i < p->gang_size; ++i) {
+		r = amdgpu_sync_push_to_job(&p->sync, p->jobs[i]);
 		if (r)
 			return r;
 	}
 
-	r = amdgpu_ctx_wait_prev_fence(p->ctx, p->entities[p->gang_size - 1]);
+	r = amdgpu_ctx_wait_prev_fence(p->ctx, p->entities[p->gang_leader_idx]);
 	if (r && r != -ERESTARTSYS)
 		DRM_ERROR("amdgpu_ctx_wait_prev_fence failed.\n");
-
 	return r;
 }
 
@@ -1237,11 +1247,14 @@ static int amdgpu_cs_submit(struct amdgpu_cs_parser *p,
 	for (i = 0; i < p->gang_size; ++i)
 		drm_sched_job_arm(&p->jobs[i]->base);
 
-	for (i = 0; i < (p->gang_size - 1); ++i) {
+	for (i = 0; i < p->gang_size; ++i) {
 		struct dma_fence *fence;
 
+		if (p->jobs[i] == leader)
+			continue;
+
 		fence = &p->jobs[i]->base.s_fence->scheduled;
-		r = amdgpu_sync_fence(&leader->sync, fence);
+		r = drm_sched_job_add_dependency(&leader->base, fence);
 		if (r)
 			goto error_cleanup;
 	}
@@ -1264,7 +1277,8 @@ static int amdgpu_cs_submit(struct amdgpu_cs_parser *p,
 	amdgpu_bo_list_for_each_userptr_entry(e, p->bo_list) {
 		struct amdgpu_bo *bo = ttm_to_amdgpu_bo(e->tv.bo);
 
-		r |= !amdgpu_ttm_tt_get_user_pages_done(bo->tbo.ttm);
+		r |= !amdgpu_ttm_tt_get_user_pages_done(bo->tbo.ttm, e->range);
+		e->range = NULL;
 	}
 	if (r) {
 		r = -EAGAIN;
@@ -1275,7 +1289,10 @@ static int amdgpu_cs_submit(struct amdgpu_cs_parser *p,
 	list_for_each_entry(e, &p->validated, tv.head) {
 
 		/* Everybody except for the gang leader uses READ */
-		for (i = 0; i < (p->gang_size - 1); ++i) {
+		for (i = 0; i < p->gang_size; ++i) {
+			if (p->jobs[i] == leader)
+				continue;
+
 			dma_resv_add_fence(e->tv.bo->base.resv,
 					   &p->jobs[i]->base.s_fence->finished,
 					   DMA_RESV_USAGE_READ);
@@ -1285,7 +1302,7 @@ static int amdgpu_cs_submit(struct amdgpu_cs_parser *p,
 		e->tv.num_shared = 0;
 	}
 
-	seq = amdgpu_ctx_add_fence(p->ctx, p->entities[p->gang_size - 1],
+	seq = amdgpu_ctx_add_fence(p->ctx, p->entities[p->gang_leader_idx],
 				   p->fence);
 	amdgpu_cs_post_dependencies(p);
 
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.h
index cbaa19b2b8a3..113f39510a72 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.h
@@ -54,6 +54,7 @@ struct amdgpu_cs_parser {
 
 	/* scheduler job objects */
 	unsigned int		gang_size;
+	unsigned int		gang_leader_idx;
 	struct drm_sched_entity	*entities[AMDGPU_CS_GANG_SIZE];
 	struct amdgpu_job	*jobs[AMDGPU_CS_GANG_SIZE];
 	struct amdgpu_job	*gang_leader;
@@ -75,6 +76,8 @@ struct amdgpu_cs_parser {
 
 	unsigned			num_post_deps;
 	struct amdgpu_cs_post_dep	*post_deps;
+
+	struct amdgpu_sync		sync;
 };
 
 int amdgpu_cs_find_mapping(struct amdgpu_cs_parser *parser,
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c
index f6d9d5da53cd..d2139ac12159 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c
@@ -326,7 +326,10 @@ static int amdgpu_ctx_init(struct amdgpu_ctx_mgr *mgr, int32_t priority,
 	if (r)
 		return r;
 
-	ctx->stable_pstate = current_stable_pstate;
+	if (mgr->adev->pm.stable_pstate_ctx)
+		ctx->stable_pstate = mgr->adev->pm.stable_pstate_ctx->stable_pstate;
+	else
+		ctx->stable_pstate = current_stable_pstate;
 
 	return 0;
 }
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c
index de61a85c4b02..0f16d3c09309 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c
@@ -1969,7 +1969,7 @@ int amdgpu_debugfs_init(struct amdgpu_device *adev)
 	amdgpu_ta_if_debugfs_init(adev);
 
 #if defined(CONFIG_DRM_AMD_DC)
-	if (amdgpu_device_has_dc_support(adev))
+	if (adev->dc_enabled)
 		dtn_debugfs_init(adev);
 #endif
 
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
index e0445e8cc342..afe6af9c0138 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
@@ -37,6 +37,7 @@
 #include <linux/pci-p2pdma.h>
 
 #include <drm/drm_atomic_helper.h>
+#include <drm/drm_fb_helper.h>
 #include <drm/drm_probe_helper.h>
 #include <drm/amdgpu_drm.h>
 #include <linux/vgaarb.h>
@@ -1568,7 +1569,7 @@ static int amdgpu_device_check_arguments(struct amdgpu_device *adev)
  * @pdev: pci dev pointer
  * @state: vga_switcheroo state
  *
- * Callback for the switcheroo driver.  Suspends or resumes the
+ * Callback for the switcheroo driver.  Suspends or resumes
  * the asics before or after it is powered up using ACPI methods.
  */
 static void amdgpu_switcheroo_set_state(struct pci_dev *pdev,
@@ -1915,6 +1916,16 @@ static void amdgpu_device_enable_virtual_display(struct amdgpu_device *adev)
 	}
 }
 
+void amdgpu_device_set_sriov_virtual_display(struct amdgpu_device *adev)
+{
+	if (amdgpu_sriov_vf(adev) && !adev->enable_virtual_display) {
+		adev->mode_info.num_crtc = 1;
+		adev->enable_virtual_display = true;
+		DRM_INFO("virtual_display:%d, num_crtc:%d\n",
+			 adev->enable_virtual_display, adev->mode_info.num_crtc);
+	}
+}
+
 /**
  * amdgpu_device_parse_gpu_info_fw - parse gpu info firmware
  *
@@ -2397,7 +2408,7 @@ static int amdgpu_device_ip_init(struct amdgpu_device *adev)
 			adev->ip_blocks[i].status.hw = true;
 
 			/* right after GMC hw init, we create CSA */
-			if (amdgpu_mcbp || amdgpu_sriov_vf(adev)) {
+			if (amdgpu_mcbp) {
 				r = amdgpu_allocate_static_csa(adev, &adev->virt.csa_obj,
 								AMDGPU_GEM_DOMAIN_VRAM,
 								AMDGPU_CSA_SIZE);
@@ -2462,6 +2473,11 @@ static int amdgpu_device_ip_init(struct amdgpu_device *adev)
 			if (!amdgpu_sriov_vf(adev)) {
 				struct amdgpu_hive_info *hive = amdgpu_get_xgmi_hive(adev);
 
+				if (WARN_ON(!hive)) {
+					r = -ENOENT;
+					goto init_failed;
+				}
+
 				if (!hive->reset_domain ||
 				    !amdgpu_reset_get_reset_domain(hive->reset_domain)) {
 					r = -ENOENT;
@@ -3000,14 +3016,15 @@ static int amdgpu_device_ip_suspend_phase2(struct amdgpu_device *adev)
 			continue;
 		}
 
-		/* skip suspend of gfx and psp for S0ix
+		/* skip suspend of gfx/mes and psp for S0ix
 		 * gfx is in gfxoff state, so on resume it will exit gfxoff just
 		 * like at runtime. PSP is also part of the always on hardware
 		 * so no need to suspend it.
 		 */
 		if (adev->in_s0ix &&
 		    (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_PSP ||
-		     adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GFX))
+		     adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GFX ||
+		     adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_MES))
 			continue;
 
 		/* XXX handle errors */
@@ -3210,6 +3227,15 @@ static int amdgpu_device_ip_resume_phase2(struct amdgpu_device *adev)
 			return r;
 		}
 		adev->ip_blocks[i].status.hw = true;
+
+		if (adev->in_s0ix && adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_SMC) {
+			/* disable gfxoff for IP resume. The gfxoff will be re-enabled in
+			 * amdgpu_device_resume() after IP resume.
+			 */
+			amdgpu_gfx_off_ctrl(adev, false);
+			DRM_DEBUG("will disable gfxoff for re-initializing other blocks\n");
+		}
+
 	}
 
 	return 0;
@@ -3338,8 +3364,7 @@ bool amdgpu_device_asic_has_dc_support(enum amd_asic_type asic_type)
  */
 bool amdgpu_device_has_dc_support(struct amdgpu_device *adev)
 {
-	if (amdgpu_sriov_vf(adev) ||
-	    adev->enable_virtual_display ||
+	if (adev->enable_virtual_display ||
 	    (adev->harvest_ip_mask & AMD_HARVEST_IP_DMU_MASK))
 		return false;
 
@@ -4051,15 +4076,18 @@ void amdgpu_device_fini_sw(struct amdgpu_device *adev)
  * at suspend time.
  *
  */
-static void amdgpu_device_evict_resources(struct amdgpu_device *adev)
+static int amdgpu_device_evict_resources(struct amdgpu_device *adev)
 {
+	int ret;
+
 	/* No need to evict vram on APUs for suspend to ram or s2idle */
 	if ((adev->in_s3 || adev->in_s0ix) && (adev->flags & AMD_IS_APU))
-		return;
+		return 0;
 
-	if (amdgpu_ttm_evict_resources(adev, TTM_PL_VRAM))
+	ret = amdgpu_ttm_evict_resources(adev, TTM_PL_VRAM);
+	if (ret)
 		DRM_WARN("evicting device resources failed\n");
-
+	return ret;
 }
 
 /*
@@ -4085,6 +4113,11 @@ int amdgpu_device_suspend(struct drm_device *dev, bool fbcon)
 
 	adev->in_suspend = true;
 
+	/* Evict the majority of BOs before grabbing the full access */
+	r = amdgpu_device_evict_resources(adev);
+	if (r)
+		return r;
+
 	if (amdgpu_sriov_vf(adev)) {
 		amdgpu_virt_fini_data_exchange(adev);
 		r = amdgpu_virt_request_full_gpu(adev, false);
@@ -4109,7 +4142,9 @@ int amdgpu_device_suspend(struct drm_device *dev, bool fbcon)
 	if (!adev->in_s0ix)
 		amdgpu_amdkfd_suspend(adev, adev->in_runpm);
 
-	amdgpu_device_evict_resources(adev);
+	r = amdgpu_device_evict_resources(adev);
+	if (r)
+		return r;
 
 	amdgpu_fence_driver_hw_fini(adev);
 
@@ -4157,21 +4192,15 @@ int amdgpu_device_resume(struct drm_device *dev, bool fbcon)
 
 	r = amdgpu_device_ip_resume(adev);
 
-	/* no matter what r is, always need to properly release full GPU */
-	if (amdgpu_sriov_vf(adev)) {
-		amdgpu_virt_init_data_exchange(adev);
-		amdgpu_virt_release_full_gpu(adev, true);
-	}
-
 	if (r) {
 		dev_err(adev->dev, "amdgpu_device_ip_resume failed (%d).\n", r);
-		return r;
+		goto exit;
 	}
 	amdgpu_fence_driver_hw_init(adev);
 
 	r = amdgpu_device_ip_late_init(adev);
 	if (r)
-		return r;
+		goto exit;
 
 	queue_delayed_work(system_wq, &adev->delayed_init_work,
 			   msecs_to_jiffies(AMDGPU_RESUME_MS));
@@ -4179,12 +4208,28 @@ int amdgpu_device_resume(struct drm_device *dev, bool fbcon)
 	if (!adev->in_s0ix) {
 		r = amdgpu_amdkfd_resume(adev, adev->in_runpm);
 		if (r)
-			return r;
+			goto exit;
+	}
+
+exit:
+	if (amdgpu_sriov_vf(adev)) {
+		amdgpu_virt_init_data_exchange(adev);
+		amdgpu_virt_release_full_gpu(adev, true);
 	}
 
+	if (r)
+		return r;
+
 	/* Make sure IB tests flushed */
 	flush_delayed_work(&adev->delayed_init_work);
 
+	if (adev->in_s0ix) {
+		/* re-enable gfxoff after IP resume. This re-enables gfxoff after
+		 * it was disabled for IP resume in amdgpu_device_ip_resume_phase2().
+		 */
+		amdgpu_gfx_off_ctrl(adev, true);
+		DRM_DEBUG("will enable gfxoff for the mission mode\n");
+	}
 	if (fbcon)
 		drm_fb_helper_set_suspend_unlocked(adev_to_drm(adev)->fb_helper, false);
 
@@ -4192,25 +4237,27 @@ int amdgpu_device_resume(struct drm_device *dev, bool fbcon)
 
 	amdgpu_ras_resume(adev);
 
-	/*
-	 * Most of the connector probing functions try to acquire runtime pm
-	 * refs to ensure that the GPU is powered on when connector polling is
-	 * performed. Since we're calling this from a runtime PM callback,
-	 * trying to acquire rpm refs will cause us to deadlock.
-	 *
-	 * Since we're guaranteed to be holding the rpm lock, it's safe to
-	 * temporarily disable the rpm helpers so this doesn't deadlock us.
-	 */
+	if (adev->mode_info.num_crtc) {
+		/*
+		 * Most of the connector probing functions try to acquire runtime pm
+		 * refs to ensure that the GPU is powered on when connector polling is
+		 * performed. Since we're calling this from a runtime PM callback,
+		 * trying to acquire rpm refs will cause us to deadlock.
+		 *
+		 * Since we're guaranteed to be holding the rpm lock, it's safe to
+		 * temporarily disable the rpm helpers so this doesn't deadlock us.
+		 */
 #ifdef CONFIG_PM
-	dev->dev->power.disable_depth++;
+		dev->dev->power.disable_depth++;
 #endif
-	if (!amdgpu_device_has_dc_support(adev))
-		drm_helper_hpd_irq_event(dev);
-	else
-		drm_kms_helper_hotplug_event(dev);
+		if (!adev->dc_enabled)
+			drm_helper_hpd_irq_event(dev);
+		else
+			drm_kms_helper_hotplug_event(dev);
 #ifdef CONFIG_PM
-	dev->dev->power.disable_depth--;
+		dev->dev->power.disable_depth--;
 #endif
+	}
 	adev->in_suspend = false;
 
 	if (amdgpu_acpi_smart_shift_update(dev, AMDGPU_SS_DEV_D0))
@@ -4559,6 +4606,10 @@ bool amdgpu_device_should_recover_gpu(struct amdgpu_device *adev)
 	if (amdgpu_gpu_recovery == 0)
 		goto disabled;
 
+	/* Skip soft reset check in fatal error mode */
+	if (!amdgpu_ras_is_poison_mode_supported(adev))
+		return true;
+
 	if (!amdgpu_device_ip_check_soft_reset(adev)) {
 		dev_info(adev->dev,"Timeout, but no hardware hang detected.\n");
 		return false;
@@ -5006,6 +5057,8 @@ static void amdgpu_device_resume_display_audio(struct amdgpu_device *adev)
 		pm_runtime_enable(&(p->dev));
 		pm_runtime_resume(&(p->dev));
 	}
+
+	pci_dev_put(p);
 }
 
 static int amdgpu_device_suspend_display_audio(struct amdgpu_device *adev)
@@ -5044,6 +5097,7 @@ static int amdgpu_device_suspend_display_audio(struct amdgpu_device *adev)
 
 		if (expires < ktime_get_mono_fast_ns()) {
 			dev_warn(adev->dev, "failed to suspend display audio\n");
+			pci_dev_put(p);
 			/* TODO: abort the succeeding gpu reset? */
 			return -ETIMEDOUT;
 		}
@@ -5051,97 +5105,10 @@ static int amdgpu_device_suspend_display_audio(struct amdgpu_device *adev)
 
 	pm_runtime_disable(&(p->dev));
 
+	pci_dev_put(p);
 	return 0;
 }
 
-static void amdgpu_device_recheck_guilty_jobs(
-	struct amdgpu_device *adev, struct list_head *device_list_handle,
-	struct amdgpu_reset_context *reset_context)
-{
-	int i, r = 0;
-
-	for (i = 0; i < AMDGPU_MAX_RINGS; ++i) {
-		struct amdgpu_ring *ring = adev->rings[i];
-		int ret = 0;
-		struct drm_sched_job *s_job;
-
-		if (!ring || !ring->sched.thread)
-			continue;
-
-		s_job = list_first_entry_or_null(&ring->sched.pending_list,
-				struct drm_sched_job, list);
-		if (s_job == NULL)
-			continue;
-
-		/* clear job's guilty and depend the folowing step to decide the real one */
-		drm_sched_reset_karma(s_job);
-		drm_sched_resubmit_jobs_ext(&ring->sched, 1);
-
-		if (!s_job->s_fence->parent) {
-			DRM_WARN("Failed to get a HW fence for job!");
-			continue;
-		}
-
-		ret = dma_fence_wait_timeout(s_job->s_fence->parent, false, ring->sched.timeout);
-		if (ret == 0) { /* timeout */
-			DRM_ERROR("Found the real bad job! ring:%s, job_id:%llx\n",
-						ring->sched.name, s_job->id);
-
-
-			amdgpu_fence_driver_isr_toggle(adev, true);
-
-			/* Clear this failed job from fence array */
-			amdgpu_fence_driver_clear_job_fences(ring);
-
-			amdgpu_fence_driver_isr_toggle(adev, false);
-
-			/* Since the job won't signal and we go for
-			 * another resubmit drop this parent pointer
-			 */
-			dma_fence_put(s_job->s_fence->parent);
-			s_job->s_fence->parent = NULL;
-
-			/* set guilty */
-			drm_sched_increase_karma(s_job);
-			amdgpu_reset_prepare_hwcontext(adev, reset_context);
-retry:
-			/* do hw reset */
-			if (amdgpu_sriov_vf(adev)) {
-				amdgpu_virt_fini_data_exchange(adev);
-				r = amdgpu_device_reset_sriov(adev, false);
-				if (r)
-					adev->asic_reset_res = r;
-			} else {
-				clear_bit(AMDGPU_SKIP_HW_RESET,
-					  &reset_context->flags);
-				r = amdgpu_do_asic_reset(device_list_handle,
-							 reset_context);
-				if (r && r == -EAGAIN)
-					goto retry;
-			}
-
-			/*
-			 * add reset counter so that the following
-			 * resubmitted job could flush vmid
-			 */
-			atomic_inc(&adev->gpu_reset_counter);
-			continue;
-		}
-
-		/* got the hw fence, signal finished fence */
-		atomic_dec(ring->sched.score);
-		dma_fence_get(&s_job->s_fence->finished);
-		dma_fence_signal(&s_job->s_fence->finished);
-		dma_fence_put(&s_job->s_fence->finished);
-
-		/* remove node from list and free the job */
-		spin_lock(&ring->sched.job_list_lock);
-		list_del_init(&s_job->list);
-		spin_unlock(&ring->sched.job_list_lock);
-		ring->sched.ops->free_job(s_job);
-	}
-}
-
 static inline void amdgpu_device_stop_pending_resets(struct amdgpu_device *adev)
 {
 	struct amdgpu_ras *con = amdgpu_ras_get_context(adev);
@@ -5162,7 +5129,6 @@ static inline void amdgpu_device_stop_pending_resets(struct amdgpu_device *adev)
 
 }
 
-
 /**
  * amdgpu_device_gpu_recover - reset the asic and recover scheduler
  *
@@ -5185,7 +5151,6 @@ int amdgpu_device_gpu_recover(struct amdgpu_device *adev,
 	int i, r = 0;
 	bool need_emergency_restart = false;
 	bool audio_suspended = false;
-	int tmp_vram_lost_counter;
 	bool gpu_reset_for_dev_remove = false;
 
 	gpu_reset_for_dev_remove =
@@ -5331,7 +5296,6 @@ retry:	/* Rest of adevs pre asic reset from XGMI hive. */
 		amdgpu_device_stop_pending_resets(tmp_adev);
 	}
 
-	tmp_vram_lost_counter = atomic_read(&((adev)->vram_lost_counter));
 	/* Actual ASIC resets if needed.*/
 	/* Host driver will handle XGMI hive reset for SRIOV */
 	if (amdgpu_sriov_vf(adev)) {
@@ -5356,32 +5320,16 @@ skip_hw_reset:
 	/* Post ASIC reset for all devs .*/
 	list_for_each_entry(tmp_adev, device_list_handle, reset_list) {
 
-		/*
-		 * Sometimes a later bad compute job can block a good gfx job as gfx
-		 * and compute ring share internal GC HW mutually. We add an additional
-		 * guilty jobs recheck step to find the real guilty job, it synchronously
-		 * submits and pends for the first job being signaled. If it gets timeout,
-		 * we identify it as a real guilty job.
-		 */
-		if (amdgpu_gpu_recovery == 2 &&
-			!(tmp_vram_lost_counter < atomic_read(&adev->vram_lost_counter)))
-			amdgpu_device_recheck_guilty_jobs(
-				tmp_adev, device_list_handle, reset_context);
-
 		for (i = 0; i < AMDGPU_MAX_RINGS; ++i) {
 			struct amdgpu_ring *ring = tmp_adev->rings[i];
 
 			if (!ring || !ring->sched.thread)
 				continue;
 
-			/* No point to resubmit jobs if we didn't HW reset*/
-			if (!tmp_adev->asic_reset_res && !job_signaled)
-				drm_sched_resubmit_jobs(&ring->sched);
-
-			drm_sched_start(&ring->sched, !tmp_adev->asic_reset_res);
+			drm_sched_start(&ring->sched, true);
 		}
 
-		if (adev->enable_mes)
+		if (adev->enable_mes && adev->ip_versions[GC_HWIP][0] != IP_VERSION(11, 0, 3))
 			amdgpu_mes_self_test(tmp_adev);
 
 		if (!drm_drv_uses_atomic_modeset(adev_to_drm(tmp_adev)) && !job_signaled) {
@@ -5420,6 +5368,8 @@ skip_sched_resume:
 			amdgpu_device_resume_display_audio(tmp_adev);
 
 		amdgpu_device_unset_mp1_state(tmp_adev);
+
+		amdgpu_ras_set_error_query_ready(tmp_adev, true);
 	}
 
 recover_end:
@@ -5831,8 +5781,6 @@ void amdgpu_pci_resume(struct pci_dev *pdev)
 		if (!ring || !ring->sched.thread)
 			continue;
 
-
-		drm_sched_resubmit_jobs(&ring->sched);
 		drm_sched_start(&ring->sched, true);
 	}
 
@@ -6023,3 +5971,44 @@ struct dma_fence *amdgpu_device_switch_gang(struct amdgpu_device *adev,
 	dma_fence_put(old);
 	return NULL;
 }
+
+bool amdgpu_device_has_display_hardware(struct amdgpu_device *adev)
+{
+	switch (adev->asic_type) {
+#ifdef CONFIG_DRM_AMDGPU_SI
+	case CHIP_HAINAN:
+#endif
+	case CHIP_TOPAZ:
+		/* chips with no display hardware */
+		return false;
+#ifdef CONFIG_DRM_AMDGPU_SI
+	case CHIP_TAHITI:
+	case CHIP_PITCAIRN:
+	case CHIP_VERDE:
+	case CHIP_OLAND:
+#endif
+#ifdef CONFIG_DRM_AMDGPU_CIK
+	case CHIP_BONAIRE:
+	case CHIP_HAWAII:
+	case CHIP_KAVERI:
+	case CHIP_KABINI:
+	case CHIP_MULLINS:
+#endif
+	case CHIP_TONGA:
+	case CHIP_FIJI:
+	case CHIP_POLARIS10:
+	case CHIP_POLARIS11:
+	case CHIP_POLARIS12:
+	case CHIP_VEGAM:
+	case CHIP_CARRIZO:
+	case CHIP_STONEY:
+		/* chips with display hardware */
+		return true;
+	default:
+		/* IP discovery */
+		if (!adev->ip_versions[DCE_HWIP][0] ||
+		    (adev->harvest_ip_mask & AMD_HARVEST_IP_DMU_MASK))
+			return false;
+		return true;
+	}
+}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c
index 3993e6134914..1bbd56029a4f 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c
@@ -305,8 +305,13 @@ static int amdgpu_discovery_init(struct amdgpu_device *adev)
 		goto out;
 	}
 
-	if (!amdgpu_discovery_verify_binary_signature(adev->mman.discovery_bin)) {
-		dev_warn(adev->dev, "get invalid ip discovery binary signature from vram\n");
+	if (!amdgpu_discovery_verify_binary_signature(adev->mman.discovery_bin) || amdgpu_discovery == 2) {
+		/* ignore the discovery binary from vram if discovery=2 in kernel module parameter */
+		if (amdgpu_discovery == 2)
+			dev_info(adev->dev,"force read ip discovery binary from file");
+		else
+			dev_warn(adev->dev, "get invalid ip discovery binary signature from vram\n");
+
 		/* retry read ip discovery binary from file */
 		r = amdgpu_discovery_read_binary_from_file(adev, adev->mman.discovery_bin);
 		if (r) {
@@ -1507,6 +1512,7 @@ static int amdgpu_discovery_set_common_ip_blocks(struct amdgpu_device *adev)
 	case IP_VERSION(11, 0, 1):
 	case IP_VERSION(11, 0, 2):
 	case IP_VERSION(11, 0, 3):
+	case IP_VERSION(11, 0, 4):
 		amdgpu_device_ip_block_add(adev, &soc21_common_ip_block);
 		break;
 	default:
@@ -1551,6 +1557,7 @@ static int amdgpu_discovery_set_gmc_ip_blocks(struct amdgpu_device *adev)
 	case IP_VERSION(11, 0, 1):
 	case IP_VERSION(11, 0, 2):
 	case IP_VERSION(11, 0, 3):
+	case IP_VERSION(11, 0, 4):
 		amdgpu_device_ip_block_add(adev, &gmc_v11_0_ip_block);
 		break;
 	default:
@@ -1636,6 +1643,7 @@ static int amdgpu_discovery_set_psp_ip_blocks(struct amdgpu_device *adev)
 	case IP_VERSION(13, 0, 7):
 	case IP_VERSION(13, 0, 8):
 	case IP_VERSION(13, 0, 10):
+	case IP_VERSION(13, 0, 11):
 		amdgpu_device_ip_block_add(adev, &psp_v13_0_ip_block);
 		break;
 	case IP_VERSION(13, 0, 4):
@@ -1686,6 +1694,7 @@ static int amdgpu_discovery_set_smu_ip_blocks(struct amdgpu_device *adev)
 	case IP_VERSION(13, 0, 7):
 	case IP_VERSION(13, 0, 8):
 	case IP_VERSION(13, 0, 10):
+	case IP_VERSION(13, 0, 11):
 		amdgpu_device_ip_block_add(adev, &smu_v13_0_ip_block);
 		break;
 	default:
@@ -1697,9 +1706,17 @@ static int amdgpu_discovery_set_smu_ip_blocks(struct amdgpu_device *adev)
 	return 0;
 }
 
+#if defined(CONFIG_DRM_AMD_DC)
+static void amdgpu_discovery_set_sriov_display(struct amdgpu_device *adev)
+{
+	amdgpu_device_set_sriov_virtual_display(adev);
+	amdgpu_device_ip_block_add(adev, &amdgpu_vkms_ip_block);
+}
+#endif
+
 static int amdgpu_discovery_set_display_ip_blocks(struct amdgpu_device *adev)
 {
-	if (adev->enable_virtual_display || amdgpu_sriov_vf(adev)) {
+	if (adev->enable_virtual_display) {
 		amdgpu_device_ip_block_add(adev, &amdgpu_vkms_ip_block);
 		return 0;
 	}
@@ -1727,7 +1744,10 @@ static int amdgpu_discovery_set_display_ip_blocks(struct amdgpu_device *adev)
 		case IP_VERSION(3, 1, 6):
 		case IP_VERSION(3, 2, 0):
 		case IP_VERSION(3, 2, 1):
-			amdgpu_device_ip_block_add(adev, &dm_ip_block);
+			if (amdgpu_sriov_vf(adev))
+				amdgpu_discovery_set_sriov_display(adev);
+			else
+				amdgpu_device_ip_block_add(adev, &dm_ip_block);
 			break;
 		default:
 			dev_err(adev->dev,
@@ -1740,7 +1760,10 @@ static int amdgpu_discovery_set_display_ip_blocks(struct amdgpu_device *adev)
 		case IP_VERSION(12, 0, 0):
 		case IP_VERSION(12, 0, 1):
 		case IP_VERSION(12, 1, 0):
-			amdgpu_device_ip_block_add(adev, &dm_ip_block);
+			if (amdgpu_sriov_vf(adev))
+				amdgpu_discovery_set_sriov_display(adev);
+			else
+				amdgpu_device_ip_block_add(adev, &dm_ip_block);
 			break;
 		default:
 			dev_err(adev->dev,
@@ -1785,6 +1808,7 @@ static int amdgpu_discovery_set_gc_ip_blocks(struct amdgpu_device *adev)
 	case IP_VERSION(11, 0, 1):
 	case IP_VERSION(11, 0, 2):
 	case IP_VERSION(11, 0, 3):
+	case IP_VERSION(11, 0, 4):
 		amdgpu_device_ip_block_add(adev, &gfx_v11_0_ip_block);
 		break;
 	default:
@@ -1948,6 +1972,7 @@ static int amdgpu_discovery_set_mes_ip_blocks(struct amdgpu_device *adev)
 	case IP_VERSION(11, 0, 1):
 	case IP_VERSION(11, 0, 2):
 	case IP_VERSION(11, 0, 3):
+	case IP_VERSION(11, 0, 4):
 		amdgpu_device_ip_block_add(adev, &mes_v11_0_ip_block);
 		adev->enable_mes = true;
 		adev->enable_mes_kiq = true;
@@ -2161,6 +2186,7 @@ int amdgpu_discovery_set_ip_blocks(struct amdgpu_device *adev)
 		break;
 	case IP_VERSION(10, 3, 1):
 		adev->family = AMDGPU_FAMILY_VGH;
+		adev->apu_flags |= AMD_APU_IS_VANGOGH;
 		break;
 	case IP_VERSION(10, 3, 3):
 		adev->family = AMDGPU_FAMILY_YC;
@@ -2177,6 +2203,7 @@ int amdgpu_discovery_set_ip_blocks(struct amdgpu_device *adev)
 		adev->family = AMDGPU_FAMILY_GC_11_0_0;
 		break;
 	case IP_VERSION(11, 0, 1):
+	case IP_VERSION(11, 0, 4):
 		adev->family = AMDGPU_FAMILY_GC_11_0_1;
 		break;
 	default:
@@ -2194,6 +2221,7 @@ int amdgpu_discovery_set_ip_blocks(struct amdgpu_device *adev)
 	case IP_VERSION(10, 3, 6):
 	case IP_VERSION(10, 3, 7):
 	case IP_VERSION(11, 0, 1):
+	case IP_VERSION(11, 0, 4):
 		adev->flags |= AMD_IS_APU;
 		break;
 	default:
@@ -2250,6 +2278,7 @@ int amdgpu_discovery_set_ip_blocks(struct amdgpu_device *adev)
 		adev->nbio.hdp_flush_reg = &nbio_v4_3_hdp_flush_reg;
 		break;
 	case IP_VERSION(7, 7, 0):
+	case IP_VERSION(7, 7, 1):
 		adev->nbio.funcs = &nbio_v7_7_funcs;
 		adev->nbio.hdp_flush_reg = &nbio_v7_7_hdp_flush_reg;
 		break;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_display.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_display.c
index 1a06b8d724f3..b22471b3bd63 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_display.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_display.c
@@ -39,11 +39,46 @@
 #include <linux/pm_runtime.h>
 #include <drm/drm_crtc_helper.h>
 #include <drm/drm_edid.h>
-#include <drm/drm_gem_framebuffer_helper.h>
 #include <drm/drm_fb_helper.h>
+#include <drm/drm_gem_framebuffer_helper.h>
 #include <drm/drm_fourcc.h>
 #include <drm/drm_vblank.h>
 
+/**
+ * amdgpu_display_hotplug_work_func - work handler for display hotplug event
+ *
+ * @work: work struct pointer
+ *
+ * This is the hotplug event work handler (all ASICs).
+ * The work gets scheduled from the IRQ handler if there
+ * was a hotplug interrupt.  It walks through the connector table
+ * and calls hotplug handler for each connector. After this, it sends
+ * a DRM hotplug event to alert userspace.
+ *
+ * This design approach is required in order to defer hotplug event handling
+ * from the IRQ handler to a work handler because hotplug handler has to use
+ * mutexes which cannot be locked in an IRQ handler (since &mutex_lock may
+ * sleep).
+ */
+void amdgpu_display_hotplug_work_func(struct work_struct *work)
+{
+	struct amdgpu_device *adev = container_of(work, struct amdgpu_device,
+						  hotplug_work);
+	struct drm_device *dev = adev_to_drm(adev);
+	struct drm_mode_config *mode_config = &dev->mode_config;
+	struct drm_connector *connector;
+	struct drm_connector_list_iter iter;
+
+	mutex_lock(&mode_config->mutex);
+	drm_connector_list_iter_begin(dev, &iter);
+	drm_for_each_connector_iter(connector, &iter)
+		amdgpu_connector_hotplug(connector);
+	drm_connector_list_iter_end(&iter);
+	mutex_unlock(&mode_config->mutex);
+	/* Just fire off a uevent and let userspace tell us what to do */
+	drm_helper_hpd_irq_event(dev);
+}
+
 static int amdgpu_display_framebuffer_init(struct drm_device *dev,
 					   struct amdgpu_framebuffer *rfb,
 					   const struct drm_mode_fb_cmd2 *mode_cmd,
@@ -514,7 +549,7 @@ uint32_t amdgpu_display_supported_domains(struct amdgpu_device *adev,
 	 */
 	if ((bo_flags & AMDGPU_GEM_CREATE_CPU_GTT_USWC) &&
 	    amdgpu_bo_support_uswc(bo_flags) &&
-	    amdgpu_device_asic_has_dc_support(adev->asic_type) &&
+	    adev->dc_enabled &&
 	    adev->mode_info.gpu_vm_support)
 		domain |= AMDGPU_GEM_DOMAIN_GTT;
 #endif
@@ -1214,7 +1249,6 @@ amdgpu_display_user_framebuffer_create(struct drm_device *dev,
 
 const struct drm_mode_config_funcs amdgpu_mode_funcs = {
 	.fb_create = amdgpu_display_user_framebuffer_create,
-	.output_poll_changed = drm_fb_helper_output_poll_changed,
 };
 
 static const struct drm_prop_enum_list amdgpu_underscan_enum_list[] =
@@ -1281,7 +1315,7 @@ int amdgpu_display_modeset_create_props(struct amdgpu_device *adev)
 					 "dither",
 					 amdgpu_dither_enum_list, sz);
 
-	if (amdgpu_device_has_dc_support(adev)) {
+	if (adev->dc_enabled) {
 		adev->mode_info.abm_level_property =
 			drm_property_create_range(adev_to_drm(adev), 0,
 						  "abm level", 0, 4);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_display.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_display.h
index 560352f7c317..9d19940f73c8 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_display.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_display.h
@@ -35,6 +35,7 @@
 #define amdgpu_display_add_encoder(adev, e, s, c) (adev)->mode_info.funcs->add_encoder((adev), (e), (s), (c))
 #define amdgpu_display_add_connector(adev, ci, sd, ct, ib, coi, h, r) (adev)->mode_info.funcs->add_connector((adev), (ci), (sd), (ct), (ib), (coi), (h), (r))
 
+void amdgpu_display_hotplug_work_func(struct work_struct *work);
 void amdgpu_display_update_priority(struct amdgpu_device *adev);
 uint32_t amdgpu_display_supported_domains(struct amdgpu_device *adev,
 					  uint64_t bo_flags);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.c
index 7bd8e33b14be..271e30e34d93 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.c
@@ -328,7 +328,9 @@ amdgpu_dma_buf_create_obj(struct drm_device *dev, struct dma_buf *dma_buf)
 	if (dma_buf->ops == &amdgpu_dmabuf_ops) {
 		struct amdgpu_bo *other = gem_to_amdgpu_bo(dma_buf->priv);
 
-		flags |= other->flags & AMDGPU_GEM_CREATE_CPU_GTT_USWC;
+		flags |= other->flags & (AMDGPU_GEM_CREATE_CPU_GTT_USWC |
+					 AMDGPU_GEM_CREATE_COHERENT |
+					 AMDGPU_GEM_CREATE_UNCACHED);
 	}
 
 	ret = amdgpu_gem_object_create(adev, dma_buf->size, PAGE_SIZE,
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
index 3c9fecdd6b2f..b4f2d61ea0d5 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
@@ -25,6 +25,7 @@
 #include <drm/amdgpu_drm.h>
 #include <drm/drm_aperture.h>
 #include <drm/drm_drv.h>
+#include <drm/drm_fbdev_generic.h>
 #include <drm/drm_gem.h>
 #include <drm/drm_vblank.h>
 #include <drm/drm_managed.h>
@@ -230,17 +231,18 @@ module_param_named(vis_vramlimit, amdgpu_vis_vram_limit, int, 0444);
 
 /**
  * DOC: gartsize (uint)
- * Restrict the size of GART in Mib (32, 64, etc.) for testing. The default is -1 (The size depends on asic).
+ * Restrict the size of GART (for kernel use) in Mib (32, 64, etc.) for testing.
+ * The default is -1 (The size depends on asic).
  */
-MODULE_PARM_DESC(gartsize, "Size of GART to setup in megabytes (32, 64, etc., -1=auto)");
+MODULE_PARM_DESC(gartsize, "Size of kernel GART to setup in megabytes (32, 64, etc., -1=auto)");
 module_param_named(gartsize, amdgpu_gart_size, uint, 0600);
 
 /**
  * DOC: gttsize (int)
- * Restrict the size of GTT domain in MiB for testing. The default is -1 (It's VRAM size if 3GB < VRAM < 3/4 RAM,
- * otherwise 3/4 RAM size).
+ * Restrict the size of GTT domain (for userspace use) in MiB for testing.
+ * The default is -1 (Use 1/2 RAM, minimum value is 3GB).
  */
-MODULE_PARM_DESC(gttsize, "Size of the GTT domain in megabytes (-1 = auto)");
+MODULE_PARM_DESC(gttsize, "Size of the GTT userspace domain in megabytes (-1 = auto)");
 module_param_named(gttsize, amdgpu_gtt_size, int, 0600);
 
 /**
@@ -533,7 +535,7 @@ module_param_named(compute_multipipe, amdgpu_compute_multipipe, int, 0444);
  * DOC: gpu_recovery (int)
  * Set to enable GPU recovery mechanism (1 = enable, 0 = disable). The default is -1 (auto, disabled except SRIOV).
  */
-MODULE_PARM_DESC(gpu_recovery, "Enable GPU recovery mechanism, (2 = advanced tdr mode, 1 = enable, 0 = disable, -1 = auto)");
+MODULE_PARM_DESC(gpu_recovery, "Enable GPU recovery mechanism, (1 = enable, 0 = disable, -1 = auto)");
 module_param_named(gpu_recovery, amdgpu_gpu_recovery, int, 0444);
 
 /**
@@ -1924,9 +1926,6 @@ static const struct pci_device_id pciidlist[] = {
 	{0x1002, 0x73AF, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_SIENNA_CICHLID},
 	{0x1002, 0x73BF, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_SIENNA_CICHLID},
 
-	/* Van Gogh */
-	{0x1002, 0x163F, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_VANGOGH|AMD_IS_APU},
-
 	/* Yellow Carp */
 	{0x1002, 0x164D, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_YELLOW_CARP|AMD_IS_APU},
 	{0x1002, 0x1681, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_YELLOW_CARP|AMD_IS_APU},
@@ -2040,6 +2039,15 @@ static int amdgpu_pci_probe(struct pci_dev *pdev,
 			 "See modparam exp_hw_support\n");
 		return -ENODEV;
 	}
+	/* differentiate between P10 and P11 asics with the same DID */
+	if (pdev->device == 0x67FF &&
+	    (pdev->revision == 0xE3 ||
+	     pdev->revision == 0xE7 ||
+	     pdev->revision == 0xF3 ||
+	     pdev->revision == 0xF7)) {
+		flags &= ~AMD_ASIC_MASK;
+		flags |= CHIP_POLARIS10;
+	}
 
 	/* Due to hardware bugs, S/G Display on raven requires a 1:1 IOMMU mapping,
 	 * however, SME requires an indirect IOMMU mapping because the encryption
@@ -2109,12 +2117,12 @@ static int amdgpu_pci_probe(struct pci_dev *pdev,
 
 	pci_set_drvdata(pdev, ddev);
 
-	ret = amdgpu_driver_load_kms(adev, ent->driver_data);
+	ret = amdgpu_driver_load_kms(adev, flags);
 	if (ret)
 		goto err_pci;
 
 retry_init:
-	ret = drm_dev_register(ddev, ent->driver_data);
+	ret = drm_dev_register(ddev, flags);
 	if (ret == -EAGAIN && ++retry <= 3) {
 		DRM_INFO("retry init %d\n", retry);
 		/* Don't request EX mode too frequently which is attacking */
@@ -2201,7 +2209,8 @@ amdgpu_pci_remove(struct pci_dev *pdev)
 		pm_runtime_forbid(dev->dev);
 	}
 
-	if (adev->ip_versions[MP1_HWIP][0] == IP_VERSION(13, 0, 2)) {
+	if (adev->ip_versions[MP1_HWIP][0] == IP_VERSION(13, 0, 2) &&
+	    !amdgpu_sriov_vf(adev)) {
 		bool need_to_reset_gpu = false;
 
 		if (adev->gmc.xgmi.num_physical_nodes > 1) {
@@ -2470,7 +2479,7 @@ static int amdgpu_runtime_idle_check_display(struct device *dev)
 		if (ret)
 			return ret;
 
-		if (amdgpu_device_has_dc_support(adev)) {
+		if (adev->dc_enabled) {
 			struct drm_crtc *crtc;
 
 			drm_for_each_crtc(crtc, drm_dev) {
@@ -2571,6 +2580,8 @@ static int amdgpu_pmops_runtime_suspend(struct device *dev)
 		amdgpu_device_baco_enter(drm_dev);
 	}
 
+	dev_dbg(&pdev->dev, "asic/device is runtime suspended\n");
+
 	return 0;
 }
 
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_eeprom.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_eeprom.c
index 4d9eb0137f8c..7d2a908438e9 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_eeprom.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_eeprom.c
@@ -79,13 +79,15 @@
  * That is, for an I2C EEPROM driver everything is controlled by
  * the "eeprom_addr".
  *
+ * See also top of amdgpu_ras_eeprom.c.
+ *
  * P.S. If you need to write, lock and read the Identification Page,
  * (M24M02-DR device only, which we do not use), change the "7" to
  * "0xF" in the macro below, and let the client set bit 20 to 1 in
  * "eeprom_addr", and set A10 to 0 to write into it, and A10 and A1 to
  * 1 to lock it permanently.
  */
-#define MAKE_I2C_ADDR(_aa) ((0xA << 3) | (((_aa) >> 16) & 7))
+#define MAKE_I2C_ADDR(_aa) ((0xA << 3) | (((_aa) >> 16) & 0xF))
 
 static int __amdgpu_eeprom_xfer(struct i2c_adapter *i2c_adap, u32 eeprom_addr,
 				u8 *eeprom_buf, u16 buf_size, bool read)
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c
index d0d99ed607dd..00444203220d 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c
@@ -55,6 +55,7 @@ struct amdgpu_fence {
 
 	/* RB, DMA, etc. */
 	struct amdgpu_ring		*ring;
+	ktime_t				start_timestamp;
 };
 
 static struct kmem_cache *amdgpu_fence_slab;
@@ -199,6 +200,8 @@ int amdgpu_fence_emit(struct amdgpu_ring *ring, struct dma_fence **f, struct amd
 		}
 	}
 
+	to_amdgpu_fence(fence)->start_timestamp = ktime_get();
+
 	/* This function can't be called concurrently anyway, otherwise
 	 * emitting the fence would mess up the hardware ring buffer.
 	 */
@@ -407,6 +410,57 @@ unsigned amdgpu_fence_count_emitted(struct amdgpu_ring *ring)
 }
 
 /**
+ * amdgpu_fence_last_unsignaled_time_us - the time fence emitted until now
+ * @ring: ring the fence is associated with
+ *
+ * Find the earliest fence unsignaled until now, calculate the time delta
+ * between the time fence emitted and now.
+ */
+u64 amdgpu_fence_last_unsignaled_time_us(struct amdgpu_ring *ring)
+{
+	struct amdgpu_fence_driver *drv = &ring->fence_drv;
+	struct dma_fence *fence;
+	uint32_t last_seq, sync_seq;
+
+	last_seq = atomic_read(&ring->fence_drv.last_seq);
+	sync_seq = READ_ONCE(ring->fence_drv.sync_seq);
+	if (last_seq == sync_seq)
+		return 0;
+
+	++last_seq;
+	last_seq &= drv->num_fences_mask;
+	fence = drv->fences[last_seq];
+	if (!fence)
+		return 0;
+
+	return ktime_us_delta(ktime_get(),
+		to_amdgpu_fence(fence)->start_timestamp);
+}
+
+/**
+ * amdgpu_fence_update_start_timestamp - update the timestamp of the fence
+ * @ring: ring the fence is associated with
+ * @seq: the fence seq number to update.
+ * @timestamp: the start timestamp to update.
+ *
+ * The function called at the time the fence and related ib is about to
+ * resubmit to gpu in MCBP scenario. Thus we do not consider race condition
+ * with amdgpu_fence_process to modify the same fence.
+ */
+void amdgpu_fence_update_start_timestamp(struct amdgpu_ring *ring, uint32_t seq, ktime_t timestamp)
+{
+	struct amdgpu_fence_driver *drv = &ring->fence_drv;
+	struct dma_fence *fence;
+
+	seq &= drv->num_fences_mask;
+	fence = drv->fences[seq];
+	if (!fence)
+		return;
+
+	to_amdgpu_fence(fence)->start_timestamp = timestamp;
+}
+
+/**
  * amdgpu_fence_driver_start_ring - make the fence driver
  * ready for use on the requested ring.
  *
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_fru_eeprom.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_fru_eeprom.c
index e325150879df..4620c4712ce3 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_fru_eeprom.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_fru_eeprom.c
@@ -29,9 +29,10 @@
 #include "amdgpu_fru_eeprom.h"
 #include "amdgpu_eeprom.h"
 
-#define FRU_EEPROM_MADDR        0x60000
+#define FRU_EEPROM_MADDR_6      0x60000
+#define FRU_EEPROM_MADDR_8      0x80000
 
-static bool is_fru_eeprom_supported(struct amdgpu_device *adev)
+static bool is_fru_eeprom_supported(struct amdgpu_device *adev, u32 *fru_addr)
 {
 	/* Only server cards have the FRU EEPROM
 	 * TODO: See if we can figure this out dynamically instead of
@@ -45,6 +46,11 @@ static bool is_fru_eeprom_supported(struct amdgpu_device *adev)
 	if (amdgpu_sriov_vf(adev))
 		return false;
 
+	/* The default I2C EEPROM address of the FRU.
+	 */
+	if (fru_addr)
+		*fru_addr = FRU_EEPROM_MADDR_8;
+
 	/* VBIOS is of the format ###-DXXXYYYY-##. For SKU identification,
 	 * we can use just the "DXXX" portion. If there were more models, we
 	 * could convert the 3 characters to a hex integer and use a switch
@@ -57,21 +63,31 @@ static bool is_fru_eeprom_supported(struct amdgpu_device *adev)
 		if (strnstr(atom_ctx->vbios_version, "D161",
 			    sizeof(atom_ctx->vbios_version)) ||
 		    strnstr(atom_ctx->vbios_version, "D163",
-			    sizeof(atom_ctx->vbios_version)))
+			    sizeof(atom_ctx->vbios_version))) {
+			if (fru_addr)
+				*fru_addr = FRU_EEPROM_MADDR_6;
 			return true;
-		else
+		} else {
 			return false;
+		}
 	case CHIP_ALDEBARAN:
-		/* All Aldebaran SKUs have the FRU */
+		/* All Aldebaran SKUs have an FRU */
+		if (!strnstr(atom_ctx->vbios_version, "D673",
+			     sizeof(atom_ctx->vbios_version)))
+			if (fru_addr)
+				*fru_addr = FRU_EEPROM_MADDR_6;
 		return true;
 	case CHIP_SIENNA_CICHLID:
 		if (strnstr(atom_ctx->vbios_version, "D603",
-		    sizeof(atom_ctx->vbios_version))) {
+			    sizeof(atom_ctx->vbios_version))) {
 			if (strnstr(atom_ctx->vbios_version, "D603GLXE",
-			    sizeof(atom_ctx->vbios_version)))
+				    sizeof(atom_ctx->vbios_version))) {
 				return false;
-			else
+			} else {
+				if (fru_addr)
+					*fru_addr = FRU_EEPROM_MADDR_6;
 				return true;
+			}
 		} else {
 			return false;
 		}
@@ -80,41 +96,14 @@ static bool is_fru_eeprom_supported(struct amdgpu_device *adev)
 	}
 }
 
-static int amdgpu_fru_read_eeprom(struct amdgpu_device *adev, uint32_t addrptr,
-				  unsigned char *buf, size_t buf_size)
-{
-	int ret;
-	u8 size;
-
-	ret = amdgpu_eeprom_read(adev->pm.fru_eeprom_i2c_bus, addrptr, buf, 1);
-	if (ret < 1) {
-		DRM_WARN("FRU: Failed to get size field");
-		return ret;
-	}
-
-	/* The size returned by the i2c requires subtraction of 0xC0 since the
-	 * size apparently always reports as 0xC0+actual size.
-	 */
-	size = buf[0] & 0x3F;
-	size = min_t(size_t, size, buf_size);
-
-	ret = amdgpu_eeprom_read(adev->pm.fru_eeprom_i2c_bus, addrptr + 1,
-				 buf, size);
-	if (ret < 1) {
-		DRM_WARN("FRU: Failed to get data field");
-		return ret;
-	}
-
-	return size;
-}
-
 int amdgpu_fru_get_product_info(struct amdgpu_device *adev)
 {
-	unsigned char buf[AMDGPU_PRODUCT_NAME_LEN];
-	u32 addrptr;
+	unsigned char buf[8], *pia;
+	u32 addr, fru_addr;
 	int size, len;
+	u8 csum;
 
-	if (!is_fru_eeprom_supported(adev))
+	if (!is_fru_eeprom_supported(adev, &fru_addr))
 		return 0;
 
 	/* If algo exists, it means that the i2c_adapter's initialized */
@@ -123,88 +112,102 @@ int amdgpu_fru_get_product_info(struct amdgpu_device *adev)
 		return -ENODEV;
 	}
 
-	/* There's a lot of repetition here. This is due to the FRU having
-	 * variable-length fields. To get the information, we have to find the
-	 * size of each field, and then keep reading along and reading along
-	 * until we get all of the data that we want. We use addrptr to track
-	 * the address as we go
-	 */
-
-	/* The first fields are all of size 1-byte, from 0-7 are offsets that
-	 * contain information that isn't useful to us.
-	 * Bytes 8-a are all 1-byte and refer to the size of the entire struct,
-	 * and the language field, so just start from 0xb, manufacturer size
-	 */
-	addrptr = FRU_EEPROM_MADDR + 0xb;
-	size = amdgpu_fru_read_eeprom(adev, addrptr, buf, sizeof(buf));
-	if (size < 1) {
-		DRM_ERROR("Failed to read FRU Manufacturer, ret:%d", size);
-		return -EINVAL;
+	/* Read the IPMI Common header */
+	len = amdgpu_eeprom_read(adev->pm.fru_eeprom_i2c_bus, fru_addr, buf,
+				 sizeof(buf));
+	if (len != 8) {
+		DRM_ERROR("Couldn't read the IPMI Common Header: %d", len);
+		return len < 0 ? len : -EIO;
 	}
 
-	/* Increment the addrptr by the size of the field, and 1 due to the
-	 * size field being 1 byte. This pattern continues below.
-	 */
-	addrptr += size + 1;
-	size = amdgpu_fru_read_eeprom(adev, addrptr, buf, sizeof(buf));
-	if (size < 1) {
-		DRM_ERROR("Failed to read FRU product name, ret:%d", size);
-		return -EINVAL;
+	if (buf[0] != 1) {
+		DRM_ERROR("Bad IPMI Common Header version: 0x%02x", buf[0]);
+		return -EIO;
 	}
 
-	len = size;
-	if (len >= AMDGPU_PRODUCT_NAME_LEN) {
-		DRM_WARN("FRU Product Name is larger than %d characters. This is likely a mistake",
-				AMDGPU_PRODUCT_NAME_LEN);
-		len = AMDGPU_PRODUCT_NAME_LEN - 1;
-	}
-	memcpy(adev->product_name, buf, len);
-	adev->product_name[len] = '\0';
-
-	addrptr += size + 1;
-	size = amdgpu_fru_read_eeprom(adev, addrptr, buf, sizeof(buf));
-	if (size < 1) {
-		DRM_ERROR("Failed to read FRU product number, ret:%d", size);
-		return -EINVAL;
+	for (csum = 0; len > 0; len--)
+		csum += buf[len - 1];
+	if (csum) {
+		DRM_ERROR("Bad IPMI Common Header checksum: 0x%02x", csum);
+		return -EIO;
 	}
 
-	len = size;
-	/* Product number should only be 16 characters. Any more,
-	 * and something could be wrong. Cap it at 16 to be safe
-	 */
-	if (len >= sizeof(adev->product_number)) {
-		DRM_WARN("FRU Product Number is larger than 16 characters. This is likely a mistake");
-		len = sizeof(adev->product_number) - 1;
-	}
-	memcpy(adev->product_number, buf, len);
-	adev->product_number[len] = '\0';
+	/* Get the offset to the Product Info Area (PIA). */
+	addr = buf[4] * 8;
+	if (!addr)
+		return 0;
 
-	addrptr += size + 1;
-	size = amdgpu_fru_read_eeprom(adev, addrptr, buf, sizeof(buf));
+	/* Get the absolute address to the PIA. */
+	addr += fru_addr;
 
-	if (size < 1) {
-		DRM_ERROR("Failed to read FRU product version, ret:%d", size);
-		return -EINVAL;
+	/* Read the header of the PIA. */
+	len = amdgpu_eeprom_read(adev->pm.fru_eeprom_i2c_bus, addr, buf, 3);
+	if (len != 3) {
+		DRM_ERROR("Couldn't read the Product Info Area header: %d", len);
+		return len < 0 ? len : -EIO;
 	}
 
-	addrptr += size + 1;
-	size = amdgpu_fru_read_eeprom(adev, addrptr, buf, sizeof(buf));
+	if (buf[0] != 1) {
+		DRM_ERROR("Bad IPMI Product Info Area version: 0x%02x", buf[0]);
+		return -EIO;
+	}
 
-	if (size < 1) {
-		DRM_ERROR("Failed to read FRU serial number, ret:%d", size);
-		return -EINVAL;
+	size = buf[1] * 8;
+	pia = kzalloc(size, GFP_KERNEL);
+	if (!pia)
+		return -ENOMEM;
+
+	/* Read the whole PIA. */
+	len = amdgpu_eeprom_read(adev->pm.fru_eeprom_i2c_bus, addr, pia, size);
+	if (len != size) {
+		kfree(pia);
+		DRM_ERROR("Couldn't read the Product Info Area: %d", len);
+		return len < 0 ? len : -EIO;
 	}
 
-	len = size;
-	/* Serial number should only be 16 characters. Any more,
-	 * and something could be wrong. Cap it at 16 to be safe
-	 */
-	if (len >= sizeof(adev->serial)) {
-		DRM_WARN("FRU Serial Number is larger than 16 characters. This is likely a mistake");
-		len = sizeof(adev->serial) - 1;
+	for (csum = 0; size > 0; size--)
+		csum += pia[size - 1];
+	if (csum) {
+		DRM_ERROR("Bad Product Info Area checksum: 0x%02x", csum);
+		return -EIO;
 	}
-	memcpy(adev->serial, buf, len);
-	adev->serial[len] = '\0';
 
+	/* Now extract useful information from the PIA.
+	 *
+	 * Skip the Manufacturer Name at [3] and go directly to
+	 * the Product Name field.
+	 */
+	addr = 3 + 1 + (pia[3] & 0x3F);
+	if (addr + 1 >= len)
+		goto Out;
+	memcpy(adev->product_name, pia + addr + 1,
+	       min_t(size_t,
+		     sizeof(adev->product_name),
+		     pia[addr] & 0x3F));
+	adev->product_name[sizeof(adev->product_name) - 1] = '\0';
+
+	/* Go to the Product Part/Model Number field. */
+	addr += 1 + (pia[addr] & 0x3F);
+	if (addr + 1 >= len)
+		goto Out;
+	memcpy(adev->product_number, pia + addr + 1,
+	       min_t(size_t,
+		     sizeof(adev->product_number),
+		     pia[addr] & 0x3F));
+	adev->product_number[sizeof(adev->product_number) - 1] = '\0';
+
+	/* Go to the Product Version field. */
+	addr += 1 + (pia[addr] & 0x3F);
+
+	/* Go to the Product Serial Number field. */
+	addr += 1 + (pia[addr] & 0x3F);
+	if (addr + 1 >= len)
+		goto Out;
+	memcpy(adev->serial, pia + addr + 1, min_t(size_t,
+						   sizeof(adev->serial),
+						   pia[addr] & 0x3F));
+	adev->serial[sizeof(adev->serial) - 1] = '\0';
+Out:
+	kfree(pia);
 	return 0;
 }
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c
index 8ef31d687ef3..bb7350ea1d75 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c
@@ -38,6 +38,7 @@
 #include "amdgpu.h"
 #include "amdgpu_display.h"
 #include "amdgpu_dma_buf.h"
+#include "amdgpu_hmm.h"
 #include "amdgpu_xgmi.h"
 
 static const struct drm_gem_object_funcs amdgpu_gem_object_funcs;
@@ -87,7 +88,7 @@ static void amdgpu_gem_object_free(struct drm_gem_object *gobj)
 	struct amdgpu_bo *robj = gem_to_amdgpu_bo(gobj);
 
 	if (robj) {
-		amdgpu_mn_unregister(robj);
+		amdgpu_hmm_unregister(robj);
 		amdgpu_bo_unref(&robj);
 	}
 }
@@ -255,7 +256,7 @@ static int amdgpu_gem_object_mmap(struct drm_gem_object *obj, struct vm_area_str
 	 * becoming writable and makes is_cow_mapping(vm_flags) false.
 	 */
 	if (is_cow_mapping(vma->vm_flags) &&
-	    !(vma->vm_flags & (VM_READ | VM_WRITE | VM_EXEC)))
+	    !(vma->vm_flags & VM_ACCESS_FLAGS))
 		vma->vm_flags &= ~VM_MAYWRITE;
 
 	return drm_gem_ttm_mmap(obj, vma);
@@ -378,6 +379,7 @@ int amdgpu_gem_userptr_ioctl(struct drm_device *dev, void *data,
 	struct amdgpu_device *adev = drm_to_adev(dev);
 	struct drm_amdgpu_gem_userptr *args = data;
 	struct drm_gem_object *gobj;
+	struct hmm_range *range;
 	struct amdgpu_bo *bo;
 	uint32_t handle;
 	int r;
@@ -413,14 +415,13 @@ int amdgpu_gem_userptr_ioctl(struct drm_device *dev, void *data,
 	if (r)
 		goto release_object;
 
-	if (args->flags & AMDGPU_GEM_USERPTR_REGISTER) {
-		r = amdgpu_mn_register(bo, args->addr);
-		if (r)
-			goto release_object;
-	}
+	r = amdgpu_hmm_register(bo, args->addr);
+	if (r)
+		goto release_object;
 
 	if (args->flags & AMDGPU_GEM_USERPTR_VALIDATE) {
-		r = amdgpu_ttm_tt_get_user_pages(bo, bo->tbo.ttm->pages);
+		r = amdgpu_ttm_tt_get_user_pages(bo, bo->tbo.ttm->pages,
+						 &range);
 		if (r)
 			goto release_object;
 
@@ -443,7 +444,7 @@ int amdgpu_gem_userptr_ioctl(struct drm_device *dev, void *data,
 
 user_pages_done:
 	if (args->flags & AMDGPU_GEM_USERPTR_VALIDATE)
-		amdgpu_ttm_tt_get_user_pages_done(bo->tbo.ttm);
+		amdgpu_ttm_tt_get_user_pages_done(bo->tbo.ttm, range);
 
 release_object:
 	drm_gem_object_put(gobj);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c
index 9546adc8a76f..23692e5d4d13 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c
@@ -583,10 +583,14 @@ void amdgpu_gfx_off_ctrl(struct amdgpu_device *adev, bool enable)
 		if (adev->gfx.gfx_off_req_count == 0 &&
 		    !adev->gfx.gfx_off_state) {
 			/* If going to s2idle, no need to wait */
-			if (adev->in_s0ix)
-				delay = GFX_OFF_NO_DELAY;
-			schedule_delayed_work(&adev->gfx.gfx_off_delay_work,
+			if (adev->in_s0ix) {
+				if (!amdgpu_dpm_set_powergating_by_smu(adev,
+						AMD_IP_BLOCK_TYPE_GFX, true))
+					adev->gfx.gfx_off_state = true;
+			} else {
+				schedule_delayed_work(&adev->gfx.gfx_off_delay_work,
 					      delay);
+			}
 		}
 	} else {
 		if (adev->gfx.gfx_off_req_count == 0) {
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h
index 832b3807f1d6..b3df4787877e 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h
@@ -33,6 +33,7 @@
 #include "amdgpu_imu.h"
 #include "soc15.h"
 #include "amdgpu_ras.h"
+#include "amdgpu_ring_mux.h"
 
 /* GFX current status */
 #define AMDGPU_GFX_NORMAL_MODE			0x00000000L
@@ -352,6 +353,9 @@ struct amdgpu_gfx {
 	struct amdgpu_gfx_ras		*ras;
 
 	bool				is_poweron;
+
+	struct amdgpu_ring		sw_gfx_ring[AMDGPU_MAX_SW_GFX_RINGS];
+	struct amdgpu_ring_mux          muxer;
 };
 
 #define amdgpu_gfx_get_gpu_clock_counter(adev) (adev)->gfx.funcs->get_gpu_clock_counter((adev))
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c
index 34233a74248c..02a4c93673ce 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c
@@ -479,6 +479,12 @@ int amdgpu_gmc_allocate_vm_inv_eng(struct amdgpu_device *adev)
 	unsigned i;
 	unsigned vmhub, inv_eng;
 
+	if (adev->enable_mes) {
+		/* reserve engine 5 for firmware */
+		for (vmhub = 0; vmhub < AMDGPU_MAX_VMHUBS; vmhub++)
+			vm_inv_engs[vmhub] &= ~(1 << 5);
+	}
+
 	for (i = 0; i < adev->num_rings; ++i) {
 		ring = adev->rings[i];
 		vmhub = ring->funcs->vmhub;
@@ -542,6 +548,8 @@ void amdgpu_gmc_tmz_set(struct amdgpu_device *adev)
 	case IP_VERSION(10, 3, 1):
 	/* YELLOW_CARP*/
 	case IP_VERSION(10, 3, 3):
+	case IP_VERSION(11, 0, 1):
+	case IP_VERSION(11, 0, 4):
 		/* Don't enable it by default yet.
 		 */
 		if (amdgpu_tmz < 1) {
@@ -656,7 +664,7 @@ void amdgpu_gmc_get_vbios_allocations(struct amdgpu_device *adev)
 	}
 
 	if (amdgpu_sriov_vf(adev) ||
-	    !amdgpu_device_ip_get_ip_block(adev, AMD_IP_BLOCK_TYPE_DCE)) {
+	    !amdgpu_device_has_display_hardware(adev)) {
 		size = 0;
 	} else {
 		size = amdgpu_gmc_get_vbios_fb_size(adev);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_hmm.c
index b86c0b8252a5..2dadcfe43d03 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_hmm.c
@@ -49,9 +49,12 @@
 
 #include "amdgpu.h"
 #include "amdgpu_amdkfd.h"
+#include "amdgpu_hmm.h"
+
+#define MAX_WALK_BYTE	(2UL << 30)
 
 /**
- * amdgpu_mn_invalidate_gfx - callback to notify about mm change
+ * amdgpu_hmm_invalidate_gfx - callback to notify about mm change
  *
  * @mni: the range (mm) is about to update
  * @range: details on the invalidation
@@ -60,9 +63,9 @@
  * Block for operations on BOs to finish and mark pages as accessed and
  * potentially dirty.
  */
-static bool amdgpu_mn_invalidate_gfx(struct mmu_interval_notifier *mni,
-				     const struct mmu_notifier_range *range,
-				     unsigned long cur_seq)
+static bool amdgpu_hmm_invalidate_gfx(struct mmu_interval_notifier *mni,
+				      const struct mmu_notifier_range *range,
+				      unsigned long cur_seq)
 {
 	struct amdgpu_bo *bo = container_of(mni, struct amdgpu_bo, notifier);
 	struct amdgpu_device *adev = amdgpu_ttm_adev(bo->tbo.bdev);
@@ -83,12 +86,12 @@ static bool amdgpu_mn_invalidate_gfx(struct mmu_interval_notifier *mni,
 	return true;
 }
 
-static const struct mmu_interval_notifier_ops amdgpu_mn_gfx_ops = {
-	.invalidate = amdgpu_mn_invalidate_gfx,
+static const struct mmu_interval_notifier_ops amdgpu_hmm_gfx_ops = {
+	.invalidate = amdgpu_hmm_invalidate_gfx,
 };
 
 /**
- * amdgpu_mn_invalidate_hsa - callback to notify about mm change
+ * amdgpu_hmm_invalidate_hsa - callback to notify about mm change
  *
  * @mni: the range (mm) is about to update
  * @range: details on the invalidation
@@ -97,32 +100,26 @@ static const struct mmu_interval_notifier_ops amdgpu_mn_gfx_ops = {
  * We temporarily evict the BO attached to this range. This necessitates
  * evicting all user-mode queues of the process.
  */
-static bool amdgpu_mn_invalidate_hsa(struct mmu_interval_notifier *mni,
-				     const struct mmu_notifier_range *range,
-				     unsigned long cur_seq)
+static bool amdgpu_hmm_invalidate_hsa(struct mmu_interval_notifier *mni,
+				      const struct mmu_notifier_range *range,
+				      unsigned long cur_seq)
 {
 	struct amdgpu_bo *bo = container_of(mni, struct amdgpu_bo, notifier);
-	struct amdgpu_device *adev = amdgpu_ttm_adev(bo->tbo.bdev);
 
 	if (!mmu_notifier_range_blockable(range))
 		return false;
 
-	mutex_lock(&adev->notifier_lock);
-
-	mmu_interval_set_seq(mni, cur_seq);
-
-	amdgpu_amdkfd_evict_userptr(bo->kfd_bo, bo->notifier.mm);
-	mutex_unlock(&adev->notifier_lock);
+	amdgpu_amdkfd_evict_userptr(mni, cur_seq, bo->kfd_bo);
 
 	return true;
 }
 
-static const struct mmu_interval_notifier_ops amdgpu_mn_hsa_ops = {
-	.invalidate = amdgpu_mn_invalidate_hsa,
+static const struct mmu_interval_notifier_ops amdgpu_hmm_hsa_ops = {
+	.invalidate = amdgpu_hmm_invalidate_hsa,
 };
 
 /**
- * amdgpu_mn_register - register a BO for notifier updates
+ * amdgpu_hmm_register - register a BO for notifier updates
  *
  * @bo: amdgpu buffer object
  * @addr: userptr addr we should monitor
@@ -130,25 +127,25 @@ static const struct mmu_interval_notifier_ops amdgpu_mn_hsa_ops = {
  * Registers a mmu_notifier for the given BO at the specified address.
  * Returns 0 on success, -ERRNO if anything goes wrong.
  */
-int amdgpu_mn_register(struct amdgpu_bo *bo, unsigned long addr)
+int amdgpu_hmm_register(struct amdgpu_bo *bo, unsigned long addr)
 {
 	if (bo->kfd_bo)
 		return mmu_interval_notifier_insert(&bo->notifier, current->mm,
 						    addr, amdgpu_bo_size(bo),
-						    &amdgpu_mn_hsa_ops);
+						    &amdgpu_hmm_hsa_ops);
 	return mmu_interval_notifier_insert(&bo->notifier, current->mm, addr,
 					    amdgpu_bo_size(bo),
-					    &amdgpu_mn_gfx_ops);
+					    &amdgpu_hmm_gfx_ops);
 }
 
 /**
- * amdgpu_mn_unregister - unregister a BO for notifier updates
+ * amdgpu_hmm_unregister - unregister a BO for notifier updates
  *
  * @bo: amdgpu buffer object
  *
  * Remove any registration of mmu notifier updates from the buffer object.
  */
-void amdgpu_mn_unregister(struct amdgpu_bo *bo)
+void amdgpu_hmm_unregister(struct amdgpu_bo *bo)
 {
 	if (!bo->notifier.mm)
 		return;
@@ -157,12 +154,12 @@ void amdgpu_mn_unregister(struct amdgpu_bo *bo)
 }
 
 int amdgpu_hmm_range_get_pages(struct mmu_interval_notifier *notifier,
-			       struct mm_struct *mm, struct page **pages,
-			       uint64_t start, uint64_t npages,
-			       struct hmm_range **phmm_range, bool readonly,
-			       bool mmap_locked, void *owner)
+			       uint64_t start, uint64_t npages, bool readonly,
+			       void *owner, struct page **pages,
+			       struct hmm_range **phmm_range)
 {
 	struct hmm_range *hmm_range;
+	unsigned long end;
 	unsigned long timeout;
 	unsigned long i;
 	unsigned long *pfns;
@@ -184,32 +181,42 @@ int amdgpu_hmm_range_get_pages(struct mmu_interval_notifier *notifier,
 		hmm_range->default_flags |= HMM_PFN_REQ_WRITE;
 	hmm_range->hmm_pfns = pfns;
 	hmm_range->start = start;
-	hmm_range->end = start + npages * PAGE_SIZE;
+	end = start + npages * PAGE_SIZE;
 	hmm_range->dev_private_owner = owner;
 
-	/* Assuming 512MB takes maxmium 1 second to fault page address */
-	timeout = max(npages >> 17, 1ULL) * HMM_RANGE_DEFAULT_TIMEOUT;
-	timeout = jiffies + msecs_to_jiffies(timeout);
+	do {
+		hmm_range->end = min(hmm_range->start + MAX_WALK_BYTE, end);
+
+		pr_debug("hmm range: start = 0x%lx, end = 0x%lx",
+			hmm_range->start, hmm_range->end);
+
+		/* Assuming 512MB takes maxmium 1 second to fault page address */
+		timeout = max((hmm_range->end - hmm_range->start) >> 29, 1UL);
+		timeout *= HMM_RANGE_DEFAULT_TIMEOUT;
+		timeout = jiffies + msecs_to_jiffies(timeout);
 
 retry:
-	hmm_range->notifier_seq = mmu_interval_read_begin(notifier);
-
-	if (likely(!mmap_locked))
-		mmap_read_lock(mm);
-
-	r = hmm_range_fault(hmm_range);
-
-	if (likely(!mmap_locked))
-		mmap_read_unlock(mm);
-	if (unlikely(r)) {
-		/*
-		 * FIXME: This timeout should encompass the retry from
-		 * mmu_interval_read_retry() as well.
-		 */
-		if (r == -EBUSY && !time_after(jiffies, timeout))
-			goto retry;
-		goto out_free_pfns;
-	}
+		hmm_range->notifier_seq = mmu_interval_read_begin(notifier);
+		r = hmm_range_fault(hmm_range);
+		if (unlikely(r)) {
+			/*
+			 * FIXME: This timeout should encompass the retry from
+			 * mmu_interval_read_retry() as well.
+			 */
+			if (r == -EBUSY && !time_after(jiffies, timeout))
+				goto retry;
+			goto out_free_pfns;
+		}
+
+		if (hmm_range->end == end)
+			break;
+		hmm_range->hmm_pfns += MAX_WALK_BYTE >> PAGE_SHIFT;
+		hmm_range->start = hmm_range->end;
+		schedule();
+	} while (hmm_range->end < end);
+
+	hmm_range->start = start;
+	hmm_range->hmm_pfns = pfns;
 
 	/*
 	 * Due to default_flags, all pages are HMM_PFN_VALID or
@@ -231,9 +238,9 @@ out_free_range:
 	return r;
 }
 
-int amdgpu_hmm_range_get_pages_done(struct hmm_range *hmm_range)
+bool amdgpu_hmm_range_get_pages_done(struct hmm_range *hmm_range)
 {
-	int r;
+	bool r;
 
 	r = mmu_interval_read_retry(hmm_range->notifier,
 				    hmm_range->notifier_seq);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_hmm.h
index 14a3c1864085..e2edcd010ccc 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_hmm.h
@@ -29,25 +29,25 @@
 #include <linux/rwsem.h>
 #include <linux/workqueue.h>
 #include <linux/interval_tree.h>
+#include <linux/mmu_notifier.h>
 
 int amdgpu_hmm_range_get_pages(struct mmu_interval_notifier *notifier,
-			       struct mm_struct *mm, struct page **pages,
-			       uint64_t start, uint64_t npages,
-			       struct hmm_range **phmm_range, bool readonly,
-			       bool mmap_locked, void *owner);
-int amdgpu_hmm_range_get_pages_done(struct hmm_range *hmm_range);
+			       uint64_t start, uint64_t npages, bool readonly,
+			       void *owner, struct page **pages,
+			       struct hmm_range **phmm_range);
+bool amdgpu_hmm_range_get_pages_done(struct hmm_range *hmm_range);
 
 #if defined(CONFIG_HMM_MIRROR)
-int amdgpu_mn_register(struct amdgpu_bo *bo, unsigned long addr);
-void amdgpu_mn_unregister(struct amdgpu_bo *bo);
+int amdgpu_hmm_register(struct amdgpu_bo *bo, unsigned long addr);
+void amdgpu_hmm_unregister(struct amdgpu_bo *bo);
 #else
-static inline int amdgpu_mn_register(struct amdgpu_bo *bo, unsigned long addr)
+static inline int amdgpu_hmm_register(struct amdgpu_bo *bo, unsigned long addr)
 {
 	DRM_WARN_ONCE("HMM_MIRROR kernel config option is not enabled, "
 		      "add CONFIG_ZONE_DEVICE=y in config file to fix this\n");
 	return -ENODEV;
 }
-static inline void amdgpu_mn_unregister(struct amdgpu_bo *bo) {}
+static inline void amdgpu_hmm_unregister(struct amdgpu_bo *bo) {}
 #endif
 
 #endif
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c
index 258cffe3c06a..bcccc348dbe2 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c
@@ -182,7 +182,7 @@ int amdgpu_ib_schedule(struct amdgpu_ring *ring, unsigned num_ibs,
 
 	need_ctx_switch = ring->current_ctx != fence_ctx;
 	if (ring->funcs->emit_pipeline_sync && job &&
-	    ((tmp = amdgpu_sync_get_fence(&job->sched_sync)) ||
+	    ((tmp = amdgpu_sync_get_fence(&job->explicit_sync)) ||
 	     (amdgpu_sriov_vf(adev) && need_ctx_switch) ||
 	     amdgpu_vm_need_pipeline_sync(ring, job))) {
 		need_pipe_sync = true;
@@ -211,6 +211,7 @@ int amdgpu_ib_schedule(struct amdgpu_ring *ring, unsigned num_ibs,
 		}
 	}
 
+	amdgpu_ring_ib_begin(ring);
 	if (job && ring->funcs->init_cond_exec)
 		patch_offset = amdgpu_ring_init_cond_exec(ring);
 
@@ -285,6 +286,7 @@ int amdgpu_ib_schedule(struct amdgpu_ring *ring, unsigned num_ibs,
 	    ring->hw_prio == AMDGPU_GFX_PIPE_PRIO_HIGH)
 		ring->funcs->emit_wave_limit(ring, false);
 
+	amdgpu_ring_ib_end(ring);
 	amdgpu_ring_commit(ring);
 	return 0;
 }
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ids.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ids.c
index 03d115d2b5ed..fcb711a11a5b 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ids.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ids.c
@@ -165,31 +165,52 @@ bool amdgpu_vmid_had_gpu_reset(struct amdgpu_device *adev,
 		atomic_read(&adev->gpu_reset_counter);
 }
 
+/* Check if we need to switch to another set of resources */
+static bool amdgpu_vmid_gds_switch_needed(struct amdgpu_vmid *id,
+					  struct amdgpu_job *job)
+{
+	return id->gds_base != job->gds_base ||
+		id->gds_size != job->gds_size ||
+		id->gws_base != job->gws_base ||
+		id->gws_size != job->gws_size ||
+		id->oa_base != job->oa_base ||
+		id->oa_size != job->oa_size;
+}
+
+/* Check if the id is compatible with the job */
+static bool amdgpu_vmid_compatible(struct amdgpu_vmid *id,
+				   struct amdgpu_job *job)
+{
+	return  id->pd_gpu_addr == job->vm_pd_addr &&
+		!amdgpu_vmid_gds_switch_needed(id, job);
+}
+
 /**
  * amdgpu_vmid_grab_idle - grab idle VMID
  *
  * @vm: vm to allocate id for
  * @ring: ring we want to submit job to
- * @sync: sync object where we add dependencies
  * @idle: resulting idle VMID
+ * @fence: fence to wait for if no id could be grabbed
  *
  * Try to find an idle VMID, if none is idle add a fence to wait to the sync
  * object. Returns -ENOMEM when we are out of memory.
  */
 static int amdgpu_vmid_grab_idle(struct amdgpu_vm *vm,
 				 struct amdgpu_ring *ring,
-				 struct amdgpu_sync *sync,
-				 struct amdgpu_vmid **idle)
+				 struct amdgpu_vmid **idle,
+				 struct dma_fence **fence)
 {
 	struct amdgpu_device *adev = ring->adev;
 	unsigned vmhub = ring->funcs->vmhub;
 	struct amdgpu_vmid_mgr *id_mgr = &adev->vm_manager.id_mgr[vmhub];
 	struct dma_fence **fences;
 	unsigned i;
-	int r;
 
-	if (!dma_fence_is_signaled(ring->vmid_wait))
-		return amdgpu_sync_fence(sync, ring->vmid_wait);
+	if (!dma_fence_is_signaled(ring->vmid_wait)) {
+		*fence = dma_fence_get(ring->vmid_wait);
+		return 0;
+	}
 
 	fences = kmalloc_array(id_mgr->num_ids, sizeof(void *), GFP_KERNEL);
 	if (!fences)
@@ -228,10 +249,10 @@ static int amdgpu_vmid_grab_idle(struct amdgpu_vm *vm,
 			return -ENOMEM;
 		}
 
-		r = amdgpu_sync_fence(sync, &array->base);
+		*fence = dma_fence_get(&array->base);
 		dma_fence_put(ring->vmid_wait);
 		ring->vmid_wait = &array->base;
-		return r;
+		return 0;
 	}
 	kfree(fences);
 
@@ -243,30 +264,29 @@ static int amdgpu_vmid_grab_idle(struct amdgpu_vm *vm,
  *
  * @vm: vm to allocate id for
  * @ring: ring we want to submit job to
- * @sync: sync object where we add dependencies
- * @fence: fence protecting ID from reuse
  * @job: job who wants to use the VMID
  * @id: resulting VMID
+ * @fence: fence to wait for if no id could be grabbed
  *
  * Try to assign a reserved VMID.
  */
 static int amdgpu_vmid_grab_reserved(struct amdgpu_vm *vm,
 				     struct amdgpu_ring *ring,
-				     struct amdgpu_sync *sync,
-				     struct dma_fence *fence,
 				     struct amdgpu_job *job,
-				     struct amdgpu_vmid **id)
+				     struct amdgpu_vmid **id,
+				     struct dma_fence **fence)
 {
 	struct amdgpu_device *adev = ring->adev;
 	unsigned vmhub = ring->funcs->vmhub;
+	struct amdgpu_vmid_mgr *id_mgr = &adev->vm_manager.id_mgr[vmhub];
 	uint64_t fence_context = adev->fence_context + ring->idx;
 	bool needs_flush = vm->use_cpu_for_update;
 	uint64_t updates = amdgpu_vm_tlb_seq(vm);
 	int r;
 
-	*id = vm->reserved_vmid[vmhub];
+	*id = id_mgr->reserved;
 	if ((*id)->owner != vm->immediate.fence_context ||
-	    (*id)->pd_gpu_addr != job->vm_pd_addr ||
+	    !amdgpu_vmid_compatible(*id, job) ||
 	    (*id)->flushed_updates < updates ||
 	    !(*id)->last_flush ||
 	    ((*id)->last_flush->context != fence_context &&
@@ -282,7 +302,8 @@ static int amdgpu_vmid_grab_reserved(struct amdgpu_vm *vm,
 		tmp = amdgpu_sync_peek_fence(&(*id)->active, ring);
 		if (tmp) {
 			*id = NULL;
-			return amdgpu_sync_fence(sync, tmp);
+			*fence = dma_fence_get(tmp);
+			return 0;
 		}
 		needs_flush = true;
 	}
@@ -290,12 +311,12 @@ static int amdgpu_vmid_grab_reserved(struct amdgpu_vm *vm,
 	/* Good we can use this VMID. Remember this submission as
 	* user of the VMID.
 	*/
-	r = amdgpu_sync_fence(&(*id)->active, fence);
+	r = amdgpu_sync_fence(&(*id)->active, &job->base.s_fence->finished);
 	if (r)
 		return r;
 
-	(*id)->flushed_updates = updates;
 	job->vm_needs_flush = needs_flush;
+	job->spm_update_needed = true;
 	return 0;
 }
 
@@ -304,19 +325,17 @@ static int amdgpu_vmid_grab_reserved(struct amdgpu_vm *vm,
  *
  * @vm: vm to allocate id for
  * @ring: ring we want to submit job to
- * @sync: sync object where we add dependencies
- * @fence: fence protecting ID from reuse
  * @job: job who wants to use the VMID
  * @id: resulting VMID
+ * @fence: fence to wait for if no id could be grabbed
  *
  * Try to reuse a VMID for this submission.
  */
 static int amdgpu_vmid_grab_used(struct amdgpu_vm *vm,
 				 struct amdgpu_ring *ring,
-				 struct amdgpu_sync *sync,
-				 struct dma_fence *fence,
 				 struct amdgpu_job *job,
-				 struct amdgpu_vmid **id)
+				 struct amdgpu_vmid **id,
+				 struct dma_fence **fence)
 {
 	struct amdgpu_device *adev = ring->adev;
 	unsigned vmhub = ring->funcs->vmhub;
@@ -335,7 +354,7 @@ static int amdgpu_vmid_grab_used(struct amdgpu_vm *vm,
 		if ((*id)->owner != vm->immediate.fence_context)
 			continue;
 
-		if ((*id)->pd_gpu_addr != job->vm_pd_addr)
+		if (!amdgpu_vmid_compatible(*id, job))
 			continue;
 
 		if (!(*id)->last_flush ||
@@ -352,11 +371,11 @@ static int amdgpu_vmid_grab_used(struct amdgpu_vm *vm,
 		/* Good, we can use this VMID. Remember this submission as
 		 * user of the VMID.
 		 */
-		r = amdgpu_sync_fence(&(*id)->active, fence);
+		r = amdgpu_sync_fence(&(*id)->active,
+				      &job->base.s_fence->finished);
 		if (r)
 			return r;
 
-		(*id)->flushed_updates = updates;
 		job->vm_needs_flush |= needs_flush;
 		return 0;
 	}
@@ -370,15 +389,13 @@ static int amdgpu_vmid_grab_used(struct amdgpu_vm *vm,
  *
  * @vm: vm to allocate id for
  * @ring: ring we want to submit job to
- * @sync: sync object where we add dependencies
- * @fence: fence protecting ID from reuse
  * @job: job who wants to use the VMID
+ * @fence: fence to wait for if no id could be grabbed
  *
  * Allocate an id for the vm, adding fences to the sync obj as necessary.
  */
 int amdgpu_vmid_grab(struct amdgpu_vm *vm, struct amdgpu_ring *ring,
-		     struct amdgpu_sync *sync, struct dma_fence *fence,
-		     struct amdgpu_job *job)
+		     struct amdgpu_job *job, struct dma_fence **fence)
 {
 	struct amdgpu_device *adev = ring->adev;
 	unsigned vmhub = ring->funcs->vmhub;
@@ -388,16 +405,16 @@ int amdgpu_vmid_grab(struct amdgpu_vm *vm, struct amdgpu_ring *ring,
 	int r = 0;
 
 	mutex_lock(&id_mgr->lock);
-	r = amdgpu_vmid_grab_idle(vm, ring, sync, &idle);
+	r = amdgpu_vmid_grab_idle(vm, ring, &idle, fence);
 	if (r || !idle)
 		goto error;
 
 	if (vm->reserved_vmid[vmhub]) {
-		r = amdgpu_vmid_grab_reserved(vm, ring, sync, fence, job, &id);
+		r = amdgpu_vmid_grab_reserved(vm, ring, job, &id, fence);
 		if (r || !id)
 			goto error;
 	} else {
-		r = amdgpu_vmid_grab_used(vm, ring, sync, fence, job, &id);
+		r = amdgpu_vmid_grab_used(vm, ring, job, &id, fence);
 		if (r)
 			goto error;
 
@@ -406,26 +423,35 @@ int amdgpu_vmid_grab(struct amdgpu_vm *vm, struct amdgpu_ring *ring,
 			id = idle;
 
 			/* Remember this submission as user of the VMID */
-			r = amdgpu_sync_fence(&id->active, fence);
+			r = amdgpu_sync_fence(&id->active,
+					      &job->base.s_fence->finished);
 			if (r)
 				goto error;
 
-			id->flushed_updates = amdgpu_vm_tlb_seq(vm);
 			job->vm_needs_flush = true;
 		}
 
 		list_move_tail(&id->list, &id_mgr->ids_lru);
 	}
 
-	id->pd_gpu_addr = job->vm_pd_addr;
-	id->owner = vm->immediate.fence_context;
-
+	job->gds_switch_needed = amdgpu_vmid_gds_switch_needed(id, job);
 	if (job->vm_needs_flush) {
+		id->flushed_updates = amdgpu_vm_tlb_seq(vm);
 		dma_fence_put(id->last_flush);
 		id->last_flush = NULL;
 	}
 	job->vmid = id - id_mgr->ids;
 	job->pasid = vm->pasid;
+
+	id->gds_base = job->gds_base;
+	id->gds_size = job->gds_size;
+	id->gws_base = job->gws_base;
+	id->gws_size = job->gws_size;
+	id->oa_base = job->oa_base;
+	id->oa_size = job->oa_size;
+	id->pd_gpu_addr = job->vm_pd_addr;
+	id->owner = vm->immediate.fence_context;
+
 	trace_amdgpu_vm_grab_id(vm, ring, job);
 
 error:
@@ -437,31 +463,27 @@ int amdgpu_vmid_alloc_reserved(struct amdgpu_device *adev,
 			       struct amdgpu_vm *vm,
 			       unsigned vmhub)
 {
-	struct amdgpu_vmid_mgr *id_mgr;
-	struct amdgpu_vmid *idle;
-	int r = 0;
+	struct amdgpu_vmid_mgr *id_mgr = &adev->vm_manager.id_mgr[vmhub];
 
-	id_mgr = &adev->vm_manager.id_mgr[vmhub];
 	mutex_lock(&id_mgr->lock);
 	if (vm->reserved_vmid[vmhub])
 		goto unlock;
-	if (atomic_inc_return(&id_mgr->reserved_vmid_num) >
-	    AMDGPU_VM_MAX_RESERVED_VMID) {
-		DRM_ERROR("Over limitation of reserved vmid\n");
-		atomic_dec(&id_mgr->reserved_vmid_num);
-		r = -EINVAL;
-		goto unlock;
+
+	++id_mgr->reserved_use_count;
+	if (!id_mgr->reserved) {
+		struct amdgpu_vmid *id;
+
+		id = list_first_entry(&id_mgr->ids_lru, struct amdgpu_vmid,
+				      list);
+		/* Remove from normal round robin handling */
+		list_del_init(&id->list);
+		id_mgr->reserved = id;
 	}
-	/* Select the first entry VMID */
-	idle = list_first_entry(&id_mgr->ids_lru, struct amdgpu_vmid, list);
-	list_del_init(&idle->list);
-	vm->reserved_vmid[vmhub] = idle;
-	mutex_unlock(&id_mgr->lock);
+	vm->reserved_vmid[vmhub] = true;
 
-	return 0;
 unlock:
 	mutex_unlock(&id_mgr->lock);
-	return r;
+	return 0;
 }
 
 void amdgpu_vmid_free_reserved(struct amdgpu_device *adev,
@@ -471,12 +493,12 @@ void amdgpu_vmid_free_reserved(struct amdgpu_device *adev,
 	struct amdgpu_vmid_mgr *id_mgr = &adev->vm_manager.id_mgr[vmhub];
 
 	mutex_lock(&id_mgr->lock);
-	if (vm->reserved_vmid[vmhub]) {
-		list_add(&vm->reserved_vmid[vmhub]->list,
-			&id_mgr->ids_lru);
-		vm->reserved_vmid[vmhub] = NULL;
-		atomic_dec(&id_mgr->reserved_vmid_num);
+	if (vm->reserved_vmid[vmhub] &&
+	    !--id_mgr->reserved_use_count) {
+		/* give the reserved ID back to normal round robin */
+		list_add(&id_mgr->reserved->list, &id_mgr->ids_lru);
 	}
+	vm->reserved_vmid[vmhub] = false;
 	mutex_unlock(&id_mgr->lock);
 }
 
@@ -543,7 +565,7 @@ void amdgpu_vmid_mgr_init(struct amdgpu_device *adev)
 
 		mutex_init(&id_mgr->lock);
 		INIT_LIST_HEAD(&id_mgr->ids_lru);
-		atomic_set(&id_mgr->reserved_vmid_num, 0);
+		id_mgr->reserved_use_count = 0;
 
 		/* manage only VMIDs not used by KFD */
 		id_mgr->num_ids = adev->vm_manager.first_kfd_vmid;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ids.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ids.h
index 06c8a0034fa5..d1cc09b45da4 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ids.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ids.h
@@ -67,7 +67,8 @@ struct amdgpu_vmid_mgr {
 	unsigned		num_ids;
 	struct list_head	ids_lru;
 	struct amdgpu_vmid	ids[AMDGPU_NUM_VMID];
-	atomic_t		reserved_vmid_num;
+	struct amdgpu_vmid	*reserved;
+	unsigned int		reserved_use_count;
 };
 
 int amdgpu_pasid_alloc(unsigned int bits);
@@ -84,8 +85,7 @@ void amdgpu_vmid_free_reserved(struct amdgpu_device *adev,
 			       struct amdgpu_vm *vm,
 			       unsigned vmhub);
 int amdgpu_vmid_grab(struct amdgpu_vm *vm, struct amdgpu_ring *ring,
-		     struct amdgpu_sync *sync, struct dma_fence *fence,
-		     struct amdgpu_job *job);
+		     struct amdgpu_job *job, struct dma_fence **fence);
 void amdgpu_vmid_reset(struct amdgpu_device *adev, unsigned vmhub,
 		       unsigned vmid);
 void amdgpu_vmid_reset_all(struct amdgpu_device *adev);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_irq.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_irq.c
index 89011bae7588..a6aef488a822 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_irq.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_irq.c
@@ -101,41 +101,6 @@ const char *soc15_ih_clientid_name[] = {
 };
 
 /**
- * amdgpu_hotplug_work_func - work handler for display hotplug event
- *
- * @work: work struct pointer
- *
- * This is the hotplug event work handler (all ASICs).
- * The work gets scheduled from the IRQ handler if there
- * was a hotplug interrupt.  It walks through the connector table
- * and calls hotplug handler for each connector. After this, it sends
- * a DRM hotplug event to alert userspace.
- *
- * This design approach is required in order to defer hotplug event handling
- * from the IRQ handler to a work handler because hotplug handler has to use
- * mutexes which cannot be locked in an IRQ handler (since &mutex_lock may
- * sleep).
- */
-static void amdgpu_hotplug_work_func(struct work_struct *work)
-{
-	struct amdgpu_device *adev = container_of(work, struct amdgpu_device,
-						  hotplug_work);
-	struct drm_device *dev = adev_to_drm(adev);
-	struct drm_mode_config *mode_config = &dev->mode_config;
-	struct drm_connector *connector;
-	struct drm_connector_list_iter iter;
-
-	mutex_lock(&mode_config->mutex);
-	drm_connector_list_iter_begin(dev, &iter);
-	drm_for_each_connector_iter(connector, &iter)
-		amdgpu_connector_hotplug(connector);
-	drm_connector_list_iter_end(&iter);
-	mutex_unlock(&mode_config->mutex);
-	/* Just fire off a uevent and let userspace tell us what to do */
-	drm_helper_hpd_irq_event(dev);
-}
-
-/**
  * amdgpu_irq_disable_all - disable *all* interrupts
  *
  * @adev: amdgpu device pointer
@@ -317,21 +282,6 @@ int amdgpu_irq_init(struct amdgpu_device *adev)
 		}
 	}
 
-	if (!amdgpu_device_has_dc_support(adev)) {
-		if (!adev->enable_virtual_display)
-			/* Disable vblank IRQs aggressively for power-saving */
-			/* XXX: can this be enabled for DC? */
-			adev_to_drm(adev)->vblank_disable_immediate = true;
-
-		r = drm_vblank_init(adev_to_drm(adev), adev->mode_info.num_crtc);
-		if (r)
-			return r;
-
-		/* Pre-DCE11 */
-		INIT_WORK(&adev->hotplug_work,
-				amdgpu_hotplug_work_func);
-	}
-
 	INIT_WORK(&adev->irq.ih1_work, amdgpu_irq_handle_ih1);
 	INIT_WORK(&adev->irq.ih2_work, amdgpu_irq_handle_ih2);
 	INIT_WORK(&adev->irq.ih_soft_work, amdgpu_irq_handle_ih_soft);
@@ -345,11 +295,8 @@ int amdgpu_irq_init(struct amdgpu_device *adev)
 	/* PCI devices require shared interrupts. */
 	r = request_irq(irq, amdgpu_irq_handler, IRQF_SHARED, adev_to_drm(adev)->driver->name,
 			adev_to_drm(adev));
-	if (r) {
-		if (!amdgpu_device_has_dc_support(adev))
-			flush_work(&adev->hotplug_work);
+	if (r)
 		return r;
-	}
 	adev->irq.installed = true;
 	adev->irq.irq = irq;
 	adev_to_drm(adev)->max_vblank_count = 0x00ffffff;
@@ -366,9 +313,6 @@ void amdgpu_irq_fini_hw(struct amdgpu_device *adev)
 		adev->irq.installed = false;
 		if (adev->irq.msi_enabled)
 			pci_free_irq_vectors(adev->pdev);
-
-		if (!amdgpu_device_has_dc_support(adev))
-			flush_work(&adev->hotplug_work);
 	}
 
 	amdgpu_ih_ring_fini(adev, &adev->irq.ih_soft);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c
index cd968e781077..9e549923622b 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c
@@ -88,8 +88,9 @@ exit:
 	return DRM_GPU_SCHED_STAT_NOMINAL;
 }
 
-int amdgpu_job_alloc(struct amdgpu_device *adev, unsigned num_ibs,
-		     struct amdgpu_job **job, struct amdgpu_vm *vm)
+int amdgpu_job_alloc(struct amdgpu_device *adev, struct amdgpu_vm *vm,
+		     struct drm_sched_entity *entity, void *owner,
+		     unsigned int num_ibs, struct amdgpu_job **job)
 {
 	if (num_ibs == 0)
 		return -EINVAL;
@@ -105,28 +106,34 @@ int amdgpu_job_alloc(struct amdgpu_device *adev, unsigned num_ibs,
 	(*job)->base.sched = &adev->rings[0]->sched;
 	(*job)->vm = vm;
 
-	amdgpu_sync_create(&(*job)->sync);
-	amdgpu_sync_create(&(*job)->sched_sync);
+	amdgpu_sync_create(&(*job)->explicit_sync);
 	(*job)->vram_lost_counter = atomic_read(&adev->vram_lost_counter);
 	(*job)->vm_pd_addr = AMDGPU_BO_INVALID_OFFSET;
 
-	return 0;
+	if (!entity)
+		return 0;
+
+	return drm_sched_job_init(&(*job)->base, entity, owner);
 }
 
-int amdgpu_job_alloc_with_ib(struct amdgpu_device *adev, unsigned size,
-		enum amdgpu_ib_pool_type pool_type,
-		struct amdgpu_job **job)
+int amdgpu_job_alloc_with_ib(struct amdgpu_device *adev,
+			     struct drm_sched_entity *entity, void *owner,
+			     size_t size, enum amdgpu_ib_pool_type pool_type,
+			     struct amdgpu_job **job)
 {
 	int r;
 
-	r = amdgpu_job_alloc(adev, 1, job, NULL);
+	r = amdgpu_job_alloc(adev, NULL, entity, owner, 1, job);
 	if (r)
 		return r;
 
 	(*job)->num_ibs = 1;
 	r = amdgpu_ib_get(adev, NULL, size, pool_type, &(*job)->ibs[0]);
-	if (r)
+	if (r) {
+		if (entity)
+			drm_sched_job_cleanup(&(*job)->base);
 		kfree(*job);
+	}
 
 	return r;
 }
@@ -166,10 +173,13 @@ static void amdgpu_job_free_cb(struct drm_sched_job *s_job)
 
 	drm_sched_job_cleanup(s_job);
 
-	amdgpu_sync_free(&job->sync);
-	amdgpu_sync_free(&job->sched_sync);
+	amdgpu_sync_free(&job->explicit_sync);
 
-	dma_fence_put(&job->hw_fence);
+	/* only put the hw fence if has embedded fence */
+	if (!job->hw_fence.ops)
+		kfree(job);
+	else
+		dma_fence_put(&job->hw_fence);
 }
 
 void amdgpu_job_set_gang_leader(struct amdgpu_job *job,
@@ -190,9 +200,11 @@ void amdgpu_job_set_gang_leader(struct amdgpu_job *job,
 
 void amdgpu_job_free(struct amdgpu_job *job)
 {
+	if (job->base.entity)
+		drm_sched_job_cleanup(&job->base);
+
 	amdgpu_job_free_resources(job);
-	amdgpu_sync_free(&job->sync);
-	amdgpu_sync_free(&job->sched_sync);
+	amdgpu_sync_free(&job->explicit_sync);
 	if (job->gang_submit != &job->base.s_fence->scheduled)
 		dma_fence_put(job->gang_submit);
 
@@ -202,25 +214,16 @@ void amdgpu_job_free(struct amdgpu_job *job)
 		dma_fence_put(&job->hw_fence);
 }
 
-int amdgpu_job_submit(struct amdgpu_job *job, struct drm_sched_entity *entity,
-		      void *owner, struct dma_fence **f)
+struct dma_fence *amdgpu_job_submit(struct amdgpu_job *job)
 {
-	int r;
-
-	if (!f)
-		return -EINVAL;
-
-	r = drm_sched_job_init(&job->base, entity, owner);
-	if (r)
-		return r;
+	struct dma_fence *f;
 
 	drm_sched_job_arm(&job->base);
-
-	*f = dma_fence_get(&job->base.s_fence->finished);
+	f = dma_fence_get(&job->base.s_fence->finished);
 	amdgpu_job_free_resources(job);
 	drm_sched_entity_push_job(&job->base);
 
-	return 0;
+	return f;
 }
 
 int amdgpu_job_submit_direct(struct amdgpu_job *job, struct amdgpu_ring *ring,
@@ -238,35 +241,24 @@ int amdgpu_job_submit_direct(struct amdgpu_job *job, struct amdgpu_ring *ring,
 	return 0;
 }
 
-static struct dma_fence *amdgpu_job_dependency(struct drm_sched_job *sched_job,
-					       struct drm_sched_entity *s_entity)
+static struct dma_fence *
+amdgpu_job_prepare_job(struct drm_sched_job *sched_job,
+		      struct drm_sched_entity *s_entity)
 {
 	struct amdgpu_ring *ring = to_amdgpu_ring(s_entity->rq->sched);
 	struct amdgpu_job *job = to_amdgpu_job(sched_job);
-	struct amdgpu_vm *vm = job->vm;
-	struct dma_fence *fence;
+	struct dma_fence *fence = NULL;
 	int r;
 
-	fence = amdgpu_sync_get_fence(&job->sync);
-	if (fence && drm_sched_dependency_optimized(fence, s_entity)) {
-		r = amdgpu_sync_fence(&job->sched_sync, fence);
-		if (r)
-			DRM_ERROR("Error adding fence (%d)\n", r);
-	}
+	if (!fence && job->gang_submit)
+		fence = amdgpu_device_switch_gang(ring->adev, job->gang_submit);
 
-	while (fence == NULL && vm && !job->vmid) {
-		r = amdgpu_vmid_grab(vm, ring, &job->sync,
-				     &job->base.s_fence->finished,
-				     job);
+	while (!fence && job->vm && !job->vmid) {
+		r = amdgpu_vmid_grab(job->vm, ring, job, &fence);
 		if (r)
 			DRM_ERROR("Error getting VM ID (%d)\n", r);
-
-		fence = amdgpu_sync_get_fence(&job->sync);
 	}
 
-	if (!fence && job->gang_submit)
-		fence = amdgpu_device_switch_gang(ring->adev, job->gang_submit);
-
 	return fence;
 }
 
@@ -281,8 +273,6 @@ static struct dma_fence *amdgpu_job_run(struct drm_sched_job *sched_job)
 	job = to_amdgpu_job(sched_job);
 	finished = &job->base.s_fence->finished;
 
-	BUG_ON(amdgpu_sync_peek_fence(&job->sync, NULL));
-
 	trace_amdgpu_sched_run_job(job);
 
 	/* Skip job if VRAM is lost and never resubmit gangs */
@@ -341,7 +331,7 @@ void amdgpu_job_stop_all_jobs_on_sched(struct drm_gpu_scheduler *sched)
 }
 
 const struct drm_sched_backend_ops amdgpu_sched_ops = {
-	.dependency = amdgpu_job_dependency,
+	.prepare_job = amdgpu_job_prepare_job,
 	.run_job = amdgpu_job_run,
 	.timedout_job = amdgpu_job_timedout,
 	.free_job = amdgpu_job_free_cb
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.h
index ab7b150e5d50..52f2e313ea17 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.h
@@ -47,13 +47,14 @@ enum amdgpu_ib_pool_type;
 struct amdgpu_job {
 	struct drm_sched_job    base;
 	struct amdgpu_vm	*vm;
-	struct amdgpu_sync	sync;
-	struct amdgpu_sync	sched_sync;
+	struct amdgpu_sync	explicit_sync;
 	struct dma_fence	hw_fence;
 	struct dma_fence	*gang_submit;
 	uint32_t		preamble_status;
 	uint32_t                preemption_status;
 	bool                    vm_needs_flush;
+	bool			gds_switch_needed;
+	bool			spm_update_needed;
 	uint64_t		vm_pd_addr;
 	unsigned		vmid;
 	unsigned		pasid;
@@ -78,18 +79,20 @@ static inline struct amdgpu_ring *amdgpu_job_ring(struct amdgpu_job *job)
 	return to_amdgpu_ring(job->base.entity->rq->sched);
 }
 
-int amdgpu_job_alloc(struct amdgpu_device *adev, unsigned num_ibs,
-		     struct amdgpu_job **job, struct amdgpu_vm *vm);
-int amdgpu_job_alloc_with_ib(struct amdgpu_device *adev, unsigned size,
-		enum amdgpu_ib_pool_type pool, struct amdgpu_job **job);
+int amdgpu_job_alloc(struct amdgpu_device *adev, struct amdgpu_vm *vm,
+		     struct drm_sched_entity *entity, void *owner,
+		     unsigned int num_ibs, struct amdgpu_job **job);
+int amdgpu_job_alloc_with_ib(struct amdgpu_device *adev,
+			     struct drm_sched_entity *entity, void *owner,
+			     size_t size, enum amdgpu_ib_pool_type pool_type,
+			     struct amdgpu_job **job);
 void amdgpu_job_set_resources(struct amdgpu_job *job, struct amdgpu_bo *gds,
 			      struct amdgpu_bo *gws, struct amdgpu_bo *oa);
 void amdgpu_job_free_resources(struct amdgpu_job *job);
 void amdgpu_job_set_gang_leader(struct amdgpu_job *job,
 				struct amdgpu_job *leader);
 void amdgpu_job_free(struct amdgpu_job *job);
-int amdgpu_job_submit(struct amdgpu_job *job, struct drm_sched_entity *entity,
-		      void *owner, struct dma_fence **f);
+struct dma_fence *amdgpu_job_submit(struct amdgpu_job *job);
 int amdgpu_job_submit_direct(struct amdgpu_job *job, struct amdgpu_ring *ring,
 			     struct dma_fence **fence);
 
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_jpeg.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_jpeg.c
index 518eb0e40d32..6f81ed4fb0d9 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_jpeg.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_jpeg.c
@@ -150,14 +150,15 @@ static int amdgpu_jpeg_dec_set_reg(struct amdgpu_ring *ring, uint32_t handle,
 	const unsigned ib_size_dw = 16;
 	int i, r;
 
-	r = amdgpu_job_alloc_with_ib(ring->adev, ib_size_dw * 4,
-					AMDGPU_IB_POOL_DIRECT, &job);
+	r = amdgpu_job_alloc_with_ib(ring->adev, NULL, NULL, ib_size_dw * 4,
+				     AMDGPU_IB_POOL_DIRECT, &job);
 	if (r)
 		return r;
 
 	ib = &job->ibs[0];
 
-	ib->ptr[0] = PACKETJ(adev->jpeg.internal.jpeg_pitch, 0, 0, PACKETJ_TYPE0);
+	ib->ptr[0] = PACKETJ(adev->jpeg.internal.jpeg_pitch, 0, 0,
+			     PACKETJ_TYPE0);
 	ib->ptr[1] = 0xDEADBEEF;
 	for (i = 2; i < 16; i += 2) {
 		ib->ptr[i] = PACKETJ(0, 0, 0, PACKETJ_TYPE6);
@@ -234,3 +235,20 @@ int amdgpu_jpeg_process_poison_irq(struct amdgpu_device *adev,
 
 	return 0;
 }
+
+void jpeg_set_ras_funcs(struct amdgpu_device *adev)
+{
+	if (!adev->jpeg.ras)
+		return;
+
+	amdgpu_ras_register_ras_block(adev, &adev->jpeg.ras->ras_block);
+
+	strcpy(adev->jpeg.ras->ras_block.ras_comm.name, "jpeg");
+	adev->jpeg.ras->ras_block.ras_comm.block = AMDGPU_RAS_BLOCK__JPEG;
+	adev->jpeg.ras->ras_block.ras_comm.type = AMDGPU_RAS_ERROR__POISON;
+	adev->jpeg.ras_if = &adev->jpeg.ras->ras_block.ras_comm;
+
+	/* If don't define special ras_late_init function, use default ras_late_init */
+	if (!adev->jpeg.ras->ras_block.ras_late_init)
+		adev->jpeg.ras->ras_block.ras_late_init = amdgpu_ras_block_late_init;
+}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_jpeg.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_jpeg.h
index 635dca59a70a..e8ca3e32ad52 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_jpeg.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_jpeg.h
@@ -72,5 +72,6 @@ int amdgpu_jpeg_dec_ring_test_ib(struct amdgpu_ring *ring, long timeout);
 int amdgpu_jpeg_process_poison_irq(struct amdgpu_device *adev,
 				struct amdgpu_irq_src *source,
 				struct amdgpu_iv_entry *entry);
+void jpeg_set_ras_funcs(struct amdgpu_device *adev);
 
 #endif /*__AMDGPU_JPEG_H__*/
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c
index fe23e09eec98..7aa7e52ca784 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c
@@ -29,6 +29,7 @@
 #include "amdgpu.h"
 #include <drm/amdgpu_drm.h>
 #include <drm/drm_drv.h>
+#include <drm/drm_fb_helper.h>
 #include "amdgpu_uvd.h"
 #include "amdgpu_vce.h"
 #include "atom.h"
@@ -337,11 +338,17 @@ static int amdgpu_firmware_info(struct drm_amdgpu_info_firmware *fw_info,
 		fw_info->feature = adev->psp.cap_feature_version;
 		break;
 	case AMDGPU_INFO_FW_MES_KIQ:
-		fw_info->ver = adev->mes.ucode_fw_version[0];
-		fw_info->feature = 0;
+		fw_info->ver = adev->mes.kiq_version & AMDGPU_MES_VERSION_MASK;
+		fw_info->feature = (adev->mes.kiq_version & AMDGPU_MES_FEAT_VERSION_MASK)
+					>> AMDGPU_MES_FEAT_VERSION_SHIFT;
 		break;
 	case AMDGPU_INFO_FW_MES:
-		fw_info->ver = adev->mes.ucode_fw_version[1];
+		fw_info->ver = adev->mes.sched_version & AMDGPU_MES_VERSION_MASK;
+		fw_info->feature = (adev->mes.sched_version & AMDGPU_MES_FEAT_VERSION_MASK)
+					>> AMDGPU_MES_FEAT_VERSION_SHIFT;
+		break;
+	case AMDGPU_INFO_FW_IMU:
+		fw_info->ver = adev->gfx.imu_fw_version;
 		fw_info->feature = 0;
 		break;
 	default:
@@ -424,7 +431,7 @@ static int amdgpu_hw_ip_info(struct amdgpu_device *adev,
 	case AMDGPU_HW_IP_VCN_DEC:
 		type = AMD_IP_BLOCK_TYPE_VCN;
 		for (i = 0; i < adev->vcn.num_vcn_inst; i++) {
-			if (adev->uvd.harvest_config & (1 << i))
+			if (adev->vcn.harvest_config & (1 << i))
 				continue;
 
 			if (adev->vcn.inst[i].ring_dec.sched.ready)
@@ -436,7 +443,7 @@ static int amdgpu_hw_ip_info(struct amdgpu_device *adev,
 	case AMDGPU_HW_IP_VCN_ENC:
 		type = AMD_IP_BLOCK_TYPE_VCN;
 		for (i = 0; i < adev->vcn.num_vcn_inst; i++) {
-			if (adev->uvd.harvest_config & (1 << i))
+			if (adev->vcn.harvest_config & (1 << i))
 				continue;
 
 			for (j = 0; j < adev->vcn.num_enc_rings; j++)
@@ -790,7 +797,7 @@ int amdgpu_info_ioctl(struct drm_device *dev, void *data, struct drm_file *filp)
 		dev_info->ids_flags = 0;
 		if (adev->flags & AMD_IS_APU)
 			dev_info->ids_flags |= AMDGPU_IDS_FLAGS_FUSION;
-		if (amdgpu_mcbp || amdgpu_sriov_vf(adev))
+		if (amdgpu_mcbp)
 			dev_info->ids_flags |= AMDGPU_IDS_FLAGS_PREEMPTION;
 		if (amdgpu_is_tmz(adev))
 			dev_info->ids_flags |= AMDGPU_IDS_FLAGS_TMZ;
@@ -1166,7 +1173,7 @@ int amdgpu_driver_open_kms(struct drm_device *dev, struct drm_file *file_priv)
 		goto error_vm;
 	}
 
-	if (amdgpu_mcbp || amdgpu_sriov_vf(adev)) {
+	if (amdgpu_mcbp) {
 		uint64_t csa_addr = amdgpu_csa_vaddr(adev) & AMDGPU_GMC_HOLE_MASK;
 
 		r = amdgpu_map_static_csa(adev, &fpriv->vm, adev->virt.csa_obj,
@@ -1230,7 +1237,7 @@ void amdgpu_driver_postclose_kms(struct drm_device *dev,
 	if (amdgpu_device_ip_get_ip_block(adev, AMD_IP_BLOCK_TYPE_VCE) != NULL)
 		amdgpu_vce_free_handles(adev, file_priv);
 
-	if (amdgpu_mcbp || amdgpu_sriov_vf(adev)) {
+	if (amdgpu_mcbp) {
 		/* TODO: how to handle reserve failure */
 		BUG_ON(amdgpu_bo_reserve(adev->virt.csa_obj, true));
 		amdgpu_vm_bo_del(adev, fpriv->csa_va);
@@ -1520,6 +1527,15 @@ static int amdgpu_debugfs_firmware_info_show(struct seq_file *m, void *unused)
 			   fw_info.feature, fw_info.ver);
 	}
 
+	/* IMU */
+	query_fw.fw_type = AMDGPU_INFO_FW_IMU;
+	query_fw.index = 0;
+	ret = amdgpu_firmware_info(&fw_info, &query_fw, adev);
+	if (ret)
+		return ret;
+	seq_printf(m, "IMU feature version: %u, firmware version: 0x%08x\n",
+		   fw_info.feature, fw_info.ver);
+
 	/* PSP SOS */
 	query_fw.fw_type = AMDGPU_INFO_FW_SOS;
 	ret = amdgpu_firmware_info(&fw_info, &query_fw, adev);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.h
index ad980f4b66e1..97c05d08a551 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.h
@@ -91,14 +91,12 @@ struct amdgpu_mes {
 	struct amdgpu_bo		*ucode_fw_obj[AMDGPU_MAX_MES_PIPES];
 	uint64_t			ucode_fw_gpu_addr[AMDGPU_MAX_MES_PIPES];
 	uint32_t			*ucode_fw_ptr[AMDGPU_MAX_MES_PIPES];
-	uint32_t                        ucode_fw_version[AMDGPU_MAX_MES_PIPES];
 	uint64_t                        uc_start_addr[AMDGPU_MAX_MES_PIPES];
 
 	/* mes ucode data */
 	struct amdgpu_bo		*data_fw_obj[AMDGPU_MAX_MES_PIPES];
 	uint64_t			data_fw_gpu_addr[AMDGPU_MAX_MES_PIPES];
 	uint32_t			*data_fw_ptr[AMDGPU_MAX_MES_PIPES];
-	uint32_t                        data_fw_version[AMDGPU_MAX_MES_PIPES];
 	uint64_t                        data_start_addr[AMDGPU_MAX_MES_PIPES];
 
 	/* eop gpu obj */
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_mode.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_mode.h
index 37322550d750..8a39300b1a84 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_mode.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_mode.h
@@ -36,7 +36,6 @@
 #include <drm/drm_encoder.h>
 #include <drm/drm_fixed.h>
 #include <drm/drm_crtc_helper.h>
-#include <drm/drm_fb_helper.h>
 #include <drm/drm_framebuffer.h>
 #include <drm/drm_probe_helper.h>
 #include <linux/i2c.h>
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
index 974e85d8b6cc..4e684c2afc70 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
@@ -346,17 +346,16 @@ int amdgpu_bo_create_kernel(struct amdgpu_device *adev,
  * @adev: amdgpu device object
  * @offset: offset of the BO
  * @size: size of the BO
- * @domain: where to place it
  * @bo_ptr:  used to initialize BOs in structures
  * @cpu_addr: optional CPU address mapping
  *
- * Creates a kernel BO at a specific offset in the address space of the domain.
+ * Creates a kernel BO at a specific offset in VRAM.
  *
  * Returns:
  * 0 on success, negative error code otherwise.
  */
 int amdgpu_bo_create_kernel_at(struct amdgpu_device *adev,
-			       uint64_t offset, uint64_t size, uint32_t domain,
+			       uint64_t offset, uint64_t size,
 			       struct amdgpu_bo **bo_ptr, void **cpu_addr)
 {
 	struct ttm_operation_ctx ctx = { false, false };
@@ -366,8 +365,9 @@ int amdgpu_bo_create_kernel_at(struct amdgpu_device *adev,
 	offset &= PAGE_MASK;
 	size = ALIGN(size, PAGE_SIZE);
 
-	r = amdgpu_bo_create_reserved(adev, size, PAGE_SIZE, domain, bo_ptr,
-				      NULL, cpu_addr);
+	r = amdgpu_bo_create_reserved(adev, size, PAGE_SIZE,
+				      AMDGPU_GEM_DOMAIN_VRAM, bo_ptr, NULL,
+				      cpu_addr);
 	if (r)
 		return r;
 
@@ -422,6 +422,8 @@ void amdgpu_bo_free_kernel(struct amdgpu_bo **bo, u64 *gpu_addr,
 	if (*bo == NULL)
 		return;
 
+	WARN_ON(amdgpu_ttm_adev((*bo)->tbo.bdev)->in_suspend);
+
 	if (likely(amdgpu_bo_reserve(*bo, true) == 0)) {
 		if (cpu_addr)
 			amdgpu_bo_kunmap(*bo);
@@ -446,27 +448,24 @@ static bool amdgpu_bo_validate_size(struct amdgpu_device *adev,
 
 	/*
 	 * If GTT is part of requested domains the check must succeed to
-	 * allow fall back to GTT
+	 * allow fall back to GTT.
 	 */
 	if (domain & AMDGPU_GEM_DOMAIN_GTT) {
 		man = ttm_manager_type(&adev->mman.bdev, TTM_PL_TT);
 
-		if (size < man->size)
+		if (man && size < man->size)
 			return true;
-		else
-			goto fail;
-	}
-
-	if (domain & AMDGPU_GEM_DOMAIN_VRAM) {
+		else if (!man)
+			WARN_ON_ONCE("GTT domain requested but GTT mem manager uninitialized");
+		goto fail;
+	} else if (domain & AMDGPU_GEM_DOMAIN_VRAM) {
 		man = ttm_manager_type(&adev->mman.bdev, TTM_PL_VRAM);
 
-		if (size < man->size)
+		if (man && size < man->size)
 			return true;
-		else
-			goto fail;
+		goto fail;
 	}
 
-
 	/* TODO add more domains checks, such as AMDGPU_GEM_DOMAIN_CPU */
 	return true;
 
@@ -1510,7 +1509,8 @@ u64 amdgpu_bo_gpu_offset_no_check(struct amdgpu_bo *bo)
 uint32_t amdgpu_bo_get_preferred_domain(struct amdgpu_device *adev,
 					    uint32_t domain)
 {
-	if (domain == (AMDGPU_GEM_DOMAIN_VRAM | AMDGPU_GEM_DOMAIN_GTT)) {
+	if ((domain == (AMDGPU_GEM_DOMAIN_VRAM | AMDGPU_GEM_DOMAIN_GTT)) &&
+	    ((adev->asic_type == CHIP_CARRIZO) || (adev->asic_type == CHIP_STONEY))) {
 		domain = AMDGPU_GEM_DOMAIN_VRAM;
 		if (adev->gmc.real_vram_size <= AMDGPU_SG_THRESHOLD)
 			domain = AMDGPU_GEM_DOMAIN_GTT;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h
index 147b79c10cbb..93207badf83f 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h
@@ -284,7 +284,7 @@ int amdgpu_bo_create_kernel(struct amdgpu_device *adev,
 			    u32 domain, struct amdgpu_bo **bo_ptr,
 			    u64 *gpu_addr, void **cpu_addr);
 int amdgpu_bo_create_kernel_at(struct amdgpu_device *adev,
-			       uint64_t offset, uint64_t size, uint32_t domain,
+			       uint64_t offset, uint64_t size,
 			       struct amdgpu_bo **bo_ptr, void **cpu_addr);
 int amdgpu_bo_create_user(struct amdgpu_device *adev,
 			  struct amdgpu_bo_param *bp,
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c
index effa7df3ddbf..7a2fc920739b 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c
@@ -52,6 +52,32 @@ static int psp_load_smu_fw(struct psp_context *psp);
 static int psp_rap_terminate(struct psp_context *psp);
 static int psp_securedisplay_terminate(struct psp_context *psp);
 
+static int psp_ring_init(struct psp_context *psp,
+			 enum psp_ring_type ring_type)
+{
+	int ret = 0;
+	struct psp_ring *ring;
+	struct amdgpu_device *adev = psp->adev;
+
+	ring = &psp->km_ring;
+
+	ring->ring_type = ring_type;
+
+	/* allocate 4k Page of Local Frame Buffer memory for ring */
+	ring->ring_size = 0x1000;
+	ret = amdgpu_bo_create_kernel(adev, ring->ring_size, PAGE_SIZE,
+				      AMDGPU_GEM_DOMAIN_VRAM,
+				      &adev->firmware.rbuf,
+				      &ring->ring_mem_mc_addr,
+				      (void **)&ring->ring_mem);
+	if (ret) {
+		ring->ring_size = 0;
+		return ret;
+	}
+
+	return 0;
+}
+
 /*
  * Due to DF Cstate management centralized to PMFW, the firmware
  * loading sequence will be updated as below:
@@ -139,6 +165,7 @@ static int psp_early_init(void *handle)
 	case IP_VERSION(13, 0, 5):
 	case IP_VERSION(13, 0, 8):
 	case IP_VERSION(13, 0, 10):
+	case IP_VERSION(13, 0, 11):
 		psp_v13_0_set_psp_funcs(psp);
 		psp->autoload_supported = true;
 		break;
@@ -172,6 +199,7 @@ void psp_ta_free_shared_buf(struct ta_mem_context *mem_ctx)
 {
 	amdgpu_bo_free_kernel(&mem_ctx->shared_bo, &mem_ctx->shared_mc_addr,
 			      &mem_ctx->shared_buf);
+	mem_ctx->shared_bo = NULL;
 }
 
 static void psp_free_shared_bufs(struct psp_context *psp)
@@ -182,6 +210,7 @@ static void psp_free_shared_bufs(struct psp_context *psp)
 	/* free TMR memory buffer */
 	pptr = amdgpu_sriov_vf(psp->adev) ? &tmr_buf : NULL;
 	amdgpu_bo_free_kernel(&psp->tmr_bo, &psp->tmr_mc_addr, pptr);
+	psp->tmr_bo = NULL;
 
 	/* free xgmi shared memory */
 	psp_ta_free_shared_buf(&psp->xgmi_context.context.mem_context);
@@ -484,26 +513,22 @@ static int psp_sw_fini(void *handle)
 	struct psp_gfx_cmd_resp *cmd = psp->cmd;
 
 	psp_memory_training_fini(psp);
-	if (psp->sos_fw) {
-		release_firmware(psp->sos_fw);
-		psp->sos_fw = NULL;
-	}
-	if (psp->asd_fw) {
-		release_firmware(psp->asd_fw);
-		psp->asd_fw = NULL;
-	}
-	if (psp->ta_fw) {
-		release_firmware(psp->ta_fw);
-		psp->ta_fw = NULL;
-	}
-	if (psp->cap_fw) {
-		release_firmware(psp->cap_fw);
-		psp->cap_fw = NULL;
-	}
-	if (psp->toc_fw) {
-		release_firmware(psp->toc_fw);
-		psp->toc_fw = NULL;
-	}
+
+	release_firmware(psp->sos_fw);
+	psp->sos_fw = NULL;
+
+	release_firmware(psp->asd_fw);
+	psp->asd_fw = NULL;
+
+	release_firmware(psp->ta_fw);
+	psp->ta_fw = NULL;
+
+	release_firmware(psp->cap_fw);
+	psp->cap_fw = NULL;
+
+	release_firmware(psp->toc_fw);
+	psp->toc_fw = NULL;
+
 	if (adev->ip_versions[MP0_HWIP][0] == IP_VERSION(11, 0, 0) ||
 	    adev->ip_versions[MP0_HWIP][0] == IP_VERSION(11, 0, 7))
 		psp_sysfs_fini(adev);
@@ -743,7 +768,7 @@ static int psp_load_toc(struct psp_context *psp,
 /* Set up Trusted Memory Region */
 static int psp_tmr_init(struct psp_context *psp)
 {
-	int ret;
+	int ret = 0;
 	int tmr_size;
 	void *tmr_buf;
 	void **pptr;
@@ -770,10 +795,12 @@ static int psp_tmr_init(struct psp_context *psp)
 		}
 	}
 
-	pptr = amdgpu_sriov_vf(psp->adev) ? &tmr_buf : NULL;
-	ret = amdgpu_bo_create_kernel(psp->adev, tmr_size, PSP_TMR_ALIGNMENT,
-				      AMDGPU_GEM_DOMAIN_VRAM,
-				      &psp->tmr_bo, &psp->tmr_mc_addr, pptr);
+	if (!psp->tmr_bo) {
+		pptr = amdgpu_sriov_vf(psp->adev) ? &tmr_buf : NULL;
+		ret = amdgpu_bo_create_kernel(psp->adev, tmr_size, PSP_TMR_ALIGNMENT,
+					      AMDGPU_GEM_DOMAIN_VRAM,
+					      &psp->tmr_bo, &psp->tmr_mc_addr, pptr);
+	}
 
 	return ret;
 }
@@ -831,7 +858,7 @@ static int psp_tmr_unload(struct psp_context *psp)
 	struct psp_gfx_cmd_resp *cmd = acquire_psp_cmd_buf(psp);
 
 	psp_prep_tmr_unload_cmd_buf(psp, cmd);
-	dev_info(psp->adev->dev, "free PSP TMR buffer\n");
+	dev_dbg(psp->adev->dev, "free PSP TMR buffer\n");
 
 	ret = psp_cmd_submit_buf(psp, NULL, cmd,
 				 psp->fence_buf_mc_addr);
@@ -988,6 +1015,8 @@ int psp_ta_unload(struct psp_context *psp, struct ta_context *context)
 
 	ret = psp_cmd_submit_buf(psp, NULL, cmd, psp->fence_buf_mc_addr);
 
+	context->resp_status = cmd->resp.status;
+
 	release_psp_cmd_buf(psp);
 
 	return ret;
@@ -1069,42 +1098,6 @@ int psp_ta_init_shared_buf(struct psp_context *psp,
 				      &mem_ctx->shared_buf);
 }
 
-static void psp_prep_ta_invoke_indirect_cmd_buf(struct psp_gfx_cmd_resp *cmd,
-				       uint32_t ta_cmd_id,
-				       struct ta_context *context)
-{
-	cmd->cmd_id                         = GFX_CMD_ID_INVOKE_CMD;
-	cmd->cmd.cmd_invoke_cmd.session_id  = context->session_id;
-	cmd->cmd.cmd_invoke_cmd.ta_cmd_id   = ta_cmd_id;
-
-	cmd->cmd.cmd_invoke_cmd.buf.num_desc   = 1;
-	cmd->cmd.cmd_invoke_cmd.buf.total_size = context->mem_context.shared_mem_size;
-	cmd->cmd.cmd_invoke_cmd.buf.buf_desc[0].buf_size = context->mem_context.shared_mem_size;
-	cmd->cmd.cmd_invoke_cmd.buf.buf_desc[0].buf_phy_addr_lo =
-				     lower_32_bits(context->mem_context.shared_mc_addr);
-	cmd->cmd.cmd_invoke_cmd.buf.buf_desc[0].buf_phy_addr_hi =
-				     upper_32_bits(context->mem_context.shared_mc_addr);
-}
-
-int psp_ta_invoke_indirect(struct psp_context *psp,
-		  uint32_t ta_cmd_id,
-		  struct ta_context *context)
-{
-	int ret;
-	struct psp_gfx_cmd_resp *cmd = acquire_psp_cmd_buf(psp);
-
-	psp_prep_ta_invoke_indirect_cmd_buf(cmd, ta_cmd_id, context);
-
-	ret = psp_cmd_submit_buf(psp, NULL, cmd,
-				 psp->fence_buf_mc_addr);
-
-	context->resp_status = cmd->resp.status;
-
-	release_psp_cmd_buf(psp);
-
-	return ret;
-}
-
 static void psp_prep_ta_invoke_cmd_buf(struct psp_gfx_cmd_resp *cmd,
 				       uint32_t ta_cmd_id,
 				       uint32_t session_id)
@@ -1547,7 +1540,7 @@ int psp_ras_terminate(struct psp_context *psp)
 	return ret;
 }
 
-static int psp_ras_initialize(struct psp_context *psp)
+int psp_ras_initialize(struct psp_context *psp)
 {
 	int ret;
 	uint32_t boot_cfg = 0xFF;
@@ -1610,7 +1603,7 @@ static int psp_ras_initialize(struct psp_context *psp)
 	psp->ras_context.context.mem_context.shared_mem_size = PSP_RAS_SHARED_MEM_SIZE;
 	psp->ras_context.context.ta_load_type = GFX_CMD_ID_LOAD_TA;
 
-	if (!psp->ras_context.context.initialized) {
+	if (!psp->ras_context.context.mem_context.shared_buf) {
 		ret = psp_ta_init_shared_buf(psp, &psp->ras_context.context.mem_context);
 		if (ret)
 			return ret;
@@ -1631,7 +1624,9 @@ static int psp_ras_initialize(struct psp_context *psp)
 	else {
 		if (ras_cmd->ras_status)
 			dev_warn(psp->adev->dev, "RAS Init Status: 0x%X\n", ras_cmd->ras_status);
-		amdgpu_ras_fini(psp->adev);
+
+		/* fail to load RAS TA */
+		psp->ras_context.context.initialized = false;
 	}
 
 	return ret;
@@ -1938,10 +1933,15 @@ static int psp_securedisplay_initialize(struct psp_context *psp)
 	} else
 		return ret;
 
+	mutex_lock(&psp->securedisplay_context.mutex);
+
 	psp_prep_securedisplay_cmd_buf(psp, &securedisplay_cmd,
 			TA_SECUREDISPLAY_COMMAND__QUERY_TA);
 
 	ret = psp_securedisplay_invoke(psp, TA_SECUREDISPLAY_COMMAND__QUERY_TA);
+
+	mutex_unlock(&psp->securedisplay_context.mutex);
+
 	if (ret) {
 		psp_securedisplay_terminate(psp);
 		/* free securedisplay shared memory */
@@ -1990,12 +1990,8 @@ int psp_securedisplay_invoke(struct psp_context *psp, uint32_t ta_cmd_id)
 	    ta_cmd_id != TA_SECUREDISPLAY_COMMAND__SEND_ROI_CRC)
 		return -EINVAL;
 
-	mutex_lock(&psp->securedisplay_context.mutex);
-
 	ret = psp_ta_invoke(psp, ta_cmd_id, &psp->securedisplay_context.context);
 
-	mutex_unlock(&psp->securedisplay_context.mutex);
-
 	return ret;
 }
 /* SECUREDISPLAY end */
@@ -2732,8 +2728,6 @@ static int psp_suspend(void *handle)
 	}
 
 out:
-	psp_free_shared_bufs(psp);
-
 	return ret;
 }
 
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.h
index 58ce3ebb446c..cf4f60c66122 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.h
@@ -118,7 +118,6 @@ struct psp_funcs
 	int (*bootloader_load_dbg_drv)(struct psp_context *psp);
 	int (*bootloader_load_ras_drv)(struct psp_context *psp);
 	int (*bootloader_load_sos)(struct psp_context *psp);
-	int (*ring_init)(struct psp_context *psp, enum psp_ring_type ring_type);
 	int (*ring_create)(struct psp_context *psp,
 			   enum psp_ring_type ring_type);
 	int (*ring_stop)(struct psp_context *psp,
@@ -136,6 +135,12 @@ struct psp_funcs
 	int (*vbflash_stat)(struct psp_context *psp);
 };
 
+struct ta_funcs {
+	int (*fn_ta_initialize)(struct psp_context *psp);
+	int (*fn_ta_invoke)(struct psp_context *psp, uint32_t ta_cmd_id);
+	int (*fn_ta_terminate)(struct psp_context *psp);
+};
+
 #define AMDGPU_XGMI_MAX_CONNECTED_NODES		64
 struct psp_xgmi_node_info {
 	uint64_t				node_id;
@@ -309,6 +314,7 @@ struct psp_context
 	struct psp_gfx_cmd_resp		*cmd;
 
 	const struct psp_funcs		*funcs;
+	const struct ta_funcs		*ta_funcs;
 
 	/* firmware buffer */
 	struct amdgpu_bo		*fw_pri_bo;
@@ -389,7 +395,6 @@ struct amdgpu_psp_funcs {
 };
 
 
-#define psp_ring_init(psp, type) (psp)->funcs->ring_init((psp), (type))
 #define psp_ring_create(psp, type) (psp)->funcs->ring_create((psp), (type))
 #define psp_ring_stop(psp, type) (psp)->funcs->ring_stop((psp), (type))
 #define psp_ring_destroy(psp, type) ((psp)->funcs->ring_destroy((psp), (type)))
@@ -463,9 +468,6 @@ int psp_ta_load(struct psp_context *psp, struct ta_context *context);
 int psp_ta_invoke(struct psp_context *psp,
 			uint32_t ta_cmd_id,
 			struct ta_context *context);
-int psp_ta_invoke_indirect(struct psp_context *psp,
-		  uint32_t ta_cmd_id,
-		  struct ta_context *context);
 
 int psp_xgmi_initialize(struct psp_context *psp, bool set_extended_data, bool load_ta);
 int psp_xgmi_terminate(struct psp_context *psp);
@@ -479,7 +481,7 @@ int psp_xgmi_get_topology_info(struct psp_context *psp,
 int psp_xgmi_set_topology_info(struct psp_context *psp,
 			       int number_devices,
 			       struct psp_xgmi_topology_info *topology);
-
+int psp_ras_initialize(struct psp_context *psp);
 int psp_ras_invoke(struct psp_context *psp, uint32_t ta_cmd_id);
 int psp_ras_enable_features(struct psp_context *psp,
 		union ta_ras_cmd_input *info, bool enable);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp_ta.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp_ta.c
index 0988e00612e5..468a67b302d4 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp_ta.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp_ta.c
@@ -41,30 +41,46 @@ static uint32_t get_bin_version(const uint8_t *bin)
 	return hdr->ucode_version;
 }
 
-static void prep_ta_mem_context(struct psp_context *psp,
-					     struct ta_context *context,
+static int prep_ta_mem_context(struct ta_mem_context *mem_context,
 					     uint8_t *shared_buf,
 					     uint32_t shared_buf_len)
 {
-	context->mem_context.shared_mem_size = PAGE_ALIGN(shared_buf_len);
-	psp_ta_init_shared_buf(psp, &context->mem_context);
+	if (mem_context->shared_mem_size < shared_buf_len)
+		return -EINVAL;
+	memset(mem_context->shared_buf, 0, mem_context->shared_mem_size);
+	memcpy((void *)mem_context->shared_buf, shared_buf, shared_buf_len);
 
-	memcpy((void *)context->mem_context.shared_buf, shared_buf, shared_buf_len);
+	return 0;
 }
 
 static bool is_ta_type_valid(enum ta_type_id ta_type)
 {
-	bool ret = false;
+	switch (ta_type) {
+	case TA_TYPE_RAS:
+		return true;
+	default:
+		return false;
+	}
+}
+
+static const struct ta_funcs ras_ta_funcs = {
+	.fn_ta_initialize = psp_ras_initialize,
+	.fn_ta_invoke    = psp_ras_invoke,
+	.fn_ta_terminate = psp_ras_terminate
+};
 
+static void set_ta_context_funcs(struct psp_context *psp,
+						      enum ta_type_id ta_type,
+						      struct ta_context **pcontext)
+{
 	switch (ta_type) {
 	case TA_TYPE_RAS:
-		ret = true;
+		*pcontext = &psp->ras_context.context;
+		psp->ta_funcs = &ras_ta_funcs;
 		break;
 	default:
 		break;
 	}
-
-	return ret;
 }
 
 static const struct file_operations ta_load_debugfs_fops = {
@@ -85,8 +101,7 @@ static const struct file_operations ta_invoke_debugfs_fops = {
 	.owner  = THIS_MODULE
 };
 
-
-/**
+/*
  * DOC: AMDGPU TA debugfs interfaces
  *
  * Three debugfs interfaces can be opened by a program to
@@ -111,15 +126,18 @@ static const struct file_operations ta_invoke_debugfs_fops = {
  *
  * - For TA invoke debugfs interface:
  *   Transmit buffer:
+ *    - TA type (4bytes)
  *    - TA ID (4bytes)
  *    - TA CMD ID (4bytes)
- *    - TA shard buf length (4bytes)
+ *    - TA shard buf length
+ *      (4bytes, value not beyond TA shared memory size)
  *    - TA shared buf
  *   Receive buffer:
  *    - TA shared buf
  *
  * - For TA unload debugfs interface:
  *   Transmit buffer:
+ *    - TA type (4bytes)
  *    - TA ID (4bytes)
  */
 
@@ -131,59 +149,92 @@ static ssize_t ta_if_load_debugfs_write(struct file *fp, const char *buf, size_t
 	uint32_t copy_pos   = 0;
 	int      ret        = 0;
 
-	struct amdgpu_device *adev   = (struct amdgpu_device *)file_inode(fp)->i_private;
-	struct psp_context   *psp    = &adev->psp;
-	struct ta_context    context = {0};
+	struct amdgpu_device *adev    = (struct amdgpu_device *)file_inode(fp)->i_private;
+	struct psp_context   *psp     = &adev->psp;
+	struct ta_context    *context = NULL;
 
 	if (!buf)
 		return -EINVAL;
 
 	ret = copy_from_user((void *)&ta_type, &buf[copy_pos], sizeof(uint32_t));
 	if (ret || (!is_ta_type_valid(ta_type)))
-		return -EINVAL;
+		return -EFAULT;
 
 	copy_pos += sizeof(uint32_t);
 
 	ret = copy_from_user((void *)&ta_bin_len, &buf[copy_pos], sizeof(uint32_t));
 	if (ret)
-		return -EINVAL;
+		return -EFAULT;
 
 	copy_pos += sizeof(uint32_t);
 
 	ta_bin = kzalloc(ta_bin_len, GFP_KERNEL);
 	if (!ta_bin)
-		ret = -ENOMEM;
+		return -ENOMEM;
 	if (copy_from_user((void *)ta_bin, &buf[copy_pos], ta_bin_len)) {
 		ret = -EFAULT;
 		goto err_free_bin;
 	}
 
-	ret = psp_ras_terminate(psp);
-	if (ret) {
-		dev_err(adev->dev, "Failed to unload embedded RAS TA\n");
+	/* Set TA context and functions */
+	set_ta_context_funcs(psp, ta_type, &context);
+
+	if (!psp->ta_funcs || !psp->ta_funcs->fn_ta_terminate) {
+		dev_err(adev->dev, "Unsupported function to terminate TA\n");
+		ret = -EOPNOTSUPP;
 		goto err_free_bin;
 	}
 
-	context.ta_type             = ta_type;
-	context.ta_load_type        = GFX_CMD_ID_LOAD_TA;
-	context.bin_desc.fw_version = get_bin_version(ta_bin);
-	context.bin_desc.size_bytes = ta_bin_len;
-	context.bin_desc.start_addr = ta_bin;
+	/*
+	 * Allocate TA shared buf in case shared buf was freed
+	 * due to loading TA failed before.
+	 */
+	if (!context->mem_context.shared_buf) {
+		ret = psp_ta_init_shared_buf(psp, &context->mem_context);
+		if (ret) {
+			ret = -ENOMEM;
+			goto err_free_bin;
+		}
+	}
+
+	ret = psp_fn_ta_terminate(psp);
+	if (ret || context->resp_status) {
+		dev_err(adev->dev,
+			"Failed to unload embedded TA (%d) and status (0x%X)\n",
+			ret, context->resp_status);
+		if (!ret)
+			ret = -EINVAL;
+		goto err_free_ta_shared_buf;
+	}
+
+	/* Prepare TA context for TA initialization */
+	context->ta_type                     = ta_type;
+	context->bin_desc.fw_version         = get_bin_version(ta_bin);
+	context->bin_desc.size_bytes         = ta_bin_len;
+	context->bin_desc.start_addr         = ta_bin;
 
-	ret = psp_ta_load(psp, &context);
+	if (!psp->ta_funcs->fn_ta_initialize) {
+		dev_err(adev->dev, "Unsupported function to initialize TA\n");
+		ret = -EOPNOTSUPP;
+		goto err_free_ta_shared_buf;
+	}
 
-	if (ret || context.resp_status) {
-		dev_err(adev->dev, "TA load via debugfs failed (%d) status %d\n",
-			 ret, context.resp_status);
+	ret = psp_fn_ta_initialize(psp);
+	if (ret || context->resp_status) {
+		dev_err(adev->dev, "Failed to load TA via debugfs (%d) and status (0x%X)\n",
+			ret, context->resp_status);
 		if (!ret)
 			ret = -EINVAL;
-		goto err_free_bin;
+		goto err_free_ta_shared_buf;
 	}
 
-	context.initialized = true;
-	if (copy_to_user((char *)buf, (void *)&context.session_id, sizeof(uint32_t)))
+	if (copy_to_user((char *)buf, (void *)&context->session_id, sizeof(uint32_t)))
 		ret = -EFAULT;
 
+err_free_ta_shared_buf:
+	/* Only free TA shared buf when returns error code */
+	if (ret && context->mem_context.shared_buf)
+		psp_ta_free_shared_buf(&context->mem_context);
 err_free_bin:
 	kfree(ta_bin);
 
@@ -192,58 +243,85 @@ err_free_bin:
 
 static ssize_t ta_if_unload_debugfs_write(struct file *fp, const char *buf, size_t len, loff_t *off)
 {
-	uint32_t ta_id  = 0;
-	int      ret    = 0;
+	uint32_t ta_type    = 0;
+	uint32_t ta_id      = 0;
+	uint32_t copy_pos   = 0;
+	int      ret        = 0;
 
-	struct amdgpu_device *adev   = (struct amdgpu_device *)file_inode(fp)->i_private;
-	struct psp_context   *psp    = &adev->psp;
-	struct ta_context    context = {0};
+	struct amdgpu_device *adev    = (struct amdgpu_device *)file_inode(fp)->i_private;
+	struct psp_context   *psp     = &adev->psp;
+	struct ta_context    *context = NULL;
 
 	if (!buf)
 		return -EINVAL;
 
-	ret = copy_from_user((void *)&ta_id, buf, sizeof(uint32_t));
+	ret = copy_from_user((void *)&ta_type, &buf[copy_pos], sizeof(uint32_t));
+	if (ret || (!is_ta_type_valid(ta_type)))
+		return -EFAULT;
+
+	copy_pos += sizeof(uint32_t);
+
+	ret = copy_from_user((void *)&ta_id, &buf[copy_pos], sizeof(uint32_t));
 	if (ret)
-		return -EINVAL;
+		return -EFAULT;
 
-	context.session_id = ta_id;
+	set_ta_context_funcs(psp, ta_type, &context);
+	context->session_id = ta_id;
 
-	ret = psp_ta_unload(psp, &context);
-	if (!ret)
-		context.initialized = false;
+	if (!psp->ta_funcs || !psp->ta_funcs->fn_ta_terminate) {
+		dev_err(adev->dev, "Unsupported function to terminate TA\n");
+		return -EOPNOTSUPP;
+	}
+
+	ret = psp_fn_ta_terminate(psp);
+	if (ret || context->resp_status) {
+		dev_err(adev->dev, "Failed to unload TA via debugfs (%d) and status (0x%X)\n",
+			ret, context->resp_status);
+		if (!ret)
+			ret = -EINVAL;
+	}
+
+	if (context->mem_context.shared_buf)
+		psp_ta_free_shared_buf(&context->mem_context);
 
 	return ret;
 }
 
 static ssize_t ta_if_invoke_debugfs_write(struct file *fp, const char *buf, size_t len, loff_t *off)
 {
+	uint32_t ta_type        = 0;
 	uint32_t ta_id          = 0;
 	uint32_t cmd_id         = 0;
 	uint32_t shared_buf_len = 0;
-	uint8_t	 *shared_buf    = NULL;
+	uint8_t *shared_buf     = NULL;
 	uint32_t copy_pos       = 0;
 	int      ret            = 0;
 
-	struct amdgpu_device *adev   = (struct amdgpu_device *)file_inode(fp)->i_private;
-	struct psp_context   *psp    = &adev->psp;
-	struct ta_context    context = {0};
+	struct amdgpu_device *adev    = (struct amdgpu_device *)file_inode(fp)->i_private;
+	struct psp_context   *psp     = &adev->psp;
+	struct ta_context    *context = NULL;
 
 	if (!buf)
 		return -EINVAL;
 
+	ret = copy_from_user((void *)&ta_type, &buf[copy_pos], sizeof(uint32_t));
+	if (ret)
+		return -EFAULT;
+	copy_pos += sizeof(uint32_t);
+
 	ret = copy_from_user((void *)&ta_id, &buf[copy_pos], sizeof(uint32_t));
 	if (ret)
-		return -EINVAL;
+		return -EFAULT;
 	copy_pos += sizeof(uint32_t);
 
 	ret = copy_from_user((void *)&cmd_id, &buf[copy_pos], sizeof(uint32_t));
 	if (ret)
-		return -EINVAL;
+		return -EFAULT;
 	copy_pos += sizeof(uint32_t);
 
 	ret = copy_from_user((void *)&shared_buf_len, &buf[copy_pos], sizeof(uint32_t));
 	if (ret)
-		return -EINVAL;
+		return -EFAULT;
 	copy_pos += sizeof(uint32_t);
 
 	shared_buf = kzalloc(shared_buf_len, GFP_KERNEL);
@@ -254,26 +332,39 @@ static ssize_t ta_if_invoke_debugfs_write(struct file *fp, const char *buf, size
 		goto err_free_shared_buf;
 	}
 
-	context.session_id = ta_id;
+	set_ta_context_funcs(psp, ta_type, &context);
+
+	if (!context->initialized) {
+		dev_err(adev->dev, "TA is not initialized\n");
+		ret = -EINVAL;
+		goto err_free_shared_buf;
+	}
+
+	if (!psp->ta_funcs || !psp->ta_funcs->fn_ta_invoke) {
+		dev_err(adev->dev, "Unsupported function to invoke TA\n");
+		ret = -EOPNOTSUPP;
+		goto err_free_shared_buf;
+	}
 
-	prep_ta_mem_context(psp, &context, shared_buf, shared_buf_len);
+	context->session_id = ta_id;
 
-	ret = psp_ta_invoke_indirect(psp, cmd_id, &context);
+	ret = prep_ta_mem_context(&context->mem_context, shared_buf, shared_buf_len);
+	if (ret)
+		goto err_free_shared_buf;
 
-	if (ret || context.resp_status) {
-		dev_err(adev->dev, "TA invoke via debugfs failed (%d) status %d\n",
-			 ret, context.resp_status);
-		if (!ret)
+	ret = psp_fn_ta_invoke(psp, cmd_id);
+	if (ret || context->resp_status) {
+		dev_err(adev->dev, "Failed to invoke TA via debugfs (%d) and status (0x%X)\n",
+			ret, context->resp_status);
+		if (!ret) {
 			ret = -EINVAL;
-		goto err_free_ta_shared_buf;
+			goto err_free_shared_buf;
+		}
 	}
 
-	if (copy_to_user((char *)buf, context.mem_context.shared_buf, shared_buf_len))
+	if (copy_to_user((char *)buf, context->mem_context.shared_buf, shared_buf_len))
 		ret = -EFAULT;
 
-err_free_ta_shared_buf:
-	psp_ta_free_shared_buf(&context.mem_context);
-
 err_free_shared_buf:
 	kfree(shared_buf);
 
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp_ta.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp_ta.h
index cfc1542f63ef..14cd1c81c3e6 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp_ta.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp_ta.h
@@ -24,6 +24,11 @@
 #ifndef __AMDGPU_PSP_TA_H__
 #define __AMDGPU_PSP_TA_H__
 
+/* Calling set_ta_context_funcs is required before using the following macros */
+#define psp_fn_ta_initialize(psp) ((psp)->ta_funcs->fn_ta_initialize((psp)))
+#define psp_fn_ta_invoke(psp, ta_cmd_id) ((psp)->ta_funcs->fn_ta_invoke((psp), (ta_cmd_id)))
+#define psp_fn_ta_terminate(psp) ((psp)->ta_funcs->fn_ta_terminate((psp)))
+
 void amdgpu_ta_if_debugfs_init(struct amdgpu_device *adev);
 
 #endif
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
index a4b47e1bd111..ad490c1e2f57 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
@@ -1267,7 +1267,7 @@ static ssize_t amdgpu_ras_sysfs_features_read(struct device *dev,
 	struct amdgpu_ras *con =
 		container_of(attr, struct amdgpu_ras, features_attr);
 
-	return scnprintf(buf, PAGE_SIZE, "feature mask: 0x%x\n", con->features);
+	return sysfs_emit(buf, "feature mask: 0x%x\n", con->features);
 }
 
 static void amdgpu_ras_sysfs_remove_bad_page_node(struct amdgpu_device *adev)
@@ -1561,7 +1561,6 @@ static void amdgpu_ras_interrupt_poison_consumption_handler(struct ras_manager *
 {
 	bool poison_stat = false;
 	struct amdgpu_device *adev = obj->adev;
-	struct ras_err_data err_data = {0, 0, 0, NULL};
 	struct amdgpu_ras_block_object *block_obj =
 		amdgpu_ras_get_ras_block(adev, obj->head.block, 0);
 
@@ -1584,7 +1583,7 @@ static void amdgpu_ras_interrupt_poison_consumption_handler(struct ras_manager *
 	}
 
 	if (!adev->gmc.xgmi.connected_to_cpu)
-		amdgpu_umc_poison_handler(adev, &err_data, false);
+		amdgpu_umc_poison_handler(adev, false);
 
 	if (block_obj->hw_ops->handle_poison_consumption)
 		poison_stat = block_obj->hw_ops->handle_poison_consumption(adev);
@@ -1949,7 +1948,12 @@ static void amdgpu_ras_do_recovery(struct work_struct *work)
 
 		reset_context.method = AMD_RESET_METHOD_NONE;
 		reset_context.reset_req_dev = adev;
-		clear_bit(AMDGPU_NEED_FULL_RESET, &reset_context.flags);
+
+		/* Perform full reset in fatal error mode */
+		if (!amdgpu_ras_is_poison_mode_supported(ras->adev))
+			set_bit(AMDGPU_NEED_FULL_RESET, &reset_context.flags);
+		else
+			clear_bit(AMDGPU_NEED_FULL_RESET, &reset_context.flags);
 
 		amdgpu_device_gpu_recover(ras->adev, NULL, &reset_context);
 	}
@@ -2344,7 +2348,8 @@ static void amdgpu_ras_check_supported(struct amdgpu_device *adev)
 				adev->ras_hw_enabled |= ~(1 << AMDGPU_RAS_BLOCK__UMC |
 							    1 << AMDGPU_RAS_BLOCK__DF);
 
-				if (adev->ip_versions[VCN_HWIP][0] == IP_VERSION(2, 6, 0))
+				if (adev->ip_versions[VCN_HWIP][0] == IP_VERSION(2, 6, 0) ||
+				    adev->ip_versions[VCN_HWIP][0] == IP_VERSION(4, 0, 0))
 					adev->ras_hw_enabled |= (1 << AMDGPU_RAS_BLOCK__VCN |
 							1 << AMDGPU_RAS_BLOCK__JPEG);
 				else
@@ -2848,7 +2853,6 @@ static int amdgpu_bad_page_notifier(struct notifier_block *nb,
 	struct amdgpu_device *adev = NULL;
 	uint32_t gpu_id = 0;
 	uint32_t umc_inst = 0, ch_inst = 0;
-	struct ras_err_data err_data = {0, 0, 0, NULL};
 
 	/*
 	 * If the error was generated in UMC_V2, which belongs to GPU UMCs,
@@ -2887,31 +2891,10 @@ static int amdgpu_bad_page_notifier(struct notifier_block *nb,
 	dev_info(adev->dev, "Uncorrectable error detected in UMC inst: %d, chan_idx: %d",
 			     umc_inst, ch_inst);
 
-	err_data.err_addr =
-		kcalloc(adev->umc.max_ras_err_cnt_per_query,
-			sizeof(struct eeprom_table_record), GFP_KERNEL);
-	if (!err_data.err_addr) {
-		dev_warn(adev->dev,
-			"Failed to alloc memory for umc error record in mca notifier!\n");
+	if (!amdgpu_umc_page_retirement_mca(adev, m->addr, ch_inst, umc_inst))
+		return NOTIFY_OK;
+	else
 		return NOTIFY_DONE;
-	}
-
-	/*
-	 * Translate UMC channel address to Physical address
-	 */
-	if (adev->umc.ras &&
-	    adev->umc.ras->convert_ras_error_address)
-		adev->umc.ras->convert_ras_error_address(adev,
-			&err_data, m->addr, ch_inst, umc_inst);
-
-	if (amdgpu_bad_page_threshold != 0) {
-		amdgpu_ras_add_bad_pages(adev, err_data.err_addr,
-						err_data.err_addr_cnt);
-		amdgpu_ras_save_bad_pages(adev);
-	}
-
-	kfree(err_data.err_addr);
-	return NOTIFY_OK;
 }
 
 static struct notifier_block amdgpu_bad_page_nb = {
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras_eeprom.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras_eeprom.c
index 84c241b9a2a1..2d9f3f4cd79e 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras_eeprom.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras_eeprom.c
@@ -33,12 +33,29 @@
 
 #include "amdgpu_reset.h"
 
-#define EEPROM_I2C_MADDR_VEGA20         0x0
-#define EEPROM_I2C_MADDR_ARCTURUS       0x40000
-#define EEPROM_I2C_MADDR_ARCTURUS_D342  0x0
-#define EEPROM_I2C_MADDR_SIENNA_CICHLID 0x0
-#define EEPROM_I2C_MADDR_ALDEBARAN      0x0
-#define EEPROM_I2C_MADDR_SMU_13_0_0     (0x54UL << 16)
+/* These are memory addresses as would be seen by one or more EEPROM
+ * chips strung on the I2C bus, usually by manipulating pins 1-3 of a
+ * set of EEPROM devices. They form a continuous memory space.
+ *
+ * The I2C device address includes the device type identifier, 1010b,
+ * which is a reserved value and indicates that this is an I2C EEPROM
+ * device. It also includes the top 3 bits of the 19 bit EEPROM memory
+ * address, namely bits 18, 17, and 16. This makes up the 7 bit
+ * address sent on the I2C bus with bit 0 being the direction bit,
+ * which is not represented here, and sent by the hardware directly.
+ *
+ * For instance,
+ *   50h = 1010000b => device type identifier 1010b, bits 18:16 = 000b, address 0.
+ *   54h = 1010100b => --"--, bits 18:16 = 100b, address 40000h.
+ *   56h = 1010110b => --"--, bits 18:16 = 110b, address 60000h.
+ * Depending on the size of the I2C EEPROM device(s), bits 18:16 may
+ * address memory in a device or a device on the I2C bus, depending on
+ * the status of pins 1-3. See top of amdgpu_eeprom.c.
+ *
+ * The RAS table lives either at address 0 or address 40000h of EEPROM.
+ */
+#define EEPROM_I2C_MADDR_0      0x0
+#define EEPROM_I2C_MADDR_4      0x40000
 
 /*
  * The 2 macros bellow represent the actual size in bytes that
@@ -90,6 +107,16 @@
 
 static bool __is_ras_eeprom_supported(struct amdgpu_device *adev)
 {
+	if (adev->asic_type == CHIP_IP_DISCOVERY) {
+		switch (adev->ip_versions[MP1_HWIP][0]) {
+		case IP_VERSION(13, 0, 0):
+		case IP_VERSION(13, 0, 10):
+			return true;
+		default:
+			return false;
+		}
+	}
+
 	return  adev->asic_type == CHIP_VEGA20 ||
 		adev->asic_type == CHIP_ARCTURUS ||
 		adev->asic_type == CHIP_SIENNA_CICHLID ||
@@ -107,16 +134,30 @@ static bool __get_eeprom_i2c_addr_arct(struct amdgpu_device *adev,
 	if (strnstr(atom_ctx->vbios_version,
 	            "D342",
 		    sizeof(atom_ctx->vbios_version)))
-		control->i2c_address = EEPROM_I2C_MADDR_ARCTURUS_D342;
+		control->i2c_address = EEPROM_I2C_MADDR_0;
 	else
-		control->i2c_address = EEPROM_I2C_MADDR_ARCTURUS;
+		control->i2c_address = EEPROM_I2C_MADDR_4;
 
 	return true;
 }
 
+static bool __get_eeprom_i2c_addr_ip_discovery(struct amdgpu_device *adev,
+				       struct amdgpu_ras_eeprom_control *control)
+{
+	switch (adev->ip_versions[MP1_HWIP][0]) {
+	case IP_VERSION(13, 0, 0):
+	case IP_VERSION(13, 0, 10):
+		control->i2c_address = EEPROM_I2C_MADDR_4;
+		return true;
+	default:
+		return false;
+	}
+}
+
 static bool __get_eeprom_i2c_addr(struct amdgpu_device *adev,
 				  struct amdgpu_ras_eeprom_control *control)
 {
+	struct atom_context *atom_ctx = adev->mode_info.atom_context;
 	u8 i2c_addr;
 
 	if (!control)
@@ -139,27 +180,34 @@ static bool __get_eeprom_i2c_addr(struct amdgpu_device *adev,
 
 	switch (adev->asic_type) {
 	case CHIP_VEGA20:
-		control->i2c_address = EEPROM_I2C_MADDR_VEGA20;
+		control->i2c_address = EEPROM_I2C_MADDR_0;
 		break;
 
 	case CHIP_ARCTURUS:
 		return __get_eeprom_i2c_addr_arct(adev, control);
 
 	case CHIP_SIENNA_CICHLID:
-		control->i2c_address = EEPROM_I2C_MADDR_SIENNA_CICHLID;
+		control->i2c_address = EEPROM_I2C_MADDR_0;
 		break;
 
 	case CHIP_ALDEBARAN:
-		control->i2c_address = EEPROM_I2C_MADDR_ALDEBARAN;
+		if (strnstr(atom_ctx->vbios_version, "D673",
+			    sizeof(atom_ctx->vbios_version)))
+			control->i2c_address = EEPROM_I2C_MADDR_4;
+		else
+			control->i2c_address = EEPROM_I2C_MADDR_0;
 		break;
 
+	case CHIP_IP_DISCOVERY:
+		return __get_eeprom_i2c_addr_ip_discovery(adev, control);
+
 	default:
 		return false;
 	}
 
 	switch (adev->ip_versions[MP1_HWIP][0]) {
 	case IP_VERSION(13, 0, 0):
-		control->i2c_address = EEPROM_I2C_MADDR_SMU_13_0_0;
+		control->i2c_address = EEPROM_I2C_MADDR_4;
 		break;
 
 	default:
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c
index d3558c34d406..dc474b809604 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c
@@ -569,3 +569,15 @@ int amdgpu_ring_init_mqd(struct amdgpu_ring *ring)
 
 	return mqd_mgr->init_mqd(adev, ring->mqd_ptr, &prop);
 }
+
+void amdgpu_ring_ib_begin(struct amdgpu_ring *ring)
+{
+	if (ring->is_sw_ring)
+		amdgpu_sw_ring_ib_begin(ring);
+}
+
+void amdgpu_ring_ib_end(struct amdgpu_ring *ring)
+{
+	if (ring->is_sw_ring)
+		amdgpu_sw_ring_ib_end(ring);
+}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h
index 82c178a9033a..f752c7ae7f60 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h
@@ -39,6 +39,7 @@ struct amdgpu_vm;
 #define AMDGPU_MAX_RINGS		28
 #define AMDGPU_MAX_HWIP_RINGS		8
 #define AMDGPU_MAX_GFX_RINGS		2
+#define AMDGPU_MAX_SW_GFX_RINGS         2
 #define AMDGPU_MAX_COMPUTE_RINGS	8
 #define AMDGPU_MAX_VCE_RINGS		3
 #define AMDGPU_MAX_UVD_ENC_RINGS	2
@@ -59,6 +60,7 @@ enum amdgpu_ring_priority_level {
 #define AMDGPU_FENCE_FLAG_64BIT         (1 << 0)
 #define AMDGPU_FENCE_FLAG_INT           (1 << 1)
 #define AMDGPU_FENCE_FLAG_TC_WB_ONLY    (1 << 2)
+#define AMDGPU_FENCE_FLAG_EXEC          (1 << 3)
 
 #define to_amdgpu_ring(s) container_of((s), struct amdgpu_ring, sched)
 
@@ -143,8 +145,13 @@ signed long amdgpu_fence_wait_polling(struct amdgpu_ring *ring,
 				      uint32_t wait_seq,
 				      signed long timeout);
 unsigned amdgpu_fence_count_emitted(struct amdgpu_ring *ring);
+
 void amdgpu_fence_driver_isr_toggle(struct amdgpu_device *adev, bool stop);
 
+u64 amdgpu_fence_last_unsignaled_time_us(struct amdgpu_ring *ring);
+void amdgpu_fence_update_start_timestamp(struct amdgpu_ring *ring, uint32_t seq,
+					 ktime_t timestamp);
+
 /*
  * Rings.
  */
@@ -279,6 +286,10 @@ struct amdgpu_ring {
 	bool			is_mes_queue;
 	uint32_t		hw_queue_id;
 	struct amdgpu_mes_ctx_data *mes_ctx;
+
+	bool            is_sw_ring;
+	unsigned int    entry_index;
+
 };
 
 #define amdgpu_ring_parse_cs(r, p, job, ib) ((r)->funcs->parse_cs((p), (job), (ib)))
@@ -307,6 +318,9 @@ struct amdgpu_ring {
 #define amdgpu_ring_preempt_ib(r) (r)->funcs->preempt_ib(r)
 
 int amdgpu_ring_alloc(struct amdgpu_ring *ring, unsigned ndw);
+void amdgpu_ring_ib_begin(struct amdgpu_ring *ring);
+void amdgpu_ring_ib_end(struct amdgpu_ring *ring);
+
 void amdgpu_ring_insert_nop(struct amdgpu_ring *ring, uint32_t count);
 void amdgpu_ring_generic_pad_ib(struct amdgpu_ring *ring, struct amdgpu_ib *ib);
 void amdgpu_ring_commit(struct amdgpu_ring *ring);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring_mux.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring_mux.c
new file mode 100644
index 000000000000..62079f0e3ee8
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring_mux.c
@@ -0,0 +1,514 @@
+/*
+ * Copyright 2022 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+#include <linux/slab.h>
+#include <drm/drm_print.h>
+
+#include "amdgpu_ring_mux.h"
+#include "amdgpu_ring.h"
+#include "amdgpu.h"
+
+#define AMDGPU_MUX_RESUBMIT_JIFFIES_TIMEOUT (HZ / 2)
+#define AMDGPU_MAX_LAST_UNSIGNALED_THRESHOLD_US 10000
+
+static const struct ring_info {
+	unsigned int hw_pio;
+	const char *ring_name;
+} sw_ring_info[] = {
+	{ AMDGPU_RING_PRIO_DEFAULT, "gfx_low"},
+	{ AMDGPU_RING_PRIO_2, "gfx_high"},
+};
+
+static struct kmem_cache *amdgpu_mux_chunk_slab;
+
+static inline struct amdgpu_mux_entry *amdgpu_ring_mux_sw_entry(struct amdgpu_ring_mux *mux,
+								struct amdgpu_ring *ring)
+{
+	return ring->entry_index < mux->ring_entry_size ?
+			&mux->ring_entry[ring->entry_index] : NULL;
+}
+
+/* copy packages on sw ring range[begin, end) */
+static void amdgpu_ring_mux_copy_pkt_from_sw_ring(struct amdgpu_ring_mux *mux,
+						  struct amdgpu_ring *ring,
+						  u64 s_start, u64 s_end)
+{
+	u64 start, end;
+	struct amdgpu_ring *real_ring = mux->real_ring;
+
+	start = s_start & ring->buf_mask;
+	end = s_end & ring->buf_mask;
+
+	if (start == end) {
+		DRM_ERROR("no more data copied from sw ring\n");
+		return;
+	}
+	if (start > end) {
+		amdgpu_ring_alloc(real_ring, (ring->ring_size >> 2) + end - start);
+		amdgpu_ring_write_multiple(real_ring, (void *)&ring->ring[start],
+					   (ring->ring_size >> 2) - start);
+		amdgpu_ring_write_multiple(real_ring, (void *)&ring->ring[0], end);
+	} else {
+		amdgpu_ring_alloc(real_ring, end - start);
+		amdgpu_ring_write_multiple(real_ring, (void *)&ring->ring[start], end - start);
+	}
+}
+
+static void amdgpu_mux_resubmit_chunks(struct amdgpu_ring_mux *mux)
+{
+	struct amdgpu_mux_entry *e = NULL;
+	struct amdgpu_mux_chunk *chunk;
+	uint32_t seq, last_seq;
+	int i;
+
+	/*find low priority entries:*/
+	if (!mux->s_resubmit)
+		return;
+
+	for (i = 0; i < mux->num_ring_entries; i++) {
+		if (mux->ring_entry[i].ring->hw_prio <= AMDGPU_RING_PRIO_DEFAULT) {
+			e = &mux->ring_entry[i];
+			break;
+		}
+	}
+
+	if (!e) {
+		DRM_ERROR("%s no low priority ring found\n", __func__);
+		return;
+	}
+
+	last_seq = atomic_read(&e->ring->fence_drv.last_seq);
+	seq = mux->seqno_to_resubmit;
+	if (last_seq < seq) {
+		/*resubmit all the fences between (last_seq, seq]*/
+		list_for_each_entry(chunk, &e->list, entry) {
+			if (chunk->sync_seq > last_seq && chunk->sync_seq <= seq) {
+				amdgpu_fence_update_start_timestamp(e->ring,
+								    chunk->sync_seq,
+								    ktime_get());
+				amdgpu_ring_mux_copy_pkt_from_sw_ring(mux, e->ring,
+								      chunk->start,
+								      chunk->end);
+				mux->wptr_resubmit = chunk->end;
+				amdgpu_ring_commit(mux->real_ring);
+			}
+		}
+	}
+
+	del_timer(&mux->resubmit_timer);
+	mux->s_resubmit = false;
+}
+
+static void amdgpu_ring_mux_schedule_resubmit(struct amdgpu_ring_mux *mux)
+{
+	mod_timer(&mux->resubmit_timer, jiffies + AMDGPU_MUX_RESUBMIT_JIFFIES_TIMEOUT);
+}
+
+static void amdgpu_mux_resubmit_fallback(struct timer_list *t)
+{
+	struct amdgpu_ring_mux *mux = from_timer(mux, t, resubmit_timer);
+
+	if (!spin_trylock(&mux->lock)) {
+		amdgpu_ring_mux_schedule_resubmit(mux);
+		DRM_ERROR("reschedule resubmit\n");
+		return;
+	}
+	amdgpu_mux_resubmit_chunks(mux);
+	spin_unlock(&mux->lock);
+}
+
+int amdgpu_ring_mux_init(struct amdgpu_ring_mux *mux, struct amdgpu_ring *ring,
+			 unsigned int entry_size)
+{
+	mux->real_ring = ring;
+	mux->num_ring_entries = 0;
+
+	mux->ring_entry = kcalloc(entry_size, sizeof(struct amdgpu_mux_entry), GFP_KERNEL);
+	if (!mux->ring_entry)
+		return -ENOMEM;
+
+	mux->ring_entry_size = entry_size;
+	mux->s_resubmit = false;
+
+	amdgpu_mux_chunk_slab = kmem_cache_create("amdgpu_mux_chunk",
+						  sizeof(struct amdgpu_mux_chunk), 0,
+						  SLAB_HWCACHE_ALIGN, NULL);
+	if (!amdgpu_mux_chunk_slab) {
+		DRM_ERROR("create amdgpu_mux_chunk cache failed\n");
+		return -ENOMEM;
+	}
+
+	spin_lock_init(&mux->lock);
+	timer_setup(&mux->resubmit_timer, amdgpu_mux_resubmit_fallback, 0);
+
+	return 0;
+}
+
+void amdgpu_ring_mux_fini(struct amdgpu_ring_mux *mux)
+{
+	struct amdgpu_mux_entry *e;
+	struct amdgpu_mux_chunk *chunk, *chunk2;
+	int i;
+
+	for (i = 0; i < mux->num_ring_entries; i++) {
+		e = &mux->ring_entry[i];
+		list_for_each_entry_safe(chunk, chunk2, &e->list, entry) {
+			list_del(&chunk->entry);
+			kmem_cache_free(amdgpu_mux_chunk_slab, chunk);
+		}
+	}
+	kmem_cache_destroy(amdgpu_mux_chunk_slab);
+	kfree(mux->ring_entry);
+	mux->ring_entry = NULL;
+	mux->num_ring_entries = 0;
+	mux->ring_entry_size = 0;
+}
+
+int amdgpu_ring_mux_add_sw_ring(struct amdgpu_ring_mux *mux, struct amdgpu_ring *ring)
+{
+	struct amdgpu_mux_entry *e;
+
+	if (mux->num_ring_entries >= mux->ring_entry_size) {
+		DRM_ERROR("add sw ring exceeding max entry size\n");
+		return -ENOENT;
+	}
+
+	e = &mux->ring_entry[mux->num_ring_entries];
+	ring->entry_index = mux->num_ring_entries;
+	e->ring = ring;
+
+	INIT_LIST_HEAD(&e->list);
+	mux->num_ring_entries += 1;
+	return 0;
+}
+
+void amdgpu_ring_mux_set_wptr(struct amdgpu_ring_mux *mux, struct amdgpu_ring *ring, u64 wptr)
+{
+	struct amdgpu_mux_entry *e;
+
+	spin_lock(&mux->lock);
+
+	if (ring->hw_prio <= AMDGPU_RING_PRIO_DEFAULT)
+		amdgpu_mux_resubmit_chunks(mux);
+
+	e = amdgpu_ring_mux_sw_entry(mux, ring);
+	if (!e) {
+		DRM_ERROR("cannot find entry for sw ring\n");
+		spin_unlock(&mux->lock);
+		return;
+	}
+
+	/* We could skip this set wptr as preemption in process. */
+	if (ring->hw_prio <= AMDGPU_RING_PRIO_DEFAULT && mux->pending_trailing_fence_signaled) {
+		spin_unlock(&mux->lock);
+		return;
+	}
+
+	e->sw_cptr = e->sw_wptr;
+	/* Update cptr if the package already copied in resubmit functions */
+	if (ring->hw_prio <= AMDGPU_RING_PRIO_DEFAULT && e->sw_cptr < mux->wptr_resubmit)
+		e->sw_cptr = mux->wptr_resubmit;
+	e->sw_wptr = wptr;
+	e->start_ptr_in_hw_ring = mux->real_ring->wptr;
+
+	/* Skip copying for the packages already resubmitted.*/
+	if (ring->hw_prio > AMDGPU_RING_PRIO_DEFAULT || mux->wptr_resubmit < wptr) {
+		amdgpu_ring_mux_copy_pkt_from_sw_ring(mux, ring, e->sw_cptr, wptr);
+		e->end_ptr_in_hw_ring = mux->real_ring->wptr;
+		amdgpu_ring_commit(mux->real_ring);
+	} else {
+		e->end_ptr_in_hw_ring = mux->real_ring->wptr;
+	}
+	spin_unlock(&mux->lock);
+}
+
+u64 amdgpu_ring_mux_get_wptr(struct amdgpu_ring_mux *mux, struct amdgpu_ring *ring)
+{
+	struct amdgpu_mux_entry *e;
+
+	e = amdgpu_ring_mux_sw_entry(mux, ring);
+	if (!e) {
+		DRM_ERROR("cannot find entry for sw ring\n");
+		return 0;
+	}
+
+	return e->sw_wptr;
+}
+
+/**
+ * amdgpu_ring_mux_get_rptr - get the readptr of the software ring
+ * @mux: the multiplexer the software rings attach to
+ * @ring: the software ring of which we calculate the readptr
+ *
+ * The return value of the readptr is not precise while the other rings could
+ * write data onto the real ring buffer.After overwriting on the real ring, we
+ * can not decide if our packages have been excuted or not read yet. However,
+ * this function is only called by the tools such as umr to collect the latest
+ * packages for the hang analysis. We assume the hang happens near our latest
+ * submit. Thus we could use the following logic to give the clue:
+ * If the readptr is between start and end, then we return the copy pointer
+ * plus the distance from start to readptr. If the readptr is before start, we
+ * return the copy pointer. Lastly, if the readptr is past end, we return the
+ * write pointer.
+ */
+u64 amdgpu_ring_mux_get_rptr(struct amdgpu_ring_mux *mux, struct amdgpu_ring *ring)
+{
+	struct amdgpu_mux_entry *e;
+	u64 readp, offset, start, end;
+
+	e = amdgpu_ring_mux_sw_entry(mux, ring);
+	if (!e) {
+		DRM_ERROR("no sw entry found!\n");
+		return 0;
+	}
+
+	readp = amdgpu_ring_get_rptr(mux->real_ring);
+
+	start = e->start_ptr_in_hw_ring & mux->real_ring->buf_mask;
+	end = e->end_ptr_in_hw_ring & mux->real_ring->buf_mask;
+	if (start > end) {
+		if (readp <= end)
+			readp += mux->real_ring->ring_size >> 2;
+		end += mux->real_ring->ring_size >> 2;
+	}
+
+	if (start <= readp && readp <= end) {
+		offset = readp - start;
+		e->sw_rptr = (e->sw_cptr + offset) & ring->buf_mask;
+	} else if (readp < start) {
+		e->sw_rptr = e->sw_cptr;
+	} else {
+		/* end < readptr */
+		e->sw_rptr = e->sw_wptr;
+	}
+
+	return e->sw_rptr;
+}
+
+u64 amdgpu_sw_ring_get_rptr_gfx(struct amdgpu_ring *ring)
+{
+	struct amdgpu_device *adev = ring->adev;
+	struct amdgpu_ring_mux *mux = &adev->gfx.muxer;
+
+	WARN_ON(!ring->is_sw_ring);
+	return amdgpu_ring_mux_get_rptr(mux, ring);
+}
+
+u64 amdgpu_sw_ring_get_wptr_gfx(struct amdgpu_ring *ring)
+{
+	struct amdgpu_device *adev = ring->adev;
+	struct amdgpu_ring_mux *mux = &adev->gfx.muxer;
+
+	WARN_ON(!ring->is_sw_ring);
+	return amdgpu_ring_mux_get_wptr(mux, ring);
+}
+
+void amdgpu_sw_ring_set_wptr_gfx(struct amdgpu_ring *ring)
+{
+	struct amdgpu_device *adev = ring->adev;
+	struct amdgpu_ring_mux *mux = &adev->gfx.muxer;
+
+	WARN_ON(!ring->is_sw_ring);
+	amdgpu_ring_mux_set_wptr(mux, ring, ring->wptr);
+}
+
+/* Override insert_nop to prevent emitting nops to the software rings */
+void amdgpu_sw_ring_insert_nop(struct amdgpu_ring *ring, uint32_t count)
+{
+	WARN_ON(!ring->is_sw_ring);
+}
+
+const char *amdgpu_sw_ring_name(int idx)
+{
+	return idx < ARRAY_SIZE(sw_ring_info) ?
+		sw_ring_info[idx].ring_name : NULL;
+}
+
+unsigned int amdgpu_sw_ring_priority(int idx)
+{
+	return idx < ARRAY_SIZE(sw_ring_info) ?
+		sw_ring_info[idx].hw_pio : AMDGPU_RING_PRIO_DEFAULT;
+}
+
+/*Scan on low prio rings to have unsignaled fence and high ring has no fence.*/
+static int amdgpu_mcbp_scan(struct amdgpu_ring_mux *mux)
+{
+	struct amdgpu_ring *ring;
+	int i, need_preempt;
+
+	need_preempt = 0;
+	for (i = 0; i < mux->num_ring_entries; i++) {
+		ring = mux->ring_entry[i].ring;
+		if (ring->hw_prio > AMDGPU_RING_PRIO_DEFAULT &&
+		    amdgpu_fence_count_emitted(ring) > 0)
+			return 0;
+		if (ring->hw_prio <= AMDGPU_RING_PRIO_DEFAULT &&
+		    amdgpu_fence_last_unsignaled_time_us(ring) >
+		    AMDGPU_MAX_LAST_UNSIGNALED_THRESHOLD_US)
+			need_preempt = 1;
+	}
+	return need_preempt && !mux->s_resubmit;
+}
+
+/* Trigger Mid-Command Buffer Preemption (MCBP) and find if we need to resubmit. */
+static int amdgpu_mcbp_trigger_preempt(struct amdgpu_ring_mux *mux)
+{
+	int r;
+
+	spin_lock(&mux->lock);
+	mux->pending_trailing_fence_signaled = true;
+	r = amdgpu_ring_preempt_ib(mux->real_ring);
+	spin_unlock(&mux->lock);
+	return r;
+}
+
+void amdgpu_sw_ring_ib_begin(struct amdgpu_ring *ring)
+{
+	struct amdgpu_device *adev = ring->adev;
+	struct amdgpu_ring_mux *mux = &adev->gfx.muxer;
+
+	WARN_ON(!ring->is_sw_ring);
+	if (ring->hw_prio > AMDGPU_RING_PRIO_DEFAULT) {
+		if (amdgpu_mcbp_scan(mux) > 0)
+			amdgpu_mcbp_trigger_preempt(mux);
+		return;
+	}
+
+	amdgpu_ring_mux_start_ib(mux, ring);
+}
+
+void amdgpu_sw_ring_ib_end(struct amdgpu_ring *ring)
+{
+	struct amdgpu_device *adev = ring->adev;
+	struct amdgpu_ring_mux *mux = &adev->gfx.muxer;
+
+	WARN_ON(!ring->is_sw_ring);
+	if (ring->hw_prio > AMDGPU_RING_PRIO_DEFAULT)
+		return;
+	amdgpu_ring_mux_end_ib(mux, ring);
+}
+
+void amdgpu_ring_mux_start_ib(struct amdgpu_ring_mux *mux, struct amdgpu_ring *ring)
+{
+	struct amdgpu_mux_entry *e;
+	struct amdgpu_mux_chunk *chunk;
+
+	spin_lock(&mux->lock);
+	amdgpu_mux_resubmit_chunks(mux);
+	spin_unlock(&mux->lock);
+
+	e = amdgpu_ring_mux_sw_entry(mux, ring);
+	if (!e) {
+		DRM_ERROR("cannot find entry!\n");
+		return;
+	}
+
+	chunk = kmem_cache_alloc(amdgpu_mux_chunk_slab, GFP_KERNEL);
+	if (!chunk) {
+		DRM_ERROR("alloc amdgpu_mux_chunk_slab failed\n");
+		return;
+	}
+
+	chunk->start = ring->wptr;
+	list_add_tail(&chunk->entry, &e->list);
+}
+
+static void scan_and_remove_signaled_chunk(struct amdgpu_ring_mux *mux, struct amdgpu_ring *ring)
+{
+	uint32_t last_seq = 0;
+	struct amdgpu_mux_entry *e;
+	struct amdgpu_mux_chunk *chunk, *tmp;
+
+	e = amdgpu_ring_mux_sw_entry(mux, ring);
+	if (!e) {
+		DRM_ERROR("cannot find entry!\n");
+		return;
+	}
+
+	last_seq = atomic_read(&ring->fence_drv.last_seq);
+
+	list_for_each_entry_safe(chunk, tmp, &e->list, entry) {
+		if (chunk->sync_seq <= last_seq) {
+			list_del(&chunk->entry);
+			kmem_cache_free(amdgpu_mux_chunk_slab, chunk);
+		}
+	}
+}
+
+void amdgpu_ring_mux_end_ib(struct amdgpu_ring_mux *mux, struct amdgpu_ring *ring)
+{
+	struct amdgpu_mux_entry *e;
+	struct amdgpu_mux_chunk *chunk;
+
+	e = amdgpu_ring_mux_sw_entry(mux, ring);
+	if (!e) {
+		DRM_ERROR("cannot find entry!\n");
+		return;
+	}
+
+	chunk = list_last_entry(&e->list, struct amdgpu_mux_chunk, entry);
+	if (!chunk) {
+		DRM_ERROR("cannot find chunk!\n");
+		return;
+	}
+
+	chunk->end = ring->wptr;
+	chunk->sync_seq = READ_ONCE(ring->fence_drv.sync_seq);
+
+	scan_and_remove_signaled_chunk(mux, ring);
+}
+
+bool amdgpu_mcbp_handle_trailing_fence_irq(struct amdgpu_ring_mux *mux)
+{
+	struct amdgpu_mux_entry *e;
+	struct amdgpu_ring *ring = NULL;
+	int i;
+
+	if (!mux->pending_trailing_fence_signaled)
+		return false;
+
+	if (mux->real_ring->trail_seq != le32_to_cpu(*mux->real_ring->trail_fence_cpu_addr))
+		return false;
+
+	for (i = 0; i < mux->num_ring_entries; i++) {
+		e = &mux->ring_entry[i];
+		if (e->ring->hw_prio <= AMDGPU_RING_PRIO_DEFAULT) {
+			ring = e->ring;
+			break;
+		}
+	}
+
+	if (!ring) {
+		DRM_ERROR("cannot find low priority ring\n");
+		return false;
+	}
+
+	amdgpu_fence_process(ring);
+	if (amdgpu_fence_count_emitted(ring) > 0) {
+		mux->s_resubmit = true;
+		mux->seqno_to_resubmit = ring->fence_drv.sync_seq;
+		amdgpu_ring_mux_schedule_resubmit(mux);
+	}
+
+	mux->pending_trailing_fence_signaled = false;
+	return true;
+}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring_mux.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring_mux.h
new file mode 100644
index 000000000000..4be45fc14954
--- /dev/null
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring_mux.h
@@ -0,0 +1,103 @@
+/*
+ * Copyright 2022 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#ifndef __AMDGPU_RING_MUX__
+#define __AMDGPU_RING_MUX__
+
+#include <linux/timer.h>
+#include <linux/spinlock.h>
+#include "amdgpu_ring.h"
+
+struct amdgpu_ring;
+
+/**
+ * struct amdgpu_mux_entry - the entry recording software rings copying information.
+ * @ring: the pointer to the software ring.
+ * @start_ptr_in_hw_ring: last start location copied to in the hardware ring.
+ * @end_ptr_in_hw_ring: last end location copied to in the hardware ring.
+ * @sw_cptr: the position of the copy pointer in the sw ring.
+ * @sw_rptr: the read pointer in software ring.
+ * @sw_wptr: the write pointer in software ring.
+ * @list: list head for amdgpu_mux_chunk
+ */
+struct amdgpu_mux_entry {
+	struct amdgpu_ring      *ring;
+	u64                     start_ptr_in_hw_ring;
+	u64                     end_ptr_in_hw_ring;
+	u64                     sw_cptr;
+	u64                     sw_rptr;
+	u64                     sw_wptr;
+	struct list_head        list;
+};
+
+struct amdgpu_ring_mux {
+	struct amdgpu_ring      *real_ring;
+
+	struct amdgpu_mux_entry *ring_entry;
+	unsigned int            num_ring_entries;
+	unsigned int            ring_entry_size;
+	/*the lock for copy data from different software rings*/
+	spinlock_t              lock;
+	bool                    s_resubmit;
+	uint32_t                seqno_to_resubmit;
+	u64                     wptr_resubmit;
+	struct timer_list       resubmit_timer;
+
+	bool                    pending_trailing_fence_signaled;
+};
+
+/**
+ * struct amdgpu_mux_chunk - save the location of indirect buffer's package on softare rings.
+ * @entry: the list entry.
+ * @sync_seq: the fence seqno related with the saved IB.
+ * @start:- start location on the software ring.
+ * @end:- end location on the software ring.
+ */
+struct amdgpu_mux_chunk {
+	struct list_head        entry;
+	uint32_t                sync_seq;
+	u64                     start;
+	u64                     end;
+};
+
+int amdgpu_ring_mux_init(struct amdgpu_ring_mux *mux, struct amdgpu_ring *ring,
+			 unsigned int entry_size);
+void amdgpu_ring_mux_fini(struct amdgpu_ring_mux *mux);
+int amdgpu_ring_mux_add_sw_ring(struct amdgpu_ring_mux *mux, struct amdgpu_ring *ring);
+void amdgpu_ring_mux_set_wptr(struct amdgpu_ring_mux *mux, struct amdgpu_ring *ring, u64 wptr);
+u64 amdgpu_ring_mux_get_wptr(struct amdgpu_ring_mux *mux, struct amdgpu_ring *ring);
+u64 amdgpu_ring_mux_get_rptr(struct amdgpu_ring_mux *mux, struct amdgpu_ring *ring);
+void amdgpu_ring_mux_start_ib(struct amdgpu_ring_mux *mux, struct amdgpu_ring *ring);
+void amdgpu_ring_mux_end_ib(struct amdgpu_ring_mux *mux, struct amdgpu_ring *ring);
+bool amdgpu_mcbp_handle_trailing_fence_irq(struct amdgpu_ring_mux *mux);
+
+u64 amdgpu_sw_ring_get_rptr_gfx(struct amdgpu_ring *ring);
+u64 amdgpu_sw_ring_get_wptr_gfx(struct amdgpu_ring *ring);
+void amdgpu_sw_ring_set_wptr_gfx(struct amdgpu_ring *ring);
+void amdgpu_sw_ring_insert_nop(struct amdgpu_ring *ring, uint32_t count);
+void amdgpu_sw_ring_ib_begin(struct amdgpu_ring *ring);
+void amdgpu_sw_ring_ib_end(struct amdgpu_ring *ring);
+const char *amdgpu_sw_ring_name(int idx);
+unsigned int amdgpu_sw_ring_priority(int idx);
+
+#endif
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_securedisplay.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_securedisplay.c
index cc7597a15fe9..2c1d82fc4c34 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_securedisplay.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_securedisplay.c
@@ -121,6 +121,7 @@ static ssize_t amdgpu_securedisplay_debugfs_write(struct file *f, const char __u
 
 	switch (op) {
 	case 1:
+		mutex_lock(&psp->securedisplay_context.mutex);
 		psp_prep_securedisplay_cmd_buf(psp, &securedisplay_cmd,
 			TA_SECUREDISPLAY_COMMAND__QUERY_TA);
 		ret = psp_securedisplay_invoke(psp, TA_SECUREDISPLAY_COMMAND__QUERY_TA);
@@ -131,8 +132,10 @@ static ssize_t amdgpu_securedisplay_debugfs_write(struct file *f, const char __u
 			else
 				psp_securedisplay_parse_resp_status(psp, securedisplay_cmd->status);
 		}
+		mutex_unlock(&psp->securedisplay_context.mutex);
 		break;
 	case 2:
+		mutex_lock(&psp->securedisplay_context.mutex);
 		psp_prep_securedisplay_cmd_buf(psp, &securedisplay_cmd,
 			TA_SECUREDISPLAY_COMMAND__SEND_ROI_CRC);
 		securedisplay_cmd->securedisplay_in_message.send_roi_crc.phy_id = phy_id;
@@ -146,6 +149,7 @@ static ssize_t amdgpu_securedisplay_debugfs_write(struct file *f, const char __u
 				psp_securedisplay_parse_resp_status(psp, securedisplay_cmd->status);
 			}
 		}
+		mutex_unlock(&psp->securedisplay_context.mutex);
 		break;
 	default:
 		dev_err(adev->dev, "Invalid input: %s\n", str);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_sync.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_sync.c
index 090e66a1b284..bac7976975bd 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_sync.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_sync.c
@@ -259,6 +259,14 @@ int amdgpu_sync_resv(struct amdgpu_device *adev, struct amdgpu_sync *sync,
 	return 0;
 }
 
+/* Free the entry back to the slab */
+static void amdgpu_sync_entry_free(struct amdgpu_sync_entry *e)
+{
+	hash_del(&e->node);
+	dma_fence_put(e->fence);
+	kmem_cache_free(amdgpu_sync_slab, e);
+}
+
 /**
  * amdgpu_sync_peek_fence - get the next fence not signaled yet
  *
@@ -280,9 +288,7 @@ struct dma_fence *amdgpu_sync_peek_fence(struct amdgpu_sync *sync,
 		struct drm_sched_fence *s_fence = to_drm_sched_fence(f);
 
 		if (dma_fence_is_signaled(f)) {
-			hash_del(&e->node);
-			dma_fence_put(f);
-			kmem_cache_free(amdgpu_sync_slab, e);
+			amdgpu_sync_entry_free(e);
 			continue;
 		}
 		if (ring && s_fence) {
@@ -355,15 +361,42 @@ int amdgpu_sync_clone(struct amdgpu_sync *source, struct amdgpu_sync *clone)
 			if (r)
 				return r;
 		} else {
-			hash_del(&e->node);
-			dma_fence_put(f);
-			kmem_cache_free(amdgpu_sync_slab, e);
+			amdgpu_sync_entry_free(e);
 		}
 	}
 
 	return 0;
 }
 
+/**
+ * amdgpu_sync_push_to_job - push fences into job
+ * @sync: sync object to get the fences from
+ * @job: job to push the fences into
+ *
+ * Add all unsignaled fences from sync to job.
+ */
+int amdgpu_sync_push_to_job(struct amdgpu_sync *sync, struct amdgpu_job *job)
+{
+	struct amdgpu_sync_entry *e;
+	struct hlist_node *tmp;
+	struct dma_fence *f;
+	int i, r;
+
+	hash_for_each_safe(sync->fences, i, tmp, e, node) {
+		f = e->fence;
+		if (dma_fence_is_signaled(f)) {
+			amdgpu_sync_entry_free(e);
+			continue;
+		}
+
+		dma_fence_get(f);
+		r = drm_sched_job_add_dependency(&job->base, f);
+		if (r)
+			return r;
+	}
+	return 0;
+}
+
 int amdgpu_sync_wait(struct amdgpu_sync *sync, bool intr)
 {
 	struct amdgpu_sync_entry *e;
@@ -375,9 +408,7 @@ int amdgpu_sync_wait(struct amdgpu_sync *sync, bool intr)
 		if (r)
 			return r;
 
-		hash_del(&e->node);
-		dma_fence_put(e->fence);
-		kmem_cache_free(amdgpu_sync_slab, e);
+		amdgpu_sync_entry_free(e);
 	}
 
 	return 0;
@@ -396,11 +427,8 @@ void amdgpu_sync_free(struct amdgpu_sync *sync)
 	struct hlist_node *tmp;
 	unsigned int i;
 
-	hash_for_each_safe(sync->fences, i, tmp, e, node) {
-		hash_del(&e->node);
-		dma_fence_put(e->fence);
-		kmem_cache_free(amdgpu_sync_slab, e);
-	}
+	hash_for_each_safe(sync->fences, i, tmp, e, node)
+		amdgpu_sync_entry_free(e);
 }
 
 /**
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_sync.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_sync.h
index 2d5c613cda10..cf1e9e858efd 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_sync.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_sync.h
@@ -30,6 +30,7 @@ struct dma_fence;
 struct dma_resv;
 struct amdgpu_device;
 struct amdgpu_ring;
+struct amdgpu_job;
 
 enum amdgpu_sync_mode {
 	AMDGPU_SYNC_ALWAYS,
@@ -54,6 +55,7 @@ struct dma_fence *amdgpu_sync_peek_fence(struct amdgpu_sync *sync,
 				     struct amdgpu_ring *ring);
 struct dma_fence *amdgpu_sync_get_fence(struct amdgpu_sync *sync);
 int amdgpu_sync_clone(struct amdgpu_sync *source, struct amdgpu_sync *clone);
+int amdgpu_sync_push_to_job(struct amdgpu_sync *sync, struct amdgpu_job *job);
 int amdgpu_sync_wait(struct amdgpu_sync *sync, bool intr);
 void amdgpu_sync_free(struct amdgpu_sync *sync);
 int amdgpu_sync_init(void);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
index 7d647993fd7e..55e0284b2bdd 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
@@ -58,6 +58,7 @@
 #include "amdgpu_amdkfd.h"
 #include "amdgpu_sdma.h"
 #include "amdgpu_ras.h"
+#include "amdgpu_hmm.h"
 #include "amdgpu_atomfirmware.h"
 #include "amdgpu_res_cursor.h"
 #include "bif/bif_4_1_d.h"
@@ -189,7 +190,6 @@ static int amdgpu_ttm_map_buffer(struct ttm_buffer_object *bo,
 	struct amdgpu_device *adev = ring->adev;
 	unsigned offset, num_pages, num_dw, num_bytes;
 	uint64_t src_addr, dst_addr;
-	struct dma_fence *fence;
 	struct amdgpu_job *job;
 	void *cpu_addr;
 	uint64_t flags;
@@ -229,7 +229,9 @@ static int amdgpu_ttm_map_buffer(struct ttm_buffer_object *bo,
 	num_dw = ALIGN(adev->mman.buffer_funcs->copy_num_dw, 8);
 	num_bytes = num_pages * 8 * AMDGPU_GPU_PAGES_IN_CPU_PAGE;
 
-	r = amdgpu_job_alloc_with_ib(adev, num_dw * 4 + num_bytes,
+	r = amdgpu_job_alloc_with_ib(adev, &adev->mman.entity,
+				     AMDGPU_FENCE_OWNER_UNDEFINED,
+				     num_dw * 4 + num_bytes,
 				     AMDGPU_IB_POOL_DELAYED, &job);
 	if (r)
 		return r;
@@ -269,18 +271,8 @@ static int amdgpu_ttm_map_buffer(struct ttm_buffer_object *bo,
 		}
 	}
 
-	r = amdgpu_job_submit(job, &adev->mman.entity,
-			      AMDGPU_FENCE_OWNER_UNDEFINED, &fence);
-	if (r)
-		goto error_free;
-
-	dma_fence_put(fence);
-
-	return r;
-
-error_free:
-	amdgpu_job_free(job);
-	return r;
+	dma_fence_put(amdgpu_job_submit(job));
+	return 0;
 }
 
 /**
@@ -643,9 +635,6 @@ struct amdgpu_ttm_tt {
 	struct task_struct	*usertask;
 	uint32_t		userflags;
 	bool			bound;
-#if IS_ENABLED(CONFIG_DRM_AMDGPU_USERPTR)
-	struct hmm_range	*range;
-#endif
 };
 
 #define ttm_to_amdgpu_ttm_tt(ptr)	container_of(ptr, struct amdgpu_ttm_tt, ttm)
@@ -658,7 +647,8 @@ struct amdgpu_ttm_tt {
  * Calling function must call amdgpu_ttm_tt_userptr_range_done() once and only
  * once afterwards to stop HMM tracking
  */
-int amdgpu_ttm_tt_get_user_pages(struct amdgpu_bo *bo, struct page **pages)
+int amdgpu_ttm_tt_get_user_pages(struct amdgpu_bo *bo, struct page **pages,
+				 struct hmm_range **range)
 {
 	struct ttm_tt *ttm = bo->tbo.ttm;
 	struct amdgpu_ttm_tt *gtt = ttm_to_amdgpu_ttm_tt(ttm);
@@ -668,16 +658,15 @@ int amdgpu_ttm_tt_get_user_pages(struct amdgpu_bo *bo, struct page **pages)
 	bool readonly;
 	int r = 0;
 
+	/* Make sure get_user_pages_done() can cleanup gracefully */
+	*range = NULL;
+
 	mm = bo->notifier.mm;
 	if (unlikely(!mm)) {
 		DRM_DEBUG_DRIVER("BO is not registered?\n");
 		return -EFAULT;
 	}
 
-	/* Another get_user_pages is running at the same time?? */
-	if (WARN_ON(gtt->range))
-		return -EFAULT;
-
 	if (!mmget_not_zero(mm)) /* Happens during process shutdown */
 		return -ESRCH;
 
@@ -694,9 +683,8 @@ int amdgpu_ttm_tt_get_user_pages(struct amdgpu_bo *bo, struct page **pages)
 	}
 
 	readonly = amdgpu_ttm_tt_is_readonly(ttm);
-	r = amdgpu_hmm_range_get_pages(&bo->notifier, mm, pages, start,
-				       ttm->num_pages, &gtt->range, readonly,
-				       true, NULL);
+	r = amdgpu_hmm_range_get_pages(&bo->notifier, start, ttm->num_pages,
+				       readonly, NULL, pages, range);
 out_unlock:
 	mmap_read_unlock(mm);
 	if (r)
@@ -707,36 +695,37 @@ out_unlock:
 	return r;
 }
 
+/* amdgpu_ttm_tt_discard_user_pages - Discard range and pfn array allocations
+ */
+void amdgpu_ttm_tt_discard_user_pages(struct ttm_tt *ttm,
+				      struct hmm_range *range)
+{
+	struct amdgpu_ttm_tt *gtt = (void *)ttm;
+
+	if (gtt && gtt->userptr && range)
+		amdgpu_hmm_range_get_pages_done(range);
+}
+
 /*
- * amdgpu_ttm_tt_userptr_range_done - stop HMM track the CPU page table change
+ * amdgpu_ttm_tt_get_user_pages_done - stop HMM track the CPU page table change
  * Check if the pages backing this ttm range have been invalidated
  *
  * Returns: true if pages are still valid
  */
-bool amdgpu_ttm_tt_get_user_pages_done(struct ttm_tt *ttm)
+bool amdgpu_ttm_tt_get_user_pages_done(struct ttm_tt *ttm,
+				       struct hmm_range *range)
 {
 	struct amdgpu_ttm_tt *gtt = ttm_to_amdgpu_ttm_tt(ttm);
-	bool r = false;
 
-	if (!gtt || !gtt->userptr)
+	if (!gtt || !gtt->userptr || !range)
 		return false;
 
 	DRM_DEBUG_DRIVER("user_pages_done 0x%llx pages 0x%x\n",
 		gtt->userptr, ttm->num_pages);
 
-	WARN_ONCE(!gtt->range || !gtt->range->hmm_pfns,
-		"No user pages to check\n");
-
-	if (gtt->range) {
-		/*
-		 * FIXME: Must always hold notifier_lock for this, and must
-		 * not ignore the return code.
-		 */
-		r = amdgpu_hmm_range_get_pages_done(gtt->range);
-		gtt->range = NULL;
-	}
+	WARN_ONCE(!range->hmm_pfns, "No user pages to check\n");
 
-	return !r;
+	return !amdgpu_hmm_range_get_pages_done(range);
 }
 #endif
 
@@ -813,20 +802,6 @@ static void amdgpu_ttm_tt_unpin_userptr(struct ttm_device *bdev,
 	/* unmap the pages mapped to the device */
 	dma_unmap_sgtable(adev->dev, ttm->sg, direction, 0);
 	sg_free_table(ttm->sg);
-
-#if IS_ENABLED(CONFIG_DRM_AMDGPU_USERPTR)
-	if (gtt->range) {
-		unsigned long i;
-
-		for (i = 0; i < ttm->num_pages; i++) {
-			if (ttm->pages[i] !=
-			    hmm_pfn_to_page(gtt->range->hmm_pfns[i]))
-				break;
-		}
-
-		WARN((i == ttm->num_pages), "Missing get_user_page_done\n");
-	}
-#endif
 }
 
 static void amdgpu_ttm_gart_bind(struct amdgpu_device *adev,
@@ -1177,8 +1152,9 @@ int amdgpu_ttm_tt_get_userptr(const struct ttm_buffer_object *tbo,
  * @addr:  The address in the current tasks VM space to use
  * @flags: Requirements of userptr object.
  *
- * Called by amdgpu_gem_userptr_ioctl() to bind userptr pages
- * to current task
+ * Called by amdgpu_gem_userptr_ioctl() and kfd_ioctl_alloc_memory_of_gpu() to
+ * bind userptr pages to current task and by kfd_ioctl_acquire_vm() to
+ * initialize GPU VM for a KFD process.
  */
 int amdgpu_ttm_tt_set_userptr(struct ttm_buffer_object *bo,
 			      uint64_t addr, uint32_t flags)
@@ -1417,7 +1393,8 @@ static void amdgpu_ttm_vram_mm_access(struct amdgpu_device *adev, loff_t pos,
 }
 
 static int amdgpu_ttm_access_memory_sdma(struct ttm_buffer_object *bo,
-					unsigned long offset, void *buf, int len, int write)
+					unsigned long offset, void *buf,
+					int len, int write)
 {
 	struct amdgpu_bo *abo = ttm_to_amdgpu_bo(bo);
 	struct amdgpu_device *adev = amdgpu_ttm_adev(abo->tbo.bdev);
@@ -1441,26 +1418,27 @@ static int amdgpu_ttm_access_memory_sdma(struct ttm_buffer_object *bo,
 		memcpy(adev->mman.sdma_access_ptr, buf, len);
 
 	num_dw = ALIGN(adev->mman.buffer_funcs->copy_num_dw, 8);
-	r = amdgpu_job_alloc_with_ib(adev, num_dw * 4, AMDGPU_IB_POOL_DELAYED, &job);
+	r = amdgpu_job_alloc_with_ib(adev, &adev->mman.entity,
+				     AMDGPU_FENCE_OWNER_UNDEFINED,
+				     num_dw * 4, AMDGPU_IB_POOL_DELAYED,
+				     &job);
 	if (r)
 		goto out;
 
 	amdgpu_res_first(abo->tbo.resource, offset, len, &src_mm);
-	src_addr = amdgpu_ttm_domain_start(adev, bo->resource->mem_type) + src_mm.start;
+	src_addr = amdgpu_ttm_domain_start(adev, bo->resource->mem_type) +
+		src_mm.start;
 	dst_addr = amdgpu_bo_gpu_offset(adev->mman.sdma_access_bo);
 	if (write)
 		swap(src_addr, dst_addr);
 
-	amdgpu_emit_copy_buffer(adev, &job->ibs[0], src_addr, dst_addr, PAGE_SIZE, false);
+	amdgpu_emit_copy_buffer(adev, &job->ibs[0], src_addr, dst_addr,
+				PAGE_SIZE, false);
 
 	amdgpu_ring_pad_ib(adev->mman.buffer_funcs_ring, &job->ibs[0]);
 	WARN_ON(job->ibs[0].length_dw > num_dw);
 
-	r = amdgpu_job_submit(job, &adev->mman.entity, AMDGPU_FENCE_OWNER_UNDEFINED, &fence);
-	if (r) {
-		amdgpu_job_free(job);
-		goto out;
-	}
+	fence = amdgpu_job_submit(job);
 
 	if (!dma_fence_wait_timeout(fence, false, adev->sdma_timeout))
 		r = -ETIMEDOUT;
@@ -1560,6 +1538,23 @@ static void amdgpu_ttm_fw_reserve_vram_fini(struct amdgpu_device *adev)
 		NULL, &adev->mman.fw_vram_usage_va);
 }
 
+/*
+ * Driver Reservation functions
+ */
+/**
+ * amdgpu_ttm_drv_reserve_vram_fini - free drv reserved vram
+ *
+ * @adev: amdgpu_device pointer
+ *
+ * free drv reserved vram if it has been reserved.
+ */
+static void amdgpu_ttm_drv_reserve_vram_fini(struct amdgpu_device *adev)
+{
+	amdgpu_bo_free_kernel(&adev->mman.drv_vram_usage_reserved_bo,
+						  NULL,
+						  &adev->mman.drv_vram_usage_va);
+}
+
 /**
  * amdgpu_ttm_fw_reserve_vram_init - create bo vram reservation from fw
  *
@@ -1581,11 +1576,35 @@ static int amdgpu_ttm_fw_reserve_vram_init(struct amdgpu_device *adev)
 	return amdgpu_bo_create_kernel_at(adev,
 					  adev->mman.fw_vram_usage_start_offset,
 					  adev->mman.fw_vram_usage_size,
-					  AMDGPU_GEM_DOMAIN_VRAM,
 					  &adev->mman.fw_vram_usage_reserved_bo,
 					  &adev->mman.fw_vram_usage_va);
 }
 
+/**
+ * amdgpu_ttm_drv_reserve_vram_init - create bo vram reservation from driver
+ *
+ * @adev: amdgpu_device pointer
+ *
+ * create bo vram reservation from drv.
+ */
+static int amdgpu_ttm_drv_reserve_vram_init(struct amdgpu_device *adev)
+{
+	u64 vram_size = adev->gmc.visible_vram_size;
+
+	adev->mman.drv_vram_usage_va = NULL;
+	adev->mman.drv_vram_usage_reserved_bo = NULL;
+
+	if (adev->mman.drv_vram_usage_size == 0 ||
+	    adev->mman.drv_vram_usage_size > vram_size)
+		return 0;
+
+	return amdgpu_bo_create_kernel_at(adev,
+					  adev->mman.drv_vram_usage_start_offset,
+					  adev->mman.drv_vram_usage_size,
+					  &adev->mman.drv_vram_usage_reserved_bo,
+					  &adev->mman.drv_vram_usage_va);
+}
+
 /*
  * Memoy training reservation functions
  */
@@ -1662,7 +1681,6 @@ static int amdgpu_ttm_reserve_tmr(struct amdgpu_device *adev)
 		ret = amdgpu_bo_create_kernel_at(adev,
 					 ctx->c2p_train_data_offset,
 					 ctx->train_data_size,
-					 AMDGPU_GEM_DOMAIN_VRAM,
 					 &ctx->c2p_bo,
 					 NULL);
 		if (ret) {
@@ -1676,7 +1694,6 @@ static int amdgpu_ttm_reserve_tmr(struct amdgpu_device *adev)
 	ret = amdgpu_bo_create_kernel_at(adev,
 				adev->gmc.real_vram_size - adev->mman.discovery_tmr_size,
 				adev->mman.discovery_tmr_size,
-				AMDGPU_GEM_DOMAIN_VRAM,
 				&adev->mman.discovery_memory,
 				NULL);
 	if (ret) {
@@ -1754,6 +1771,14 @@ int amdgpu_ttm_init(struct amdgpu_device *adev)
 	}
 
 	/*
+	 *The reserved vram for driver must be pinned to the specified
+	 *place on the VRAM, so reserve it early.
+	 */
+	r = amdgpu_ttm_drv_reserve_vram_init(adev);
+	if (r)
+		return r;
+
+	/*
 	 * only NAVI10 and onwards ASIC support for IP discovery.
 	 * If IP discovery enabled, a block of memory should be
 	 * reserved for IP discovey.
@@ -1769,21 +1794,18 @@ int amdgpu_ttm_init(struct amdgpu_device *adev)
 	 * avoid display artifacts while transitioning between pre-OS
 	 * and driver.  */
 	r = amdgpu_bo_create_kernel_at(adev, 0, adev->mman.stolen_vga_size,
-				       AMDGPU_GEM_DOMAIN_VRAM,
 				       &adev->mman.stolen_vga_memory,
 				       NULL);
 	if (r)
 		return r;
 	r = amdgpu_bo_create_kernel_at(adev, adev->mman.stolen_vga_size,
 				       adev->mman.stolen_extended_size,
-				       AMDGPU_GEM_DOMAIN_VRAM,
 				       &adev->mman.stolen_extended_memory,
 				       NULL);
 	if (r)
 		return r;
 	r = amdgpu_bo_create_kernel_at(adev, adev->mman.stolen_reserved_offset,
 				       adev->mman.stolen_reserved_size,
-				       AMDGPU_GEM_DOMAIN_VRAM,
 				       &adev->mman.stolen_reserved_memory,
 				       NULL);
 	if (r)
@@ -1878,6 +1900,7 @@ void amdgpu_ttm_fini(struct amdgpu_device *adev)
 	amdgpu_bo_free_kernel(&adev->mman.sdma_access_bo, NULL,
 					&adev->mman.sdma_access_ptr);
 	amdgpu_ttm_fw_reserve_vram_fini(adev);
+	amdgpu_ttm_drv_reserve_vram_fini(adev);
 
 	if (drm_dev_enter(adev_to_drm(adev), &idx)) {
 
@@ -1959,7 +1982,9 @@ static int amdgpu_ttm_prepare_job(struct amdgpu_device *adev,
 		AMDGPU_IB_POOL_DELAYED;
 	int r;
 
-	r = amdgpu_job_alloc_with_ib(adev, num_dw * 4, pool, job);
+	r = amdgpu_job_alloc_with_ib(adev, &adev->mman.entity,
+				     AMDGPU_FENCE_OWNER_UNDEFINED,
+				     num_dw * 4, pool, job);
 	if (r)
 		return r;
 
@@ -1969,17 +1994,11 @@ static int amdgpu_ttm_prepare_job(struct amdgpu_device *adev,
 							adev->gart.bo);
 		(*job)->vm_needs_flush = true;
 	}
-	if (resv) {
-		r = amdgpu_sync_resv(adev, &(*job)->sync, resv,
-				     AMDGPU_SYNC_ALWAYS,
-				     AMDGPU_FENCE_OWNER_UNDEFINED);
-		if (r) {
-			DRM_ERROR("sync failed (%d).\n", r);
-			amdgpu_job_free(*job);
-			return r;
-		}
-	}
-	return 0;
+	if (!resv)
+		return 0;
+
+	return drm_sched_job_add_resv_dependencies(&(*job)->base, resv,
+						   DMA_RESV_USAGE_BOOKKEEP);
 }
 
 int amdgpu_copy_buffer(struct amdgpu_ring *ring, uint64_t src_offset,
@@ -2024,8 +2043,7 @@ int amdgpu_copy_buffer(struct amdgpu_ring *ring, uint64_t src_offset,
 	if (direct_submit)
 		r = amdgpu_job_submit_direct(job, ring, fence);
 	else
-		r = amdgpu_job_submit(job, &adev->mman.entity,
-				      AMDGPU_FENCE_OWNER_UNDEFINED, fence);
+		*fence = amdgpu_job_submit(job);
 	if (r)
 		goto error_free;
 
@@ -2070,16 +2088,8 @@ static int amdgpu_ttm_fill_mem(struct amdgpu_ring *ring, uint32_t src_data,
 
 	amdgpu_ring_pad_ib(ring, &job->ibs[0]);
 	WARN_ON(job->ibs[0].length_dw > num_dw);
-	r = amdgpu_job_submit(job, &adev->mman.entity,
-			      AMDGPU_FENCE_OWNER_UNDEFINED, fence);
-	if (r)
-		goto error_free;
-
+	*fence = amdgpu_job_submit(job);
 	return 0;
-
-error_free:
-	amdgpu_job_free(job);
-	return r;
 }
 
 int amdgpu_fill_buffer(struct amdgpu_bo *bo,
@@ -2295,9 +2305,9 @@ static ssize_t amdgpu_iomem_read(struct file *f, char __user *buf,
 		if (p->mapping != adev->mman.bdev.dev_mapping)
 			return -EPERM;
 
-		ptr = kmap(p);
+		ptr = kmap_local_page(p);
 		r = copy_to_user(buf, ptr + off, bytes);
-		kunmap(p);
+		kunmap_local(ptr);
 		if (r)
 			return -EFAULT;
 
@@ -2346,9 +2356,9 @@ static ssize_t amdgpu_iomem_write(struct file *f, const char __user *buf,
 		if (p->mapping != adev->mman.bdev.dev_mapping)
 			return -EPERM;
 
-		ptr = kmap(p);
+		ptr = kmap_local_page(p);
 		r = copy_from_user(ptr + off, buf, bytes);
-		kunmap(p);
+		kunmap_local(ptr);
 		if (r)
 			return -EFAULT;
 
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h
index 6a70818039dd..e2cd5894afc9 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h
@@ -39,6 +39,8 @@
 
 #define AMDGPU_POISON	0xd0bed0be
 
+struct hmm_range;
+
 struct amdgpu_gtt_mgr {
 	struct ttm_resource_manager manager;
 	struct drm_mm mm;
@@ -84,6 +86,12 @@ struct amdgpu_mman {
 	struct amdgpu_bo	*fw_vram_usage_reserved_bo;
 	void		*fw_vram_usage_va;
 
+	/* driver VRAM reservation */
+	u64		drv_vram_usage_start_offset;
+	u64		drv_vram_usage_size;
+	struct amdgpu_bo	*drv_vram_usage_reserved_bo;
+	void		*drv_vram_usage_va;
+
 	/* PAGE_SIZE'd BO for process memory r/w over SDMA. */
 	struct amdgpu_bo	*sdma_access_bo;
 	void			*sdma_access_ptr;
@@ -149,15 +157,25 @@ void amdgpu_ttm_recover_gart(struct ttm_buffer_object *tbo);
 uint64_t amdgpu_ttm_domain_start(struct amdgpu_device *adev, uint32_t type);
 
 #if IS_ENABLED(CONFIG_DRM_AMDGPU_USERPTR)
-int amdgpu_ttm_tt_get_user_pages(struct amdgpu_bo *bo, struct page **pages);
-bool amdgpu_ttm_tt_get_user_pages_done(struct ttm_tt *ttm);
+int amdgpu_ttm_tt_get_user_pages(struct amdgpu_bo *bo, struct page **pages,
+				 struct hmm_range **range);
+void amdgpu_ttm_tt_discard_user_pages(struct ttm_tt *ttm,
+				      struct hmm_range *range);
+bool amdgpu_ttm_tt_get_user_pages_done(struct ttm_tt *ttm,
+				       struct hmm_range *range);
 #else
 static inline int amdgpu_ttm_tt_get_user_pages(struct amdgpu_bo *bo,
-					       struct page **pages)
+					       struct page **pages,
+					       struct hmm_range **range)
 {
 	return -EPERM;
 }
-static inline bool amdgpu_ttm_tt_get_user_pages_done(struct ttm_tt *ttm)
+static inline void amdgpu_ttm_tt_discard_user_pages(struct ttm_tt *ttm,
+						    struct hmm_range *range)
+{
+}
+static inline bool amdgpu_ttm_tt_get_user_pages_done(struct ttm_tt *ttm,
+						     struct hmm_range *range)
 {
 	return false;
 }
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.c
index dd0bc649a57d..5cb62e6249c2 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.c
@@ -698,6 +698,7 @@ FW_VERSION_ATTR(rlc_srlg_fw_version, 0444, gfx.rlc_srlg_fw_version);
 FW_VERSION_ATTR(rlc_srls_fw_version, 0444, gfx.rlc_srls_fw_version);
 FW_VERSION_ATTR(mec_fw_version, 0444, gfx.mec_fw_version);
 FW_VERSION_ATTR(mec2_fw_version, 0444, gfx.mec2_fw_version);
+FW_VERSION_ATTR(imu_fw_version, 0444, gfx.imu_fw_version);
 FW_VERSION_ATTR(sos_fw_version, 0444, psp.sos.fw_version);
 FW_VERSION_ATTR(asd_fw_version, 0444, psp.asd_context.bin_desc.fw_version);
 FW_VERSION_ATTR(ta_ras_fw_version, 0444, psp.ras_context.context.bin_desc.fw_version);
@@ -719,7 +720,8 @@ static struct attribute *fw_attrs[] = {
 	&dev_attr_ta_ras_fw_version.attr, &dev_attr_ta_xgmi_fw_version.attr,
 	&dev_attr_smc_fw_version.attr, &dev_attr_sdma_fw_version.attr,
 	&dev_attr_sdma2_fw_version.attr, &dev_attr_vcn_fw_version.attr,
-	&dev_attr_dmcu_fw_version.attr, NULL
+	&dev_attr_dmcu_fw_version.attr, &dev_attr_imu_fw_version.attr,
+	NULL
 };
 
 static const struct attribute_group fw_attr_group = {
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.h
index 1c36235b4539..552e06929229 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.h
@@ -125,6 +125,7 @@ enum psp_fw_type {
 	PSP_FW_TYPE_PSP_INTF_DRV,
 	PSP_FW_TYPE_PSP_DBG_DRV,
 	PSP_FW_TYPE_PSP_RAS_DRV,
+	PSP_FW_TYPE_MAX_INDEX,
 };
 
 /* version_major=2, version_minor=0 */
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_umc.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_umc.c
index aad3c8b4c810..f76c19fc0392 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_umc.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_umc.c
@@ -22,6 +22,59 @@
  */
 
 #include "amdgpu.h"
+#include "umc_v6_7.h"
+
+static int amdgpu_umc_convert_error_address(struct amdgpu_device *adev,
+				    struct ras_err_data *err_data, uint64_t err_addr,
+				    uint32_t ch_inst, uint32_t umc_inst)
+{
+	switch (adev->ip_versions[UMC_HWIP][0]) {
+	case IP_VERSION(6, 7, 0):
+		umc_v6_7_convert_error_address(adev,
+				err_data, err_addr, ch_inst, umc_inst);
+		break;
+	default:
+		dev_warn(adev->dev,
+			 "UMC address to Physical address translation is not supported\n");
+		return AMDGPU_RAS_FAIL;
+	}
+
+	return AMDGPU_RAS_SUCCESS;
+}
+
+int amdgpu_umc_page_retirement_mca(struct amdgpu_device *adev,
+			uint64_t err_addr, uint32_t ch_inst, uint32_t umc_inst)
+{
+	struct ras_err_data err_data = {0, 0, 0, NULL};
+	int ret = AMDGPU_RAS_FAIL;
+
+	err_data.err_addr =
+		kcalloc(adev->umc.max_ras_err_cnt_per_query,
+			sizeof(struct eeprom_table_record), GFP_KERNEL);
+	if (!err_data.err_addr) {
+		dev_warn(adev->dev,
+			"Failed to alloc memory for umc error record in MCA notifier!\n");
+		return AMDGPU_RAS_FAIL;
+	}
+
+	/*
+	 * Translate UMC channel address to Physical address
+	 */
+	ret = amdgpu_umc_convert_error_address(adev, &err_data, err_addr,
+					ch_inst, umc_inst);
+	if (ret)
+		goto out;
+
+	if (amdgpu_bad_page_threshold != 0) {
+		amdgpu_ras_add_bad_pages(adev, err_data.err_addr,
+						err_data.err_addr_cnt);
+		amdgpu_ras_save_bad_pages(adev);
+	}
+
+out:
+	kfree(err_data.err_addr);
+	return ret;
+}
 
 static int amdgpu_umc_do_page_retirement(struct amdgpu_device *adev,
 		void *ras_error_status,
@@ -112,23 +165,29 @@ static int amdgpu_umc_do_page_retirement(struct amdgpu_device *adev,
 	return AMDGPU_RAS_SUCCESS;
 }
 
-int amdgpu_umc_poison_handler(struct amdgpu_device *adev,
-		void *ras_error_status,
-		bool reset)
+int amdgpu_umc_poison_handler(struct amdgpu_device *adev, bool reset)
 {
-	int ret;
-	struct ras_err_data *err_data = (struct ras_err_data *)ras_error_status;
-	struct ras_common_if head = {
-		.block = AMDGPU_RAS_BLOCK__UMC,
-	};
-	struct ras_manager *obj = amdgpu_ras_find_obj(adev, &head);
+	int ret = AMDGPU_RAS_SUCCESS;
 
-	ret =
-		amdgpu_umc_do_page_retirement(adev, ras_error_status, NULL, reset);
+	if (!adev->gmc.xgmi.connected_to_cpu) {
+		struct ras_err_data err_data = {0, 0, 0, NULL};
+		struct ras_common_if head = {
+			.block = AMDGPU_RAS_BLOCK__UMC,
+		};
+		struct ras_manager *obj = amdgpu_ras_find_obj(adev, &head);
 
-	if (ret == AMDGPU_RAS_SUCCESS && obj) {
-		obj->err_data.ue_count += err_data->ue_count;
-		obj->err_data.ce_count += err_data->ce_count;
+		ret = amdgpu_umc_do_page_retirement(adev, &err_data, NULL, reset);
+
+		if (ret == AMDGPU_RAS_SUCCESS && obj) {
+			obj->err_data.ue_count += err_data.ue_count;
+			obj->err_data.ce_count += err_data.ce_count;
+		}
+	} else if (reset) {
+		/* MCA poison handler is only responsible for GPU reset,
+		 * let MCA notifier do page retirement.
+		 */
+		kgd2kfd_set_sram_ecc_flag(adev->kfd.dev);
+		amdgpu_ras_reset_gpu(adev);
 	}
 
 	return ret;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_umc.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_umc.h
index e46439274f3a..a6951160f13a 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_umc.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_umc.h
@@ -51,9 +51,6 @@ struct amdgpu_umc_ras {
 	struct amdgpu_ras_block_object ras_block;
 	void (*err_cnt_init)(struct amdgpu_device *adev);
 	bool (*query_ras_poison_mode)(struct amdgpu_device *adev);
-	void (*convert_ras_error_address)(struct amdgpu_device *adev,
-				struct ras_err_data *err_data, uint64_t err_addr,
-				uint32_t ch_inst, uint32_t umc_inst);
 	void (*ecc_info_query_ras_error_count)(struct amdgpu_device *adev,
 				      void *ras_error_status);
 	void (*ecc_info_query_ras_error_address)(struct amdgpu_device *adev,
@@ -86,9 +83,7 @@ struct amdgpu_umc {
 };
 
 int amdgpu_umc_ras_late_init(struct amdgpu_device *adev, struct ras_common_if *ras_block);
-int amdgpu_umc_poison_handler(struct amdgpu_device *adev,
-		void *ras_error_status,
-		bool reset);
+int amdgpu_umc_poison_handler(struct amdgpu_device *adev, bool reset);
 int amdgpu_umc_process_ecc_irq(struct amdgpu_device *adev,
 		struct amdgpu_irq_src *source,
 		struct amdgpu_iv_entry *entry);
@@ -101,4 +96,6 @@ void amdgpu_umc_fill_error_record(struct ras_err_data *err_data,
 int amdgpu_umc_process_ras_data_cb(struct amdgpu_device *adev,
 		void *ras_error_status,
 		struct amdgpu_iv_entry *entry);
+int amdgpu_umc_page_retirement_mca(struct amdgpu_device *adev,
+			uint64_t err_addr, uint32_t ch_inst, uint32_t umc_inst);
 #endif
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c
index 6eac649499d3..e00bb654e24b 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c
@@ -1132,7 +1132,9 @@ static int amdgpu_uvd_send_msg(struct amdgpu_ring *ring, struct amdgpu_bo *bo,
 	unsigned offset_idx = 0;
 	unsigned offset[3] = { UVD_BASE_SI, 0, 0 };
 
-	r = amdgpu_job_alloc_with_ib(adev, 64, direct ? AMDGPU_IB_POOL_DIRECT :
+	r = amdgpu_job_alloc_with_ib(ring->adev, &adev->uvd.entity,
+				     AMDGPU_FENCE_OWNER_UNDEFINED,
+				     64, direct ? AMDGPU_IB_POOL_DIRECT :
 				     AMDGPU_IB_POOL_DELAYED, &job);
 	if (r)
 		return r;
@@ -1175,16 +1177,13 @@ static int amdgpu_uvd_send_msg(struct amdgpu_ring *ring, struct amdgpu_bo *bo,
 		if (r)
 			goto err_free;
 	} else {
-		r = amdgpu_sync_resv(adev, &job->sync, bo->tbo.base.resv,
-				     AMDGPU_SYNC_ALWAYS,
-				     AMDGPU_FENCE_OWNER_UNDEFINED);
+		r = drm_sched_job_add_resv_dependencies(&job->base,
+							bo->tbo.base.resv,
+							DMA_RESV_USAGE_KERNEL);
 		if (r)
 			goto err_free;
 
-		r = amdgpu_job_submit(job, &adev->uvd.entity,
-				      AMDGPU_FENCE_OWNER_UNDEFINED, &f);
-		if (r)
-			goto err_free;
+		f = amdgpu_job_submit(job);
 	}
 
 	amdgpu_bo_reserve(bo, true);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c
index 02cb3a12dd76..b239e874f2d5 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c
@@ -450,8 +450,10 @@ static int amdgpu_vce_get_create_msg(struct amdgpu_ring *ring, uint32_t handle,
 	uint64_t addr;
 	int i, r;
 
-	r = amdgpu_job_alloc_with_ib(ring->adev, ib_size_dw * 4,
-				     AMDGPU_IB_POOL_DIRECT, &job);
+	r = amdgpu_job_alloc_with_ib(ring->adev, &ring->adev->vce.entity,
+				     AMDGPU_FENCE_OWNER_UNDEFINED,
+				     ib_size_dw * 4, AMDGPU_IB_POOL_DIRECT,
+				     &job);
 	if (r)
 		return r;
 
@@ -538,7 +540,9 @@ static int amdgpu_vce_get_destroy_msg(struct amdgpu_ring *ring, uint32_t handle,
 	struct dma_fence *f = NULL;
 	int i, r;
 
-	r = amdgpu_job_alloc_with_ib(ring->adev, ib_size_dw * 4,
+	r = amdgpu_job_alloc_with_ib(ring->adev, &ring->adev->vce.entity,
+				     AMDGPU_FENCE_OWNER_UNDEFINED,
+				     ib_size_dw * 4,
 				     direct ? AMDGPU_IB_POOL_DIRECT :
 				     AMDGPU_IB_POOL_DELAYED, &job);
 	if (r)
@@ -570,8 +574,7 @@ static int amdgpu_vce_get_destroy_msg(struct amdgpu_ring *ring, uint32_t handle,
 	if (direct)
 		r = amdgpu_job_submit_direct(job, ring, &f);
 	else
-		r = amdgpu_job_submit(job, &ring->adev->vce.entity,
-				      AMDGPU_FENCE_OWNER_UNDEFINED, &f);
+		f = amdgpu_job_submit(job);
 	if (r)
 		goto err;
 
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c
index 0b52af415b28..b1622ac9949f 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c
@@ -156,6 +156,9 @@ int amdgpu_vcn_sw_init(struct amdgpu_device *adev)
 		break;
 	case IP_VERSION(3, 0, 2):
 		fw_name = FIRMWARE_VANGOGH;
+		if ((adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) &&
+		    (adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG))
+			adev->vcn.indirect_sram = true;
 		break;
 	case IP_VERSION(3, 0, 16):
 		fw_name = FIRMWARE_DIMGREY_CAVEFISH;
@@ -600,15 +603,16 @@ static int amdgpu_vcn_dec_send_msg(struct amdgpu_ring *ring,
 				   struct amdgpu_ib *ib_msg,
 				   struct dma_fence **fence)
 {
+	u64 addr = AMDGPU_GPU_PAGE_ALIGN(ib_msg->gpu_addr);
 	struct amdgpu_device *adev = ring->adev;
 	struct dma_fence *f = NULL;
 	struct amdgpu_job *job;
 	struct amdgpu_ib *ib;
-	uint64_t addr = AMDGPU_GPU_PAGE_ALIGN(ib_msg->gpu_addr);
 	int i, r;
 
-	r = amdgpu_job_alloc_with_ib(adev, 64,
-					AMDGPU_IB_POOL_DIRECT, &job);
+	r = amdgpu_job_alloc_with_ib(ring->adev, NULL, NULL,
+				     64, AMDGPU_IB_POOL_DIRECT,
+				     &job);
 	if (r)
 		goto err;
 
@@ -787,8 +791,9 @@ static int amdgpu_vcn_dec_sw_send_msg(struct amdgpu_ring *ring,
 	if (sq)
 		ib_size_dw += 8;
 
-	r = amdgpu_job_alloc_with_ib(adev, ib_size_dw * 4,
-				AMDGPU_IB_POOL_DIRECT, &job);
+	r = amdgpu_job_alloc_with_ib(ring->adev, NULL, NULL,
+				     ib_size_dw * 4, AMDGPU_IB_POOL_DIRECT,
+				     &job);
 	if (r)
 		goto err;
 
@@ -916,8 +921,9 @@ static int amdgpu_vcn_enc_get_create_msg(struct amdgpu_ring *ring, uint32_t hand
 	if (sq)
 		ib_size_dw += 8;
 
-	r = amdgpu_job_alloc_with_ib(ring->adev, ib_size_dw * 4,
-					AMDGPU_IB_POOL_DIRECT, &job);
+	r = amdgpu_job_alloc_with_ib(ring->adev, NULL, NULL,
+				     ib_size_dw * 4, AMDGPU_IB_POOL_DIRECT,
+				     &job);
 	if (r)
 		return r;
 
@@ -982,8 +988,9 @@ static int amdgpu_vcn_enc_get_destroy_msg(struct amdgpu_ring *ring, uint32_t han
 	if (sq)
 		ib_size_dw += 8;
 
-	r = amdgpu_job_alloc_with_ib(ring->adev, ib_size_dw * 4,
-					AMDGPU_IB_POOL_DIRECT, &job);
+	r = amdgpu_job_alloc_with_ib(ring->adev, NULL, NULL,
+				     ib_size_dw * 4, AMDGPU_IB_POOL_DIRECT,
+				     &job);
 	if (r)
 		return r;
 
@@ -1248,3 +1255,20 @@ int amdgpu_vcn_process_poison_irq(struct amdgpu_device *adev,
 
 	return 0;
 }
+
+void amdgpu_vcn_set_ras_funcs(struct amdgpu_device *adev)
+{
+	if (!adev->vcn.ras)
+		return;
+
+	amdgpu_ras_register_ras_block(adev, &adev->vcn.ras->ras_block);
+
+	strcpy(adev->vcn.ras->ras_block.ras_comm.name, "vcn");
+	adev->vcn.ras->ras_block.ras_comm.block = AMDGPU_RAS_BLOCK__VCN;
+	adev->vcn.ras->ras_block.ras_comm.type = AMDGPU_RAS_ERROR__POISON;
+	adev->vcn.ras_if = &adev->vcn.ras->ras_block.ras_comm;
+
+	/* If don't define special ras_late_init function, use default ras_late_init */
+	if (!adev->vcn.ras->ras_block.ras_late_init)
+		adev->vcn.ras->ras_block.ras_late_init = amdgpu_ras_block_late_init;
+}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.h
index 253ea6b159df..dbb8d68a30c6 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.h
@@ -399,5 +399,6 @@ void amdgpu_debugfs_vcn_fwlog_init(struct amdgpu_device *adev,
 int amdgpu_vcn_process_poison_irq(struct amdgpu_device *adev,
 			struct amdgpu_irq_src *source,
 			struct amdgpu_iv_entry *entry);
+void amdgpu_vcn_set_ras_funcs(struct amdgpu_device *adev);
 
 #endif
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c
index 9c765b04aae3..2994b9db196f 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c
@@ -64,6 +64,10 @@ void amdgpu_virt_init_setting(struct amdgpu_device *adev)
 	ddev->driver_features &= ~DRIVER_ATOMIC;
 	adev->cg_flags = 0;
 	adev->pg_flags = 0;
+
+	/* enable mcbp for sriov asic_type before soc21 */
+	amdgpu_mcbp = (adev->asic_type < CHIP_IP_DISCOVERY) ? 1 : 0;
+
 }
 
 void amdgpu_virt_kiq_reg_write_reg_wait(struct amdgpu_device *adev,
@@ -391,7 +395,6 @@ static void amdgpu_virt_ras_reserve_bps(struct amdgpu_device *adev)
 		 */
 		if (amdgpu_bo_create_kernel_at(adev, bp << AMDGPU_GPU_PAGE_SHIFT,
 					       AMDGPU_GPU_PAGE_SIZE,
-					       AMDGPU_GEM_DOMAIN_VRAM,
 					       &bo, NULL))
 			DRM_DEBUG("RAS WARN: reserve vram for retired page %llx fail\n", bp);
 
@@ -424,11 +427,17 @@ static void amdgpu_virt_add_bad_page(struct amdgpu_device *adev,
 	struct eeprom_table_record bp;
 	uint64_t retired_page;
 	uint32_t bp_idx, bp_cnt;
+	void *vram_usage_va = NULL;
+
+	if (adev->mman.fw_vram_usage_va)
+		vram_usage_va = adev->mman.fw_vram_usage_va;
+	else
+		vram_usage_va = adev->mman.drv_vram_usage_va;
 
 	if (bp_block_size) {
 		bp_cnt = bp_block_size / sizeof(uint64_t);
 		for (bp_idx = 0; bp_idx < bp_cnt; bp_idx++) {
-			retired_page = *(uint64_t *)(adev->mman.fw_vram_usage_va +
+			retired_page = *(uint64_t *)(vram_usage_va +
 					bp_block_offset + bp_idx * sizeof(uint64_t));
 			bp.retired_page = retired_page;
 
@@ -547,6 +556,7 @@ static void amdgpu_virt_populate_vf2pf_ucode_info(struct amdgpu_device *adev)
 	POPULATE_UCODE_INFO(vf2pf_info, AMD_SRIOV_UCODE_ID_RLC_SRLS, adev->gfx.rlc_srls_fw_version);
 	POPULATE_UCODE_INFO(vf2pf_info, AMD_SRIOV_UCODE_ID_MEC,      adev->gfx.mec_fw_version);
 	POPULATE_UCODE_INFO(vf2pf_info, AMD_SRIOV_UCODE_ID_MEC2,     adev->gfx.mec2_fw_version);
+	POPULATE_UCODE_INFO(vf2pf_info, AMD_SRIOV_UCODE_ID_IMU,      adev->gfx.imu_fw_version);
 	POPULATE_UCODE_INFO(vf2pf_info, AMD_SRIOV_UCODE_ID_SOS,      adev->psp.sos.fw_version);
 	POPULATE_UCODE_INFO(vf2pf_info, AMD_SRIOV_UCODE_ID_ASD,
 			    adev->psp.asd_context.bin_desc.fw_version);
@@ -638,7 +648,9 @@ void amdgpu_virt_init_data_exchange(struct amdgpu_device *adev)
 	adev->virt.fw_reserve.p_vf2pf = NULL;
 	adev->virt.vf2pf_update_interval_ms = 0;
 
-	if (adev->mman.fw_vram_usage_va != NULL) {
+	if (adev->mman.fw_vram_usage_va && adev->mman.drv_vram_usage_va) {
+		DRM_WARN("Currently fw_vram and drv_vram should not have values at the same time!");
+	} else if (adev->mman.fw_vram_usage_va || adev->mman.drv_vram_usage_va) {
 		/* go through this logic in ip_init and reset to init workqueue*/
 		amdgpu_virt_exchange_data(adev);
 
@@ -661,32 +673,40 @@ void amdgpu_virt_exchange_data(struct amdgpu_device *adev)
 	uint32_t bp_block_size = 0;
 	struct amd_sriov_msg_pf2vf_info *pf2vf_v2 = NULL;
 
-	if (adev->mman.fw_vram_usage_va != NULL) {
-
-		adev->virt.fw_reserve.p_pf2vf =
-			(struct amd_sriov_msg_pf2vf_info_header *)
-			(adev->mman.fw_vram_usage_va + (AMD_SRIOV_MSG_PF2VF_OFFSET_KB << 10));
-		adev->virt.fw_reserve.p_vf2pf =
-			(struct amd_sriov_msg_vf2pf_info_header *)
-			(adev->mman.fw_vram_usage_va + (AMD_SRIOV_MSG_VF2PF_OFFSET_KB << 10));
+	if (adev->mman.fw_vram_usage_va || adev->mman.drv_vram_usage_va) {
+		if (adev->mman.fw_vram_usage_va) {
+			adev->virt.fw_reserve.p_pf2vf =
+				(struct amd_sriov_msg_pf2vf_info_header *)
+				(adev->mman.fw_vram_usage_va + (AMD_SRIOV_MSG_PF2VF_OFFSET_KB << 10));
+			adev->virt.fw_reserve.p_vf2pf =
+				(struct amd_sriov_msg_vf2pf_info_header *)
+				(adev->mman.fw_vram_usage_va + (AMD_SRIOV_MSG_VF2PF_OFFSET_KB << 10));
+		} else if (adev->mman.drv_vram_usage_va) {
+			adev->virt.fw_reserve.p_pf2vf =
+				(struct amd_sriov_msg_pf2vf_info_header *)
+				(adev->mman.drv_vram_usage_va + (AMD_SRIOV_MSG_PF2VF_OFFSET_KB << 10));
+			adev->virt.fw_reserve.p_vf2pf =
+				(struct amd_sriov_msg_vf2pf_info_header *)
+				(adev->mman.drv_vram_usage_va + (AMD_SRIOV_MSG_VF2PF_OFFSET_KB << 10));
+		}
 
 		amdgpu_virt_read_pf2vf_data(adev);
 		amdgpu_virt_write_vf2pf_data(adev);
 
 		/* bad page handling for version 2 */
 		if (adev->virt.fw_reserve.p_pf2vf->version == 2) {
-				pf2vf_v2 = (struct amd_sriov_msg_pf2vf_info *)adev->virt.fw_reserve.p_pf2vf;
+			pf2vf_v2 = (struct amd_sriov_msg_pf2vf_info *)adev->virt.fw_reserve.p_pf2vf;
 
-				bp_block_offset = ((uint64_t)pf2vf_v2->bp_block_offset_low & 0xFFFFFFFF) |
-						((((uint64_t)pf2vf_v2->bp_block_offset_high) << 32) & 0xFFFFFFFF00000000);
-				bp_block_size = pf2vf_v2->bp_block_size;
+			bp_block_offset = ((uint64_t)pf2vf_v2->bp_block_offset_low & 0xFFFFFFFF) |
+				((((uint64_t)pf2vf_v2->bp_block_offset_high) << 32) & 0xFFFFFFFF00000000);
+			bp_block_size = pf2vf_v2->bp_block_size;
 
-				if (bp_block_size && !adev->virt.ras_init_done)
-					amdgpu_virt_init_ras_err_handler_data(adev);
+			if (bp_block_size && !adev->virt.ras_init_done)
+				amdgpu_virt_init_ras_err_handler_data(adev);
 
-				if (adev->virt.ras_init_done)
-					amdgpu_virt_add_bad_page(adev, bp_block_offset, bp_block_size);
-			}
+			if (adev->virt.ras_init_done)
+				amdgpu_virt_add_bad_page(adev, bp_block_offset, bp_block_size);
+		}
 	}
 }
 
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h
index 49c4347d154c..2b9d806e23af 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h
@@ -75,6 +75,8 @@ struct amdgpu_vf_error_buffer {
 	uint64_t data[AMDGPU_VF_ERROR_ENTRY_SIZE];
 };
 
+enum idh_request;
+
 /**
  * struct amdgpu_virt_ops - amdgpu device virt operations
  */
@@ -84,7 +86,8 @@ struct amdgpu_virt_ops {
 	int (*req_init_data)(struct amdgpu_device *adev);
 	int (*reset_gpu)(struct amdgpu_device *adev);
 	int (*wait_reset)(struct amdgpu_device *adev);
-	void (*trans_msg)(struct amdgpu_device *adev, u32 req, u32 data1, u32 data2, u32 data3);
+	void (*trans_msg)(struct amdgpu_device *adev, enum idh_request req,
+			  u32 data1, u32 data2, u32 data3);
 };
 
 /*
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vkms.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vkms.c
index 09dec2561adf..53ff91fc6cf6 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vkms.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vkms.c
@@ -498,6 +498,10 @@ static int amdgpu_vkms_sw_init(void *handle)
 	adev_to_drm(adev)->mode_config.preferred_depth = 24;
 	adev_to_drm(adev)->mode_config.prefer_shadow = 1;
 
+	adev_to_drm(adev)->mode_config.fb_modifiers_not_supported = true;
+
+	adev_to_drm(adev)->mode_config.fb_modifiers_not_supported = true;
+
 	r = amdgpu_display_modeset_create_props(adev);
 	if (r)
 		return r;
@@ -509,6 +513,10 @@ static int amdgpu_vkms_sw_init(void *handle)
 			return r;
 	}
 
+	r = drm_vblank_init(adev_to_drm(adev), adev->mode_info.num_crtc);
+	if (r)
+		return r;
+
 	drm_kms_helper_poll_init(adev_to_drm(adev));
 
 	adev->mode_info.mode_config_initialized = true;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
index 2291aa14d888..dc379dc22c77 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
@@ -45,22 +45,43 @@
 /**
  * DOC: GPUVM
  *
- * GPUVM is similar to the legacy gart on older asics, however
- * rather than there being a single global gart table
- * for the entire GPU, there are multiple VM page tables active
- * at any given time.  The VM page tables can contain a mix
- * vram pages and system memory pages and system memory pages
+ * GPUVM is the MMU functionality provided on the GPU.
+ * GPUVM is similar to the legacy GART on older asics, however
+ * rather than there being a single global GART table
+ * for the entire GPU, there can be multiple GPUVM page tables active
+ * at any given time.  The GPUVM page tables can contain a mix
+ * VRAM pages and system pages (both memory and MMIO) and system pages
  * can be mapped as snooped (cached system pages) or unsnooped
  * (uncached system pages).
- * Each VM has an ID associated with it and there is a page table
- * associated with each VMID.  When executing a command buffer,
- * the kernel tells the ring what VMID to use for that command
+ *
+ * Each active GPUVM has an ID associated with it and there is a page table
+ * linked with each VMID.  When executing a command buffer,
+ * the kernel tells the engine what VMID to use for that command
  * buffer.  VMIDs are allocated dynamically as commands are submitted.
  * The userspace drivers maintain their own address space and the kernel
  * sets up their pages tables accordingly when they submit their
  * command buffers and a VMID is assigned.
- * Cayman/Trinity support up to 8 active VMs at any given time;
- * SI supports 16.
+ * The hardware supports up to 16 active GPUVMs at any given time.
+ *
+ * Each GPUVM is represented by a 1-2 or 1-5 level page table, depending
+ * on the ASIC family.  GPUVM supports RWX attributes on each page as well
+ * as other features such as encryption and caching attributes.
+ *
+ * VMID 0 is special.  It is the GPUVM used for the kernel driver.  In
+ * addition to an aperture managed by a page table, VMID 0 also has
+ * several other apertures.  There is an aperture for direct access to VRAM
+ * and there is a legacy AGP aperture which just forwards accesses directly
+ * to the matching system physical addresses (or IOVAs when an IOMMU is
+ * present).  These apertures provide direct access to these memories without
+ * incurring the overhead of a page table.  VMID 0 is used by the kernel
+ * driver for tasks like memory management.
+ *
+ * GPU clients (i.e., engines on the GPU) use GPUVM VMIDs to access memory.
+ * For user applications, each application can have their own unique GPUVM
+ * address space.  The application manages the address space and the kernel
+ * driver manages the GPUVM page tables for each process.  If an GPU client
+ * accesses an invalid page, it will generate a GPU page fault, similar to
+ * accessing an invalid page on a CPU.
  */
 
 #define START(node) ((node)->start)
@@ -143,32 +164,6 @@ int amdgpu_vm_set_pasid(struct amdgpu_device *adev, struct amdgpu_vm *vm,
 	return 0;
 }
 
-/*
- * vm eviction_lock can be taken in MMU notifiers. Make sure no reclaim-FS
- * happens while holding this lock anywhere to prevent deadlocks when
- * an MMU notifier runs in reclaim-FS context.
- */
-static inline void amdgpu_vm_eviction_lock(struct amdgpu_vm *vm)
-{
-	mutex_lock(&vm->eviction_lock);
-	vm->saved_flags = memalloc_noreclaim_save();
-}
-
-static inline int amdgpu_vm_eviction_trylock(struct amdgpu_vm *vm)
-{
-	if (mutex_trylock(&vm->eviction_lock)) {
-		vm->saved_flags = memalloc_noreclaim_save();
-		return 1;
-	}
-	return 0;
-}
-
-static inline void amdgpu_vm_eviction_unlock(struct amdgpu_vm *vm)
-{
-	memalloc_noreclaim_restore(vm->saved_flags);
-	mutex_unlock(&vm->eviction_lock);
-}
-
 /**
  * amdgpu_vm_bo_evicted - vm_bo is evicted
  *
@@ -489,25 +484,20 @@ bool amdgpu_vm_need_pipeline_sync(struct amdgpu_ring *ring,
 	struct amdgpu_device *adev = ring->adev;
 	unsigned vmhub = ring->funcs->vmhub;
 	struct amdgpu_vmid_mgr *id_mgr = &adev->vm_manager.id_mgr[vmhub];
-	struct amdgpu_vmid *id;
-	bool gds_switch_needed;
-	bool vm_flush_needed = job->vm_needs_flush || ring->has_compute_vm_bug;
 
 	if (job->vmid == 0)
 		return false;
-	id = &id_mgr->ids[job->vmid];
-	gds_switch_needed = ring->funcs->emit_gds_switch && (
-		id->gds_base != job->gds_base ||
-		id->gds_size != job->gds_size ||
-		id->gws_base != job->gws_base ||
-		id->gws_size != job->gws_size ||
-		id->oa_base != job->oa_base ||
-		id->oa_size != job->oa_size);
-
-	if (amdgpu_vmid_had_gpu_reset(adev, id))
+
+	if (job->vm_needs_flush || ring->has_compute_vm_bug)
+		return true;
+
+	if (ring->funcs->emit_gds_switch && job->gds_switch_needed)
+		return true;
+
+	if (amdgpu_vmid_had_gpu_reset(adev, &id_mgr->ids[job->vmid]))
 		return true;
 
-	return vm_flush_needed || gds_switch_needed;
+	return false;
 }
 
 /**
@@ -529,27 +519,20 @@ int amdgpu_vm_flush(struct amdgpu_ring *ring, struct amdgpu_job *job,
 	unsigned vmhub = ring->funcs->vmhub;
 	struct amdgpu_vmid_mgr *id_mgr = &adev->vm_manager.id_mgr[vmhub];
 	struct amdgpu_vmid *id = &id_mgr->ids[job->vmid];
-	bool gds_switch_needed = ring->funcs->emit_gds_switch && (
-		id->gds_base != job->gds_base ||
-		id->gds_size != job->gds_size ||
-		id->gws_base != job->gws_base ||
-		id->gws_size != job->gws_size ||
-		id->oa_base != job->oa_base ||
-		id->oa_size != job->oa_size);
+	bool spm_update_needed = job->spm_update_needed;
+	bool gds_switch_needed = ring->funcs->emit_gds_switch &&
+		job->gds_switch_needed;
 	bool vm_flush_needed = job->vm_needs_flush;
 	struct dma_fence *fence = NULL;
 	bool pasid_mapping_needed = false;
 	unsigned patch_offset = 0;
-	bool update_spm_vmid_needed = (job->vm && (job->vm->reserved_vmid[vmhub] != NULL));
 	int r;
 
-	if (update_spm_vmid_needed && adev->gfx.rlc.funcs->update_spm_vmid)
-		adev->gfx.rlc.funcs->update_spm_vmid(adev, job->vmid);
-
 	if (amdgpu_vmid_had_gpu_reset(adev, id)) {
 		gds_switch_needed = true;
 		vm_flush_needed = true;
 		pasid_mapping_needed = true;
+		spm_update_needed = true;
 	}
 
 	mutex_lock(&id_mgr->lock);
@@ -567,6 +550,7 @@ int amdgpu_vm_flush(struct amdgpu_ring *ring, struct amdgpu_job *job,
 	if (!vm_flush_needed && !gds_switch_needed && !need_pipe_sync)
 		return 0;
 
+	amdgpu_ring_ib_begin(ring);
 	if (ring->funcs->init_cond_exec)
 		patch_offset = amdgpu_ring_init_cond_exec(ring);
 
@@ -581,6 +565,17 @@ int amdgpu_vm_flush(struct amdgpu_ring *ring, struct amdgpu_job *job,
 	if (pasid_mapping_needed)
 		amdgpu_gmc_emit_pasid_mapping(ring, job->vmid, job->pasid);
 
+	if (spm_update_needed && adev->gfx.rlc.funcs->update_spm_vmid)
+		adev->gfx.rlc.funcs->update_spm_vmid(adev, job->vmid);
+
+	if (!ring->is_mes_queue && ring->funcs->emit_gds_switch &&
+	    gds_switch_needed) {
+		amdgpu_ring_emit_gds_switch(ring, job->vmid, job->gds_base,
+					    job->gds_size, job->gws_base,
+					    job->gws_size, job->oa_base,
+					    job->oa_size);
+	}
+
 	if (vm_flush_needed || pasid_mapping_needed) {
 		r = amdgpu_fence_emit(ring, &fence, NULL, 0);
 		if (r)
@@ -605,20 +600,6 @@ int amdgpu_vm_flush(struct amdgpu_ring *ring, struct amdgpu_job *job,
 	}
 	dma_fence_put(fence);
 
-	if (!ring->is_mes_queue && ring->funcs->emit_gds_switch &&
-	    gds_switch_needed) {
-		id->gds_base = job->gds_base;
-		id->gds_size = job->gds_size;
-		id->gws_base = job->gws_base;
-		id->gws_size = job->gws_size;
-		id->oa_base = job->oa_base;
-		id->oa_size = job->oa_size;
-		amdgpu_ring_emit_gds_switch(ring, job->vmid, job->gds_base,
-					    job->gds_size, job->gws_base,
-					    job->gws_size, job->oa_base,
-					    job->oa_size);
-	}
-
 	if (ring->funcs->patch_cond_exec)
 		amdgpu_ring_patch_cond_exec(ring, patch_offset);
 
@@ -627,6 +608,7 @@ int amdgpu_vm_flush(struct amdgpu_ring *ring, struct amdgpu_job *job,
 		amdgpu_ring_emit_switch_buffer(ring);
 		amdgpu_ring_emit_switch_buffer(ring);
 	}
+	amdgpu_ring_ib_end(ring);
 	return 0;
 }
 
@@ -2386,7 +2368,6 @@ int amdgpu_vm_ioctl(struct drm_device *dev, void *data, struct drm_file *filp)
 	union drm_amdgpu_vm *args = data;
 	struct amdgpu_device *adev = drm_to_adev(dev);
 	struct amdgpu_fpriv *fpriv = filp->driver_priv;
-	long timeout = msecs_to_jiffies(2000);
 	int r;
 
 	switch (args->in.op) {
@@ -2398,21 +2379,6 @@ int amdgpu_vm_ioctl(struct drm_device *dev, void *data, struct drm_file *filp)
 			return r;
 		break;
 	case AMDGPU_VM_OP_UNRESERVE_VMID:
-		if (amdgpu_sriov_runtime(adev))
-			timeout = 8 * timeout;
-
-		/* Wait vm idle to make sure the vmid set in SPM_VMID is
-		 * not referenced anymore.
-		 */
-		r = amdgpu_bo_reserve(fpriv->vm.root.bo, true);
-		if (r)
-			return r;
-
-		r = amdgpu_vm_wait_idle(&fpriv->vm, timeout);
-		if (r < 0)
-			return r;
-
-		amdgpu_bo_unreserve(fpriv->vm.root.bo);
 		amdgpu_vmid_free_reserved(adev, &fpriv->vm, AMDGPU_GFXHUB_0);
 		break;
 	default:
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
index 83acb7bd80fe..094bb4807303 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
@@ -119,9 +119,6 @@ struct amdgpu_bo_vm;
 /* Reserve 2MB at top/bottom of address space for kernel use */
 #define AMDGPU_VA_RESERVED_SIZE			(2ULL << 20)
 
-/* max vmids dedicated for process */
-#define AMDGPU_VM_MAX_RESERVED_VMID	1
-
 /* See vm_update_mode */
 #define AMDGPU_VM_USE_CPU_FOR_GFX (1 << 0)
 #define AMDGPU_VM_USE_CPU_FOR_COMPUTE (1 << 1)
@@ -298,8 +295,7 @@ struct amdgpu_vm {
 	struct dma_fence	*last_unlocked;
 
 	unsigned int		pasid;
-	/* dedicated to vm */
-	struct amdgpu_vmid	*reserved_vmid[AMDGPU_MAX_VMHUBS];
+	bool			reserved_vmid[AMDGPU_MAX_VMHUBS];
 
 	/* Flag to indicate if VM tables are updated by CPU or GPU (SDMA) */
 	bool					use_cpu_for_update;
@@ -492,7 +488,48 @@ void amdgpu_debugfs_vm_bo_info(struct amdgpu_vm *vm, struct seq_file *m);
  */
 static inline uint64_t amdgpu_vm_tlb_seq(struct amdgpu_vm *vm)
 {
+	unsigned long flags;
+	spinlock_t *lock;
+
+	/*
+	 * Workaround to stop racing between the fence signaling and handling
+	 * the cb. The lock is static after initially setting it up, just make
+	 * sure that the dma_fence structure isn't freed up.
+	 */
+	rcu_read_lock();
+	lock = vm->last_tlb_flush->lock;
+	rcu_read_unlock();
+
+	spin_lock_irqsave(lock, flags);
+	spin_unlock_irqrestore(lock, flags);
+
 	return atomic64_read(&vm->tlb_seq);
 }
 
+/*
+ * vm eviction_lock can be taken in MMU notifiers. Make sure no reclaim-FS
+ * happens while holding this lock anywhere to prevent deadlocks when
+ * an MMU notifier runs in reclaim-FS context.
+ */
+static inline void amdgpu_vm_eviction_lock(struct amdgpu_vm *vm)
+{
+	mutex_lock(&vm->eviction_lock);
+	vm->saved_flags = memalloc_noreclaim_save();
+}
+
+static inline bool amdgpu_vm_eviction_trylock(struct amdgpu_vm *vm)
+{
+	if (mutex_trylock(&vm->eviction_lock)) {
+		vm->saved_flags = memalloc_noreclaim_save();
+		return true;
+	}
+	return false;
+}
+
+static inline void amdgpu_vm_eviction_unlock(struct amdgpu_vm *vm)
+{
+	memalloc_noreclaim_restore(vm->saved_flags);
+	mutex_unlock(&vm->eviction_lock);
+}
+
 #endif
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_pt.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_pt.c
index 358b91243e37..b5f3bba851db 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_pt.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_pt.c
@@ -597,7 +597,9 @@ static int amdgpu_vm_pt_alloc(struct amdgpu_device *adev,
 	if (entry->bo)
 		return 0;
 
+	amdgpu_vm_eviction_unlock(vm);
 	r = amdgpu_vm_pt_create(adev, vm, cursor->level, immediate, &pt);
+	amdgpu_vm_eviction_lock(vm);
 	if (r)
 		return r;
 
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_sdma.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_sdma.c
index 69e105fa41f6..535cd6569bcc 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_sdma.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_sdma.c
@@ -47,6 +47,32 @@ static int amdgpu_vm_sdma_map_table(struct amdgpu_bo_vm *table)
 	return r;
 }
 
+/* Allocate a new job for @count PTE updates */
+static int amdgpu_vm_sdma_alloc_job(struct amdgpu_vm_update_params *p,
+				    unsigned int count)
+{
+	enum amdgpu_ib_pool_type pool = p->immediate ? AMDGPU_IB_POOL_IMMEDIATE
+		: AMDGPU_IB_POOL_DELAYED;
+	struct drm_sched_entity *entity = p->immediate ? &p->vm->immediate
+		: &p->vm->delayed;
+	unsigned int ndw;
+	int r;
+
+	/* estimate how many dw we need */
+	ndw = AMDGPU_VM_SDMA_MIN_NUM_DW;
+	if (p->pages_addr)
+		ndw += count * 2;
+	ndw = min(ndw, AMDGPU_VM_SDMA_MAX_NUM_DW);
+
+	r = amdgpu_job_alloc_with_ib(p->adev, entity, AMDGPU_FENCE_OWNER_VM,
+				     ndw * 4, pool, &p->job);
+	if (r)
+		return r;
+
+	p->num_dw_left = ndw;
+	return 0;
+}
+
 /**
  * amdgpu_vm_sdma_prepare - prepare SDMA command submission
  *
@@ -61,21 +87,22 @@ static int amdgpu_vm_sdma_prepare(struct amdgpu_vm_update_params *p,
 				  struct dma_resv *resv,
 				  enum amdgpu_sync_mode sync_mode)
 {
-	enum amdgpu_ib_pool_type pool = p->immediate ? AMDGPU_IB_POOL_IMMEDIATE
-		: AMDGPU_IB_POOL_DELAYED;
-	unsigned int ndw = AMDGPU_VM_SDMA_MIN_NUM_DW;
+	struct amdgpu_sync sync;
 	int r;
 
-	r = amdgpu_job_alloc_with_ib(p->adev, ndw * 4, pool, &p->job);
+	r = amdgpu_vm_sdma_alloc_job(p, 0);
 	if (r)
 		return r;
 
-	p->num_dw_left = ndw;
-
 	if (!resv)
 		return 0;
 
-	return amdgpu_sync_resv(p->adev, &p->job->sync, resv, sync_mode, p->vm);
+	amdgpu_sync_create(&sync);
+	r = amdgpu_sync_resv(p->adev, &sync, resv, sync_mode, p->vm);
+	if (!r)
+		r = amdgpu_sync_push_to_job(&sync, p->job);
+	amdgpu_sync_free(&sync);
+	return r;
 }
 
 /**
@@ -91,20 +118,16 @@ static int amdgpu_vm_sdma_commit(struct amdgpu_vm_update_params *p,
 				 struct dma_fence **fence)
 {
 	struct amdgpu_ib *ib = p->job->ibs;
-	struct drm_sched_entity *entity;
 	struct amdgpu_ring *ring;
 	struct dma_fence *f;
-	int r;
 
-	entity = p->immediate ? &p->vm->immediate : &p->vm->delayed;
-	ring = container_of(entity->rq->sched, struct amdgpu_ring, sched);
+	ring = container_of(p->vm->delayed.rq->sched, struct amdgpu_ring,
+			    sched);
 
 	WARN_ON(ib->length_dw == 0);
 	amdgpu_ring_pad_ib(ring, ib);
 	WARN_ON(ib->length_dw > p->num_dw_left);
-	r = amdgpu_job_submit(p->job, entity, AMDGPU_FENCE_OWNER_VM, &f);
-	if (r)
-		goto error;
+	f = amdgpu_job_submit(p->job);
 
 	if (p->unlocked) {
 		struct dma_fence *tmp = dma_fence_get(f);
@@ -127,10 +150,6 @@ static int amdgpu_vm_sdma_commit(struct amdgpu_vm_update_params *p,
 	}
 	dma_fence_put(f);
 	return 0;
-
-error:
-	amdgpu_job_free(p->job);
-	return r;
 }
 
 /**
@@ -210,8 +229,6 @@ static int amdgpu_vm_sdma_update(struct amdgpu_vm_update_params *p,
 				 uint64_t flags)
 {
 	struct amdgpu_bo *bo = &vmbo->bo;
-	enum amdgpu_ib_pool_type pool = p->immediate ? AMDGPU_IB_POOL_IMMEDIATE
-		: AMDGPU_IB_POOL_DELAYED;
 	struct dma_resv_iter cursor;
 	unsigned int i, ndw, nptes;
 	struct dma_fence *fence;
@@ -221,8 +238,10 @@ static int amdgpu_vm_sdma_update(struct amdgpu_vm_update_params *p,
 	/* Wait for PD/PT moves to be completed */
 	dma_resv_iter_begin(&cursor, bo->tbo.base.resv, DMA_RESV_USAGE_KERNEL);
 	dma_resv_for_each_fence_unlocked(&cursor, fence) {
-		r = amdgpu_sync_fence(&p->job->sync, fence);
+		dma_fence_get(fence);
+		r = drm_sched_job_add_dependency(&p->job->base, fence);
 		if (r) {
+			dma_fence_put(fence);
 			dma_resv_iter_end(&cursor);
 			return r;
 		}
@@ -238,19 +257,9 @@ static int amdgpu_vm_sdma_update(struct amdgpu_vm_update_params *p,
 			if (r)
 				return r;
 
-			/* estimate how many dw we need */
-			ndw = 32;
-			if (p->pages_addr)
-				ndw += count * 2;
-			ndw = max(ndw, AMDGPU_VM_SDMA_MIN_NUM_DW);
-			ndw = min(ndw, AMDGPU_VM_SDMA_MAX_NUM_DW);
-
-			r = amdgpu_job_alloc_with_ib(p->adev, ndw * 4, pool,
-						     &p->job);
+			r = amdgpu_vm_sdma_alloc_job(p, count);
 			if (r)
 				return r;
-
-			p->num_dw_left = ndw;
 		}
 
 		if (!p->pages_addr) {
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c
index 18c1a173d187..faa12146635c 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c
@@ -435,7 +435,7 @@ static int amdgpu_vram_mgr_new(struct ttm_resource_manager *man,
 	if (place->flags & TTM_PL_FLAG_TOPDOWN)
 		vres->flags |= DRM_BUDDY_TOPDOWN_ALLOCATION;
 
-	if (fpfn || lpfn != man->size)
+	if (fpfn || lpfn != mgr->mm.size)
 		/* Allocate blocks in desired range */
 		vres->flags |= DRM_BUDDY_RANGE_ALLOCATION;
 
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c
index 47159e9a0884..4b9e7b050ccd 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c
@@ -386,7 +386,6 @@ struct amdgpu_hive_info *amdgpu_get_xgmi_hive(struct amdgpu_device *adev)
 	if (ret) {
 		dev_err(adev->dev, "XGMI: failed initializing kobject for xgmi hive\n");
 		kobject_put(&hive->kobj);
-		kfree(hive);
 		hive = NULL;
 		goto pro_end;
 	}
@@ -410,7 +409,6 @@ struct amdgpu_hive_info *amdgpu_get_xgmi_hive(struct amdgpu_device *adev)
 				dev_err(adev->dev, "XGMI: failed initializing reset domain for xgmi hive\n");
 				ret = -ENOMEM;
 				kobject_put(&hive->kobj);
-				kfree(hive);
 				hive = NULL;
 				goto pro_end;
 			}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgv_sriovmsg.h b/drivers/gpu/drm/amd/amdgpu/amdgv_sriovmsg.h
index e78e4c27b62a..6c97148ca0ed 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgv_sriovmsg.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgv_sriovmsg.h
@@ -70,6 +70,7 @@ enum amd_sriov_ucode_engine_id {
 	AMD_SRIOV_UCODE_ID_RLC_SRLS,
 	AMD_SRIOV_UCODE_ID_MEC,
 	AMD_SRIOV_UCODE_ID_MEC2,
+	AMD_SRIOV_UCODE_ID_IMU,
 	AMD_SRIOV_UCODE_ID_SOS,
 	AMD_SRIOV_UCODE_ID_ASD,
 	AMD_SRIOV_UCODE_ID_TA_RAS,
diff --git a/drivers/gpu/drm/amd/amdgpu/atombios_encoders.c b/drivers/gpu/drm/amd/amdgpu/atombios_encoders.c
index 6be9ac2b9c5b..18ae9433e463 100644
--- a/drivers/gpu/drm/amd/amdgpu/atombios_encoders.c
+++ b/drivers/gpu/drm/amd/amdgpu/atombios_encoders.c
@@ -2081,8 +2081,11 @@ amdgpu_atombios_encoder_get_lcd_info(struct amdgpu_encoder *encoder)
 						}
 					}
 					record += fake_edid_record->ucFakeEDIDLength ?
-						fake_edid_record->ucFakeEDIDLength + 2 :
-						sizeof(ATOM_FAKE_EDID_PATCH_RECORD);
+						  struct_size(fake_edid_record,
+							      ucFakeEDIDString,
+							      fake_edid_record->ucFakeEDIDLength) :
+						  /* empty fake edid record must be 3 bytes long */
+						  sizeof(ATOM_FAKE_EDID_PATCH_RECORD) + 1;
 					break;
 				case LCD_PANEL_RESOLUTION_RECORD_TYPE:
 					panel_res_record = (ATOM_PANEL_RESOLUTION_PATCH_RECORD *)record;
diff --git a/drivers/gpu/drm/amd/amdgpu/dce_v10_0.c b/drivers/gpu/drm/amd/amdgpu/dce_v10_0.c
index 05051d5d2ec3..248f1a4e915f 100644
--- a/drivers/gpu/drm/amd/amdgpu/dce_v10_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/dce_v10_0.c
@@ -21,6 +21,7 @@
  *
  */
 
+#include <drm/drm_fb_helper.h>
 #include <drm/drm_fourcc.h>
 #include <drm/drm_vblank.h>
 
@@ -2828,6 +2829,17 @@ static int dce_v10_0_sw_init(void *handle)
 	if (r)
 		return r;
 
+	/* Disable vblank IRQs aggressively for power-saving */
+	/* XXX: can this be enabled for DC? */
+	adev_to_drm(adev)->vblank_disable_immediate = true;
+
+	r = drm_vblank_init(adev_to_drm(adev), adev->mode_info.num_crtc);
+	if (r)
+		return r;
+
+	INIT_WORK(&adev->hotplug_work,
+		  amdgpu_display_hotplug_work_func);
+
 	drm_kms_helper_poll_init(adev_to_drm(adev));
 
 	adev->mode_info.mode_config_initialized = true;
@@ -2890,6 +2902,8 @@ static int dce_v10_0_hw_fini(void *handle)
 
 	dce_v10_0_pageflip_interrupt_fini(adev);
 
+	flush_work(&adev->hotplug_work);
+
 	return 0;
 }
 
diff --git a/drivers/gpu/drm/amd/amdgpu/dce_v11_0.c b/drivers/gpu/drm/amd/amdgpu/dce_v11_0.c
index c928bc9eb202..cd9c19060d89 100644
--- a/drivers/gpu/drm/amd/amdgpu/dce_v11_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/dce_v11_0.c
@@ -21,6 +21,7 @@
  *
  */
 
+#include <drm/drm_fb_helper.h>
 #include <drm/drm_fourcc.h>
 #include <drm/drm_vblank.h>
 
@@ -2947,6 +2948,17 @@ static int dce_v11_0_sw_init(void *handle)
 	if (r)
 		return r;
 
+	/* Disable vblank IRQs aggressively for power-saving */
+	/* XXX: can this be enabled for DC? */
+	adev_to_drm(adev)->vblank_disable_immediate = true;
+
+	r = drm_vblank_init(adev_to_drm(adev), adev->mode_info.num_crtc);
+	if (r)
+		return r;
+
+	INIT_WORK(&adev->hotplug_work,
+		  amdgpu_display_hotplug_work_func);
+
 	drm_kms_helper_poll_init(adev_to_drm(adev));
 
 	adev->mode_info.mode_config_initialized = true;
@@ -3020,6 +3032,8 @@ static int dce_v11_0_hw_fini(void *handle)
 
 	dce_v11_0_pageflip_interrupt_fini(adev);
 
+	flush_work(&adev->hotplug_work);
+
 	return 0;
 }
 
diff --git a/drivers/gpu/drm/amd/amdgpu/dce_v6_0.c b/drivers/gpu/drm/amd/amdgpu/dce_v6_0.c
index 62315fd5a05f..76323deecc58 100644
--- a/drivers/gpu/drm/amd/amdgpu/dce_v6_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/dce_v6_0.c
@@ -23,6 +23,7 @@
 
 #include <linux/pci.h>
 
+#include <drm/drm_fb_helper.h>
 #include <drm/drm_fourcc.h>
 #include <drm/drm_vblank.h>
 
@@ -2705,6 +2706,18 @@ static int dce_v6_0_sw_init(void *handle)
 	if (r)
 		return r;
 
+	/* Disable vblank IRQs aggressively for power-saving */
+	/* XXX: can this be enabled for DC? */
+	adev_to_drm(adev)->vblank_disable_immediate = true;
+
+	r = drm_vblank_init(adev_to_drm(adev), adev->mode_info.num_crtc);
+	if (r)
+		return r;
+
+	/* Pre-DCE11 */
+	INIT_WORK(&adev->hotplug_work,
+		  amdgpu_display_hotplug_work_func);
+
 	drm_kms_helper_poll_init(adev_to_drm(adev));
 
 	return r;
@@ -2763,6 +2776,8 @@ static int dce_v6_0_hw_fini(void *handle)
 
 	dce_v6_0_pageflip_interrupt_fini(adev);
 
+	flush_work(&adev->hotplug_work);
+
 	return 0;
 }
 
diff --git a/drivers/gpu/drm/amd/amdgpu/dce_v8_0.c b/drivers/gpu/drm/amd/amdgpu/dce_v8_0.c
index 87d5e4c21cb3..01cf3ab111cb 100644
--- a/drivers/gpu/drm/amd/amdgpu/dce_v8_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/dce_v8_0.c
@@ -21,6 +21,7 @@
  *
  */
 
+#include <drm/drm_fb_helper.h>
 #include <drm/drm_fourcc.h>
 #include <drm/drm_vblank.h>
 
@@ -2729,6 +2730,18 @@ static int dce_v8_0_sw_init(void *handle)
 	if (r)
 		return r;
 
+	/* Disable vblank IRQs aggressively for power-saving */
+	/* XXX: can this be enabled for DC? */
+	adev_to_drm(adev)->vblank_disable_immediate = true;
+
+	r = drm_vblank_init(adev_to_drm(adev), adev->mode_info.num_crtc);
+	if (r)
+		return r;
+
+	/* Pre-DCE11 */
+	INIT_WORK(&adev->hotplug_work,
+		  amdgpu_display_hotplug_work_func);
+
 	drm_kms_helper_poll_init(adev_to_drm(adev));
 
 	adev->mode_info.mode_config_initialized = true;
@@ -2789,6 +2802,8 @@ static int dce_v8_0_hw_fini(void *handle)
 
 	dce_v8_0_pageflip_interrupt_fini(adev);
 
+	flush_work(&adev->hotplug_work);
+
 	return 0;
 }
 
diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c
index af94ac580d3e..49d34c7bbf20 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c
@@ -4453,8 +4453,6 @@ static void gfx_v10_0_gpu_early_init(struct amdgpu_device *adev)
 {
 	u32 gb_addr_config;
 
-	adev->gfx.funcs = &gfx_v10_0_gfx_funcs;
-
 	switch (adev->ip_versions[GC_HWIP][0]) {
 	case IP_VERSION(10, 1, 10):
 	case IP_VERSION(10, 1, 1):
@@ -6911,6 +6909,8 @@ static int gfx_v10_0_kiq_init_queue(struct amdgpu_ring *ring)
 		mutex_unlock(&adev->srbm_mutex);
 	} else {
 		memset((void *)mqd, 0, sizeof(*mqd));
+		if (amdgpu_sriov_vf(adev) && adev->in_suspend)
+			amdgpu_ring_clear_ring(ring);
 		mutex_lock(&adev->srbm_mutex);
 		nv_grbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
 		amdgpu_ring_init_mqd(ring);
@@ -7593,6 +7593,8 @@ static int gfx_v10_0_early_init(void *handle)
 {
 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
 
+	adev->gfx.funcs = &gfx_v10_0_gfx_funcs;
+
 	switch (adev->ip_versions[GC_HWIP][0]) {
 	case IP_VERSION(10, 1, 10):
 	case IP_VERSION(10, 1, 1):
@@ -8489,7 +8491,7 @@ static void gfx_v10_0_ring_emit_ib_gfx(struct amdgpu_ring *ring,
 
 	control |= ib->length_dw | (vmid << 24);
 
-	if ((amdgpu_sriov_vf(ring->adev) || amdgpu_mcbp) && (ib->flags & AMDGPU_IB_FLAG_PREEMPT)) {
+	if (amdgpu_mcbp && (ib->flags & AMDGPU_IB_FLAG_PREEMPT)) {
 		control |= INDIRECT_BUFFER_PRE_ENB(1);
 
 		if (flags & AMDGPU_IB_PREEMPTED)
@@ -8664,7 +8666,7 @@ static void gfx_v10_0_ring_emit_cntxcntl(struct amdgpu_ring *ring,
 {
 	uint32_t dw2 = 0;
 
-	if (amdgpu_mcbp || amdgpu_sriov_vf(ring->adev))
+	if (amdgpu_mcbp)
 		gfx_v10_0_ring_emit_ce_meta(ring,
 				    (!amdgpu_sriov_vf(ring->adev) && flags & AMDGPU_IB_PREEMPTED) ? true : false);
 
diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c
index 671ca5a0f208..a56c6e106d00 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c
@@ -77,6 +77,10 @@ MODULE_FIRMWARE("amdgpu/gc_11_0_3_pfp.bin");
 MODULE_FIRMWARE("amdgpu/gc_11_0_3_me.bin");
 MODULE_FIRMWARE("amdgpu/gc_11_0_3_mec.bin");
 MODULE_FIRMWARE("amdgpu/gc_11_0_3_rlc.bin");
+MODULE_FIRMWARE("amdgpu/gc_11_0_4_pfp.bin");
+MODULE_FIRMWARE("amdgpu/gc_11_0_4_me.bin");
+MODULE_FIRMWARE("amdgpu/gc_11_0_4_mec.bin");
+MODULE_FIRMWARE("amdgpu/gc_11_0_4_rlc.bin");
 
 static const struct soc15_reg_golden golden_settings_gc_11_0_1[] =
 {
@@ -262,6 +266,7 @@ static void gfx_v11_0_init_golden_registers(struct amdgpu_device *adev)
 {
 	switch (adev->ip_versions[GC_HWIP][0]) {
 	case IP_VERSION(11, 0, 1):
+	case IP_VERSION(11, 0, 4):
 		soc15_program_register_sequence(adev,
 						golden_settings_gc_11_0_1,
 						(const u32)ARRAY_SIZE(golden_settings_gc_11_0_1));
@@ -843,7 +848,6 @@ static const struct amdgpu_gfx_funcs gfx_v11_0_gfx_funcs = {
 
 static int gfx_v11_0_gpu_early_init(struct amdgpu_device *adev)
 {
-	adev->gfx.funcs = &gfx_v11_0_gfx_funcs;
 
 	switch (adev->ip_versions[GC_HWIP][0]) {
 	case IP_VERSION(11, 0, 0):
@@ -856,6 +860,7 @@ static int gfx_v11_0_gpu_early_init(struct amdgpu_device *adev)
 		adev->gfx.config.sc_earlyz_tile_fifo_size = 0x4C0;
 		break;
 	case IP_VERSION(11, 0, 1):
+	case IP_VERSION(11, 0, 4):
 		adev->gfx.config.max_hw_contexts = 8;
 		adev->gfx.config.sc_prim_fifo_size_frontend = 0x20;
 		adev->gfx.config.sc_prim_fifo_size_backend = 0x100;
@@ -1285,6 +1290,7 @@ static int gfx_v11_0_sw_init(void *handle)
 	case IP_VERSION(11, 0, 1):
 	case IP_VERSION(11, 0, 2):
 	case IP_VERSION(11, 0, 3):
+	case IP_VERSION(11, 0, 4):
 		adev->gfx.me.num_me = 1;
 		adev->gfx.me.num_pipe_per_me = 1;
 		adev->gfx.me.num_queue_per_pipe = 1;
@@ -1626,7 +1632,8 @@ static void gfx_v11_0_constants_init(struct amdgpu_device *adev)
 	u32 tmp;
 	int i;
 
-	WREG32_FIELD15_PREREG(GC, 0, GRBM_CNTL, READ_TIMEOUT, 0xff);
+	if (!amdgpu_sriov_vf(adev))
+		WREG32_FIELD15_PREREG(GC, 0, GRBM_CNTL, READ_TIMEOUT, 0xff);
 
 	gfx_v11_0_setup_rb(adev);
 	gfx_v11_0_get_cu_info(adev, &adev->gfx.cu_info);
@@ -2486,7 +2493,8 @@ static int gfx_v11_0_wait_for_rlc_autoload_complete(struct amdgpu_device *adev)
 	for (i = 0; i < adev->usec_timeout; i++) {
 		cp_status = RREG32_SOC15(GC, 0, regCP_STAT);
 
-		if (adev->ip_versions[GC_HWIP][0] == IP_VERSION(11, 0, 1))
+		if (adev->ip_versions[GC_HWIP][0] == IP_VERSION(11, 0, 1) ||
+				adev->ip_versions[GC_HWIP][0] == IP_VERSION(11, 0, 4))
 			bootload_status = RREG32_SOC15(GC, 0,
 					regRLC_RLCS_BOOTLOAD_STATUS_gc_11_0_1);
 		else
@@ -4004,6 +4012,8 @@ static int gfx_v11_0_kiq_init_queue(struct amdgpu_ring *ring)
 		mutex_unlock(&adev->srbm_mutex);
 	} else {
 		memset((void *)mqd, 0, sizeof(*mqd));
+		if (amdgpu_sriov_vf(adev) && adev->in_suspend)
+			amdgpu_ring_clear_ring(ring);
 		mutex_lock(&adev->srbm_mutex);
 		soc21_grbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
 		amdgpu_ring_init_mqd(ring);
@@ -4390,7 +4400,6 @@ static int gfx_v11_0_hw_fini(void *handle)
 {
 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
 	int r;
-	uint32_t tmp;
 
 	amdgpu_irq_put(adev, &adev->gfx.priv_reg_irq, 0);
 	amdgpu_irq_put(adev, &adev->gfx.priv_inst_irq, 0);
@@ -4409,15 +4418,14 @@ static int gfx_v11_0_hw_fini(void *handle)
 		amdgpu_mes_kiq_hw_fini(adev);
 	}
 
-	if (amdgpu_sriov_vf(adev)) {
-		gfx_v11_0_cp_gfx_enable(adev, false);
-		/* Program KIQ position of RLC_CP_SCHEDULERS during destroy */
-		tmp = RREG32_SOC15(GC, 0, regRLC_CP_SCHEDULERS);
-		tmp &= 0xffffff00;
-		WREG32_SOC15(GC, 0, regRLC_CP_SCHEDULERS, tmp);
-
+	if (amdgpu_sriov_vf(adev))
+		/* Remove the steps disabling CPG and clearing KIQ position,
+		 * so that CP could perform IDLE-SAVE during switch. Those
+		 * steps are necessary to avoid a DMAR error in gfx9 but it is
+		 * not reproduced on gfx11.
+		 */
 		return 0;
-	}
+
 	gfx_v11_0_cp_enable(adev, false);
 	gfx_v11_0_enable_gui_idle_interrupt(adev, false);
 
@@ -4656,6 +4664,8 @@ static int gfx_v11_0_early_init(void *handle)
 {
 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
 
+	adev->gfx.funcs = &gfx_v11_0_gfx_funcs;
+
 	adev->gfx.num_gfx_rings = GFX11_NUM_GFX_RINGS;
 	adev->gfx.num_compute_rings = min(amdgpu_gfx_get_num_kcq(adev),
 					  AMDGPU_MAX_COMPUTE_RINGS);
@@ -4673,6 +4683,26 @@ static int gfx_v11_0_early_init(void *handle)
 	return 0;
 }
 
+static int gfx_v11_0_ras_late_init(void *handle)
+{
+	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+	struct ras_common_if *gfx_common_if;
+	int ret;
+
+	gfx_common_if = kzalloc(sizeof(struct ras_common_if), GFP_KERNEL);
+	if (!gfx_common_if)
+		return -ENOMEM;
+
+	gfx_common_if->block = AMDGPU_RAS_BLOCK__GFX;
+
+	ret = amdgpu_ras_feature_enable(adev, gfx_common_if, true);
+	if (ret)
+		dev_warn(adev->dev, "Failed to enable gfx11 ras feature\n");
+
+	kfree(gfx_common_if);
+	return 0;
+}
+
 static int gfx_v11_0_late_init(void *handle)
 {
 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
@@ -4686,6 +4716,12 @@ static int gfx_v11_0_late_init(void *handle)
 	if (r)
 		return r;
 
+	if (adev->ip_versions[GC_HWIP][0] == IP_VERSION(11, 0, 3)) {
+		r = gfx_v11_0_ras_late_init(handle);
+		if (r)
+			return r;
+	}
+
 	return 0;
 }
 
@@ -5022,6 +5058,7 @@ static void gfx_v11_cntl_power_gating(struct amdgpu_device *adev, bool enable)
 	if (enable && (adev->pg_flags & AMD_PG_SUPPORT_GFX_PG)) {
 		switch (adev->ip_versions[GC_HWIP][0]) {
 		case IP_VERSION(11, 0, 1):
+		case IP_VERSION(11, 0, 4):
 			WREG32_SOC15(GC, 0, regRLC_PG_DELAY_3, RLC_PG_DELAY_3_DEFAULT_GC_11_0_1);
 			break;
 		default:
@@ -5051,9 +5088,11 @@ static int gfx_v11_0_set_powergating_state(void *handle,
 	switch (adev->ip_versions[GC_HWIP][0]) {
 	case IP_VERSION(11, 0, 0):
 	case IP_VERSION(11, 0, 2):
+	case IP_VERSION(11, 0, 3):
 		amdgpu_gfx_off_ctrl(adev, enable);
 		break;
 	case IP_VERSION(11, 0, 1):
+	case IP_VERSION(11, 0, 4):
 		gfx_v11_cntl_pg(adev, enable);
 		amdgpu_gfx_off_ctrl(adev, enable);
 		break;
@@ -5077,6 +5116,7 @@ static int gfx_v11_0_set_clockgating_state(void *handle,
 	case IP_VERSION(11, 0, 1):
 	case IP_VERSION(11, 0, 2):
 	case IP_VERSION(11, 0, 3):
+	case IP_VERSION(11, 0, 4):
 	        gfx_v11_0_update_gfx_clock_gating(adev,
 	                        state ==  AMD_CG_STATE_GATE);
 	        break;
@@ -5298,7 +5338,7 @@ static void gfx_v11_0_ring_emit_ib_gfx(struct amdgpu_ring *ring,
 
 	control |= ib->length_dw | (vmid << 24);
 
-	if ((amdgpu_sriov_vf(ring->adev) || amdgpu_mcbp) && (ib->flags & AMDGPU_IB_FLAG_PREEMPT)) {
+	if (amdgpu_mcbp && (ib->flags & AMDGPU_IB_FLAG_PREEMPT)) {
 		control |= INDIRECT_BUFFER_PRE_ENB(1);
 
 		if (flags & AMDGPU_IB_PREEMPTED)
@@ -6059,6 +6099,7 @@ static const struct amdgpu_ring_funcs gfx_v11_0_ring_funcs_gfx = {
 	.align_mask = 0xff,
 	.nop = PACKET3(PACKET3_NOP, 0x3FFF),
 	.support_64bit_ptrs = true,
+	.secure_submission_supported = true,
 	.vmhub = AMDGPU_GFXHUB_0,
 	.get_rptr = gfx_v11_0_ring_get_rptr_gfx,
 	.get_wptr = gfx_v11_0_ring_get_wptr_gfx,
diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
index 7f0b18b0d4c4..d47135606e3e 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
@@ -4643,6 +4643,8 @@ static int gfx_v8_0_kiq_init_queue(struct amdgpu_ring *ring)
 		memset((void *)mqd, 0, sizeof(struct vi_mqd_allocation));
 		((struct vi_mqd_allocation *)mqd)->dynamic_cu_mask = 0xFFFFFFFF;
 		((struct vi_mqd_allocation *)mqd)->dynamic_rb_mask = 0xFFFFFFFF;
+		if (amdgpu_sriov_vf(adev) && adev->in_suspend)
+			amdgpu_ring_clear_ring(ring);
 		mutex_lock(&adev->srbm_mutex);
 		vi_srbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
 		gfx_v8_0_mqd_init(ring);
diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
index 0320be4a5fc6..f202b45c413c 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
@@ -47,6 +47,7 @@
 
 #include "amdgpu_ras.h"
 
+#include "amdgpu_ring_mux.h"
 #include "gfx_v9_4.h"
 #include "gfx_v9_0.h"
 #include "gfx_v9_4_2.h"
@@ -56,6 +57,7 @@
 #include "asic_reg/gc/gc_9_0_default.h"
 
 #define GFX9_NUM_GFX_RINGS     1
+#define GFX9_NUM_SW_GFX_RINGS  2
 #define GFX9_MEC_HPD_SIZE 4096
 #define RLCG_UCODE_LOADING_START_ADDRESS 0x00002000L
 #define RLC_SAVE_RESTORE_ADDR_STARTING_OFFSET 0x00000000L
@@ -753,7 +755,7 @@ static void gfx_v9_0_set_rlc_funcs(struct amdgpu_device *adev);
 static int gfx_v9_0_get_cu_info(struct amdgpu_device *adev,
 				struct amdgpu_cu_info *cu_info);
 static uint64_t gfx_v9_0_get_gpu_clock_counter(struct amdgpu_device *adev);
-static void gfx_v9_0_ring_emit_de_meta(struct amdgpu_ring *ring);
+static void gfx_v9_0_ring_emit_de_meta(struct amdgpu_ring *ring, bool resume);
 static u64 gfx_v9_0_ring_get_rptr_compute(struct amdgpu_ring *ring);
 static void gfx_v9_0_query_ras_error_count(struct amdgpu_device *adev,
 					  void *ras_error_status);
@@ -826,9 +828,10 @@ static void gfx_v9_0_kiq_unmap_queues(struct amdgpu_ring *kiq_ring,
 			PACKET3_UNMAP_QUEUES_DOORBELL_OFFSET0(ring->doorbell_index));
 
 	if (action == PREEMPT_QUEUES_NO_UNMAP) {
-		amdgpu_ring_write(kiq_ring, lower_32_bits(gpu_addr));
-		amdgpu_ring_write(kiq_ring, upper_32_bits(gpu_addr));
-		amdgpu_ring_write(kiq_ring, seq);
+		amdgpu_ring_write(kiq_ring, lower_32_bits(ring->wptr & ring->buf_mask));
+		amdgpu_ring_write(kiq_ring, 0);
+		amdgpu_ring_write(kiq_ring, 0);
+
 	} else {
 		amdgpu_ring_write(kiq_ring, 0);
 		amdgpu_ring_write(kiq_ring, 0);
@@ -1564,7 +1567,7 @@ static void gfx_v9_0_init_always_on_cu_mask(struct amdgpu_device *adev)
 			mask = 1;
 			cu_bitmap = 0;
 			counter = 0;
-			gfx_v9_0_select_se_sh(adev, i, j, 0xffffffff);
+			amdgpu_gfx_select_se_sh(adev, i, j, 0xffffffff);
 
 			for (k = 0; k < adev->gfx.config.max_cu_per_sh; k ++) {
 				if (cu_info->bitmap[i][j] & mask) {
@@ -1583,7 +1586,7 @@ static void gfx_v9_0_init_always_on_cu_mask(struct amdgpu_device *adev)
 			cu_info->ao_cu_bitmap[i][j] = cu_bitmap;
 		}
 	}
-	gfx_v9_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
+	amdgpu_gfx_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
 	mutex_unlock(&adev->grbm_idx_mutex);
 }
 
@@ -1605,7 +1608,7 @@ static void gfx_v9_0_init_lbpw(struct amdgpu_device *adev)
 
 	mutex_lock(&adev->grbm_idx_mutex);
 	/* set mmRLC_LB_INIT_CU_MASK thru broadcast mode to enable all SE/SH*/
-	gfx_v9_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
+	amdgpu_gfx_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
 	WREG32_SOC15(GC, 0, mmRLC_LB_INIT_CU_MASK, 0xffffffff);
 
 	/* set mmRLC_LB_PARAMS = 0x003F_1006 */
@@ -1654,7 +1657,7 @@ static void gfx_v9_4_init_lbpw(struct amdgpu_device *adev)
 
 	mutex_lock(&adev->grbm_idx_mutex);
 	/* set mmRLC_LB_INIT_CU_MASK thru broadcast mode to enable all SE/SH*/
-	gfx_v9_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
+	amdgpu_gfx_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
 	WREG32_SOC15(GC, 0, mmRLC_LB_INIT_CU_MASK, 0xffffffff);
 
 	/* set mmRLC_LB_PARAMS = 0x003F_1006 */
@@ -1919,8 +1922,6 @@ static int gfx_v9_0_gpu_early_init(struct amdgpu_device *adev)
 	u32 gb_addr_config;
 	int err;
 
-	adev->gfx.funcs = &gfx_v9_0_gfx_funcs;
-
 	switch (adev->ip_versions[GC_HWIP][0]) {
 	case IP_VERSION(9, 0, 1):
 		adev->gfx.config.max_hw_contexts = 8;
@@ -2105,6 +2106,7 @@ static int gfx_v9_0_sw_init(void *handle)
 	struct amdgpu_ring *ring;
 	struct amdgpu_kiq *kiq;
 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+	unsigned int hw_prio;
 
 	switch (adev->ip_versions[GC_HWIP][0]) {
 	case IP_VERSION(9, 0, 1):
@@ -2188,6 +2190,9 @@ static int gfx_v9_0_sw_init(void *handle)
 			sprintf(ring->name, "gfx_%d", i);
 		ring->use_doorbell = true;
 		ring->doorbell_index = adev->doorbell_index.gfx_ring0 << 1;
+
+		/* disable scheduler on the real ring */
+		ring->no_scheduler = true;
 		r = amdgpu_ring_init(adev, ring, 1024, &adev->gfx.eop_irq,
 				     AMDGPU_CP_IRQ_GFX_ME0_PIPE0_EOP,
 				     AMDGPU_RING_PRIO_DEFAULT, NULL);
@@ -2195,6 +2200,41 @@ static int gfx_v9_0_sw_init(void *handle)
 			return r;
 	}
 
+	/* set up the software rings */
+	if (adev->gfx.num_gfx_rings) {
+		for (i = 0; i < GFX9_NUM_SW_GFX_RINGS; i++) {
+			ring = &adev->gfx.sw_gfx_ring[i];
+			ring->ring_obj = NULL;
+			sprintf(ring->name, amdgpu_sw_ring_name(i));
+			ring->use_doorbell = true;
+			ring->doorbell_index = adev->doorbell_index.gfx_ring0 << 1;
+			ring->is_sw_ring = true;
+			hw_prio = amdgpu_sw_ring_priority(i);
+			r = amdgpu_ring_init(adev, ring, 1024, &adev->gfx.eop_irq,
+					     AMDGPU_CP_IRQ_GFX_ME0_PIPE0_EOP, hw_prio,
+					     NULL);
+			if (r)
+				return r;
+			ring->wptr = 0;
+		}
+
+		/* init the muxer and add software rings */
+		r = amdgpu_ring_mux_init(&adev->gfx.muxer, &adev->gfx.gfx_ring[0],
+					 GFX9_NUM_SW_GFX_RINGS);
+		if (r) {
+			DRM_ERROR("amdgpu_ring_mux_init failed(%d)\n", r);
+			return r;
+		}
+		for (i = 0; i < GFX9_NUM_SW_GFX_RINGS; i++) {
+			r = amdgpu_ring_mux_add_sw_ring(&adev->gfx.muxer,
+							&adev->gfx.sw_gfx_ring[i]);
+			if (r) {
+				DRM_ERROR("amdgpu_ring_mux_add_sw_ring failed(%d)\n", r);
+				return r;
+			}
+		}
+	}
+
 	/* set up the compute queues - allocate horizontally across pipes */
 	ring_id = 0;
 	for (i = 0; i < adev->gfx.mec.num_mec; ++i) {
@@ -2245,6 +2285,12 @@ static int gfx_v9_0_sw_fini(void *handle)
 	int i;
 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
 
+	if (adev->gfx.num_gfx_rings) {
+		for (i = 0; i < GFX9_NUM_SW_GFX_RINGS; i++)
+			amdgpu_ring_fini(&adev->gfx.sw_gfx_ring[i]);
+		amdgpu_ring_mux_fini(&adev->gfx.muxer);
+	}
+
 	for (i = 0; i < adev->gfx.num_gfx_rings; i++)
 		amdgpu_ring_fini(&adev->gfx.gfx_ring[i]);
 	for (i = 0; i < adev->gfx.num_compute_rings; i++)
@@ -2324,13 +2370,13 @@ static void gfx_v9_0_setup_rb(struct amdgpu_device *adev)
 	mutex_lock(&adev->grbm_idx_mutex);
 	for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
 		for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
-			gfx_v9_0_select_se_sh(adev, i, j, 0xffffffff);
+			amdgpu_gfx_select_se_sh(adev, i, j, 0xffffffff);
 			data = gfx_v9_0_get_rb_active_bitmap(adev);
 			active_rbs |= data << ((i * adev->gfx.config.max_sh_per_se + j) *
 					       rb_bitmap_width_per_sh);
 		}
 	}
-	gfx_v9_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
+	amdgpu_gfx_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
 	mutex_unlock(&adev->grbm_idx_mutex);
 
 	adev->gfx.config.backend_enable_mask = active_rbs;
@@ -2467,14 +2513,14 @@ static void gfx_v9_0_wait_for_rlc_serdes(struct amdgpu_device *adev)
 	mutex_lock(&adev->grbm_idx_mutex);
 	for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
 		for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
-			gfx_v9_0_select_se_sh(adev, i, j, 0xffffffff);
+			amdgpu_gfx_select_se_sh(adev, i, j, 0xffffffff);
 			for (k = 0; k < adev->usec_timeout; k++) {
 				if (RREG32_SOC15(GC, 0, mmRLC_SERDES_CU_MASTER_BUSY) == 0)
 					break;
 				udelay(1);
 			}
 			if (k == adev->usec_timeout) {
-				gfx_v9_0_select_se_sh(adev, 0xffffffff,
+				amdgpu_gfx_select_se_sh(adev, 0xffffffff,
 						      0xffffffff, 0xffffffff);
 				mutex_unlock(&adev->grbm_idx_mutex);
 				DRM_INFO("Timeout wait for RLC serdes %u,%u\n",
@@ -2483,7 +2529,7 @@ static void gfx_v9_0_wait_for_rlc_serdes(struct amdgpu_device *adev)
 			}
 		}
 	}
-	gfx_v9_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
+	amdgpu_gfx_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
 	mutex_unlock(&adev->grbm_idx_mutex);
 
 	mask = RLC_SERDES_NONCU_MASTER_BUSY__SE_MASTER_BUSY_MASK |
@@ -3583,6 +3629,8 @@ static int gfx_v9_0_kiq_init_queue(struct amdgpu_ring *ring)
 		memset((void *)mqd, 0, sizeof(struct v9_mqd_allocation));
 		((struct v9_mqd_allocation *)mqd)->dynamic_cu_mask = 0xFFFFFFFF;
 		((struct v9_mqd_allocation *)mqd)->dynamic_rb_mask = 0xFFFFFFFF;
+		if (amdgpu_sriov_vf(adev) && adev->in_suspend)
+			amdgpu_ring_clear_ring(ring);
 		mutex_lock(&adev->srbm_mutex);
 		soc15_grbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
 		gfx_v9_0_mqd_init(ring);
@@ -4539,6 +4587,8 @@ static int gfx_v9_0_early_init(void *handle)
 {
 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
 
+	adev->gfx.funcs = &gfx_v9_0_gfx_funcs;
+
 	if (adev->ip_versions[GC_HWIP][0] == IP_VERSION(9, 4, 1) ||
 	    adev->ip_versions[GC_HWIP][0] == IP_VERSION(9, 4, 2))
 		adev->gfx.num_gfx_rings = 0;
@@ -5155,11 +5205,17 @@ static void gfx_v9_0_ring_emit_ib_gfx(struct amdgpu_ring *ring,
 
 	control |= ib->length_dw | (vmid << 24);
 
-	if (amdgpu_sriov_vf(ring->adev) && (ib->flags & AMDGPU_IB_FLAG_PREEMPT)) {
+	if (ib->flags & AMDGPU_IB_FLAG_PREEMPT) {
 		control |= INDIRECT_BUFFER_PRE_ENB(1);
 
+		if (flags & AMDGPU_IB_PREEMPTED)
+			control |= INDIRECT_BUFFER_PRE_RESUME(1);
+
 		if (!(ib->flags & AMDGPU_IB_FLAG_CE) && vmid)
-			gfx_v9_0_ring_emit_de_meta(ring);
+			gfx_v9_0_ring_emit_de_meta(ring,
+						   (!amdgpu_sriov_vf(ring->adev) &&
+						   flags & AMDGPU_IB_PREEMPTED) ?
+						   true : false);
 	}
 
 	amdgpu_ring_write(ring, header);
@@ -5214,17 +5270,24 @@ static void gfx_v9_0_ring_emit_fence(struct amdgpu_ring *ring, u64 addr,
 	bool write64bit = flags & AMDGPU_FENCE_FLAG_64BIT;
 	bool int_sel = flags & AMDGPU_FENCE_FLAG_INT;
 	bool writeback = flags & AMDGPU_FENCE_FLAG_TC_WB_ONLY;
+	bool exec = flags & AMDGPU_FENCE_FLAG_EXEC;
+	uint32_t dw2 = 0;
 
 	/* RELEASE_MEM - flush caches, send int */
 	amdgpu_ring_write(ring, PACKET3(PACKET3_RELEASE_MEM, 6));
-	amdgpu_ring_write(ring, ((writeback ? (EOP_TC_WB_ACTION_EN |
-					       EOP_TC_NC_ACTION_EN) :
-					      (EOP_TCL1_ACTION_EN |
-					       EOP_TC_ACTION_EN |
-					       EOP_TC_WB_ACTION_EN |
-					       EOP_TC_MD_ACTION_EN)) |
-				 EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) |
-				 EVENT_INDEX(5)));
+
+	if (writeback) {
+		dw2 = EOP_TC_NC_ACTION_EN;
+	} else {
+		dw2 = EOP_TCL1_ACTION_EN | EOP_TC_ACTION_EN |
+				EOP_TC_MD_ACTION_EN;
+	}
+	dw2 |= EOP_TC_WB_ACTION_EN | EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) |
+				EVENT_INDEX(5);
+	if (exec)
+		dw2 |= EOP_EXEC;
+
+	amdgpu_ring_write(ring, dw2);
 	amdgpu_ring_write(ring, DATA_SEL(write64bit ? 2 : 1) | INT_SEL(int_sel ? 2 : 0));
 
 	/*
@@ -5329,33 +5392,135 @@ static void gfx_v9_ring_emit_sb(struct amdgpu_ring *ring)
 	amdgpu_ring_write(ring, 0);
 }
 
-static void gfx_v9_0_ring_emit_ce_meta(struct amdgpu_ring *ring)
+static void gfx_v9_0_ring_emit_ce_meta(struct amdgpu_ring *ring, bool resume)
 {
+	struct amdgpu_device *adev = ring->adev;
 	struct v9_ce_ib_state ce_payload = {0};
-	uint64_t csa_addr;
+	uint64_t offset, ce_payload_gpu_addr;
+	void *ce_payload_cpu_addr;
 	int cnt;
 
 	cnt = (sizeof(ce_payload) >> 2) + 4 - 2;
-	csa_addr = amdgpu_csa_vaddr(ring->adev);
+
+	if (ring->is_mes_queue) {
+		offset = offsetof(struct amdgpu_mes_ctx_meta_data,
+				  gfx[0].gfx_meta_data) +
+			offsetof(struct v9_gfx_meta_data, ce_payload);
+		ce_payload_gpu_addr =
+			amdgpu_mes_ctx_get_offs_gpu_addr(ring, offset);
+		ce_payload_cpu_addr =
+			amdgpu_mes_ctx_get_offs_cpu_addr(ring, offset);
+	} else {
+		offset = offsetof(struct v9_gfx_meta_data, ce_payload);
+		ce_payload_gpu_addr = amdgpu_csa_vaddr(ring->adev) + offset;
+		ce_payload_cpu_addr = adev->virt.csa_cpu_addr + offset;
+	}
 
 	amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, cnt));
 	amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(2) |
 				 WRITE_DATA_DST_SEL(8) |
 				 WR_CONFIRM) |
 				 WRITE_DATA_CACHE_POLICY(0));
-	amdgpu_ring_write(ring, lower_32_bits(csa_addr + offsetof(struct v9_gfx_meta_data, ce_payload)));
-	amdgpu_ring_write(ring, upper_32_bits(csa_addr + offsetof(struct v9_gfx_meta_data, ce_payload)));
-	amdgpu_ring_write_multiple(ring, (void *)&ce_payload, sizeof(ce_payload) >> 2);
+	amdgpu_ring_write(ring, lower_32_bits(ce_payload_gpu_addr));
+	amdgpu_ring_write(ring, upper_32_bits(ce_payload_gpu_addr));
+
+	if (resume)
+		amdgpu_ring_write_multiple(ring, ce_payload_cpu_addr,
+					   sizeof(ce_payload) >> 2);
+	else
+		amdgpu_ring_write_multiple(ring, (void *)&ce_payload,
+					   sizeof(ce_payload) >> 2);
+}
+
+static int gfx_v9_0_ring_preempt_ib(struct amdgpu_ring *ring)
+{
+	int i, r = 0;
+	struct amdgpu_device *adev = ring->adev;
+	struct amdgpu_kiq *kiq = &adev->gfx.kiq;
+	struct amdgpu_ring *kiq_ring = &kiq->ring;
+	unsigned long flags;
+
+	if (!kiq->pmf || !kiq->pmf->kiq_unmap_queues)
+		return -EINVAL;
+
+	spin_lock_irqsave(&kiq->ring_lock, flags);
+
+	if (amdgpu_ring_alloc(kiq_ring, kiq->pmf->unmap_queues_size)) {
+		spin_unlock_irqrestore(&kiq->ring_lock, flags);
+		return -ENOMEM;
+	}
+
+	/* assert preemption condition */
+	amdgpu_ring_set_preempt_cond_exec(ring, false);
+
+	ring->trail_seq += 1;
+	amdgpu_ring_alloc(ring, 13);
+	gfx_v9_0_ring_emit_fence(ring, ring->trail_fence_gpu_addr,
+				 ring->trail_seq, AMDGPU_FENCE_FLAG_EXEC | AMDGPU_FENCE_FLAG_INT);
+	/*reset the CP_VMID_PREEMPT after trailing fence*/
+	amdgpu_ring_emit_wreg(ring,
+			      SOC15_REG_OFFSET(GC, 0, mmCP_VMID_PREEMPT),
+			      0x0);
+
+	/* assert IB preemption, emit the trailing fence */
+	kiq->pmf->kiq_unmap_queues(kiq_ring, ring, PREEMPT_QUEUES_NO_UNMAP,
+				   ring->trail_fence_gpu_addr,
+				   ring->trail_seq);
+
+	amdgpu_ring_commit(kiq_ring);
+	spin_unlock_irqrestore(&kiq->ring_lock, flags);
+
+	/* poll the trailing fence */
+	for (i = 0; i < adev->usec_timeout; i++) {
+		if (ring->trail_seq ==
+			le32_to_cpu(*ring->trail_fence_cpu_addr))
+			break;
+		udelay(1);
+	}
+
+	if (i >= adev->usec_timeout) {
+		r = -EINVAL;
+		DRM_WARN("ring %d timeout to preempt ib\n", ring->idx);
+	}
+
+	amdgpu_ring_commit(ring);
+
+	/* deassert preemption condition */
+	amdgpu_ring_set_preempt_cond_exec(ring, true);
+	return r;
 }
 
-static void gfx_v9_0_ring_emit_de_meta(struct amdgpu_ring *ring)
+static void gfx_v9_0_ring_emit_de_meta(struct amdgpu_ring *ring, bool resume)
 {
+	struct amdgpu_device *adev = ring->adev;
 	struct v9_de_ib_state de_payload = {0};
-	uint64_t csa_addr, gds_addr;
+	uint64_t offset, gds_addr, de_payload_gpu_addr;
+	void *de_payload_cpu_addr;
 	int cnt;
 
-	csa_addr = amdgpu_csa_vaddr(ring->adev);
-	gds_addr = csa_addr + 4096;
+	if (ring->is_mes_queue) {
+		offset = offsetof(struct amdgpu_mes_ctx_meta_data,
+				  gfx[0].gfx_meta_data) +
+			offsetof(struct v9_gfx_meta_data, de_payload);
+		de_payload_gpu_addr =
+			amdgpu_mes_ctx_get_offs_gpu_addr(ring, offset);
+		de_payload_cpu_addr =
+			amdgpu_mes_ctx_get_offs_cpu_addr(ring, offset);
+
+		offset = offsetof(struct amdgpu_mes_ctx_meta_data,
+				  gfx[0].gds_backup) +
+			offsetof(struct v9_gfx_meta_data, de_payload);
+		gds_addr = amdgpu_mes_ctx_get_offs_gpu_addr(ring, offset);
+	} else {
+		offset = offsetof(struct v9_gfx_meta_data, de_payload);
+		de_payload_gpu_addr = amdgpu_csa_vaddr(ring->adev) + offset;
+		de_payload_cpu_addr = adev->virt.csa_cpu_addr + offset;
+
+		gds_addr = ALIGN(amdgpu_csa_vaddr(ring->adev) +
+				 AMDGPU_CSA_SIZE - adev->gds.gds_size,
+				 PAGE_SIZE);
+	}
+
 	de_payload.gds_backup_addrlo = lower_32_bits(gds_addr);
 	de_payload.gds_backup_addrhi = upper_32_bits(gds_addr);
 
@@ -5365,9 +5530,15 @@ static void gfx_v9_0_ring_emit_de_meta(struct amdgpu_ring *ring)
 				 WRITE_DATA_DST_SEL(8) |
 				 WR_CONFIRM) |
 				 WRITE_DATA_CACHE_POLICY(0));
-	amdgpu_ring_write(ring, lower_32_bits(csa_addr + offsetof(struct v9_gfx_meta_data, de_payload)));
-	amdgpu_ring_write(ring, upper_32_bits(csa_addr + offsetof(struct v9_gfx_meta_data, de_payload)));
-	amdgpu_ring_write_multiple(ring, (void *)&de_payload, sizeof(de_payload) >> 2);
+	amdgpu_ring_write(ring, lower_32_bits(de_payload_gpu_addr));
+	amdgpu_ring_write(ring, upper_32_bits(de_payload_gpu_addr));
+
+	if (resume)
+		amdgpu_ring_write_multiple(ring, de_payload_cpu_addr,
+					   sizeof(de_payload) >> 2);
+	else
+		amdgpu_ring_write_multiple(ring, (void *)&de_payload,
+					   sizeof(de_payload) >> 2);
 }
 
 static void gfx_v9_0_ring_emit_frame_cntl(struct amdgpu_ring *ring, bool start,
@@ -5383,8 +5554,9 @@ static void gfx_v9_ring_emit_cntxcntl(struct amdgpu_ring *ring, uint32_t flags)
 {
 	uint32_t dw2 = 0;
 
-	if (amdgpu_sriov_vf(ring->adev))
-		gfx_v9_0_ring_emit_ce_meta(ring);
+	gfx_v9_0_ring_emit_ce_meta(ring,
+				   (!amdgpu_sriov_vf(ring->adev) &&
+				   flags & AMDGPU_IB_PREEMPTED) ? true : false);
 
 	dw2 |= 0x80000000; /* set load_enable otherwise this package is just NOPs */
 	if (flags & AMDGPU_HAVE_CTX_SWITCH) {
@@ -5710,7 +5882,12 @@ static int gfx_v9_0_eop_irq(struct amdgpu_device *adev,
 
 	switch (me_id) {
 	case 0:
-		amdgpu_fence_process(&adev->gfx.gfx_ring[0]);
+		if (adev->gfx.num_gfx_rings &&
+		    !amdgpu_mcbp_handle_trailing_fence_irq(&adev->gfx.muxer)) {
+			/* Fence signals are handled on the software rings*/
+			for (i = 0; i < GFX9_NUM_SW_GFX_RINGS; i++)
+				amdgpu_fence_process(&adev->gfx.sw_gfx_ring[i]);
+		}
 		break;
 	case 1:
 	case 2:
@@ -6482,7 +6659,7 @@ static void gfx_v9_0_reset_ras_error_count(struct amdgpu_device *adev)
 	for (i = 0; i < ARRAY_SIZE(gfx_v9_0_edc_counter_regs); i++) {
 		for (j = 0; j < gfx_v9_0_edc_counter_regs[i].se_num; j++) {
 			for (k = 0; k < gfx_v9_0_edc_counter_regs[i].instance; k++) {
-				gfx_v9_0_select_se_sh(adev, j, 0x0, k);
+				amdgpu_gfx_select_se_sh(adev, j, 0x0, k);
 				RREG32(SOC15_REG_ENTRY_OFFSET(gfx_v9_0_edc_counter_regs[i]));
 			}
 		}
@@ -6544,7 +6721,7 @@ static void gfx_v9_0_query_ras_error_count(struct amdgpu_device *adev,
 	for (i = 0; i < ARRAY_SIZE(gfx_v9_0_edc_counter_regs); i++) {
 		for (j = 0; j < gfx_v9_0_edc_counter_regs[i].se_num; j++) {
 			for (k = 0; k < gfx_v9_0_edc_counter_regs[i].instance; k++) {
-				gfx_v9_0_select_se_sh(adev, j, 0, k);
+				amdgpu_gfx_select_se_sh(adev, j, 0, k);
 				reg_value =
 					RREG32(SOC15_REG_ENTRY_OFFSET(gfx_v9_0_edc_counter_regs[i]));
 				if (reg_value)
@@ -6559,7 +6736,7 @@ static void gfx_v9_0_query_ras_error_count(struct amdgpu_device *adev,
 	err_data->ce_count += sec_count;
 	err_data->ue_count += ded_count;
 
-	gfx_v9_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
+	amdgpu_gfx_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
 	mutex_unlock(&adev->grbm_idx_mutex);
 
 	gfx_v9_0_query_utc_edc_status(adev, err_data);
@@ -6707,6 +6884,62 @@ static const struct amdgpu_ring_funcs gfx_v9_0_ring_funcs_gfx = {
 	.emit_cntxcntl = gfx_v9_ring_emit_cntxcntl,
 	.init_cond_exec = gfx_v9_0_ring_emit_init_cond_exec,
 	.patch_cond_exec = gfx_v9_0_ring_emit_patch_cond_exec,
+	.preempt_ib = gfx_v9_0_ring_preempt_ib,
+	.emit_frame_cntl = gfx_v9_0_ring_emit_frame_cntl,
+	.emit_wreg = gfx_v9_0_ring_emit_wreg,
+	.emit_reg_wait = gfx_v9_0_ring_emit_reg_wait,
+	.emit_reg_write_reg_wait = gfx_v9_0_ring_emit_reg_write_reg_wait,
+	.soft_recovery = gfx_v9_0_ring_soft_recovery,
+	.emit_mem_sync = gfx_v9_0_emit_mem_sync,
+};
+
+static const struct amdgpu_ring_funcs gfx_v9_0_sw_ring_funcs_gfx = {
+	.type = AMDGPU_RING_TYPE_GFX,
+	.align_mask = 0xff,
+	.nop = PACKET3(PACKET3_NOP, 0x3FFF),
+	.support_64bit_ptrs = true,
+	.secure_submission_supported = true,
+	.vmhub = AMDGPU_GFXHUB_0,
+	.get_rptr = amdgpu_sw_ring_get_rptr_gfx,
+	.get_wptr = amdgpu_sw_ring_get_wptr_gfx,
+	.set_wptr = amdgpu_sw_ring_set_wptr_gfx,
+	.emit_frame_size = /* totally 242 maximum if 16 IBs */
+		5 +  /* COND_EXEC */
+		7 +  /* PIPELINE_SYNC */
+		SOC15_FLUSH_GPU_TLB_NUM_WREG * 5 +
+		SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 7 +
+		2 + /* VM_FLUSH */
+		8 +  /* FENCE for VM_FLUSH */
+		20 + /* GDS switch */
+		4 + /* double SWITCH_BUFFER,
+		     * the first COND_EXEC jump to the place just
+		     * prior to this double SWITCH_BUFFER
+		     */
+		5 + /* COND_EXEC */
+		7 +	 /*	HDP_flush */
+		4 +	 /*	VGT_flush */
+		14 + /*	CE_META */
+		31 + /*	DE_META */
+		3 + /* CNTX_CTRL */
+		5 + /* HDP_INVL */
+		8 + 8 + /* FENCE x2 */
+		2 + /* SWITCH_BUFFER */
+		7, /* gfx_v9_0_emit_mem_sync */
+	.emit_ib_size =	4, /* gfx_v9_0_ring_emit_ib_gfx */
+	.emit_ib = gfx_v9_0_ring_emit_ib_gfx,
+	.emit_fence = gfx_v9_0_ring_emit_fence,
+	.emit_pipeline_sync = gfx_v9_0_ring_emit_pipeline_sync,
+	.emit_vm_flush = gfx_v9_0_ring_emit_vm_flush,
+	.emit_gds_switch = gfx_v9_0_ring_emit_gds_switch,
+	.emit_hdp_flush = gfx_v9_0_ring_emit_hdp_flush,
+	.test_ring = gfx_v9_0_ring_test_ring,
+	.test_ib = gfx_v9_0_ring_test_ib,
+	.insert_nop = amdgpu_sw_ring_insert_nop,
+	.pad_ib = amdgpu_ring_generic_pad_ib,
+	.emit_switch_buffer = gfx_v9_ring_emit_sb,
+	.emit_cntxcntl = gfx_v9_ring_emit_cntxcntl,
+	.init_cond_exec = gfx_v9_0_ring_emit_init_cond_exec,
+	.patch_cond_exec = gfx_v9_0_ring_emit_patch_cond_exec,
 	.emit_frame_cntl = gfx_v9_0_ring_emit_frame_cntl,
 	.emit_wreg = gfx_v9_0_ring_emit_wreg,
 	.emit_reg_wait = gfx_v9_0_ring_emit_reg_wait,
@@ -6792,6 +7025,11 @@ static void gfx_v9_0_set_ring_funcs(struct amdgpu_device *adev)
 	for (i = 0; i < adev->gfx.num_gfx_rings; i++)
 		adev->gfx.gfx_ring[i].funcs = &gfx_v9_0_ring_funcs_gfx;
 
+	if (adev->gfx.num_gfx_rings) {
+		for (i = 0; i < GFX9_NUM_SW_GFX_RINGS; i++)
+			adev->gfx.sw_gfx_ring[i].funcs = &gfx_v9_0_sw_ring_funcs_gfx;
+	}
+
 	for (i = 0; i < adev->gfx.num_compute_rings; i++)
 		adev->gfx.compute_ring[i].funcs = &gfx_v9_0_ring_funcs_compute;
 }
@@ -6963,7 +7201,7 @@ static int gfx_v9_0_get_cu_info(struct amdgpu_device *adev,
 			mask = 1;
 			ao_bitmap = 0;
 			counter = 0;
-			gfx_v9_0_select_se_sh(adev, i, j, 0xffffffff);
+			amdgpu_gfx_select_se_sh(adev, i, j, 0xffffffff);
 			gfx_v9_0_set_user_cu_inactive_bitmap(
 				adev, disable_masks[i * adev->gfx.config.max_sh_per_se + j]);
 			bitmap = gfx_v9_0_get_cu_active_bitmap(adev);
@@ -6996,7 +7234,7 @@ static int gfx_v9_0_get_cu_info(struct amdgpu_device *adev,
 			cu_info->ao_cu_bitmap[i % 4][j + i / 4] = ao_bitmap;
 		}
 	}
-	gfx_v9_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
+	amdgpu_gfx_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
 	mutex_unlock(&adev->grbm_idx_mutex);
 
 	cu_info->number = active_cu_number;
diff --git a/drivers/gpu/drm/amd/amdgpu/gfxhub_v2_1.c b/drivers/gpu/drm/amd/amdgpu/gfxhub_v2_1.c
index 8cf53e039c11..3f8676d23a5e 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfxhub_v2_1.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfxhub_v2_1.c
@@ -397,6 +397,9 @@ static void gfxhub_v2_1_gart_disable(struct amdgpu_device *adev)
 			    ENABLE_ADVANCED_DRIVER_MODEL, 0);
 	WREG32_SOC15(GC, 0, mmGCMC_VM_MX_L1_TLB_CNTL, tmp);
 
+	if (amdgpu_sriov_vf(adev))
+		return;
+
 	/* Setup L2 cache */
 	WREG32_FIELD15(GC, 0, GCVM_L2_CNTL, ENABLE_L2_CACHE, 0);
 	WREG32_SOC15(GC, 0, mmGCVM_L2_CNTL3, 0);
diff --git a/drivers/gpu/drm/amd/amdgpu/gfxhub_v3_0_3.c b/drivers/gpu/drm/amd/amdgpu/gfxhub_v3_0_3.c
index 5d3fffd4929f..080ff11ca305 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfxhub_v3_0_3.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfxhub_v3_0_3.c
@@ -154,6 +154,9 @@ static void gfxhub_v3_0_3_init_system_aperture_regs(struct amdgpu_device *adev)
 {
 	uint64_t value;
 
+	if (amdgpu_sriov_vf(adev))
+		return;
+
 	/* Disable AGP. */
 	WREG32_SOC15(GC, 0, regGCMC_VM_AGP_BASE, 0);
 	WREG32_SOC15(GC, 0, regGCMC_VM_AGP_TOP, 0);
@@ -354,18 +357,6 @@ static void gfxhub_v3_0_3_program_invalidation(struct amdgpu_device *adev)
 
 static int gfxhub_v3_0_3_gart_enable(struct amdgpu_device *adev)
 {
-	if (amdgpu_sriov_vf(adev)) {
-		/*
-		 * GCMC_VM_FB_LOCATION_BASE/TOP is NULL for VF, becuase they are
-		 * VF copy registers so vbios post doesn't program them, for
-		 * SRIOV driver need to program them
-		 */
-		WREG32_SOC15(GC, 0, regGCMC_VM_FB_LOCATION_BASE,
-			     adev->gmc.vram_start >> 24);
-		WREG32_SOC15(GC, 0, regGCMC_VM_FB_LOCATION_TOP,
-			     adev->gmc.vram_end >> 24);
-	}
-
 	/* GART Enable. */
 	gfxhub_v3_0_3_init_gart_aperture_regs(adev);
 	gfxhub_v3_0_3_init_system_aperture_regs(adev);
diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c
index f513e2c2e964..21e46817d82d 100644
--- a/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c
@@ -371,7 +371,9 @@ static void gmc_v10_0_flush_gpu_tlb(struct amdgpu_device *adev, uint32_t vmid,
 	 * translation. Avoid this by doing the invalidation from the SDMA
 	 * itself.
 	 */
-	r = amdgpu_job_alloc_with_ib(adev, 16 * 4, AMDGPU_IB_POOL_IMMEDIATE,
+	r = amdgpu_job_alloc_with_ib(ring->adev, &adev->mman.entity,
+				     AMDGPU_FENCE_OWNER_UNDEFINED,
+				     16 * 4, AMDGPU_IB_POOL_IMMEDIATE,
 				     &job);
 	if (r)
 		goto error_alloc;
@@ -380,10 +382,7 @@ static void gmc_v10_0_flush_gpu_tlb(struct amdgpu_device *adev, uint32_t vmid,
 	job->vm_needs_flush = true;
 	job->ibs->ptr[job->ibs->length_dw++] = ring->funcs->nop;
 	amdgpu_ring_pad_ib(ring, &job->ibs[0]);
-	r = amdgpu_job_submit(job, &adev->mman.entity,
-			      AMDGPU_FENCE_OWNER_UNDEFINED, &fence);
-	if (r)
-		goto error_submit;
+	fence = amdgpu_job_submit(job);
 
 	mutex_unlock(&adev->mman.gtt_window_lock);
 
@@ -392,9 +391,6 @@ static void gmc_v10_0_flush_gpu_tlb(struct amdgpu_device *adev, uint32_t vmid,
 
 	return;
 
-error_submit:
-	amdgpu_job_free(job);
-
 error_alloc:
 	mutex_unlock(&adev->mman.gtt_window_lock);
 	DRM_ERROR("Error flushing GPU TLB using the SDMA (%d)!\n", r);
@@ -612,6 +608,8 @@ static void gmc_v10_0_get_vm_pte(struct amdgpu_device *adev,
 				 struct amdgpu_bo_va_mapping *mapping,
 				 uint64_t *flags)
 {
+	struct amdgpu_bo *bo = mapping->bo_va->base.bo;
+
 	*flags &= ~AMDGPU_PTE_EXECUTABLE;
 	*flags |= mapping->flags & AMDGPU_PTE_EXECUTABLE;
 
@@ -628,6 +626,11 @@ static void gmc_v10_0_get_vm_pte(struct amdgpu_device *adev,
 		*flags |= AMDGPU_PTE_SYSTEM;
 		*flags &= ~AMDGPU_PTE_VALID;
 	}
+
+	if (bo && bo->flags & (AMDGPU_GEM_CREATE_COHERENT |
+			       AMDGPU_GEM_CREATE_UNCACHED))
+		*flags = (*flags & ~AMDGPU_PTE_MTYPE_NV10_MASK) |
+			 AMDGPU_PTE_MTYPE_NV10(MTYPE_UC);
 }
 
 static unsigned gmc_v10_0_get_vbios_fb_size(struct amdgpu_device *adev)
diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v11_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v11_0.c
index 66dfb574cc7d..4326078689cd 100644
--- a/drivers/gpu/drm/amd/amdgpu/gmc_v11_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gmc_v11_0.c
@@ -503,6 +503,8 @@ static void gmc_v11_0_get_vm_pte(struct amdgpu_device *adev,
 				 struct amdgpu_bo_va_mapping *mapping,
 				 uint64_t *flags)
 {
+	struct amdgpu_bo *bo = mapping->bo_va->base.bo;
+
 	*flags &= ~AMDGPU_PTE_EXECUTABLE;
 	*flags |= mapping->flags & AMDGPU_PTE_EXECUTABLE;
 
@@ -519,6 +521,11 @@ static void gmc_v11_0_get_vm_pte(struct amdgpu_device *adev,
 		*flags |= AMDGPU_PTE_SYSTEM;
 		*flags &= ~AMDGPU_PTE_VALID;
 	}
+
+	if (bo && bo->flags & (AMDGPU_GEM_CREATE_COHERENT |
+			       AMDGPU_GEM_CREATE_UNCACHED))
+		*flags = (*flags & ~AMDGPU_PTE_MTYPE_NV10_MASK) |
+			 AMDGPU_PTE_MTYPE_NV10(MTYPE_UC);
 }
 
 static unsigned gmc_v11_0_get_vbios_fb_size(struct amdgpu_device *adev)
@@ -551,7 +558,10 @@ static void gmc_v11_0_set_umc_funcs(struct amdgpu_device *adev)
 		adev->umc.node_inst_num = adev->gmc.num_umc;
 		adev->umc.max_ras_err_cnt_per_query = UMC_V8_10_TOTAL_CHANNEL_NUM(adev);
 		adev->umc.channel_offs = UMC_V8_10_PER_CHANNEL_OFFSET;
-		adev->umc.channel_idx_tbl = &umc_v8_10_channel_idx_tbl[0][0][0];
+		if (adev->umc.node_inst_num == 4)
+			adev->umc.channel_idx_tbl = &umc_v8_10_channel_idx_tbl_ext0[0][0][0];
+		else
+			adev->umc.channel_idx_tbl = &umc_v8_10_channel_idx_tbl[0][0][0];
 		adev->umc.ras = &umc_v8_10_ras;
 		break;
 	case IP_VERSION(8, 11, 0):
@@ -749,6 +759,7 @@ static int gmc_v11_0_sw_init(void *handle)
 	case IP_VERSION(11, 0, 1):
 	case IP_VERSION(11, 0, 2):
 	case IP_VERSION(11, 0, 3):
+	case IP_VERSION(11, 0, 4):
 		adev->num_vmhubs = 2;
 		/*
 		 * To fulfill 4-level page support,
diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
index 67ca16a8027c..08d6cf79fb15 100644
--- a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
@@ -1113,10 +1113,80 @@ static void gmc_v9_0_get_vm_pde(struct amdgpu_device *adev, int level,
 	}
 }
 
+static void gmc_v9_0_get_coherence_flags(struct amdgpu_device *adev,
+					 struct amdgpu_bo *bo,
+					 struct amdgpu_bo_va_mapping *mapping,
+					 uint64_t *flags)
+{
+	struct amdgpu_device *bo_adev = amdgpu_ttm_adev(bo->tbo.bdev);
+	bool is_vram = bo->tbo.resource->mem_type == TTM_PL_VRAM;
+	bool coherent = bo->flags & AMDGPU_GEM_CREATE_COHERENT;
+	bool uncached = bo->flags & AMDGPU_GEM_CREATE_UNCACHED;
+	unsigned int mtype;
+	bool snoop = false;
+
+	switch (adev->ip_versions[GC_HWIP][0]) {
+	case IP_VERSION(9, 4, 1):
+	case IP_VERSION(9, 4, 2):
+		if (is_vram) {
+			if (bo_adev == adev) {
+				if (uncached)
+					mtype = MTYPE_UC;
+				else if (coherent)
+					mtype = MTYPE_CC;
+				else
+					mtype = MTYPE_RW;
+				/* FIXME: is this still needed? Or does
+				 * amdgpu_ttm_tt_pde_flags already handle this?
+				 */
+				if (adev->ip_versions[GC_HWIP][0] ==
+					IP_VERSION(9, 4, 2) &&
+				    adev->gmc.xgmi.connected_to_cpu)
+					snoop = true;
+			} else {
+				if (uncached || coherent)
+					mtype = MTYPE_UC;
+				else
+					mtype = MTYPE_NC;
+				if (mapping->bo_va->is_xgmi)
+					snoop = true;
+			}
+		} else {
+			if (uncached || coherent)
+				mtype = MTYPE_UC;
+			else
+				mtype = MTYPE_NC;
+			/* FIXME: is this still needed? Or does
+			 * amdgpu_ttm_tt_pde_flags already handle this?
+			 */
+			snoop = true;
+		}
+		break;
+	default:
+		if (uncached || coherent)
+			mtype = MTYPE_UC;
+		else
+			mtype = MTYPE_NC;
+
+		/* FIXME: is this still needed? Or does
+		 * amdgpu_ttm_tt_pde_flags already handle this?
+		 */
+		if (!is_vram)
+			snoop = true;
+	}
+
+	if (mtype != MTYPE_NC)
+		*flags = (*flags & ~AMDGPU_PTE_MTYPE_VG10_MASK) |
+			 AMDGPU_PTE_MTYPE_VG10(mtype);
+	*flags |= snoop ? AMDGPU_PTE_SNOOPED : 0;
+}
+
 static void gmc_v9_0_get_vm_pte(struct amdgpu_device *adev,
 				struct amdgpu_bo_va_mapping *mapping,
 				uint64_t *flags)
 {
+	struct amdgpu_bo *bo = mapping->bo_va->base.bo;
+
 	*flags &= ~AMDGPU_PTE_EXECUTABLE;
 	*flags |= mapping->flags & AMDGPU_PTE_EXECUTABLE;
 
@@ -1128,14 +1198,9 @@ static void gmc_v9_0_get_vm_pte(struct amdgpu_device *adev,
 		*flags &= ~AMDGPU_PTE_VALID;
 	}
 
-	if ((adev->ip_versions[GC_HWIP][0] == IP_VERSION(9, 4, 1) ||
-	     adev->ip_versions[GC_HWIP][0] == IP_VERSION(9, 4, 2)) &&
-	    !(*flags & AMDGPU_PTE_SYSTEM) &&
-	    mapping->bo_va->is_xgmi)
-		*flags |= AMDGPU_PTE_SNOOPED;
-
-	if (adev->ip_versions[GC_HWIP][0] == IP_VERSION(9, 4, 2))
-		*flags |= mapping->flags & AMDGPU_PTE_SNOOPED;
+	if (bo && bo->tbo.resource)
+		gmc_v9_0_get_coherence_flags(adev, mapping->bo_va->base.bo,
+					     mapping, flags);
 }
 
 static unsigned gmc_v9_0_get_vbios_fb_size(struct amdgpu_device *adev)
diff --git a/drivers/gpu/drm/amd/amdgpu/jpeg_v2_5.c b/drivers/gpu/drm/amd/amdgpu/jpeg_v2_5.c
index f87d0f6ffc93..f2b743a93915 100644
--- a/drivers/gpu/drm/amd/amdgpu/jpeg_v2_5.c
+++ b/drivers/gpu/drm/amd/amdgpu/jpeg_v2_5.c
@@ -807,16 +807,5 @@ static void jpeg_v2_5_set_ras_funcs(struct amdgpu_device *adev)
 		break;
 	}
 
-	if (adev->jpeg.ras) {
-		amdgpu_ras_register_ras_block(adev, &adev->jpeg.ras->ras_block);
-
-		strcpy(adev->jpeg.ras->ras_block.ras_comm.name, "jpeg");
-		adev->jpeg.ras->ras_block.ras_comm.block = AMDGPU_RAS_BLOCK__JPEG;
-		adev->jpeg.ras->ras_block.ras_comm.type = AMDGPU_RAS_ERROR__POISON;
-		adev->jpeg.ras_if = &adev->jpeg.ras->ras_block.ras_comm;
-
-		/* If don't define special ras_late_init function, use default ras_late_init */
-		if (!adev->jpeg.ras->ras_block.ras_late_init)
-			adev->jpeg.ras->ras_block.ras_late_init = amdgpu_ras_block_late_init;
-	}
+	jpeg_set_ras_funcs(adev);
 }
diff --git a/drivers/gpu/drm/amd/amdgpu/jpeg_v4_0.c b/drivers/gpu/drm/amd/amdgpu/jpeg_v4_0.c
index 63b0d0b810ec..3beb731b2ce5 100644
--- a/drivers/gpu/drm/amd/amdgpu/jpeg_v4_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/jpeg_v4_0.c
@@ -27,6 +27,7 @@
 #include "soc15.h"
 #include "soc15d.h"
 #include "jpeg_v2_0.h"
+#include "jpeg_v4_0.h"
 
 #include "vcn/vcn_4_0_0_offset.h"
 #include "vcn/vcn_4_0_0_sh_mask.h"
@@ -38,6 +39,7 @@ static void jpeg_v4_0_set_dec_ring_funcs(struct amdgpu_device *adev);
 static void jpeg_v4_0_set_irq_funcs(struct amdgpu_device *adev);
 static int jpeg_v4_0_set_powergating_state(void *handle,
 				enum amd_powergating_state state);
+static void jpeg_v4_0_set_ras_funcs(struct amdgpu_device *adev);
 
 /**
  * jpeg_v4_0_early_init - set function pointers
@@ -55,6 +57,7 @@ static int jpeg_v4_0_early_init(void *handle)
 
 	jpeg_v4_0_set_dec_ring_funcs(adev);
 	jpeg_v4_0_set_irq_funcs(adev);
+	jpeg_v4_0_set_ras_funcs(adev);
 
 	return 0;
 }
@@ -78,6 +81,18 @@ static int jpeg_v4_0_sw_init(void *handle)
 	if (r)
 		return r;
 
+	/* JPEG DJPEG POISON EVENT */
+	r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_VCN,
+			VCN_4_0__SRCID_DJPEG0_POISON, &adev->jpeg.inst->irq);
+	if (r)
+		return r;
+
+	/* JPEG EJPEG POISON EVENT */
+	r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_VCN,
+			VCN_4_0__SRCID_EJPEG0_POISON, &adev->jpeg.inst->irq);
+	if (r)
+		return r;
+
 	r = amdgpu_jpeg_sw_init(adev);
 	if (r)
 		return r;
@@ -167,6 +182,8 @@ static int jpeg_v4_0_hw_fini(void *handle)
 	      RREG32_SOC15(JPEG, 0, regUVD_JRBC_STATUS))
 		jpeg_v4_0_set_powergating_state(adev, AMD_PG_STATE_GATE);
 
+	amdgpu_irq_put(adev, &adev->jpeg.inst->irq, 0);
+
 	return 0;
 }
 
@@ -524,6 +541,10 @@ static int jpeg_v4_0_process_interrupt(struct amdgpu_device *adev,
 	case VCN_4_0__SRCID__JPEG_DECODE:
 		amdgpu_fence_process(&adev->jpeg.inst->ring_dec);
 		break;
+	case VCN_4_0__SRCID_DJPEG0_POISON:
+	case VCN_4_0__SRCID_EJPEG0_POISON:
+		amdgpu_jpeg_process_poison_irq(adev, source, entry);
+		break;
 	default:
 		DRM_DEV_ERROR(adev->dev, "Unhandled interrupt: %d %d\n",
 			  entry->src_id, entry->src_data[0]);
@@ -607,3 +628,63 @@ const struct amdgpu_ip_block_version jpeg_v4_0_ip_block = {
 	.rev = 0,
 	.funcs = &jpeg_v4_0_ip_funcs,
 };
+
+static uint32_t jpeg_v4_0_query_poison_by_instance(struct amdgpu_device *adev,
+		uint32_t instance, uint32_t sub_block)
+{
+	uint32_t poison_stat = 0, reg_value = 0;
+
+	switch (sub_block) {
+	case AMDGPU_JPEG_V4_0_JPEG0:
+		reg_value = RREG32_SOC15(JPEG, instance, regUVD_RAS_JPEG0_STATUS);
+		poison_stat = REG_GET_FIELD(reg_value, UVD_RAS_JPEG0_STATUS, POISONED_PF);
+		break;
+	case AMDGPU_JPEG_V4_0_JPEG1:
+		reg_value = RREG32_SOC15(JPEG, instance, regUVD_RAS_JPEG1_STATUS);
+		poison_stat = REG_GET_FIELD(reg_value, UVD_RAS_JPEG1_STATUS, POISONED_PF);
+		break;
+	default:
+		break;
+	}
+
+	if (poison_stat)
+		dev_info(adev->dev, "Poison detected in JPEG%d sub_block%d\n",
+			instance, sub_block);
+
+	return poison_stat;
+}
+
+static bool jpeg_v4_0_query_ras_poison_status(struct amdgpu_device *adev)
+{
+	uint32_t inst = 0, sub = 0, poison_stat = 0;
+
+	for (inst = 0; inst < adev->jpeg.num_jpeg_inst; inst++)
+		for (sub = 0; sub < AMDGPU_JPEG_V4_0_MAX_SUB_BLOCK; sub++)
+			poison_stat +=
+				jpeg_v4_0_query_poison_by_instance(adev, inst, sub);
+
+	return !!poison_stat;
+}
+
+const struct amdgpu_ras_block_hw_ops jpeg_v4_0_ras_hw_ops = {
+	.query_poison_status = jpeg_v4_0_query_ras_poison_status,
+};
+
+static struct amdgpu_jpeg_ras jpeg_v4_0_ras = {
+	.ras_block = {
+		.hw_ops = &jpeg_v4_0_ras_hw_ops,
+	},
+};
+
+static void jpeg_v4_0_set_ras_funcs(struct amdgpu_device *adev)
+{
+	switch (adev->ip_versions[JPEG_HWIP][0]) {
+	case IP_VERSION(4, 0, 0):
+		adev->jpeg.ras = &jpeg_v4_0_ras;
+		break;
+	default:
+		break;
+	}
+
+	jpeg_set_ras_funcs(adev);
+}
diff --git a/drivers/gpu/drm/amd/amdgpu/jpeg_v4_0.h b/drivers/gpu/drm/amd/amdgpu/jpeg_v4_0.h
index f1ed6ccfedca..07d36c2abd6b 100644
--- a/drivers/gpu/drm/amd/amdgpu/jpeg_v4_0.h
+++ b/drivers/gpu/drm/amd/amdgpu/jpeg_v4_0.h
@@ -24,6 +24,13 @@
 #ifndef __JPEG_V4_0_H__
 #define __JPEG_V4_0_H__
 
+enum amdgpu_jpeg_v4_0_sub_block {
+	AMDGPU_JPEG_V4_0_JPEG0 = 0,
+	AMDGPU_JPEG_V4_0_JPEG1,
+
+	AMDGPU_JPEG_V4_0_MAX_SUB_BLOCK,
+};
+
 extern const struct amdgpu_ip_block_version jpeg_v4_0_ip_block;
 
 #endif /* __JPEG_V4_0_H__ */
diff --git a/drivers/gpu/drm/amd/amdgpu/mes_v10_1.c b/drivers/gpu/drm/amd/amdgpu/mes_v10_1.c
index 067d10073a56..614394118a53 100644
--- a/drivers/gpu/drm/amd/amdgpu/mes_v10_1.c
+++ b/drivers/gpu/drm/amd/amdgpu/mes_v10_1.c
@@ -121,6 +121,10 @@ static int mes_v10_1_submit_pkt_and_poll_completion(struct amdgpu_mes *mes,
 	if (r < 1) {
 		DRM_ERROR("MES failed to response msg=%d\n",
 			  x_pkt->header.opcode);
+
+		while (halt_if_hws_hang)
+			schedule();
+
 		return -ETIMEDOUT;
 	}
 
@@ -415,10 +419,6 @@ static int mes_v10_1_init_microcode(struct amdgpu_device *adev,
 
 	mes_hdr = (const struct mes_firmware_header_v1_0 *)
 		adev->mes.fw[pipe]->data;
-	adev->mes.ucode_fw_version[pipe] =
-		le32_to_cpu(mes_hdr->mes_ucode_version);
-	adev->mes.ucode_fw_version[pipe] =
-		le32_to_cpu(mes_hdr->mes_ucode_data_version);
 	adev->mes.uc_start_addr[pipe] =
 		le32_to_cpu(mes_hdr->mes_uc_start_addr_lo) |
 		((uint64_t)(le32_to_cpu(mes_hdr->mes_uc_start_addr_hi)) << 32);
diff --git a/drivers/gpu/drm/amd/amdgpu/mes_v11_0.c b/drivers/gpu/drm/amd/amdgpu/mes_v11_0.c
index fef7d020bc5f..970b066b37bb 100644
--- a/drivers/gpu/drm/amd/amdgpu/mes_v11_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/mes_v11_0.c
@@ -98,7 +98,14 @@ static int mes_v11_0_submit_pkt_and_poll_completion(struct amdgpu_mes *mes,
 	struct amdgpu_device *adev = mes->adev;
 	struct amdgpu_ring *ring = &mes->ring;
 	unsigned long flags;
+	signed long timeout = adev->usec_timeout;
 
+	if (amdgpu_emu_mode) {
+		timeout *= 100;
+	} else if (amdgpu_sriov_vf(adev)) {
+		/* Worst case in sriov where all other 15 VF timeout, each VF needs about 600ms */
+		timeout = 15 * 600 * 1000;
+	}
 	BUG_ON(size % 4 != 0);
 
 	spin_lock_irqsave(&mes->ring_lock, flags);
@@ -118,10 +125,14 @@ static int mes_v11_0_submit_pkt_and_poll_completion(struct amdgpu_mes *mes,
 	DRM_DEBUG("MES msg=%d was emitted\n", x_pkt->header.opcode);
 
 	r = amdgpu_fence_wait_polling(ring, ring->fence_drv.sync_seq,
-		      adev->usec_timeout * (amdgpu_emu_mode ? 100 : 1));
+		      timeout);
 	if (r < 1) {
 		DRM_ERROR("MES failed to response msg=%d\n",
 			  x_pkt->header.opcode);
+
+		while (halt_if_hws_hang)
+			schedule();
+
 		return -ETIMEDOUT;
 	}
 
@@ -377,6 +388,7 @@ static int mes_v11_0_set_hw_resources(struct amdgpu_mes *mes)
 	mes_set_hw_res_pkt.disable_reset = 1;
 	mes_set_hw_res_pkt.disable_mes_log = 1;
 	mes_set_hw_res_pkt.use_different_vmid_compute = 1;
+	mes_set_hw_res_pkt.enable_reg_active_poll = 1;
 	mes_set_hw_res_pkt.oversubscription_timer = 50;
 
 	return mes_v11_0_submit_pkt_and_poll_completion(mes,
@@ -478,10 +490,6 @@ static int mes_v11_0_init_microcode(struct amdgpu_device *adev,
 
 	mes_hdr = (const struct mes_firmware_header_v1_0 *)
 		adev->mes.fw[pipe]->data;
-	adev->mes.ucode_fw_version[pipe] =
-		le32_to_cpu(mes_hdr->mes_ucode_version);
-	adev->mes.ucode_fw_version[pipe] =
-		le32_to_cpu(mes_hdr->mes_ucode_data_version);
 	adev->mes.uc_start_addr[pipe] =
 		le32_to_cpu(mes_hdr->mes_uc_start_addr_lo) |
 		((uint64_t)(le32_to_cpu(mes_hdr->mes_uc_start_addr_hi)) << 32);
@@ -1246,7 +1254,9 @@ static int mes_v11_0_kiq_hw_fini(struct amdgpu_device *adev)
 	if (adev->mes.ring.sched.ready)
 		mes_v11_0_kiq_dequeue_sched(adev);
 
-	mes_v11_0_enable(adev, false);
+	if (!amdgpu_sriov_vf(adev))
+		mes_v11_0_enable(adev, false);
+
 	return 0;
 }
 
@@ -1332,7 +1342,8 @@ static int mes_v11_0_late_init(void *handle)
 {
 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
 
-	if (!amdgpu_in_reset(adev) &&
+	/* it's only intended for use in mes_self_test case, not for s0ix and reset */
+	if (!amdgpu_in_reset(adev) && !adev->in_s0ix &&
 	    (adev->ip_versions[GC_HWIP][0] != IP_VERSION(11, 0, 3)))
 		amdgpu_mes_self_test(adev);
 
diff --git a/drivers/gpu/drm/amd/amdgpu/mmhub_v2_0.c b/drivers/gpu/drm/amd/amdgpu/mmhub_v2_0.c
index 4d304f22889e..0e664d0cc8d5 100644
--- a/drivers/gpu/drm/amd/amdgpu/mmhub_v2_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/mmhub_v2_0.c
@@ -32,8 +32,6 @@
 #include "gc/gc_10_1_0_offset.h"
 #include "soc15_common.h"
 
-#define mmMM_ATC_L2_MISC_CG_Sienna_Cichlid                      0x064d
-#define mmMM_ATC_L2_MISC_CG_Sienna_Cichlid_BASE_IDX             0
 #define mmDAGB0_CNTL_MISC2_Sienna_Cichlid                       0x0070
 #define mmDAGB0_CNTL_MISC2_Sienna_Cichlid_BASE_IDX              0
 
@@ -321,7 +319,7 @@ static void mmhub_v2_0_init_cache_regs(struct amdgpu_device *adev)
 
 	tmp = mmMMVM_L2_CNTL5_DEFAULT;
 	tmp = REG_SET_FIELD(tmp, MMVM_L2_CNTL5, L2_CACHE_SMALLK_FRAGMENT_SIZE, 0);
-	WREG32_SOC15(GC, 0, mmMMVM_L2_CNTL5, tmp);
+	WREG32_SOC15(MMHUB, 0, mmMMVM_L2_CNTL5, tmp);
 }
 
 static void mmhub_v2_0_enable_system_domain(struct amdgpu_device *adev)
@@ -574,7 +572,6 @@ static void mmhub_v2_0_update_medium_grain_clock_gating(struct amdgpu_device *ad
 	case IP_VERSION(2, 1, 0):
 	case IP_VERSION(2, 1, 1):
 	case IP_VERSION(2, 1, 2):
-		def  = data  = RREG32_SOC15(MMHUB, 0, mmMM_ATC_L2_MISC_CG_Sienna_Cichlid);
 		def1 = data1 = RREG32_SOC15(MMHUB, 0, mmDAGB0_CNTL_MISC2_Sienna_Cichlid);
 		break;
 	default:
@@ -608,8 +605,6 @@ static void mmhub_v2_0_update_medium_grain_clock_gating(struct amdgpu_device *ad
 	case IP_VERSION(2, 1, 0):
 	case IP_VERSION(2, 1, 1):
 	case IP_VERSION(2, 1, 2):
-		if (def != data)
-			WREG32_SOC15(MMHUB, 0, mmMM_ATC_L2_MISC_CG_Sienna_Cichlid, data);
 		if (def1 != data1)
 			WREG32_SOC15(MMHUB, 0, mmDAGB0_CNTL_MISC2_Sienna_Cichlid, data1);
 		break;
@@ -634,8 +629,8 @@ static void mmhub_v2_0_update_medium_grain_light_sleep(struct amdgpu_device *ade
 	case IP_VERSION(2, 1, 0):
 	case IP_VERSION(2, 1, 1):
 	case IP_VERSION(2, 1, 2):
-		def  = data  = RREG32_SOC15(MMHUB, 0, mmMM_ATC_L2_MISC_CG_Sienna_Cichlid);
-		break;
+		/* There is no ATCL2 in MMHUB for 2.1.x */
+		return;
 	default:
 		def  = data  = RREG32_SOC15(MMHUB, 0, mmMM_ATC_L2_MISC_CG);
 		break;
@@ -646,18 +641,8 @@ static void mmhub_v2_0_update_medium_grain_light_sleep(struct amdgpu_device *ade
 	else
 		data &= ~MM_ATC_L2_MISC_CG__MEM_LS_ENABLE_MASK;
 
-	if (def != data) {
-		switch (adev->ip_versions[MMHUB_HWIP][0]) {
-		case IP_VERSION(2, 1, 0):
-		case IP_VERSION(2, 1, 1):
-		case IP_VERSION(2, 1, 2):
-			WREG32_SOC15(MMHUB, 0, mmMM_ATC_L2_MISC_CG_Sienna_Cichlid, data);
-			break;
-		default:
-			WREG32_SOC15(MMHUB, 0, mmMM_ATC_L2_MISC_CG, data);
-			break;
-		}
-	}
+	if (def != data)
+		WREG32_SOC15(MMHUB, 0, mmMM_ATC_L2_MISC_CG, data);
 }
 
 static int mmhub_v2_0_set_clockgating(struct amdgpu_device *adev,
@@ -695,7 +680,10 @@ static void mmhub_v2_0_get_clockgating(struct amdgpu_device *adev, u64 *flags)
 	case IP_VERSION(2, 1, 0):
 	case IP_VERSION(2, 1, 1):
 	case IP_VERSION(2, 1, 2):
-		data  = RREG32_SOC15(MMHUB, 0, mmMM_ATC_L2_MISC_CG_Sienna_Cichlid);
+		/* There is no ATCL2 in MMHUB for 2.1.x. Keep the status
+		 * based on DAGB
+		 */
+		data = MM_ATC_L2_MISC_CG__ENABLE_MASK;
 		data1 = RREG32_SOC15(MMHUB, 0, mmDAGB0_CNTL_MISC2_Sienna_Cichlid);
 		break;
 	default:
diff --git a/drivers/gpu/drm/amd/amdgpu/mmhub_v2_3.c b/drivers/gpu/drm/amd/amdgpu/mmhub_v2_3.c
index 1b027d069ab4..4638ea7c2eec 100644
--- a/drivers/gpu/drm/amd/amdgpu/mmhub_v2_3.c
+++ b/drivers/gpu/drm/amd/amdgpu/mmhub_v2_3.c
@@ -243,7 +243,7 @@ static void mmhub_v2_3_init_cache_regs(struct amdgpu_device *adev)
 
 	tmp = mmMMVM_L2_CNTL5_DEFAULT;
 	tmp = REG_SET_FIELD(tmp, MMVM_L2_CNTL5, L2_CACHE_SMALLK_FRAGMENT_SIZE, 0);
-	WREG32_SOC15(GC, 0, mmMMVM_L2_CNTL5, tmp);
+	WREG32_SOC15(MMHUB, 0, mmMMVM_L2_CNTL5, tmp);
 }
 
 static void mmhub_v2_3_enable_system_domain(struct amdgpu_device *adev)
diff --git a/drivers/gpu/drm/amd/amdgpu/mmhub_v3_0.c b/drivers/gpu/drm/amd/amdgpu/mmhub_v3_0.c
index a1d26c4d80b8..16cc82215e2e 100644
--- a/drivers/gpu/drm/amd/amdgpu/mmhub_v3_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/mmhub_v3_0.c
@@ -275,7 +275,7 @@ static void mmhub_v3_0_init_cache_regs(struct amdgpu_device *adev)
 
 	tmp = regMMVM_L2_CNTL5_DEFAULT;
 	tmp = REG_SET_FIELD(tmp, MMVM_L2_CNTL5, L2_CACHE_SMALLK_FRAGMENT_SIZE, 0);
-	WREG32_SOC15(GC, 0, regMMVM_L2_CNTL5, tmp);
+	WREG32_SOC15(MMHUB, 0, regMMVM_L2_CNTL5, tmp);
 }
 
 static void mmhub_v3_0_enable_system_domain(struct amdgpu_device *adev)
diff --git a/drivers/gpu/drm/amd/amdgpu/mmhub_v3_0_1.c b/drivers/gpu/drm/amd/amdgpu/mmhub_v3_0_1.c
index e8058edc1d10..6bdf2ef0298d 100644
--- a/drivers/gpu/drm/amd/amdgpu/mmhub_v3_0_1.c
+++ b/drivers/gpu/drm/amd/amdgpu/mmhub_v3_0_1.c
@@ -269,7 +269,7 @@ static void mmhub_v3_0_1_init_cache_regs(struct amdgpu_device *adev)
 
 	tmp = regMMVM_L2_CNTL5_DEFAULT;
 	tmp = REG_SET_FIELD(tmp, MMVM_L2_CNTL5, L2_CACHE_SMALLK_FRAGMENT_SIZE, 0);
-	WREG32_SOC15(GC, 0, regMMVM_L2_CNTL5, tmp);
+	WREG32_SOC15(MMHUB, 0, regMMVM_L2_CNTL5, tmp);
 }
 
 static void mmhub_v3_0_1_enable_system_domain(struct amdgpu_device *adev)
diff --git a/drivers/gpu/drm/amd/amdgpu/mmhub_v3_0_2.c b/drivers/gpu/drm/amd/amdgpu/mmhub_v3_0_2.c
index 770be0a8f7ce..45465acaa943 100644
--- a/drivers/gpu/drm/amd/amdgpu/mmhub_v3_0_2.c
+++ b/drivers/gpu/drm/amd/amdgpu/mmhub_v3_0_2.c
@@ -268,7 +268,7 @@ static void mmhub_v3_0_2_init_cache_regs(struct amdgpu_device *adev)
 
 	tmp = regMMVM_L2_CNTL5_DEFAULT;
 	tmp = REG_SET_FIELD(tmp, MMVM_L2_CNTL5, L2_CACHE_SMALLK_FRAGMENT_SIZE, 0);
-	WREG32_SOC15(GC, 0, regMMVM_L2_CNTL5, tmp);
+	WREG32_SOC15(MMHUB, 0, regMMVM_L2_CNTL5, tmp);
 }
 
 static void mmhub_v3_0_2_enable_system_domain(struct amdgpu_device *adev)
diff --git a/drivers/gpu/drm/amd/amdgpu/mmsch_v4_0.h b/drivers/gpu/drm/amd/amdgpu/mmsch_v4_0.h
index f772bb499f3e..0312c71c3af9 100644
--- a/drivers/gpu/drm/amd/amdgpu/mmsch_v4_0.h
+++ b/drivers/gpu/drm/amd/amdgpu/mmsch_v4_0.h
@@ -32,7 +32,6 @@
 
 #define RB_ENABLED (1 << 0)
 #define RB4_ENABLED (1 << 1)
-#define MMSCH_DOORBELL_OFFSET 0x8
 
 #define MMSCH_VF_ENGINE_STATUS__PASS 0x1
 
diff --git a/drivers/gpu/drm/amd/amdgpu/nv.c b/drivers/gpu/drm/amd/amdgpu/nv.c
index b3fba8dea63c..6853b93ac82e 100644
--- a/drivers/gpu/drm/amd/amdgpu/nv.c
+++ b/drivers/gpu/drm/amd/amdgpu/nv.c
@@ -82,10 +82,10 @@ static const struct amdgpu_video_codecs nv_video_codecs_encode =
 /* Navi1x */
 static const struct amdgpu_video_codec_info nv_video_codecs_decode_array[] =
 {
-	{codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG2, 4096, 4906, 3)},
-	{codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG4, 4096, 4906, 5)},
-	{codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG4_AVC, 4096, 4906, 52)},
-	{codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_VC1, 4096, 4906, 4)},
+	{codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG2, 4096, 4096, 3)},
+	{codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG4, 4096, 4096, 5)},
+	{codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG4_AVC, 4096, 4096, 52)},
+	{codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_VC1, 4096, 4096, 4)},
 	{codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_HEVC, 8192, 4352, 186)},
 	{codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_JPEG, 4096, 4096, 0)},
 	{codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_VP9, 8192, 4352, 0)},
@@ -100,10 +100,10 @@ static const struct amdgpu_video_codecs nv_video_codecs_decode =
 /* Sienna Cichlid */
 static const struct amdgpu_video_codec_info sc_video_codecs_decode_array[] =
 {
-	{codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG2, 4096, 4906, 3)},
-	{codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG4, 4096, 4906, 5)},
-	{codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG4_AVC, 4096, 4906, 52)},
-	{codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_VC1, 4096, 4906, 4)},
+	{codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG2, 4096, 4096, 3)},
+	{codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG4, 4096, 4096, 5)},
+	{codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG4_AVC, 4096, 4096, 52)},
+	{codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_VC1, 4096, 4096, 4)},
 	{codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_HEVC, 8192, 4352, 186)},
 	{codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_JPEG, 4096, 4096, 0)},
 	{codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_VP9, 8192, 4352, 0)},
@@ -125,10 +125,10 @@ static struct amdgpu_video_codec_info sriov_sc_video_codecs_encode_array[] =
 
 static struct amdgpu_video_codec_info sriov_sc_video_codecs_decode_array[] =
 {
-	{codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG2, 4096, 4906, 3)},
-	{codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG4, 4096, 4906, 5)},
-	{codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG4_AVC, 4096, 4906, 52)},
-	{codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_VC1, 4096, 4906, 4)},
+	{codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG2, 4096, 4096, 3)},
+	{codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG4, 4096, 4096, 5)},
+	{codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG4_AVC, 4096, 4096, 52)},
+	{codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_VC1, 4096, 4096, 4)},
 	{codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_HEVC, 8192, 4352, 186)},
 	{codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_JPEG, 4096, 4096, 0)},
 	{codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_VP9, 8192, 4352, 0)},
@@ -149,7 +149,7 @@ static struct amdgpu_video_codecs sriov_sc_video_codecs_decode =
 
 /* Beige Goby*/
 static const struct amdgpu_video_codec_info bg_video_codecs_decode_array[] = {
-	{codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG4_AVC, 4096, 4906, 52)},
+	{codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG4_AVC, 4096, 4096, 52)},
 	{codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_HEVC, 8192, 4352, 186)},
 	{codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_VP9, 8192, 4352, 0)},
 };
@@ -166,7 +166,7 @@ static const struct amdgpu_video_codecs bg_video_codecs_encode = {
 
 /* Yellow Carp*/
 static const struct amdgpu_video_codec_info yc_video_codecs_decode_array[] = {
-	{codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG4_AVC, 4096, 4906, 52)},
+	{codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG4_AVC, 4096, 4096, 52)},
 	{codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_HEVC, 8192, 4352, 186)},
 	{codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_VP9, 8192, 4352, 0)},
 	{codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_JPEG, 4096, 4096, 0)},
diff --git a/drivers/gpu/drm/amd/amdgpu/psp_v10_0.c b/drivers/gpu/drm/amd/amdgpu/psp_v10_0.c
index ed2293686f0d..9de46fa8f46c 100644
--- a/drivers/gpu/drm/amd/amdgpu/psp_v10_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/psp_v10_0.c
@@ -126,32 +126,6 @@ out:
 	return err;
 }
 
-static int psp_v10_0_ring_init(struct psp_context *psp,
-			       enum psp_ring_type ring_type)
-{
-	int ret = 0;
-	struct psp_ring *ring;
-	struct amdgpu_device *adev = psp->adev;
-
-	ring = &psp->km_ring;
-
-	ring->ring_type = ring_type;
-
-	/* allocate 4k Page of Local Frame Buffer memory for ring */
-	ring->ring_size = 0x1000;
-	ret = amdgpu_bo_create_kernel(adev, ring->ring_size, PAGE_SIZE,
-				      AMDGPU_GEM_DOMAIN_VRAM,
-				      &adev->firmware.rbuf,
-				      &ring->ring_mem_mc_addr,
-				      (void **)&ring->ring_mem);
-	if (ret) {
-		ring->ring_size = 0;
-		return ret;
-	}
-
-	return 0;
-}
-
 static int psp_v10_0_ring_create(struct psp_context *psp,
 				 enum psp_ring_type ring_type)
 {
@@ -245,7 +219,6 @@ static void psp_v10_0_ring_set_wptr(struct psp_context *psp, uint32_t value)
 
 static const struct psp_funcs psp_v10_0_funcs = {
 	.init_microcode = psp_v10_0_init_microcode,
-	.ring_init = psp_v10_0_ring_init,
 	.ring_create = psp_v10_0_ring_create,
 	.ring_stop = psp_v10_0_ring_stop,
 	.ring_destroy = psp_v10_0_ring_destroy,
diff --git a/drivers/gpu/drm/amd/amdgpu/psp_v11_0.c b/drivers/gpu/drm/amd/amdgpu/psp_v11_0.c
index 9518b4394a6e..bd3e3e23a939 100644
--- a/drivers/gpu/drm/amd/amdgpu/psp_v11_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/psp_v11_0.c
@@ -360,32 +360,6 @@ static int psp_v11_0_bootloader_load_sos(struct psp_context *psp)
 	return ret;
 }
 
-static int psp_v11_0_ring_init(struct psp_context *psp,
-			      enum psp_ring_type ring_type)
-{
-	int ret = 0;
-	struct psp_ring *ring;
-	struct amdgpu_device *adev = psp->adev;
-
-	ring = &psp->km_ring;
-
-	ring->ring_type = ring_type;
-
-	/* allocate 4k Page of Local Frame Buffer memory for ring */
-	ring->ring_size = 0x1000;
-	ret = amdgpu_bo_create_kernel(adev, ring->ring_size, PAGE_SIZE,
-				      AMDGPU_GEM_DOMAIN_VRAM,
-				      &adev->firmware.rbuf,
-				      &ring->ring_mem_mc_addr,
-				      (void **)&ring->ring_mem);
-	if (ret) {
-		ring->ring_size = 0;
-		return ret;
-	}
-
-	return 0;
-}
-
 static int psp_v11_0_ring_stop(struct psp_context *psp,
 			      enum psp_ring_type ring_type)
 {
@@ -779,7 +753,6 @@ static const struct psp_funcs psp_v11_0_funcs = {
 	.bootloader_load_spl = psp_v11_0_bootloader_load_spl,
 	.bootloader_load_sysdrv = psp_v11_0_bootloader_load_sysdrv,
 	.bootloader_load_sos = psp_v11_0_bootloader_load_sos,
-	.ring_init = psp_v11_0_ring_init,
 	.ring_create = psp_v11_0_ring_create,
 	.ring_stop = psp_v11_0_ring_stop,
 	.ring_destroy = psp_v11_0_ring_destroy,
diff --git a/drivers/gpu/drm/amd/amdgpu/psp_v11_0_8.c b/drivers/gpu/drm/amd/amdgpu/psp_v11_0_8.c
index ff13e1beb49b..5697760a819b 100644
--- a/drivers/gpu/drm/amd/amdgpu/psp_v11_0_8.c
+++ b/drivers/gpu/drm/amd/amdgpu/psp_v11_0_8.c
@@ -28,32 +28,6 @@
 
 #include "mp/mp_11_0_8_offset.h"
 
-static int psp_v11_0_8_ring_init(struct psp_context *psp,
-			      enum psp_ring_type ring_type)
-{
-	int ret = 0;
-	struct psp_ring *ring;
-	struct amdgpu_device *adev = psp->adev;
-
-	ring = &psp->km_ring;
-
-	ring->ring_type = ring_type;
-
-	/* allocate 4k Page of Local Frame Buffer memory for ring */
-	ring->ring_size = 0x1000;
-	ret = amdgpu_bo_create_kernel(adev, ring->ring_size, PAGE_SIZE,
-				      AMDGPU_GEM_DOMAIN_VRAM,
-				      &adev->firmware.rbuf,
-				      &ring->ring_mem_mc_addr,
-				      (void **)&ring->ring_mem);
-	if (ret) {
-		ring->ring_size = 0;
-		return ret;
-	}
-
-	return 0;
-}
-
 static int psp_v11_0_8_ring_stop(struct psp_context *psp,
 			       enum psp_ring_type ring_type)
 {
@@ -194,7 +168,6 @@ static void psp_v11_0_8_ring_set_wptr(struct psp_context *psp, uint32_t value)
 }
 
 static const struct psp_funcs psp_v11_0_8_funcs = {
-	.ring_init = psp_v11_0_8_ring_init,
 	.ring_create = psp_v11_0_8_ring_create,
 	.ring_stop = psp_v11_0_8_ring_stop,
 	.ring_destroy = psp_v11_0_8_ring_destroy,
diff --git a/drivers/gpu/drm/amd/amdgpu/psp_v12_0.c b/drivers/gpu/drm/amd/amdgpu/psp_v12_0.c
index 0b2ac418e4ac..8ed2281b6557 100644
--- a/drivers/gpu/drm/amd/amdgpu/psp_v12_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/psp_v12_0.c
@@ -236,34 +236,6 @@ static void psp_v12_0_reroute_ih(struct psp_context *psp)
 		     0x80000000, 0x8000FFFF, false);
 }
 
-static int psp_v12_0_ring_init(struct psp_context *psp,
-			      enum psp_ring_type ring_type)
-{
-	int ret = 0;
-	struct psp_ring *ring;
-	struct amdgpu_device *adev = psp->adev;
-
-	psp_v12_0_reroute_ih(psp);
-
-	ring = &psp->km_ring;
-
-	ring->ring_type = ring_type;
-
-	/* allocate 4k Page of Local Frame Buffer memory for ring */
-	ring->ring_size = 0x1000;
-	ret = amdgpu_bo_create_kernel(adev, ring->ring_size, PAGE_SIZE,
-				      AMDGPU_GEM_DOMAIN_VRAM,
-				      &adev->firmware.rbuf,
-				      &ring->ring_mem_mc_addr,
-				      (void **)&ring->ring_mem);
-	if (ret) {
-		ring->ring_size = 0;
-		return ret;
-	}
-
-	return 0;
-}
-
 static int psp_v12_0_ring_create(struct psp_context *psp,
 				enum psp_ring_type ring_type)
 {
@@ -272,6 +244,8 @@ static int psp_v12_0_ring_create(struct psp_context *psp,
 	struct psp_ring *ring = &psp->km_ring;
 	struct amdgpu_device *adev = psp->adev;
 
+	psp_v12_0_reroute_ih(psp);
+
 	if (amdgpu_sriov_vf(psp->adev)) {
 		/* Write low address of the ring to C2PMSG_102 */
 		psp_ring_reg = lower_32_bits(ring->ring_mem_mc_addr);
@@ -425,7 +399,6 @@ static const struct psp_funcs psp_v12_0_funcs = {
 	.init_microcode = psp_v12_0_init_microcode,
 	.bootloader_load_sysdrv = psp_v12_0_bootloader_load_sysdrv,
 	.bootloader_load_sos = psp_v12_0_bootloader_load_sos,
-	.ring_init = psp_v12_0_ring_init,
 	.ring_create = psp_v12_0_ring_create,
 	.ring_stop = psp_v12_0_ring_stop,
 	.ring_destroy = psp_v12_0_ring_destroy,
diff --git a/drivers/gpu/drm/amd/amdgpu/psp_v13_0.c b/drivers/gpu/drm/amd/amdgpu/psp_v13_0.c
index 21d822b1d589..e6a26a7e5e5e 100644
--- a/drivers/gpu/drm/amd/amdgpu/psp_v13_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/psp_v13_0.c
@@ -45,6 +45,9 @@ MODULE_FIRMWARE("amdgpu/psp_13_0_0_ta.bin");
 MODULE_FIRMWARE("amdgpu/psp_13_0_7_sos.bin");
 MODULE_FIRMWARE("amdgpu/psp_13_0_7_ta.bin");
 MODULE_FIRMWARE("amdgpu/psp_13_0_10_sos.bin");
+MODULE_FIRMWARE("amdgpu/psp_13_0_10_ta.bin");
+MODULE_FIRMWARE("amdgpu/psp_13_0_11_toc.bin");
+MODULE_FIRMWARE("amdgpu/psp_13_0_11_ta.bin");
 
 /* For large FW files the time to complete can be very long */
 #define USBC_PD_POLLING_LIMIT_S 240
@@ -101,6 +104,7 @@ static int psp_v13_0_init_microcode(struct psp_context *psp)
 	case IP_VERSION(13, 0, 3):
 	case IP_VERSION(13, 0, 5):
 	case IP_VERSION(13, 0, 8):
+	case IP_VERSION(13, 0, 11):
 		err = psp_init_toc_microcode(psp, chip_name);
 		if (err)
 			return err;
@@ -267,32 +271,6 @@ static int psp_v13_0_bootloader_load_sos(struct psp_context *psp)
 	return ret;
 }
 
-static int psp_v13_0_ring_init(struct psp_context *psp,
-			      enum psp_ring_type ring_type)
-{
-	int ret = 0;
-	struct psp_ring *ring;
-	struct amdgpu_device *adev = psp->adev;
-
-	ring = &psp->km_ring;
-
-	ring->ring_type = ring_type;
-
-	/* allocate 4k Page of Local Frame Buffer memory for ring */
-	ring->ring_size = 0x1000;
-	ret = amdgpu_bo_create_kernel(adev, ring->ring_size, PAGE_SIZE,
-				      AMDGPU_GEM_DOMAIN_VRAM,
-				      &adev->firmware.rbuf,
-				      &ring->ring_mem_mc_addr,
-				      (void **)&ring->ring_mem);
-	if (ret) {
-		ring->ring_size = 0;
-		return ret;
-	}
-
-	return 0;
-}
-
 static int psp_v13_0_ring_stop(struct psp_context *psp,
 			       enum psp_ring_type ring_type)
 {
@@ -728,7 +706,6 @@ static const struct psp_funcs psp_v13_0_funcs = {
 	.bootloader_load_dbg_drv = psp_v13_0_bootloader_load_dbg_drv,
 	.bootloader_load_ras_drv = psp_v13_0_bootloader_load_ras_drv,
 	.bootloader_load_sos = psp_v13_0_bootloader_load_sos,
-	.ring_init = psp_v13_0_ring_init,
 	.ring_create = psp_v13_0_ring_create,
 	.ring_stop = psp_v13_0_ring_stop,
 	.ring_destroy = psp_v13_0_ring_destroy,
diff --git a/drivers/gpu/drm/amd/amdgpu/psp_v13_0_4.c b/drivers/gpu/drm/amd/amdgpu/psp_v13_0_4.c
index 321089dfa7db..9d4e24e518e8 100644
--- a/drivers/gpu/drm/amd/amdgpu/psp_v13_0_4.c
+++ b/drivers/gpu/drm/amd/amdgpu/psp_v13_0_4.c
@@ -199,32 +199,6 @@ static int psp_v13_0_4_bootloader_load_sos(struct psp_context *psp)
 	return ret;
 }
 
-static int psp_v13_0_4_ring_init(struct psp_context *psp,
-			      enum psp_ring_type ring_type)
-{
-	int ret = 0;
-	struct psp_ring *ring;
-	struct amdgpu_device *adev = psp->adev;
-
-	ring = &psp->km_ring;
-
-	ring->ring_type = ring_type;
-
-	/* allocate 4k Page of Local Frame Buffer memory for ring */
-	ring->ring_size = 0x1000;
-	ret = amdgpu_bo_create_kernel(adev, ring->ring_size, PAGE_SIZE,
-				      AMDGPU_GEM_DOMAIN_VRAM,
-				      &adev->firmware.rbuf,
-				      &ring->ring_mem_mc_addr,
-				      (void **)&ring->ring_mem);
-	if (ret) {
-		ring->ring_size = 0;
-		return ret;
-	}
-
-	return 0;
-}
-
 static int psp_v13_0_4_ring_stop(struct psp_context *psp,
 			       enum psp_ring_type ring_type)
 {
@@ -373,7 +347,6 @@ static const struct psp_funcs psp_v13_0_4_funcs = {
 	.bootloader_load_intf_drv = psp_v13_0_4_bootloader_load_intf_drv,
 	.bootloader_load_dbg_drv = psp_v13_0_4_bootloader_load_dbg_drv,
 	.bootloader_load_sos = psp_v13_0_4_bootloader_load_sos,
-	.ring_init = psp_v13_0_4_ring_init,
 	.ring_create = psp_v13_0_4_ring_create,
 	.ring_stop = psp_v13_0_4_ring_stop,
 	.ring_destroy = psp_v13_0_4_ring_destroy,
diff --git a/drivers/gpu/drm/amd/amdgpu/psp_v3_1.c b/drivers/gpu/drm/amd/amdgpu/psp_v3_1.c
index 01f3bcc62a6c..157147c6c94e 100644
--- a/drivers/gpu/drm/amd/amdgpu/psp_v3_1.c
+++ b/drivers/gpu/drm/amd/amdgpu/psp_v3_1.c
@@ -160,32 +160,6 @@ static int psp_v3_1_bootloader_load_sos(struct psp_context *psp)
 	return ret;
 }
 
-static int psp_v3_1_ring_init(struct psp_context *psp,
-			      enum psp_ring_type ring_type)
-{
-	int ret = 0;
-	struct psp_ring *ring;
-	struct amdgpu_device *adev = psp->adev;
-
-	ring = &psp->km_ring;
-
-	ring->ring_type = ring_type;
-
-	/* allocate 4k Page of Local Frame Buffer memory for ring */
-	ring->ring_size = 0x1000;
-	ret = amdgpu_bo_create_kernel(adev, ring->ring_size, PAGE_SIZE,
-				      AMDGPU_GEM_DOMAIN_VRAM,
-				      &adev->firmware.rbuf,
-				      &ring->ring_mem_mc_addr,
-				      (void **)&ring->ring_mem);
-	if (ret) {
-		ring->ring_size = 0;
-		return ret;
-	}
-
-	return 0;
-}
-
 static void psp_v3_1_reroute_ih(struct psp_context *psp)
 {
 	struct amdgpu_device *adev = psp->adev;
@@ -401,7 +375,6 @@ static const struct psp_funcs psp_v3_1_funcs = {
 	.init_microcode = psp_v3_1_init_microcode,
 	.bootloader_load_sysdrv = psp_v3_1_bootloader_load_sysdrv,
 	.bootloader_load_sos = psp_v3_1_bootloader_load_sos,
-	.ring_init = psp_v3_1_ring_init,
 	.ring_create = psp_v3_1_ring_create,
 	.ring_stop = psp_v3_1_ring_stop,
 	.ring_destroy = psp_v3_1_ring_destroy,
diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c b/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c
index 1122bd4eae98..4d780e4430e7 100644
--- a/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c
@@ -907,13 +907,13 @@ static void sdma_v4_0_ring_emit_fence(struct amdgpu_ring *ring, u64 addr, u64 se
 
 
 /**
- * sdma_v4_0_gfx_stop - stop the gfx async dma engines
+ * sdma_v4_0_gfx_enable - enable the gfx async dma engines
  *
  * @adev: amdgpu_device pointer
- *
- * Stop the gfx async dma ring buffers (VEGA10).
+ * @enable: enable SDMA RB/IB
+ * control the gfx async dma ring buffers (VEGA10).
  */
-static void sdma_v4_0_gfx_stop(struct amdgpu_device *adev)
+static void sdma_v4_0_gfx_enable(struct amdgpu_device *adev, bool enable)
 {
 	u32 rb_cntl, ib_cntl;
 	int i;
@@ -922,10 +922,10 @@ static void sdma_v4_0_gfx_stop(struct amdgpu_device *adev)
 
 	for (i = 0; i < adev->sdma.num_instances; i++) {
 		rb_cntl = RREG32_SDMA(i, mmSDMA0_GFX_RB_CNTL);
-		rb_cntl = REG_SET_FIELD(rb_cntl, SDMA0_GFX_RB_CNTL, RB_ENABLE, 0);
+		rb_cntl = REG_SET_FIELD(rb_cntl, SDMA0_GFX_RB_CNTL, RB_ENABLE, enable ? 1 : 0);
 		WREG32_SDMA(i, mmSDMA0_GFX_RB_CNTL, rb_cntl);
 		ib_cntl = RREG32_SDMA(i, mmSDMA0_GFX_IB_CNTL);
-		ib_cntl = REG_SET_FIELD(ib_cntl, SDMA0_GFX_IB_CNTL, IB_ENABLE, 0);
+		ib_cntl = REG_SET_FIELD(ib_cntl, SDMA0_GFX_IB_CNTL, IB_ENABLE, enable ? 1 : 0);
 		WREG32_SDMA(i, mmSDMA0_GFX_IB_CNTL, ib_cntl);
 	}
 }
@@ -1044,7 +1044,7 @@ static void sdma_v4_0_enable(struct amdgpu_device *adev, bool enable)
 	int i;
 
 	if (!enable) {
-		sdma_v4_0_gfx_stop(adev);
+		sdma_v4_0_gfx_enable(adev, enable);
 		sdma_v4_0_rlc_stop(adev);
 		if (adev->sdma.has_page_queue)
 			sdma_v4_0_page_stop(adev);
@@ -1960,8 +1960,10 @@ static int sdma_v4_0_suspend(void *handle)
 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
 
 	/* SMU saves SDMA state for us */
-	if (adev->in_s0ix)
+	if (adev->in_s0ix) {
+		sdma_v4_0_gfx_enable(adev, false);
 		return 0;
+	}
 
 	return sdma_v4_0_hw_fini(adev);
 }
@@ -1971,8 +1973,12 @@ static int sdma_v4_0_resume(void *handle)
 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
 
 	/* SMU restores SDMA state for us */
-	if (adev->in_s0ix)
+	if (adev->in_s0ix) {
+		sdma_v4_0_enable(adev, true);
+		sdma_v4_0_gfx_enable(adev, true);
+		amdgpu_ttm_set_buffer_funcs_status(adev, true);
 		return 0;
+	}
 
 	return sdma_v4_0_hw_init(adev);
 }
diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v6_0.c b/drivers/gpu/drm/amd/amdgpu/sdma_v6_0.c
index da3beb0bf2fa..049c26a45d85 100644
--- a/drivers/gpu/drm/amd/amdgpu/sdma_v6_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/sdma_v6_0.c
@@ -455,6 +455,9 @@ static void sdma_v6_0_enable(struct amdgpu_device *adev, bool enable)
 		sdma_v6_0_rlc_stop(adev);
 	}
 
+	if (amdgpu_sriov_vf(adev))
+		return;
+
 	for (i = 0; i < adev->sdma.num_instances; i++) {
 		f32_cntl = RREG32_SOC15_IP(GC, sdma_v6_0_get_reg_offset(adev, i, regSDMA0_F32_CNTL));
 		f32_cntl = REG_SET_FIELD(f32_cntl, SDMA0_F32_CNTL, HALT, enable ? 0 : 1);
@@ -1523,6 +1526,7 @@ static const struct amdgpu_ring_funcs sdma_v6_0_ring_funcs = {
 	.align_mask = 0xf,
 	.nop = SDMA_PKT_NOP_HEADER_OP(SDMA_OP_NOP),
 	.support_64bit_ptrs = true,
+	.secure_submission_supported = true,
 	.vmhub = AMDGPU_GFXHUB_0,
 	.get_rptr = sdma_v6_0_ring_get_rptr,
 	.get_wptr = sdma_v6_0_ring_get_wptr,
diff --git a/drivers/gpu/drm/amd/amdgpu/si_dma.c b/drivers/gpu/drm/amd/amdgpu/si_dma.c
index 4d5e718540aa..abca8b529721 100644
--- a/drivers/gpu/drm/amd/amdgpu/si_dma.c
+++ b/drivers/gpu/drm/amd/amdgpu/si_dma.c
@@ -112,14 +112,12 @@ static void si_dma_ring_emit_fence(struct amdgpu_ring *ring, u64 addr, u64 seq,
 
 static void si_dma_stop(struct amdgpu_device *adev)
 {
-	struct amdgpu_ring *ring;
 	u32 rb_cntl;
 	unsigned i;
 
 	amdgpu_sdma_unset_buffer_funcs_helper(adev);
 
 	for (i = 0; i < adev->sdma.num_instances; i++) {
-		ring = &adev->sdma.instance[i].ring;
 		/* dma0 */
 		rb_cntl = RREG32(DMA_RB_CNTL + sdma_offsets[i]);
 		rb_cntl &= ~DMA_RB_ENABLE;
diff --git a/drivers/gpu/drm/amd/amdgpu/soc15.c b/drivers/gpu/drm/amd/amdgpu/soc15.c
index e3b2b6b4f1a6..7cd17dda32ce 100644
--- a/drivers/gpu/drm/amd/amdgpu/soc15.c
+++ b/drivers/gpu/drm/amd/amdgpu/soc15.c
@@ -103,10 +103,10 @@ static const struct amdgpu_video_codecs vega_video_codecs_encode =
 /* Vega */
 static const struct amdgpu_video_codec_info vega_video_codecs_decode_array[] =
 {
-	{codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG2, 4096, 4906, 3)},
-	{codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG4, 4096, 4906, 5)},
-	{codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG4_AVC, 4096, 4906, 52)},
-	{codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_VC1, 4096, 4906, 4)},
+	{codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG2, 4096, 4096, 3)},
+	{codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG4, 4096, 4096, 5)},
+	{codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG4_AVC, 4096, 4096, 52)},
+	{codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_VC1, 4096, 4096, 4)},
 	{codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_HEVC, 4096, 4096, 186)},
 	{codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_JPEG, 4096, 4096, 0)},
 };
@@ -120,10 +120,10 @@ static const struct amdgpu_video_codecs vega_video_codecs_decode =
 /* Raven */
 static const struct amdgpu_video_codec_info rv_video_codecs_decode_array[] =
 {
-	{codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG2, 4096, 4906, 3)},
-	{codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG4, 4096, 4906, 5)},
-	{codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG4_AVC, 4096, 4906, 52)},
-	{codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_VC1, 4096, 4906, 4)},
+	{codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG2, 4096, 4096, 3)},
+	{codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG4, 4096, 4096, 5)},
+	{codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG4_AVC, 4096, 4096, 52)},
+	{codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_VC1, 4096, 4096, 4)},
 	{codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_HEVC, 4096, 4096, 186)},
 	{codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_JPEG, 4096, 4096, 0)},
 	{codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_VP9, 4096, 4096, 0)},
@@ -138,10 +138,10 @@ static const struct amdgpu_video_codecs rv_video_codecs_decode =
 /* Renoir, Arcturus */
 static const struct amdgpu_video_codec_info rn_video_codecs_decode_array[] =
 {
-	{codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG2, 4096, 4906, 3)},
-	{codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG4, 4096, 4906, 5)},
-	{codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG4_AVC, 4096, 4906, 52)},
-	{codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_VC1, 4096, 4906, 4)},
+	{codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG2, 4096, 4096, 3)},
+	{codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG4, 4096, 4096, 5)},
+	{codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG4_AVC, 4096, 4096, 52)},
+	{codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_VC1, 4096, 4096, 4)},
 	{codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_HEVC, 8192, 4352, 186)},
 	{codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_JPEG, 4096, 4096, 0)},
 	{codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_VP9, 8192, 4352, 0)},
diff --git a/drivers/gpu/drm/amd/amdgpu/soc15d.h b/drivers/gpu/drm/amd/amdgpu/soc15d.h
index 799925d22fc8..2357ff39323f 100644
--- a/drivers/gpu/drm/amd/amdgpu/soc15d.h
+++ b/drivers/gpu/drm/amd/amdgpu/soc15d.h
@@ -162,6 +162,7 @@
 		 * 2 - Bypass
 		 */
 #define     INDIRECT_BUFFER_PRE_ENB(x)		 ((x) << 21)
+#define     INDIRECT_BUFFER_PRE_RESUME(x)               ((x) << 30)
 #define	PACKET3_COPY_DATA				0x40
 #define	PACKET3_PFP_SYNC_ME				0x42
 #define	PACKET3_COND_WRITE				0x45
@@ -184,6 +185,7 @@
 #define		EOP_TC_ACTION_EN                        (1 << 17) /* L2 */
 #define		EOP_TC_NC_ACTION_EN			(1 << 19)
 #define		EOP_TC_MD_ACTION_EN			(1 << 21) /* L2 metadata */
+#define		EOP_EXEC				(1 << 28) /* For Trailing Fence */
 
 #define		DATA_SEL(x)                             ((x) << 29)
 		/* 0 - discard
diff --git a/drivers/gpu/drm/amd/amdgpu/soc21.c b/drivers/gpu/drm/amd/amdgpu/soc21.c
index e08044008186..5562670b7b52 100644
--- a/drivers/gpu/drm/amd/amdgpu/soc21.c
+++ b/drivers/gpu/drm/amd/amdgpu/soc21.c
@@ -43,6 +43,7 @@
 #include "soc15.h"
 #include "soc15_common.h"
 #include "soc21.h"
+#include "mxgpu_nv.h"
 
 static const struct amd_ip_funcs soc21_common_ip_funcs;
 
@@ -61,7 +62,7 @@ static const struct amdgpu_video_codecs vcn_4_0_0_video_codecs_encode =
 
 static const struct amdgpu_video_codec_info vcn_4_0_0_video_codecs_decode_array[] =
 {
-	{codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG4_AVC, 4096, 4906, 52)},
+	{codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_MPEG4_AVC, 4096, 4096, 52)},
 	{codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_HEVC, 8192, 4352, 186)},
 	{codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_JPEG, 4096, 4096, 0)},
 	{codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_VP9, 8192, 4352, 0)},
@@ -322,8 +323,10 @@ soc21_asic_reset_method(struct amdgpu_device *adev)
 	switch (adev->ip_versions[MP1_HWIP][0]) {
 	case IP_VERSION(13, 0, 0):
 	case IP_VERSION(13, 0, 7):
+	case IP_VERSION(13, 0, 10):
 		return AMD_RESET_METHOD_MODE1;
 	case IP_VERSION(13, 0, 4):
+	case IP_VERSION(13, 0, 11):
 		return AMD_RESET_METHOD_MODE2;
 	default:
 		if (amdgpu_dpm_is_baco_supported(adev))
@@ -584,10 +587,6 @@ static int soc21_common_early_init(void *handle)
 			AMD_PG_SUPPORT_JPEG |
 			AMD_PG_SUPPORT_ATHUB |
 			AMD_PG_SUPPORT_MMHUB;
-		if (amdgpu_sriov_vf(adev)) {
-			adev->cg_flags = 0;
-			adev->pg_flags = 0;
-		}
 		adev->external_rev_id = adev->rev_id + 0x1; // TODO: need update
 		break;
 	case IP_VERSION(11, 0, 2):
@@ -645,28 +644,64 @@ static int soc21_common_early_init(void *handle)
 		adev->pg_flags = AMD_PG_SUPPORT_VCN |
 			AMD_PG_SUPPORT_VCN_DPG |
 			AMD_PG_SUPPORT_JPEG;
-		if (amdgpu_sriov_vf(adev)) {
-			/* hypervisor control CG and PG enablement */
-			adev->cg_flags = 0;
-			adev->pg_flags = 0;
-		}
 		adev->external_rev_id = adev->rev_id + 0x20;
 		break;
+	case IP_VERSION(11, 0, 4):
+		adev->cg_flags =
+			AMD_CG_SUPPORT_GFX_CGCG |
+			AMD_CG_SUPPORT_GFX_CGLS |
+			AMD_CG_SUPPORT_GFX_MGCG |
+			AMD_CG_SUPPORT_GFX_FGCG |
+			AMD_CG_SUPPORT_REPEATER_FGCG |
+			AMD_CG_SUPPORT_GFX_PERF_CLK |
+			AMD_CG_SUPPORT_MC_MGCG |
+			AMD_CG_SUPPORT_MC_LS |
+			AMD_CG_SUPPORT_HDP_MGCG |
+			AMD_CG_SUPPORT_HDP_LS |
+			AMD_CG_SUPPORT_ATHUB_MGCG |
+			AMD_CG_SUPPORT_ATHUB_LS |
+			AMD_CG_SUPPORT_IH_CG |
+			AMD_CG_SUPPORT_BIF_MGCG |
+			AMD_CG_SUPPORT_BIF_LS |
+			AMD_CG_SUPPORT_VCN_MGCG |
+			AMD_CG_SUPPORT_JPEG_MGCG;
+		adev->pg_flags = AMD_PG_SUPPORT_VCN |
+			AMD_PG_SUPPORT_VCN_DPG |
+			AMD_PG_SUPPORT_GFX_PG |
+			AMD_PG_SUPPORT_JPEG;
+		adev->external_rev_id = adev->rev_id + 0x1;
+		break;
+
 	default:
 		/* FIXME: not supported yet */
 		return -EINVAL;
 	}
 
+	if (amdgpu_sriov_vf(adev)) {
+		amdgpu_virt_init_setting(adev);
+		xgpu_nv_mailbox_set_irq_funcs(adev);
+	}
+
 	return 0;
 }
 
 static int soc21_common_late_init(void *handle)
 {
+	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+
+	if (amdgpu_sriov_vf(adev))
+		xgpu_nv_mailbox_get_irq(adev);
+
 	return 0;
 }
 
 static int soc21_common_sw_init(void *handle)
 {
+	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+
+	if (amdgpu_sriov_vf(adev))
+		xgpu_nv_mailbox_add_irq_id(adev);
+
 	return 0;
 }
 
@@ -704,6 +739,9 @@ static int soc21_common_hw_fini(void *handle)
 	/* disable the doorbell aperture */
 	soc21_enable_doorbell_aperture(adev, false);
 
+	if (amdgpu_sriov_vf(adev))
+		xgpu_nv_mailbox_put_irq(adev);
+
 	return 0;
 }
 
diff --git a/drivers/gpu/drm/amd/amdgpu/umc_v6_7.c b/drivers/gpu/drm/amd/amdgpu/umc_v6_7.c
index 5d5d031c9e7d..72fd963f178b 100644
--- a/drivers/gpu/drm/amd/amdgpu/umc_v6_7.c
+++ b/drivers/gpu/drm/amd/amdgpu/umc_v6_7.c
@@ -187,9 +187,9 @@ static void umc_v6_7_ecc_info_query_ras_error_count(struct amdgpu_device *adev,
 	}
 }
 
-static void umc_v6_7_convert_error_address(struct amdgpu_device *adev,
-					struct ras_err_data *err_data, uint64_t err_addr,
-					uint32_t ch_inst, uint32_t umc_inst)
+void umc_v6_7_convert_error_address(struct amdgpu_device *adev,
+				    struct ras_err_data *err_data, uint64_t err_addr,
+				    uint32_t ch_inst, uint32_t umc_inst)
 {
 	uint32_t channel_index;
 	uint64_t soc_pa, retired_page, column;
@@ -553,5 +553,4 @@ struct amdgpu_umc_ras umc_v6_7_ras = {
 	.query_ras_poison_mode = umc_v6_7_query_ras_poison_mode,
 	.ecc_info_query_ras_error_count = umc_v6_7_ecc_info_query_ras_error_count,
 	.ecc_info_query_ras_error_address = umc_v6_7_ecc_info_query_ras_error_address,
-	.convert_ras_error_address = umc_v6_7_convert_error_address,
 };
diff --git a/drivers/gpu/drm/amd/amdgpu/umc_v6_7.h b/drivers/gpu/drm/amd/amdgpu/umc_v6_7.h
index fe41ed2f5945..105245d5b6e5 100644
--- a/drivers/gpu/drm/amd/amdgpu/umc_v6_7.h
+++ b/drivers/gpu/drm/amd/amdgpu/umc_v6_7.h
@@ -71,5 +71,7 @@ extern const uint32_t
 	umc_v6_7_channel_idx_tbl_second[UMC_V6_7_UMC_INSTANCE_NUM][UMC_V6_7_CHANNEL_INSTANCE_NUM];
 extern const uint32_t
 	umc_v6_7_channel_idx_tbl_first[UMC_V6_7_UMC_INSTANCE_NUM][UMC_V6_7_CHANNEL_INSTANCE_NUM];
-
+void umc_v6_7_convert_error_address(struct amdgpu_device *adev,
+                                    struct ras_err_data *err_data, uint64_t err_addr,
+                                    uint32_t ch_inst, uint32_t umc_inst);
 #endif
diff --git a/drivers/gpu/drm/amd/amdgpu/umc_v8_10.c b/drivers/gpu/drm/amd/amdgpu/umc_v8_10.c
index 91235df54e22..b7da4528cf0a 100644
--- a/drivers/gpu/drm/amd/amdgpu/umc_v8_10.c
+++ b/drivers/gpu/drm/amd/amdgpu/umc_v8_10.c
@@ -46,6 +46,16 @@ const struct channelnum_map_colbit umc_v8_10_channelnum_map_colbit_table[] = {
 };
 
 const uint32_t
+	umc_v8_10_channel_idx_tbl_ext0[]
+				[UMC_V8_10_UMC_INSTANCE_NUM]
+				[UMC_V8_10_CHANNEL_INSTANCE_NUM] = {
+	   {{1,   5}, {7,  3}},
+	   {{14, 15}, {13, 12}},
+	   {{10, 11}, {9,  8}},
+	   {{6,   2}, {0,  4}}
+	};
+
+const uint32_t
 	umc_v8_10_channel_idx_tbl[]
 				[UMC_V8_10_UMC_INSTANCE_NUM]
 				[UMC_V8_10_CHANNEL_INSTANCE_NUM] = {
diff --git a/drivers/gpu/drm/amd/amdgpu/umc_v8_10.h b/drivers/gpu/drm/amd/amdgpu/umc_v8_10.h
index 849ede88e111..25eaf4af5fcf 100644
--- a/drivers/gpu/drm/amd/amdgpu/umc_v8_10.h
+++ b/drivers/gpu/drm/amd/amdgpu/umc_v8_10.h
@@ -66,5 +66,9 @@ extern const uint32_t
 				[UMC_V8_10_UMC_INSTANCE_NUM]
 				[UMC_V8_10_CHANNEL_INSTANCE_NUM];
 
+extern const uint32_t
+	umc_v8_10_channel_idx_tbl_ext0[]
+				[UMC_V8_10_UMC_INSTANCE_NUM]
+				[UMC_V8_10_CHANNEL_INSTANCE_NUM];
 #endif
 
diff --git a/drivers/gpu/drm/amd/amdgpu/uvd_v6_0.c b/drivers/gpu/drm/amd/amdgpu/uvd_v6_0.c
index 375c440957dc..5fe872f4bea7 100644
--- a/drivers/gpu/drm/amd/amdgpu/uvd_v6_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/uvd_v6_0.c
@@ -216,8 +216,8 @@ static int uvd_v6_0_enc_get_create_msg(struct amdgpu_ring *ring, uint32_t handle
 	uint64_t addr;
 	int i, r;
 
-	r = amdgpu_job_alloc_with_ib(ring->adev, ib_size_dw * 4,
-					AMDGPU_IB_POOL_DIRECT, &job);
+	r = amdgpu_job_alloc_with_ib(ring->adev, NULL, NULL, ib_size_dw * 4,
+				     AMDGPU_IB_POOL_DIRECT, &job);
 	if (r)
 		return r;
 
@@ -280,8 +280,8 @@ static int uvd_v6_0_enc_get_destroy_msg(struct amdgpu_ring *ring,
 	uint64_t addr;
 	int i, r;
 
-	r = amdgpu_job_alloc_with_ib(ring->adev, ib_size_dw * 4,
-					AMDGPU_IB_POOL_DIRECT, &job);
+	r = amdgpu_job_alloc_with_ib(ring->adev, NULL, NULL, ib_size_dw * 4,
+				     AMDGPU_IB_POOL_DIRECT, &job);
 	if (r)
 		return r;
 
diff --git a/drivers/gpu/drm/amd/amdgpu/uvd_v7_0.c b/drivers/gpu/drm/amd/amdgpu/uvd_v7_0.c
index e668b3baa8c6..e407be6cb63c 100644
--- a/drivers/gpu/drm/amd/amdgpu/uvd_v7_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/uvd_v7_0.c
@@ -213,7 +213,7 @@ static int uvd_v7_0_enc_ring_test_ring(struct amdgpu_ring *ring)
  *
  * Open up a stream for HW test
  */
-static int uvd_v7_0_enc_get_create_msg(struct amdgpu_ring *ring, uint32_t handle,
+static int uvd_v7_0_enc_get_create_msg(struct amdgpu_ring *ring, u32 handle,
 				       struct amdgpu_bo *bo,
 				       struct dma_fence **fence)
 {
@@ -224,8 +224,8 @@ static int uvd_v7_0_enc_get_create_msg(struct amdgpu_ring *ring, uint32_t handle
 	uint64_t addr;
 	int i, r;
 
-	r = amdgpu_job_alloc_with_ib(ring->adev, ib_size_dw * 4,
-					AMDGPU_IB_POOL_DIRECT, &job);
+	r = amdgpu_job_alloc_with_ib(ring->adev, NULL, NULL, ib_size_dw * 4,
+				     AMDGPU_IB_POOL_DIRECT, &job);
 	if (r)
 		return r;
 
@@ -276,7 +276,7 @@ err:
  *
  * Close up a stream for HW test or if userspace failed to do so
  */
-static int uvd_v7_0_enc_get_destroy_msg(struct amdgpu_ring *ring, uint32_t handle,
+static int uvd_v7_0_enc_get_destroy_msg(struct amdgpu_ring *ring, u32 handle,
 					struct amdgpu_bo *bo,
 					struct dma_fence **fence)
 {
@@ -287,8 +287,8 @@ static int uvd_v7_0_enc_get_destroy_msg(struct amdgpu_ring *ring, uint32_t handl
 	uint64_t addr;
 	int i, r;
 
-	r = amdgpu_job_alloc_with_ib(ring->adev, ib_size_dw * 4,
-					AMDGPU_IB_POOL_DIRECT, &job);
+	r = amdgpu_job_alloc_with_ib(ring->adev, NULL, NULL, ib_size_dw * 4,
+				     AMDGPU_IB_POOL_DIRECT, &job);
 	if (r)
 		return r;
 
diff --git a/drivers/gpu/drm/amd/amdgpu/vcn_v2_5.c b/drivers/gpu/drm/amd/amdgpu/vcn_v2_5.c
index 8a7006d62a87..ec87b00f2e05 100644
--- a/drivers/gpu/drm/amd/amdgpu/vcn_v2_5.c
+++ b/drivers/gpu/drm/amd/amdgpu/vcn_v2_5.c
@@ -770,6 +770,33 @@ static void vcn_v2_5_enable_clock_gating(struct amdgpu_device *adev)
 	}
 }
 
+static void vcn_v2_6_enable_ras(struct amdgpu_device *adev, int inst_idx,
+				bool indirect)
+{
+	uint32_t tmp;
+
+	if (adev->ip_versions[UVD_HWIP][0] != IP_VERSION(2, 6, 0))
+		return;
+
+	tmp = VCN_RAS_CNTL__VCPU_VCODEC_REARM_MASK |
+	      VCN_RAS_CNTL__VCPU_VCODEC_IH_EN_MASK |
+	      VCN_RAS_CNTL__VCPU_VCODEC_PMI_EN_MASK |
+	      VCN_RAS_CNTL__VCPU_VCODEC_STALL_EN_MASK;
+	WREG32_SOC15_DPG_MODE(inst_idx,
+			      SOC15_DPG_MODE_OFFSET(VCN, 0, mmVCN_RAS_CNTL),
+			      tmp, 0, indirect);
+
+	tmp = UVD_VCPU_INT_EN__RASCNTL_VCPU_VCODEC_EN_MASK;
+	WREG32_SOC15_DPG_MODE(inst_idx,
+			      SOC15_DPG_MODE_OFFSET(VCN, 0, mmUVD_VCPU_INT_EN),
+			      tmp, 0, indirect);
+
+	tmp = UVD_SYS_INT_EN__RASCNTL_VCPU_VCODEC_EN_MASK;
+	WREG32_SOC15_DPG_MODE(inst_idx,
+			      SOC15_DPG_MODE_OFFSET(VCN, 0, mmUVD_SYS_INT_EN),
+			      tmp, 0, indirect);
+}
+
 static int vcn_v2_5_start_dpg_mode(struct amdgpu_device *adev, int inst_idx, bool indirect)
 {
 	volatile struct amdgpu_fw_shared *fw_shared = adev->vcn.inst[inst_idx].fw_shared.cpu_addr;
@@ -849,6 +876,8 @@ static int vcn_v2_5_start_dpg_mode(struct amdgpu_device *adev, int inst_idx, boo
 	WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET(
 		VCN, 0, mmUVD_LMI_CTRL2), 0, 0, indirect);
 
+	vcn_v2_6_enable_ras(adev, inst_idx, indirect);
+
 	/* unblock VCPU register access */
 	WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET(
 		VCN, 0, mmUVD_RB_ARB_CTRL), 0, 0, indirect);
@@ -2002,16 +2031,5 @@ static void vcn_v2_5_set_ras_funcs(struct amdgpu_device *adev)
 		break;
 	}
 
-	if (adev->vcn.ras) {
-		amdgpu_ras_register_ras_block(adev, &adev->vcn.ras->ras_block);
-
-		strcpy(adev->vcn.ras->ras_block.ras_comm.name, "vcn");
-		adev->vcn.ras->ras_block.ras_comm.block = AMDGPU_RAS_BLOCK__VCN;
-		adev->vcn.ras->ras_block.ras_comm.type = AMDGPU_RAS_ERROR__POISON;
-		adev->vcn.ras_if = &adev->vcn.ras->ras_block.ras_comm;
-
-		/* If don't define special ras_late_init function, use default ras_late_init */
-		if (!adev->vcn.ras->ras_block.ras_late_init)
-			adev->vcn.ras->ras_block.ras_late_init = amdgpu_ras_block_late_init;
-	}
+	amdgpu_vcn_set_ras_funcs(adev);
 }
diff --git a/drivers/gpu/drm/amd/amdgpu/vcn_v4_0.c b/drivers/gpu/drm/amd/amdgpu/vcn_v4_0.c
index 897a5ce9c9da..1e2b22299975 100644
--- a/drivers/gpu/drm/amd/amdgpu/vcn_v4_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/vcn_v4_0.c
@@ -31,6 +31,7 @@
 #include "soc15_hw_ip.h"
 #include "vcn_v2_0.h"
 #include "mmsch_v4_0.h"
+#include "vcn_v4_0.h"
 
 #include "vcn/vcn_4_0_0_offset.h"
 #include "vcn/vcn_4_0_0_sh_mask.h"
@@ -64,6 +65,7 @@ static int vcn_v4_0_set_powergating_state(void *handle,
 static int vcn_v4_0_pause_dpg_mode(struct amdgpu_device *adev,
         int inst_idx, struct dpg_pause_state *new_state);
 static void vcn_v4_0_unified_ring_set_wptr(struct amdgpu_ring *ring);
+static void vcn_v4_0_set_ras_funcs(struct amdgpu_device *adev);
 
 /**
  * vcn_v4_0_early_init - set function pointers
@@ -84,6 +86,7 @@ static int vcn_v4_0_early_init(void *handle)
 
 	vcn_v4_0_set_unified_ring_funcs(adev);
 	vcn_v4_0_set_irq_funcs(adev);
+	vcn_v4_0_set_ras_funcs(adev);
 
 	return 0;
 }
@@ -100,7 +103,6 @@ static int vcn_v4_0_sw_init(void *handle)
 	struct amdgpu_ring *ring;
 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
 	int i, r;
-	int vcn_doorbell_index = 0;
 
 	r = amdgpu_vcn_sw_init(adev);
 	if (r)
@@ -112,12 +114,6 @@ static int vcn_v4_0_sw_init(void *handle)
 	if (r)
 		return r;
 
-	if (amdgpu_sriov_vf(adev)) {
-		vcn_doorbell_index = adev->doorbell_index.vcn.vcn_ring0_1 - MMSCH_DOORBELL_OFFSET;
-		/* get DWORD offset */
-		vcn_doorbell_index = vcn_doorbell_index << 1;
-	}
-
 	for (i = 0; i < adev->vcn.num_vcn_inst; i++) {
 		volatile struct amdgpu_vcn4_fw_shared *fw_shared;
 
@@ -132,10 +128,16 @@ static int vcn_v4_0_sw_init(void *handle)
 		if (r)
 			return r;
 
+		/* VCN POISON TRAP */
+		r = amdgpu_irq_add_id(adev, amdgpu_ih_clientid_vcns[i],
+				VCN_4_0__SRCID_UVD_POISON, &adev->vcn.inst[i].irq);
+		if (r)
+			return r;
+
 		ring = &adev->vcn.inst[i].ring_enc[0];
 		ring->use_doorbell = true;
 		if (amdgpu_sriov_vf(adev))
-			ring->doorbell_index = vcn_doorbell_index + i * (adev->vcn.num_enc_rings + 1) + 1;
+			ring->doorbell_index = (adev->doorbell_index.vcn.vcn_ring0_1 << 1) + i * (adev->vcn.num_enc_rings + 1) + 1;
 		else
 			ring->doorbell_index = (adev->doorbell_index.vcn.vcn_ring0_1 << 1) + 2 + 8 * i;
 
@@ -296,6 +298,7 @@ static int vcn_v4_0_hw_fini(void *handle)
 			}
 		}
 
+		amdgpu_irq_put(adev, &adev->vcn.inst[i].irq, 0);
 	}
 
 	return 0;
@@ -859,6 +862,28 @@ static void vcn_v4_0_enable_clock_gating(struct amdgpu_device *adev, int inst)
 	return;
 }
 
+static void vcn_v4_0_enable_ras(struct amdgpu_device *adev, int inst_idx,
+				bool indirect)
+{
+	uint32_t tmp;
+
+	if (!amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__VCN))
+		return;
+
+	tmp = VCN_RAS_CNTL__VCPU_VCODEC_REARM_MASK |
+	      VCN_RAS_CNTL__VCPU_VCODEC_IH_EN_MASK |
+	      VCN_RAS_CNTL__VCPU_VCODEC_PMI_EN_MASK |
+	      VCN_RAS_CNTL__VCPU_VCODEC_STALL_EN_MASK;
+	WREG32_SOC15_DPG_MODE(inst_idx,
+			      SOC15_DPG_MODE_OFFSET(VCN, 0, regVCN_RAS_CNTL),
+			      tmp, 0, indirect);
+
+	tmp = UVD_SYS_INT_EN__RASCNTL_VCPU_VCODEC_EN_MASK;
+	WREG32_SOC15_DPG_MODE(inst_idx,
+			      SOC15_DPG_MODE_OFFSET(VCN, 0, regUVD_SYS_INT_EN),
+			      tmp, 0, indirect);
+}
+
 /**
  * vcn_v4_0_start_dpg_mode - VCN start with dpg mode
  *
@@ -947,6 +972,8 @@ static int vcn_v4_0_start_dpg_mode(struct amdgpu_device *adev, int inst_idx, boo
 	WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET(
 		VCN, inst_idx, regUVD_LMI_CTRL2), tmp, 0, indirect);
 
+	vcn_v4_0_enable_ras(adev, inst_idx, indirect);
+
 	/* enable master interrupt */
 	WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET(
 		VCN, inst_idx, regUVD_MASTINT_EN),
@@ -1939,6 +1966,9 @@ static int vcn_v4_0_process_interrupt(struct amdgpu_device *adev, struct amdgpu_
 	case VCN_4_0__SRCID__UVD_ENC_GENERAL_PURPOSE:
 		amdgpu_fence_process(&adev->vcn.inst[ip_instance].ring_enc[0]);
 		break;
+	case VCN_4_0__SRCID_UVD_POISON:
+		amdgpu_vcn_process_poison_irq(adev, source, entry);
+		break;
 	default:
 		DRM_ERROR("Unhandled interrupt: %d %d\n",
 			  entry->src_id, entry->src_data[0]);
@@ -2001,3 +2031,60 @@ const struct amdgpu_ip_block_version vcn_v4_0_ip_block =
 	.rev = 0,
 	.funcs = &vcn_v4_0_ip_funcs,
 };
+
+static uint32_t vcn_v4_0_query_poison_by_instance(struct amdgpu_device *adev,
+			uint32_t instance, uint32_t sub_block)
+{
+	uint32_t poison_stat = 0, reg_value = 0;
+
+	switch (sub_block) {
+	case AMDGPU_VCN_V4_0_VCPU_VCODEC:
+		reg_value = RREG32_SOC15(VCN, instance, regUVD_RAS_VCPU_VCODEC_STATUS);
+		poison_stat = REG_GET_FIELD(reg_value, UVD_RAS_VCPU_VCODEC_STATUS, POISONED_PF);
+		break;
+	default:
+		break;
+	}
+
+	if (poison_stat)
+		dev_info(adev->dev, "Poison detected in VCN%d, sub_block%d\n",
+			instance, sub_block);
+
+	return poison_stat;
+}
+
+static bool vcn_v4_0_query_ras_poison_status(struct amdgpu_device *adev)
+{
+	uint32_t inst, sub;
+	uint32_t poison_stat = 0;
+
+	for (inst = 0; inst < adev->vcn.num_vcn_inst; inst++)
+		for (sub = 0; sub < AMDGPU_VCN_V4_0_MAX_SUB_BLOCK; sub++)
+			poison_stat +=
+				vcn_v4_0_query_poison_by_instance(adev, inst, sub);
+
+	return !!poison_stat;
+}
+
+const struct amdgpu_ras_block_hw_ops vcn_v4_0_ras_hw_ops = {
+	.query_poison_status = vcn_v4_0_query_ras_poison_status,
+};
+
+static struct amdgpu_vcn_ras vcn_v4_0_ras = {
+	.ras_block = {
+		.hw_ops = &vcn_v4_0_ras_hw_ops,
+	},
+};
+
+static void vcn_v4_0_set_ras_funcs(struct amdgpu_device *adev)
+{
+	switch (adev->ip_versions[VCN_HWIP][0]) {
+	case IP_VERSION(4, 0, 0):
+		adev->vcn.ras = &vcn_v4_0_ras;
+		break;
+	default:
+		break;
+	}
+
+	amdgpu_vcn_set_ras_funcs(adev);
+}
diff --git a/drivers/gpu/drm/amd/amdgpu/vcn_v4_0.h b/drivers/gpu/drm/amd/amdgpu/vcn_v4_0.h
index 7c5c9d91bb52..7d3d11f40f27 100644
--- a/drivers/gpu/drm/amd/amdgpu/vcn_v4_0.h
+++ b/drivers/gpu/drm/amd/amdgpu/vcn_v4_0.h
@@ -24,6 +24,12 @@
 #ifndef __VCN_V4_0_H__
 #define __VCN_V4_0_H__
 
+enum amdgpu_vcn_v4_0_sub_block {
+	AMDGPU_VCN_V4_0_VCPU_VCODEC = 0,
+
+	AMDGPU_VCN_V4_0_MAX_SUB_BLOCK,
+};
+
 extern const struct amdgpu_ip_block_version vcn_v4_0_ip_block;
 
 #endif /* __VCN_V4_0_H__ */
diff --git a/drivers/gpu/drm/amd/amdgpu/vega20_ih.c b/drivers/gpu/drm/amd/amdgpu/vega20_ih.c
index 59dfca093155..1706081d054d 100644
--- a/drivers/gpu/drm/amd/amdgpu/vega20_ih.c
+++ b/drivers/gpu/drm/amd/amdgpu/vega20_ih.c
@@ -267,7 +267,7 @@ static void vega20_ih_reroute_ih(struct amdgpu_device *adev)
 	/* vega20 ih reroute will go through psp this
 	 * function is used for newer asics starting arcturus
 	 */
-	if (adev->asic_type >= CHIP_ARCTURUS) {
+	if (adev->ip_versions[OSSSYS_HWIP][0] >= IP_VERSION(4, 2, 1)) {
 		/* Reroute to IH ring 1 for VMC */
 		WREG32_SOC15(OSSSYS, 0, mmIH_CLIENT_CFG_INDEX, 0x12);
 		tmp = RREG32_SOC15(OSSSYS, 0, mmIH_CLIENT_CFG_DATA);
@@ -308,7 +308,7 @@ static int vega20_ih_irq_init(struct amdgpu_device *adev)
 
 	adev->nbio.funcs->ih_control(adev);
 
-	if (adev->asic_type == CHIP_ARCTURUS &&
+	if ((adev->ip_versions[OSSSYS_HWIP][0] == IP_VERSION(4, 2, 1)) &&
 	    adev->firmware.load_type == AMDGPU_FW_LOAD_DIRECT) {
 		ih_chicken = RREG32_SOC15(OSSSYS, 0, mmIH_CHICKEN);
 		if (adev->irq.ih.use_bus_addr) {
@@ -321,7 +321,7 @@ static int vega20_ih_irq_init(struct amdgpu_device *adev)
 	/* psp firmware won't program IH_CHICKEN for aldebaran
 	 * driver needs to program it properly according to
 	 * MC_SPACE type in IH_RB_CNTL */
-	if (adev->asic_type == CHIP_ALDEBARAN) {
+	if (adev->ip_versions[OSSSYS_HWIP][0] == IP_VERSION(4, 4, 0)) {
 		ih_chicken = RREG32_SOC15(OSSSYS, 0, mmIH_CHICKEN_ALDEBARAN);
 		if (adev->irq.ih.use_bus_addr) {
 			ih_chicken = REG_SET_FIELD(ih_chicken, IH_CHICKEN,
diff --git a/drivers/gpu/drm/amd/amdgpu/vi.c b/drivers/gpu/drm/amd/amdgpu/vi.c
index f6ffd7c96ff9..12ef782eb478 100644
--- a/drivers/gpu/drm/amd/amdgpu/vi.c
+++ b/drivers/gpu/drm/amd/amdgpu/vi.c
@@ -2111,6 +2111,8 @@ void vi_set_virt_ops(struct amdgpu_device *adev)
 
 int vi_set_ip_blocks(struct amdgpu_device *adev)
 {
+	amdgpu_device_set_sriov_virtual_display(adev);
+
 	switch (adev->asic_type) {
 	case CHIP_TOPAZ:
 		/* topaz has no DCE, UVD, VCE */
@@ -2130,7 +2132,7 @@ int vi_set_ip_blocks(struct amdgpu_device *adev)
 		amdgpu_device_ip_block_add(adev, &gfx_v8_0_ip_block);
 		amdgpu_device_ip_block_add(adev, &sdma_v3_0_ip_block);
 		amdgpu_device_ip_block_add(adev, &pp_smu_ip_block);
-		if (adev->enable_virtual_display || amdgpu_sriov_vf(adev))
+		if (adev->enable_virtual_display)
 			amdgpu_device_ip_block_add(adev, &amdgpu_vkms_ip_block);
 #if defined(CONFIG_DRM_AMD_DC)
 		else if (amdgpu_device_has_dc_support(adev))
@@ -2150,7 +2152,7 @@ int vi_set_ip_blocks(struct amdgpu_device *adev)
 		amdgpu_device_ip_block_add(adev, &gfx_v8_0_ip_block);
 		amdgpu_device_ip_block_add(adev, &sdma_v3_0_ip_block);
 		amdgpu_device_ip_block_add(adev, &pp_smu_ip_block);
-		if (adev->enable_virtual_display || amdgpu_sriov_vf(adev))
+		if (adev->enable_virtual_display)
 			amdgpu_device_ip_block_add(adev, &amdgpu_vkms_ip_block);
 #if defined(CONFIG_DRM_AMD_DC)
 		else if (amdgpu_device_has_dc_support(adev))
diff --git a/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler.h b/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler.h
index c7118843db05..0c4c5499bb5c 100644
--- a/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler.h
+++ b/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler.h
@@ -2495,442 +2495,444 @@ static const uint32_t cwsr_trap_gfx10_hex[] = {
 	0xbf9f0000, 0x00000000,
 };
 static const uint32_t cwsr_trap_gfx11_hex[] = {
-	0xbfa00001, 0xbfa0021e,
+	0xbfa00001, 0xbfa00221,
 	0xb0804006, 0xb8f8f802,
 	0x9178ff78, 0x00020006,
-	0xb8fbf803, 0xbf0d9f6d,
-	0xbfa20006, 0x8b6eff78,
-	0x00002000, 0xbfa10009,
-	0x8b6eff6d, 0x00ff0000,
-	0xbfa2001e, 0x8b6eff7b,
-	0x00000400, 0xbfa20041,
-	0xbf830010, 0xb8fbf803,
-	0xbfa0fffa, 0x8b6eff7b,
-	0x00000900, 0xbfa20015,
-	0x8b6eff7b, 0x000071ff,
-	0xbfa10008, 0x8b6fff7b,
-	0x00007080, 0xbfa10001,
-	0xbeee1287, 0xb8eff801,
-	0x846e8c6e, 0x8b6e6f6e,
-	0xbfa2000a, 0x8b6eff6d,
-	0x00ff0000, 0xbfa20007,
-	0xb8eef801, 0x8b6eff6e,
-	0x00000800, 0xbfa20003,
+	0xb8fbf803, 0xbf0d9e6d,
+	0xbfa10001, 0xbfbd0000,
+	0xbf0d9f6d, 0xbfa20006,
+	0x8b6eff78, 0x00002000,
+	0xbfa10009, 0x8b6eff6d,
+	0x00ff0000, 0xbfa2001e,
 	0x8b6eff7b, 0x00000400,
-	0xbfa20026, 0xbefa4d82,
-	0xbf89fc07, 0x84fa887a,
-	0xf4005bbd, 0xf8000010,
-	0xbf89fc07, 0x846e976e,
-	0x9177ff77, 0x00800000,
-	0x8c776e77, 0xf4045bbd,
-	0xf8000000, 0xbf89fc07,
-	0xf4045ebd, 0xf8000008,
-	0xbf89fc07, 0x8bee6e6e,
-	0xbfa10001, 0xbe80486e,
-	0x8b6eff6d, 0x01ff0000,
-	0xbfa20005, 0x8c78ff78,
-	0x00002000, 0x80ec886c,
-	0x82ed806d, 0xbfa00005,
-	0x8b6eff6d, 0x01000000,
-	0xbfa20002, 0x806c846c,
-	0x826d806d, 0x8b6dff6d,
-	0x0000ffff, 0x8bfe7e7e,
-	0x8bea6a6a, 0xb978f802,
-	0xbe804a6c, 0x8b6dff6d,
-	0x0000ffff, 0xbefa0080,
-	0xb97a0283, 0xbeee007e,
-	0xbeef007f, 0xbefe0180,
-	0xbefe4d84, 0xbf89fc07,
-	0x8b7aff7f, 0x04000000,
-	0x847a857a, 0x8c6d7a6d,
-	0xbefa007e, 0x8b7bff7f,
-	0x0000ffff, 0xbefe00c1,
-	0xbeff00c1, 0xdca6c000,
-	0x007a0000, 0x7e000280,
-	0xbefe007a, 0xbeff007b,
-	0xb8fb02dc, 0x847b997b,
-	0xb8fa3b05, 0x807a817a,
-	0xbf0d997b, 0xbfa20002,
-	0x847a897a, 0xbfa00001,
-	0x847a8a7a, 0xb8fb1e06,
-	0x847b8a7b, 0x807a7b7a,
+	0xbfa20041, 0xbf830010,
+	0xb8fbf803, 0xbfa0fffa,
+	0x8b6eff7b, 0x00000900,
+	0xbfa20015, 0x8b6eff7b,
+	0x000071ff, 0xbfa10008,
+	0x8b6fff7b, 0x00007080,
+	0xbfa10001, 0xbeee1287,
+	0xb8eff801, 0x846e8c6e,
+	0x8b6e6f6e, 0xbfa2000a,
+	0x8b6eff6d, 0x00ff0000,
+	0xbfa20007, 0xb8eef801,
+	0x8b6eff6e, 0x00000800,
+	0xbfa20003, 0x8b6eff7b,
+	0x00000400, 0xbfa20026,
+	0xbefa4d82, 0xbf89fc07,
+	0x84fa887a, 0xf4005bbd,
+	0xf8000010, 0xbf89fc07,
+	0x846e976e, 0x9177ff77,
+	0x00800000, 0x8c776e77,
+	0xf4045bbd, 0xf8000000,
+	0xbf89fc07, 0xf4045ebd,
+	0xf8000008, 0xbf89fc07,
+	0x8bee6e6e, 0xbfa10001,
+	0xbe80486e, 0x8b6eff6d,
+	0x01ff0000, 0xbfa20005,
+	0x8c78ff78, 0x00002000,
+	0x80ec886c, 0x82ed806d,
+	0xbfa00005, 0x8b6eff6d,
+	0x01000000, 0xbfa20002,
+	0x806c846c, 0x826d806d,
+	0x8b6dff6d, 0x0000ffff,
+	0x8bfe7e7e, 0x8bea6a6a,
+	0xb978f802, 0xbe804a6c,
+	0x8b6dff6d, 0x0000ffff,
+	0xbefa0080, 0xb97a0283,
+	0xbeee007e, 0xbeef007f,
+	0xbefe0180, 0xbefe4d84,
+	0xbf89fc07, 0x8b7aff7f,
+	0x04000000, 0x847a857a,
+	0x8c6d7a6d, 0xbefa007e,
 	0x8b7bff7f, 0x0000ffff,
-	0x807aff7a, 0x00000200,
-	0x807a7e7a, 0x827b807b,
-	0xd7610000, 0x00010870,
-	0xd7610000, 0x00010a71,
-	0xd7610000, 0x00010c72,
-	0xd7610000, 0x00010e73,
-	0xd7610000, 0x00011074,
-	0xd7610000, 0x00011275,
-	0xd7610000, 0x00011476,
-	0xd7610000, 0x00011677,
-	0xd7610000, 0x00011a79,
-	0xd7610000, 0x00011c7e,
-	0xd7610000, 0x00011e7f,
-	0xbefe00ff, 0x00003fff,
-	0xbeff0080, 0xdca6c040,
-	0x007a0000, 0xd760007a,
-	0x00011d00, 0xd760007b,
-	0x00011f00, 0xbefe007a,
-	0xbeff007b, 0xbef4007e,
-	0x8b75ff7f, 0x0000ffff,
-	0x8c75ff75, 0x00040000,
-	0xbef60080, 0xbef700ff,
-	0x10807fac, 0xbef1007d,
-	0xbef00080, 0xb8f302dc,
-	0x84739973, 0xbefe00c1,
-	0x857d9973, 0x8b7d817d,
-	0xbf06817d, 0xbfa20002,
-	0xbeff0080, 0xbfa00002,
-	0xbeff00c1, 0xbfa00009,
+	0xbefe00c1, 0xbeff00c1,
+	0xdca6c000, 0x007a0000,
+	0x7e000280, 0xbefe007a,
+	0xbeff007b, 0xb8fb02dc,
+	0x847b997b, 0xb8fa3b05,
+	0x807a817a, 0xbf0d997b,
+	0xbfa20002, 0x847a897a,
+	0xbfa00001, 0x847a8a7a,
+	0xb8fb1e06, 0x847b8a7b,
+	0x807a7b7a, 0x8b7bff7f,
+	0x0000ffff, 0x807aff7a,
+	0x00000200, 0x807a7e7a,
+	0x827b807b, 0xd7610000,
+	0x00010870, 0xd7610000,
+	0x00010a71, 0xd7610000,
+	0x00010c72, 0xd7610000,
+	0x00010e73, 0xd7610000,
+	0x00011074, 0xd7610000,
+	0x00011275, 0xd7610000,
+	0x00011476, 0xd7610000,
+	0x00011677, 0xd7610000,
+	0x00011a79, 0xd7610000,
+	0x00011c7e, 0xd7610000,
+	0x00011e7f, 0xbefe00ff,
+	0x00003fff, 0xbeff0080,
+	0xdca6c040, 0x007a0000,
+	0xd760007a, 0x00011d00,
+	0xd760007b, 0x00011f00,
+	0xbefe007a, 0xbeff007b,
+	0xbef4007e, 0x8b75ff7f,
+	0x0000ffff, 0x8c75ff75,
+	0x00040000, 0xbef60080,
+	0xbef700ff, 0x10807fac,
+	0xbef1007d, 0xbef00080,
+	0xb8f302dc, 0x84739973,
+	0xbefe00c1, 0x857d9973,
+	0x8b7d817d, 0xbf06817d,
+	0xbfa20002, 0xbeff0080,
+	0xbfa00002, 0xbeff00c1,
+	0xbfa00009, 0xbef600ff,
+	0x01000000, 0xe0685080,
+	0x701d0100, 0xe0685100,
+	0x701d0200, 0xe0685180,
+	0x701d0300, 0xbfa00008,
 	0xbef600ff, 0x01000000,
-	0xe0685080, 0x701d0100,
-	0xe0685100, 0x701d0200,
-	0xe0685180, 0x701d0300,
-	0xbfa00008, 0xbef600ff,
-	0x01000000, 0xe0685100,
-	0x701d0100, 0xe0685200,
-	0x701d0200, 0xe0685300,
-	0x701d0300, 0xb8f03b05,
-	0x80708170, 0xbf0d9973,
-	0xbfa20002, 0x84708970,
-	0xbfa00001, 0x84708a70,
-	0xb8fa1e06, 0x847a8a7a,
-	0x80707a70, 0x8070ff70,
-	0x00000200, 0xbef600ff,
-	0x01000000, 0x7e000280,
-	0x7e020280, 0x7e040280,
-	0xbefd0080, 0xd7610002,
-	0x0000fa71, 0x807d817d,
-	0xd7610002, 0x0000fa6c,
-	0x807d817d, 0x917aff6d,
-	0x80000000, 0xd7610002,
-	0x0000fa7a, 0x807d817d,
-	0xd7610002, 0x0000fa6e,
-	0x807d817d, 0xd7610002,
-	0x0000fa6f, 0x807d817d,
-	0xd7610002, 0x0000fa78,
-	0x807d817d, 0xb8faf803,
-	0xd7610002, 0x0000fa7a,
-	0x807d817d, 0xd7610002,
-	0x0000fa7b, 0x807d817d,
-	0xb8f1f801, 0xd7610002,
-	0x0000fa71, 0x807d817d,
-	0xb8f1f814, 0xd7610002,
-	0x0000fa71, 0x807d817d,
-	0xb8f1f815, 0xd7610002,
-	0x0000fa71, 0x807d817d,
-	0xbefe00ff, 0x0000ffff,
-	0xbeff0080, 0xe0685000,
-	0x701d0200, 0xbefe00c1,
+	0xe0685100, 0x701d0100,
+	0xe0685200, 0x701d0200,
+	0xe0685300, 0x701d0300,
 	0xb8f03b05, 0x80708170,
 	0xbf0d9973, 0xbfa20002,
 	0x84708970, 0xbfa00001,
 	0x84708a70, 0xb8fa1e06,
 	0x847a8a7a, 0x80707a70,
+	0x8070ff70, 0x00000200,
 	0xbef600ff, 0x01000000,
-	0xbef90080, 0xbefd0080,
-	0xbf800000, 0xbe804100,
-	0xbe824102, 0xbe844104,
-	0xbe864106, 0xbe884108,
-	0xbe8a410a, 0xbe8c410c,
-	0xbe8e410e, 0xd7610002,
-	0x0000f200, 0x80798179,
-	0xd7610002, 0x0000f201,
+	0x7e000280, 0x7e020280,
+	0x7e040280, 0xbefd0080,
+	0xd7610002, 0x0000fa71,
+	0x807d817d, 0xd7610002,
+	0x0000fa6c, 0x807d817d,
+	0x917aff6d, 0x80000000,
+	0xd7610002, 0x0000fa7a,
+	0x807d817d, 0xd7610002,
+	0x0000fa6e, 0x807d817d,
+	0xd7610002, 0x0000fa6f,
+	0x807d817d, 0xd7610002,
+	0x0000fa78, 0x807d817d,
+	0xb8faf803, 0xd7610002,
+	0x0000fa7a, 0x807d817d,
+	0xd7610002, 0x0000fa7b,
+	0x807d817d, 0xb8f1f801,
+	0xd7610002, 0x0000fa71,
+	0x807d817d, 0xb8f1f814,
+	0xd7610002, 0x0000fa71,
+	0x807d817d, 0xb8f1f815,
+	0xd7610002, 0x0000fa71,
+	0x807d817d, 0xbefe00ff,
+	0x0000ffff, 0xbeff0080,
+	0xe0685000, 0x701d0200,
+	0xbefe00c1, 0xb8f03b05,
+	0x80708170, 0xbf0d9973,
+	0xbfa20002, 0x84708970,
+	0xbfa00001, 0x84708a70,
+	0xb8fa1e06, 0x847a8a7a,
+	0x80707a70, 0xbef600ff,
+	0x01000000, 0xbef90080,
+	0xbefd0080, 0xbf800000,
+	0xbe804100, 0xbe824102,
+	0xbe844104, 0xbe864106,
+	0xbe884108, 0xbe8a410a,
+	0xbe8c410c, 0xbe8e410e,
+	0xd7610002, 0x0000f200,
 	0x80798179, 0xd7610002,
-	0x0000f202, 0x80798179,
-	0xd7610002, 0x0000f203,
+	0x0000f201, 0x80798179,
+	0xd7610002, 0x0000f202,
 	0x80798179, 0xd7610002,
-	0x0000f204, 0x80798179,
-	0xd7610002, 0x0000f205,
+	0x0000f203, 0x80798179,
+	0xd7610002, 0x0000f204,
 	0x80798179, 0xd7610002,
-	0x0000f206, 0x80798179,
-	0xd7610002, 0x0000f207,
+	0x0000f205, 0x80798179,
+	0xd7610002, 0x0000f206,
 	0x80798179, 0xd7610002,
-	0x0000f208, 0x80798179,
-	0xd7610002, 0x0000f209,
+	0x0000f207, 0x80798179,
+	0xd7610002, 0x0000f208,
 	0x80798179, 0xd7610002,
-	0x0000f20a, 0x80798179,
-	0xd7610002, 0x0000f20b,
+	0x0000f209, 0x80798179,
+	0xd7610002, 0x0000f20a,
 	0x80798179, 0xd7610002,
-	0x0000f20c, 0x80798179,
-	0xd7610002, 0x0000f20d,
+	0x0000f20b, 0x80798179,
+	0xd7610002, 0x0000f20c,
 	0x80798179, 0xd7610002,
-	0x0000f20e, 0x80798179,
-	0xd7610002, 0x0000f20f,
-	0x80798179, 0xbf06a079,
-	0xbfa10006, 0xe0685000,
-	0x701d0200, 0x8070ff70,
-	0x00000080, 0xbef90080,
-	0x7e040280, 0x807d907d,
-	0xbf0aff7d, 0x00000060,
-	0xbfa2ffbc, 0xbe804100,
-	0xbe824102, 0xbe844104,
-	0xbe864106, 0xbe884108,
-	0xbe8a410a, 0xd7610002,
-	0x0000f200, 0x80798179,
-	0xd7610002, 0x0000f201,
+	0x0000f20d, 0x80798179,
+	0xd7610002, 0x0000f20e,
 	0x80798179, 0xd7610002,
-	0x0000f202, 0x80798179,
-	0xd7610002, 0x0000f203,
+	0x0000f20f, 0x80798179,
+	0xbf06a079, 0xbfa10006,
+	0xe0685000, 0x701d0200,
+	0x8070ff70, 0x00000080,
+	0xbef90080, 0x7e040280,
+	0x807d907d, 0xbf0aff7d,
+	0x00000060, 0xbfa2ffbc,
+	0xbe804100, 0xbe824102,
+	0xbe844104, 0xbe864106,
+	0xbe884108, 0xbe8a410a,
+	0xd7610002, 0x0000f200,
 	0x80798179, 0xd7610002,
-	0x0000f204, 0x80798179,
-	0xd7610002, 0x0000f205,
+	0x0000f201, 0x80798179,
+	0xd7610002, 0x0000f202,
 	0x80798179, 0xd7610002,
-	0x0000f206, 0x80798179,
-	0xd7610002, 0x0000f207,
+	0x0000f203, 0x80798179,
+	0xd7610002, 0x0000f204,
 	0x80798179, 0xd7610002,
-	0x0000f208, 0x80798179,
-	0xd7610002, 0x0000f209,
+	0x0000f205, 0x80798179,
+	0xd7610002, 0x0000f206,
 	0x80798179, 0xd7610002,
-	0x0000f20a, 0x80798179,
-	0xd7610002, 0x0000f20b,
-	0x80798179, 0xe0685000,
-	0x701d0200, 0xbefe00c1,
-	0x857d9973, 0x8b7d817d,
-	0xbf06817d, 0xbfa20002,
-	0xbeff0080, 0xbfa00001,
-	0xbeff00c1, 0xb8fb4306,
-	0x8b7bc17b, 0xbfa10044,
-	0xbfbd0000, 0x8b7aff6d,
-	0x80000000, 0xbfa10040,
-	0x847b867b, 0x847b827b,
-	0xbef6007b, 0xb8f03b05,
-	0x80708170, 0xbf0d9973,
-	0xbfa20002, 0x84708970,
-	0xbfa00001, 0x84708a70,
-	0xb8fa1e06, 0x847a8a7a,
-	0x80707a70, 0x8070ff70,
-	0x00000200, 0x8070ff70,
-	0x00000080, 0xbef600ff,
-	0x01000000, 0xd71f0000,
-	0x000100c1, 0xd7200000,
-	0x000200c1, 0x16000084,
-	0x857d9973, 0x8b7d817d,
-	0xbf06817d, 0xbefd0080,
-	0xbfa20012, 0xbe8300ff,
-	0x00000080, 0xbf800000,
-	0xbf800000, 0xbf800000,
-	0xd8d80000, 0x01000000,
-	0xbf890000, 0xe0685000,
-	0x701d0100, 0x807d037d,
-	0x80700370, 0xd5250000,
-	0x0001ff00, 0x00000080,
-	0xbf0a7b7d, 0xbfa2fff4,
-	0xbfa00011, 0xbe8300ff,
-	0x00000100, 0xbf800000,
-	0xbf800000, 0xbf800000,
-	0xd8d80000, 0x01000000,
-	0xbf890000, 0xe0685000,
-	0x701d0100, 0x807d037d,
-	0x80700370, 0xd5250000,
-	0x0001ff00, 0x00000100,
-	0xbf0a7b7d, 0xbfa2fff4,
+	0x0000f207, 0x80798179,
+	0xd7610002, 0x0000f208,
+	0x80798179, 0xd7610002,
+	0x0000f209, 0x80798179,
+	0xd7610002, 0x0000f20a,
+	0x80798179, 0xd7610002,
+	0x0000f20b, 0x80798179,
+	0xe0685000, 0x701d0200,
 	0xbefe00c1, 0x857d9973,
 	0x8b7d817d, 0xbf06817d,
-	0xbfa20004, 0xbef000ff,
-	0x00000200, 0xbeff0080,
-	0xbfa00003, 0xbef000ff,
-	0x00000400, 0xbeff00c1,
-	0xb8fb3b05, 0x807b817b,
-	0x847b827b, 0x857d9973,
+	0xbfa20002, 0xbeff0080,
+	0xbfa00001, 0xbeff00c1,
+	0xb8fb4306, 0x8b7bc17b,
+	0xbfa10044, 0xbfbd0000,
+	0x8b7aff6d, 0x80000000,
+	0xbfa10040, 0x847b867b,
+	0x847b827b, 0xbef6007b,
+	0xb8f03b05, 0x80708170,
+	0xbf0d9973, 0xbfa20002,
+	0x84708970, 0xbfa00001,
+	0x84708a70, 0xb8fa1e06,
+	0x847a8a7a, 0x80707a70,
+	0x8070ff70, 0x00000200,
+	0x8070ff70, 0x00000080,
+	0xbef600ff, 0x01000000,
+	0xd71f0000, 0x000100c1,
+	0xd7200000, 0x000200c1,
+	0x16000084, 0x857d9973,
 	0x8b7d817d, 0xbf06817d,
-	0xbfa20017, 0xbef600ff,
-	0x01000000, 0xbefd0084,
-	0xbf0a7b7d, 0xbfa10037,
-	0x7e008700, 0x7e028701,
-	0x7e048702, 0x7e068703,
-	0xe0685000, 0x701d0000,
-	0xe0685080, 0x701d0100,
-	0xe0685100, 0x701d0200,
-	0xe0685180, 0x701d0300,
-	0x807d847d, 0x8070ff70,
-	0x00000200, 0xbf0a7b7d,
-	0xbfa2ffef, 0xbfa00025,
+	0xbefd0080, 0xbfa20012,
+	0xbe8300ff, 0x00000080,
+	0xbf800000, 0xbf800000,
+	0xbf800000, 0xd8d80000,
+	0x01000000, 0xbf890000,
+	0xe0685000, 0x701d0100,
+	0x807d037d, 0x80700370,
+	0xd5250000, 0x0001ff00,
+	0x00000080, 0xbf0a7b7d,
+	0xbfa2fff4, 0xbfa00011,
+	0xbe8300ff, 0x00000100,
+	0xbf800000, 0xbf800000,
+	0xbf800000, 0xd8d80000,
+	0x01000000, 0xbf890000,
+	0xe0685000, 0x701d0100,
+	0x807d037d, 0x80700370,
+	0xd5250000, 0x0001ff00,
+	0x00000100, 0xbf0a7b7d,
+	0xbfa2fff4, 0xbefe00c1,
+	0x857d9973, 0x8b7d817d,
+	0xbf06817d, 0xbfa20004,
+	0xbef000ff, 0x00000200,
+	0xbeff0080, 0xbfa00003,
+	0xbef000ff, 0x00000400,
+	0xbeff00c1, 0xb8fb3b05,
+	0x807b817b, 0x847b827b,
+	0x857d9973, 0x8b7d817d,
+	0xbf06817d, 0xbfa20017,
 	0xbef600ff, 0x01000000,
 	0xbefd0084, 0xbf0a7b7d,
-	0xbfa10011, 0x7e008700,
+	0xbfa10037, 0x7e008700,
 	0x7e028701, 0x7e048702,
 	0x7e068703, 0xe0685000,
-	0x701d0000, 0xe0685100,
-	0x701d0100, 0xe0685200,
-	0x701d0200, 0xe0685300,
+	0x701d0000, 0xe0685080,
+	0x701d0100, 0xe0685100,
+	0x701d0200, 0xe0685180,
 	0x701d0300, 0x807d847d,
-	0x8070ff70, 0x00000400,
+	0x8070ff70, 0x00000200,
 	0xbf0a7b7d, 0xbfa2ffef,
-	0xb8fb1e06, 0x8b7bc17b,
-	0xbfa1000c, 0x847b837b,
-	0x807b7d7b, 0xbefe00c1,
-	0xbeff0080, 0x7e008700,
+	0xbfa00025, 0xbef600ff,
+	0x01000000, 0xbefd0084,
+	0xbf0a7b7d, 0xbfa10011,
+	0x7e008700, 0x7e028701,
+	0x7e048702, 0x7e068703,
 	0xe0685000, 0x701d0000,
-	0x807d817d, 0x8070ff70,
-	0x00000080, 0xbf0a7b7d,
-	0xbfa2fff8, 0xbfa00146,
-	0xbef4007e, 0x8b75ff7f,
-	0x0000ffff, 0x8c75ff75,
-	0x00040000, 0xbef60080,
-	0xbef700ff, 0x10807fac,
-	0xb8f202dc, 0x84729972,
-	0x8b6eff7f, 0x04000000,
-	0xbfa1003a, 0xbefe00c1,
-	0x857d9972, 0x8b7d817d,
-	0xbf06817d, 0xbfa20002,
-	0xbeff0080, 0xbfa00001,
-	0xbeff00c1, 0xb8ef4306,
-	0x8b6fc16f, 0xbfa1002f,
-	0x846f866f, 0x846f826f,
-	0xbef6006f, 0xb8f83b05,
-	0x80788178, 0xbf0d9972,
-	0xbfa20002, 0x84788978,
-	0xbfa00001, 0x84788a78,
-	0xb8ee1e06, 0x846e8a6e,
-	0x80786e78, 0x8078ff78,
-	0x00000200, 0x8078ff78,
-	0x00000080, 0xbef600ff,
-	0x01000000, 0x857d9972,
-	0x8b7d817d, 0xbf06817d,
-	0xbefd0080, 0xbfa2000c,
-	0xe0500000, 0x781d0000,
-	0xbf8903f7, 0xdac00000,
-	0x00000000, 0x807dff7d,
-	0x00000080, 0x8078ff78,
-	0x00000080, 0xbf0a6f7d,
-	0xbfa2fff5, 0xbfa0000b,
-	0xe0500000, 0x781d0000,
-	0xbf8903f7, 0xdac00000,
-	0x00000000, 0x807dff7d,
-	0x00000100, 0x8078ff78,
-	0x00000100, 0xbf0a6f7d,
-	0xbfa2fff5, 0xbef80080,
+	0xe0685100, 0x701d0100,
+	0xe0685200, 0x701d0200,
+	0xe0685300, 0x701d0300,
+	0x807d847d, 0x8070ff70,
+	0x00000400, 0xbf0a7b7d,
+	0xbfa2ffef, 0xb8fb1e06,
+	0x8b7bc17b, 0xbfa1000c,
+	0x847b837b, 0x807b7d7b,
+	0xbefe00c1, 0xbeff0080,
+	0x7e008700, 0xe0685000,
+	0x701d0000, 0x807d817d,
+	0x8070ff70, 0x00000080,
+	0xbf0a7b7d, 0xbfa2fff8,
+	0xbfa00146, 0xbef4007e,
+	0x8b75ff7f, 0x0000ffff,
+	0x8c75ff75, 0x00040000,
+	0xbef60080, 0xbef700ff,
+	0x10807fac, 0xb8f202dc,
+	0x84729972, 0x8b6eff7f,
+	0x04000000, 0xbfa1003a,
 	0xbefe00c1, 0x857d9972,
 	0x8b7d817d, 0xbf06817d,
 	0xbfa20002, 0xbeff0080,
 	0xbfa00001, 0xbeff00c1,
-	0xb8ef3b05, 0x806f816f,
-	0x846f826f, 0x857d9972,
-	0x8b7d817d, 0xbf06817d,
-	0xbfa20024, 0xbef600ff,
-	0x01000000, 0xbeee0078,
+	0xb8ef4306, 0x8b6fc16f,
+	0xbfa1002f, 0x846f866f,
+	0x846f826f, 0xbef6006f,
+	0xb8f83b05, 0x80788178,
+	0xbf0d9972, 0xbfa20002,
+	0x84788978, 0xbfa00001,
+	0x84788a78, 0xb8ee1e06,
+	0x846e8a6e, 0x80786e78,
 	0x8078ff78, 0x00000200,
-	0xbefd0084, 0xbf0a6f7d,
-	0xbfa10050, 0xe0505000,
-	0x781d0000, 0xe0505080,
-	0x781d0100, 0xe0505100,
-	0x781d0200, 0xe0505180,
-	0x781d0300, 0xbf8903f7,
-	0x7e008500, 0x7e028501,
-	0x7e048502, 0x7e068503,
-	0x807d847d, 0x8078ff78,
-	0x00000200, 0xbf0a6f7d,
-	0xbfa2ffee, 0xe0505000,
-	0x6e1d0000, 0xe0505080,
-	0x6e1d0100, 0xe0505100,
-	0x6e1d0200, 0xe0505180,
-	0x6e1d0300, 0xbf8903f7,
-	0xbfa00034, 0xbef600ff,
-	0x01000000, 0xbeee0078,
-	0x8078ff78, 0x00000400,
-	0xbefd0084, 0xbf0a6f7d,
-	0xbfa10012, 0xe0505000,
-	0x781d0000, 0xe0505100,
-	0x781d0100, 0xe0505200,
-	0x781d0200, 0xe0505300,
-	0x781d0300, 0xbf8903f7,
-	0x7e008500, 0x7e028501,
-	0x7e048502, 0x7e068503,
-	0x807d847d, 0x8078ff78,
-	0x00000400, 0xbf0a6f7d,
-	0xbfa2ffee, 0xb8ef1e06,
-	0x8b6fc16f, 0xbfa1000e,
-	0x846f836f, 0x806f7d6f,
-	0xbefe00c1, 0xbeff0080,
+	0x8078ff78, 0x00000080,
+	0xbef600ff, 0x01000000,
+	0x857d9972, 0x8b7d817d,
+	0xbf06817d, 0xbefd0080,
+	0xbfa2000c, 0xe0500000,
+	0x781d0000, 0xbf8903f7,
+	0xdac00000, 0x00000000,
+	0x807dff7d, 0x00000080,
+	0x8078ff78, 0x00000080,
+	0xbf0a6f7d, 0xbfa2fff5,
+	0xbfa0000b, 0xe0500000,
+	0x781d0000, 0xbf8903f7,
+	0xdac00000, 0x00000000,
+	0x807dff7d, 0x00000100,
+	0x8078ff78, 0x00000100,
+	0xbf0a6f7d, 0xbfa2fff5,
+	0xbef80080, 0xbefe00c1,
+	0x857d9972, 0x8b7d817d,
+	0xbf06817d, 0xbfa20002,
+	0xbeff0080, 0xbfa00001,
+	0xbeff00c1, 0xb8ef3b05,
+	0x806f816f, 0x846f826f,
+	0x857d9972, 0x8b7d817d,
+	0xbf06817d, 0xbfa20024,
+	0xbef600ff, 0x01000000,
+	0xbeee0078, 0x8078ff78,
+	0x00000200, 0xbefd0084,
+	0xbf0a6f7d, 0xbfa10050,
 	0xe0505000, 0x781d0000,
+	0xe0505080, 0x781d0100,
+	0xe0505100, 0x781d0200,
+	0xe0505180, 0x781d0300,
 	0xbf8903f7, 0x7e008500,
-	0x807d817d, 0x8078ff78,
-	0x00000080, 0xbf0a6f7d,
-	0xbfa2fff7, 0xbeff00c1,
+	0x7e028501, 0x7e048502,
+	0x7e068503, 0x807d847d,
+	0x8078ff78, 0x00000200,
+	0xbf0a6f7d, 0xbfa2ffee,
 	0xe0505000, 0x6e1d0000,
-	0xe0505100, 0x6e1d0100,
-	0xe0505200, 0x6e1d0200,
-	0xe0505300, 0x6e1d0300,
-	0xbf8903f7, 0xb8f83b05,
-	0x80788178, 0xbf0d9972,
-	0xbfa20002, 0x84788978,
-	0xbfa00001, 0x84788a78,
-	0xb8ee1e06, 0x846e8a6e,
-	0x80786e78, 0x8078ff78,
-	0x00000200, 0x80f8ff78,
-	0x00000050, 0xbef600ff,
-	0x01000000, 0xbefd00ff,
-	0x0000006c, 0x80f89078,
-	0xf428403a, 0xf0000000,
-	0xbf89fc07, 0x80fd847d,
-	0xbf800000, 0xbe804300,
-	0xbe824302, 0x80f8a078,
-	0xf42c403a, 0xf0000000,
-	0xbf89fc07, 0x80fd887d,
-	0xbf800000, 0xbe804300,
-	0xbe824302, 0xbe844304,
-	0xbe864306, 0x80f8c078,
-	0xf430403a, 0xf0000000,
-	0xbf89fc07, 0x80fd907d,
-	0xbf800000, 0xbe804300,
-	0xbe824302, 0xbe844304,
-	0xbe864306, 0xbe884308,
-	0xbe8a430a, 0xbe8c430c,
-	0xbe8e430e, 0xbf06807d,
-	0xbfa1fff0, 0xb980f801,
-	0x00000000, 0xbfbd0000,
+	0xe0505080, 0x6e1d0100,
+	0xe0505100, 0x6e1d0200,
+	0xe0505180, 0x6e1d0300,
+	0xbf8903f7, 0xbfa00034,
+	0xbef600ff, 0x01000000,
+	0xbeee0078, 0x8078ff78,
+	0x00000400, 0xbefd0084,
+	0xbf0a6f7d, 0xbfa10012,
+	0xe0505000, 0x781d0000,
+	0xe0505100, 0x781d0100,
+	0xe0505200, 0x781d0200,
+	0xe0505300, 0x781d0300,
+	0xbf8903f7, 0x7e008500,
+	0x7e028501, 0x7e048502,
+	0x7e068503, 0x807d847d,
+	0x8078ff78, 0x00000400,
+	0xbf0a6f7d, 0xbfa2ffee,
+	0xb8ef1e06, 0x8b6fc16f,
+	0xbfa1000e, 0x846f836f,
+	0x806f7d6f, 0xbefe00c1,
+	0xbeff0080, 0xe0505000,
+	0x781d0000, 0xbf8903f7,
+	0x7e008500, 0x807d817d,
+	0x8078ff78, 0x00000080,
+	0xbf0a6f7d, 0xbfa2fff7,
+	0xbeff00c1, 0xe0505000,
+	0x6e1d0000, 0xe0505100,
+	0x6e1d0100, 0xe0505200,
+	0x6e1d0200, 0xe0505300,
+	0x6e1d0300, 0xbf8903f7,
 	0xb8f83b05, 0x80788178,
 	0xbf0d9972, 0xbfa20002,
 	0x84788978, 0xbfa00001,
 	0x84788a78, 0xb8ee1e06,
 	0x846e8a6e, 0x80786e78,
 	0x8078ff78, 0x00000200,
+	0x80f8ff78, 0x00000050,
 	0xbef600ff, 0x01000000,
-	0xf4205bfa, 0xf0000000,
-	0x80788478, 0xf4205b3a,
+	0xbefd00ff, 0x0000006c,
+	0x80f89078, 0xf428403a,
+	0xf0000000, 0xbf89fc07,
+	0x80fd847d, 0xbf800000,
+	0xbe804300, 0xbe824302,
+	0x80f8a078, 0xf42c403a,
+	0xf0000000, 0xbf89fc07,
+	0x80fd887d, 0xbf800000,
+	0xbe804300, 0xbe824302,
+	0xbe844304, 0xbe864306,
+	0x80f8c078, 0xf430403a,
+	0xf0000000, 0xbf89fc07,
+	0x80fd907d, 0xbf800000,
+	0xbe804300, 0xbe824302,
+	0xbe844304, 0xbe864306,
+	0xbe884308, 0xbe8a430a,
+	0xbe8c430c, 0xbe8e430e,
+	0xbf06807d, 0xbfa1fff0,
+	0xb980f801, 0x00000000,
+	0xbfbd0000, 0xb8f83b05,
+	0x80788178, 0xbf0d9972,
+	0xbfa20002, 0x84788978,
+	0xbfa00001, 0x84788a78,
+	0xb8ee1e06, 0x846e8a6e,
+	0x80786e78, 0x8078ff78,
+	0x00000200, 0xbef600ff,
+	0x01000000, 0xf4205bfa,
 	0xf0000000, 0x80788478,
-	0xf4205b7a, 0xf0000000,
-	0x80788478, 0xf4205c3a,
+	0xf4205b3a, 0xf0000000,
+	0x80788478, 0xf4205b7a,
 	0xf0000000, 0x80788478,
-	0xf4205c7a, 0xf0000000,
-	0x80788478, 0xf4205eba,
+	0xf4205c3a, 0xf0000000,
+	0x80788478, 0xf4205c7a,
 	0xf0000000, 0x80788478,
-	0xf4205efa, 0xf0000000,
-	0x80788478, 0xf4205e7a,
+	0xf4205eba, 0xf0000000,
+	0x80788478, 0xf4205efa,
 	0xf0000000, 0x80788478,
-	0xf4205cfa, 0xf0000000,
-	0x80788478, 0xf4205bba,
+	0xf4205e7a, 0xf0000000,
+	0x80788478, 0xf4205cfa,
 	0xf0000000, 0x80788478,
-	0xbf89fc07, 0xb96ef814,
 	0xf4205bba, 0xf0000000,
 	0x80788478, 0xbf89fc07,
-	0xb96ef815, 0xbefd006f,
-	0xbefe0070, 0xbeff0071,
-	0x8b6f7bff, 0x000003ff,
-	0xb96f4803, 0x8b6f7bff,
-	0xfffff800, 0x856f8b6f,
-	0xb96fa2c3, 0xb973f801,
-	0xb8ee3b05, 0x806e816e,
-	0xbf0d9972, 0xbfa20002,
-	0x846e896e, 0xbfa00001,
-	0x846e8a6e, 0xb8ef1e06,
-	0x846f8a6f, 0x806e6f6e,
-	0x806eff6e, 0x00000200,
-	0x806e746e, 0x826f8075,
-	0x8b6fff6f, 0x0000ffff,
-	0xf4085c37, 0xf8000050,
-	0xf4085d37, 0xf8000060,
-	0xf4005e77, 0xf8000074,
-	0xbf89fc07, 0x8b6dff6d,
-	0x0000ffff, 0x8bfe7e7e,
-	0x8bea6a6a, 0xb8eef802,
-	0xbf0d866e, 0xbfa20002,
-	0xb97af802, 0xbe80486c,
-	0xb97af802, 0xbe804a6c,
-	0xbfb00000, 0xbf9f0000,
+	0xb96ef814, 0xf4205bba,
+	0xf0000000, 0x80788478,
+	0xbf89fc07, 0xb96ef815,
+	0xbefd006f, 0xbefe0070,
+	0xbeff0071, 0x8b6f7bff,
+	0x000003ff, 0xb96f4803,
+	0x8b6f7bff, 0xfffff800,
+	0x856f8b6f, 0xb96fa2c3,
+	0xb973f801, 0xb8ee3b05,
+	0x806e816e, 0xbf0d9972,
+	0xbfa20002, 0x846e896e,
+	0xbfa00001, 0x846e8a6e,
+	0xb8ef1e06, 0x846f8a6f,
+	0x806e6f6e, 0x806eff6e,
+	0x00000200, 0x806e746e,
+	0x826f8075, 0x8b6fff6f,
+	0x0000ffff, 0xf4085c37,
+	0xf8000050, 0xf4085d37,
+	0xf8000060, 0xf4005e77,
+	0xf8000074, 0xbf89fc07,
+	0x8b6dff6d, 0x0000ffff,
+	0x8bfe7e7e, 0x8bea6a6a,
+	0xb8eef802, 0xbf0d866e,
+	0xbfa20002, 0xb97af802,
+	0xbe80486c, 0xb97af802,
+	0xbe804a6c, 0xbfb00000,
 	0xbf9f0000, 0xbf9f0000,
 	0xbf9f0000, 0xbf9f0000,
+	0xbf9f0000, 0x00000000,
 };
diff --git a/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler_gfx10.asm b/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler_gfx10.asm
index 0f81670f6f9c..8b92c33c2a7c 100644
--- a/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler_gfx10.asm
+++ b/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler_gfx10.asm
@@ -186,6 +186,12 @@ L_SKIP_RESTORE:
 	s_getreg_b32	s_save_trapsts, hwreg(HW_REG_TRAPSTS)
 
 #if SW_SA_TRAP
+	// If ttmp1[30] is set then issue s_barrier to unblock dependent waves.
+	s_bitcmp1_b32	s_save_pc_hi, 30
+	s_cbranch_scc0	L_TRAP_NO_BARRIER
+	s_barrier
+
+L_TRAP_NO_BARRIER:
 	// If ttmp1[31] is set then trap may occur early.
 	// Spin wait until SAVECTX exception is raised.
 	s_bitcmp1_b32	s_save_pc_hi, 31
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
index 5feaba6a77de..6d291aa6386b 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
@@ -1950,7 +1950,7 @@ static int criu_checkpoint(struct file *filep,
 {
 	int ret;
 	uint32_t num_devices, num_bos, num_objects;
-	uint64_t priv_size, priv_offset = 0;
+	uint64_t priv_size, priv_offset = 0, bo_priv_offset;
 
 	if (!args->devices || !args->bos || !args->priv_data)
 		return -EINVAL;
@@ -1994,38 +1994,34 @@ static int criu_checkpoint(struct file *filep,
 	if (ret)
 		goto exit_unlock;
 
-	ret = criu_checkpoint_bos(p, num_bos, (uint8_t __user *)args->bos,
-			    (uint8_t __user *)args->priv_data, &priv_offset);
-	if (ret)
-		goto exit_unlock;
+	/* Leave room for BOs in the private data. They need to be restored
+	 * before events, but we checkpoint them last to simplify the error
+	 * handling.
+	 */
+	bo_priv_offset = priv_offset;
+	priv_offset += num_bos * sizeof(struct kfd_criu_bo_priv_data);
 
 	if (num_objects) {
 		ret = kfd_criu_checkpoint_queues(p, (uint8_t __user *)args->priv_data,
 						 &priv_offset);
 		if (ret)
-			goto close_bo_fds;
+			goto exit_unlock;
 
 		ret = kfd_criu_checkpoint_events(p, (uint8_t __user *)args->priv_data,
 						 &priv_offset);
 		if (ret)
-			goto close_bo_fds;
+			goto exit_unlock;
 
 		ret = kfd_criu_checkpoint_svm(p, (uint8_t __user *)args->priv_data, &priv_offset);
 		if (ret)
-			goto close_bo_fds;
+			goto exit_unlock;
 	}
 
-close_bo_fds:
-	if (ret) {
-		/* If IOCTL returns err, user assumes all FDs opened in criu_dump_bos are closed */
-		uint32_t i;
-		struct kfd_criu_bo_bucket *bo_buckets = (struct kfd_criu_bo_bucket *) args->bos;
-
-		for (i = 0; i < num_bos; i++) {
-			if (bo_buckets[i].alloc_flags & KFD_IOC_ALLOC_MEM_FLAGS_VRAM)
-				close_fd(bo_buckets[i].dmabuf_fd);
-		}
-	}
+	/* This must be the last thing in this function that can fail.
+	 * Otherwise we leak dmabuf file descriptors.
+	 */
+	ret = criu_checkpoint_bos(p, num_bos, (uint8_t __user *)args->bos,
+			   (uint8_t __user *)args->priv_data, &bo_priv_offset);
 
 exit_unlock:
 	mutex_unlock(&p->mutex);
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_crat.c b/drivers/gpu/drm/amd/amdkfd/kfd_crat.c
index cd5f8b219bf9..3251f4783ba1 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_crat.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_crat.c
@@ -50,16 +50,6 @@ static inline unsigned int get_and_inc_gpu_processor_id(
 	return current_id;
 }
 
-/* Static table to describe GPU Cache information */
-struct kfd_gpu_cache_info {
-	uint32_t	cache_size;
-	uint32_t	cache_level;
-	uint32_t	flags;
-	/* Indicates how many Compute Units share this cache
-	 * within a SA. Value = 1 indicates the cache is not shared
-	 */
-	uint32_t	num_cu_shared;
-};
 
 static struct kfd_gpu_cache_info kaveri_cache_info[] = {
 	{
@@ -795,6 +785,150 @@ static struct kfd_gpu_cache_info yellow_carp_cache_info[] = {
 	},
 };
 
+static struct kfd_gpu_cache_info gfx1037_cache_info[] = {
+	{
+		/* TCP L1 Cache per CU */
+		.cache_size = 16,
+		.cache_level = 1,
+		.flags = (CRAT_CACHE_FLAGS_ENABLED |
+				CRAT_CACHE_FLAGS_DATA_CACHE |
+				CRAT_CACHE_FLAGS_SIMD_CACHE),
+		.num_cu_shared = 1,
+	},
+	{
+		/* Scalar L1 Instruction Cache per SQC */
+		.cache_size = 32,
+		.cache_level = 1,
+		.flags = (CRAT_CACHE_FLAGS_ENABLED |
+				CRAT_CACHE_FLAGS_INST_CACHE |
+				CRAT_CACHE_FLAGS_SIMD_CACHE),
+		.num_cu_shared = 2,
+	},
+	{
+		/* Scalar L1 Data Cache per SQC */
+		.cache_size = 16,
+		.cache_level = 1,
+		.flags = (CRAT_CACHE_FLAGS_ENABLED |
+				CRAT_CACHE_FLAGS_DATA_CACHE |
+				CRAT_CACHE_FLAGS_SIMD_CACHE),
+		.num_cu_shared = 2,
+	},
+	{
+		/* GL1 Data Cache per SA */
+		.cache_size = 128,
+		.cache_level = 1,
+		.flags = (CRAT_CACHE_FLAGS_ENABLED |
+				CRAT_CACHE_FLAGS_DATA_CACHE |
+				CRAT_CACHE_FLAGS_SIMD_CACHE),
+		.num_cu_shared = 2,
+	},
+	{
+		/* L2 Data Cache per GPU (Total Tex Cache) */
+		.cache_size = 256,
+		.cache_level = 2,
+		.flags = (CRAT_CACHE_FLAGS_ENABLED |
+				CRAT_CACHE_FLAGS_DATA_CACHE |
+				CRAT_CACHE_FLAGS_SIMD_CACHE),
+		.num_cu_shared = 2,
+	},
+};
+
+static struct kfd_gpu_cache_info gc_10_3_6_cache_info[] = {
+	{
+		/* TCP L1 Cache per CU */
+		.cache_size = 16,
+		.cache_level = 1,
+		.flags = (CRAT_CACHE_FLAGS_ENABLED |
+			  CRAT_CACHE_FLAGS_DATA_CACHE |
+			  CRAT_CACHE_FLAGS_SIMD_CACHE),
+		.num_cu_shared = 1,
+	},
+	{
+		/* Scalar L1 Instruction Cache per SQC */
+		.cache_size = 32,
+		.cache_level = 1,
+		.flags = (CRAT_CACHE_FLAGS_ENABLED |
+			  CRAT_CACHE_FLAGS_INST_CACHE |
+			  CRAT_CACHE_FLAGS_SIMD_CACHE),
+		.num_cu_shared = 2,
+	},
+	{
+		/* Scalar L1 Data Cache per SQC */
+		.cache_size = 16,
+		.cache_level = 1,
+		.flags = (CRAT_CACHE_FLAGS_ENABLED |
+			  CRAT_CACHE_FLAGS_DATA_CACHE |
+			  CRAT_CACHE_FLAGS_SIMD_CACHE),
+		.num_cu_shared = 2,
+	},
+	{
+		/* GL1 Data Cache per SA */
+		.cache_size = 128,
+		.cache_level = 1,
+		.flags = (CRAT_CACHE_FLAGS_ENABLED |
+			  CRAT_CACHE_FLAGS_DATA_CACHE |
+			  CRAT_CACHE_FLAGS_SIMD_CACHE),
+		.num_cu_shared = 2,
+	},
+	{
+		/* L2 Data Cache per GPU (Total Tex Cache) */
+		.cache_size = 256,
+		.cache_level = 2,
+		.flags = (CRAT_CACHE_FLAGS_ENABLED |
+			  CRAT_CACHE_FLAGS_DATA_CACHE |
+			  CRAT_CACHE_FLAGS_SIMD_CACHE),
+		.num_cu_shared = 2,
+	},
+};
+
+static struct kfd_gpu_cache_info dummy_cache_info[] = {
+	{
+		/* TCP L1 Cache per CU */
+		.cache_size = 16,
+		.cache_level = 1,
+		.flags = (CRAT_CACHE_FLAGS_ENABLED |
+				CRAT_CACHE_FLAGS_DATA_CACHE |
+				CRAT_CACHE_FLAGS_SIMD_CACHE),
+		.num_cu_shared = 1,
+	},
+	{
+		/* Scalar L1 Instruction Cache per SQC */
+		.cache_size = 32,
+		.cache_level = 1,
+		.flags = (CRAT_CACHE_FLAGS_ENABLED |
+				CRAT_CACHE_FLAGS_INST_CACHE |
+				CRAT_CACHE_FLAGS_SIMD_CACHE),
+		.num_cu_shared = 2,
+	},
+	{
+		/* Scalar L1 Data Cache per SQC */
+		.cache_size = 16,
+		.cache_level = 1,
+		.flags = (CRAT_CACHE_FLAGS_ENABLED |
+				CRAT_CACHE_FLAGS_DATA_CACHE |
+				CRAT_CACHE_FLAGS_SIMD_CACHE),
+		.num_cu_shared = 2,
+	},
+	{
+		/* GL1 Data Cache per SA */
+		.cache_size = 128,
+		.cache_level = 1,
+		.flags = (CRAT_CACHE_FLAGS_ENABLED |
+				CRAT_CACHE_FLAGS_DATA_CACHE |
+				CRAT_CACHE_FLAGS_SIMD_CACHE),
+		.num_cu_shared = 6,
+	},
+	{
+		/* L2 Data Cache per GPU (Total Tex Cache) */
+		.cache_size = 2048,
+		.cache_level = 2,
+		.flags = (CRAT_CACHE_FLAGS_ENABLED |
+				CRAT_CACHE_FLAGS_DATA_CACHE |
+				CRAT_CACHE_FLAGS_SIMD_CACHE),
+		.num_cu_shared = 6,
+	},
+};
+
 static void kfd_populated_cu_info_cpu(struct kfd_topology_device *dev,
 		struct crat_subtype_computeunit *cu)
 {
@@ -975,8 +1109,12 @@ static int kfd_parse_subtype_cache(struct crat_subtype_cache *cache,
 			props->cachelines_per_tag = cache->lines_per_tag;
 			props->cache_assoc = cache->associativity;
 			props->cache_latency = cache->cache_latency;
+
 			memcpy(props->sibling_map, cache->sibling_map,
-					sizeof(props->sibling_map));
+					CRAT_SIBLINGMAP_SIZE);
+
+			/* set the sibling_map_size as 32 for CRAT from ACPI */
+			props->sibling_map_size = CRAT_SIBLINGMAP_SIZE;
 
 			if (cache->flags & CRAT_CACHE_FLAGS_DATA_CACHE)
 				props->cache_type |= HSA_CACHE_TYPE_DATA;
@@ -987,7 +1125,6 @@ static int kfd_parse_subtype_cache(struct crat_subtype_cache *cache,
 			if (cache->flags & CRAT_CACHE_FLAGS_SIMD_CACHE)
 				props->cache_type |= HSA_CACHE_TYPE_HSACU;
 
-			dev->cache_count++;
 			dev->node_props.caches_count++;
 			list_add_tail(&props->list, &dev->cache_props);
 
@@ -1195,125 +1332,6 @@ err:
 	return ret;
 }
 
-/* Helper function. See kfd_fill_gpu_cache_info for parameter description */
-static int fill_in_l1_pcache(struct crat_subtype_cache *pcache,
-				struct kfd_gpu_cache_info *pcache_info,
-				struct kfd_cu_info *cu_info,
-				int mem_available,
-				int cu_bitmask,
-				int cache_type, unsigned int cu_processor_id,
-				int cu_block)
-{
-	unsigned int cu_sibling_map_mask;
-	int first_active_cu;
-
-	/* First check if enough memory is available */
-	if (sizeof(struct crat_subtype_cache) > mem_available)
-		return -ENOMEM;
-
-	cu_sibling_map_mask = cu_bitmask;
-	cu_sibling_map_mask >>= cu_block;
-	cu_sibling_map_mask &=
-		((1 << pcache_info[cache_type].num_cu_shared) - 1);
-	first_active_cu = ffs(cu_sibling_map_mask);
-
-	/* CU could be inactive. In case of shared cache find the first active
-	 * CU. and incase of non-shared cache check if the CU is inactive. If
-	 * inactive active skip it
-	 */
-	if (first_active_cu) {
-		memset(pcache, 0, sizeof(struct crat_subtype_cache));
-		pcache->type = CRAT_SUBTYPE_CACHE_AFFINITY;
-		pcache->length = sizeof(struct crat_subtype_cache);
-		pcache->flags = pcache_info[cache_type].flags;
-		pcache->processor_id_low = cu_processor_id
-					 + (first_active_cu - 1);
-		pcache->cache_level = pcache_info[cache_type].cache_level;
-		pcache->cache_size = pcache_info[cache_type].cache_size;
-
-		/* Sibling map is w.r.t processor_id_low, so shift out
-		 * inactive CU
-		 */
-		cu_sibling_map_mask =
-			cu_sibling_map_mask >> (first_active_cu - 1);
-
-		pcache->sibling_map[0] = (uint8_t)(cu_sibling_map_mask & 0xFF);
-		pcache->sibling_map[1] =
-				(uint8_t)((cu_sibling_map_mask >> 8) & 0xFF);
-		pcache->sibling_map[2] =
-				(uint8_t)((cu_sibling_map_mask >> 16) & 0xFF);
-		pcache->sibling_map[3] =
-				(uint8_t)((cu_sibling_map_mask >> 24) & 0xFF);
-		return 0;
-	}
-	return 1;
-}
-
-/* Helper function. See kfd_fill_gpu_cache_info for parameter description */
-static int fill_in_l2_l3_pcache(struct crat_subtype_cache *pcache,
-				struct kfd_gpu_cache_info *pcache_info,
-				struct kfd_cu_info *cu_info,
-				int mem_available,
-				int cache_type, unsigned int cu_processor_id)
-{
-	unsigned int cu_sibling_map_mask;
-	int first_active_cu;
-	int i, j, k;
-
-	/* First check if enough memory is available */
-	if (sizeof(struct crat_subtype_cache) > mem_available)
-		return -ENOMEM;
-
-	cu_sibling_map_mask = cu_info->cu_bitmap[0][0];
-	cu_sibling_map_mask &=
-		((1 << pcache_info[cache_type].num_cu_shared) - 1);
-	first_active_cu = ffs(cu_sibling_map_mask);
-
-	/* CU could be inactive. In case of shared cache find the first active
-	 * CU. and incase of non-shared cache check if the CU is inactive. If
-	 * inactive active skip it
-	 */
-	if (first_active_cu) {
-		memset(pcache, 0, sizeof(struct crat_subtype_cache));
-		pcache->type = CRAT_SUBTYPE_CACHE_AFFINITY;
-		pcache->length = sizeof(struct crat_subtype_cache);
-		pcache->flags = pcache_info[cache_type].flags;
-		pcache->processor_id_low = cu_processor_id
-					 + (first_active_cu - 1);
-		pcache->cache_level = pcache_info[cache_type].cache_level;
-		pcache->cache_size = pcache_info[cache_type].cache_size;
-
-		/* Sibling map is w.r.t processor_id_low, so shift out
-		 * inactive CU
-		 */
-		cu_sibling_map_mask =
-			cu_sibling_map_mask >> (first_active_cu - 1);
-		k = 0;
-		for (i = 0; i < cu_info->num_shader_engines; i++) {
-			for (j = 0; j < cu_info->num_shader_arrays_per_engine;
-				j++) {
-				pcache->sibling_map[k] =
-				 (uint8_t)(cu_sibling_map_mask & 0xFF);
-				pcache->sibling_map[k+1] =
-				 (uint8_t)((cu_sibling_map_mask >> 8) & 0xFF);
-				pcache->sibling_map[k+2] =
-				 (uint8_t)((cu_sibling_map_mask >> 16) & 0xFF);
-				pcache->sibling_map[k+3] =
-				 (uint8_t)((cu_sibling_map_mask >> 24) & 0xFF);
-				k += 4;
-				cu_sibling_map_mask =
-					cu_info->cu_bitmap[i % 4][j + i / 4];
-				cu_sibling_map_mask &= (
-				 (1 << pcache_info[cache_type].num_cu_shared)
-				 - 1);
-			}
-		}
-		return 0;
-	}
-	return 1;
-}
-
-#define KFD_MAX_CACHE_TYPES 6
 
 static int kfd_fill_gpu_cache_info_from_gfx_config(struct kfd_dev *kdev,
 						   struct kfd_gpu_cache_info *pcache_info)
@@ -1387,222 +1405,134 @@ static int kfd_fill_gpu_cache_info_from_gfx_config(struct kfd_dev *kdev,
 	return i;
 }
 
-/* kfd_fill_gpu_cache_info - Fill GPU cache info using kfd_gpu_cache_info
- * tables
- *
- *	@kdev - [IN] GPU device
- *	@gpu_processor_id - [IN] GPU processor ID to which these caches
- *			    associate
- *	@available_size - [IN] Amount of memory available in pcache
- *	@cu_info - [IN] Compute Unit info obtained from KGD
- *	@pcache - [OUT] memory into which cache data is to be filled in.
- *	@size_filled - [OUT] amount of data used up in pcache.
- *	@num_of_entries - [OUT] number of caches added
- */
-static int kfd_fill_gpu_cache_info(struct kfd_dev *kdev,
-			int gpu_processor_id,
-			int available_size,
-			struct kfd_cu_info *cu_info,
-			struct crat_subtype_cache *pcache,
-			int *size_filled,
-			int *num_of_entries)
+int kfd_get_gpu_cache_info(struct kfd_dev *kdev, struct kfd_gpu_cache_info **pcache_info)
 {
-	struct kfd_gpu_cache_info *pcache_info;
-	struct kfd_gpu_cache_info cache_info[KFD_MAX_CACHE_TYPES];
 	int num_of_cache_types = 0;
-	int i, j, k;
-	int ct = 0;
-	int mem_available = available_size;
-	unsigned int cu_processor_id;
-	int ret;
-	unsigned int num_cu_shared;
 
 	switch (kdev->adev->asic_type) {
 	case CHIP_KAVERI:
-		pcache_info = kaveri_cache_info;
+		*pcache_info = kaveri_cache_info;
 		num_of_cache_types = ARRAY_SIZE(kaveri_cache_info);
 		break;
 	case CHIP_HAWAII:
-		pcache_info = hawaii_cache_info;
+		*pcache_info = hawaii_cache_info;
 		num_of_cache_types = ARRAY_SIZE(hawaii_cache_info);
 		break;
 	case CHIP_CARRIZO:
-		pcache_info = carrizo_cache_info;
+		*pcache_info = carrizo_cache_info;
 		num_of_cache_types = ARRAY_SIZE(carrizo_cache_info);
 		break;
 	case CHIP_TONGA:
-		pcache_info = tonga_cache_info;
+		*pcache_info = tonga_cache_info;
 		num_of_cache_types = ARRAY_SIZE(tonga_cache_info);
 		break;
 	case CHIP_FIJI:
-		pcache_info = fiji_cache_info;
+		*pcache_info = fiji_cache_info;
 		num_of_cache_types = ARRAY_SIZE(fiji_cache_info);
 		break;
 	case CHIP_POLARIS10:
-		pcache_info = polaris10_cache_info;
+		*pcache_info = polaris10_cache_info;
 		num_of_cache_types = ARRAY_SIZE(polaris10_cache_info);
 		break;
 	case CHIP_POLARIS11:
-		pcache_info = polaris11_cache_info;
+		*pcache_info = polaris11_cache_info;
 		num_of_cache_types = ARRAY_SIZE(polaris11_cache_info);
 		break;
 	case CHIP_POLARIS12:
-		pcache_info = polaris12_cache_info;
+		*pcache_info = polaris12_cache_info;
 		num_of_cache_types = ARRAY_SIZE(polaris12_cache_info);
 		break;
 	case CHIP_VEGAM:
-		pcache_info = vegam_cache_info;
+		*pcache_info = vegam_cache_info;
 		num_of_cache_types = ARRAY_SIZE(vegam_cache_info);
 		break;
 	default:
 		switch (KFD_GC_VERSION(kdev)) {
 		case IP_VERSION(9, 0, 1):
-			pcache_info = vega10_cache_info;
+			*pcache_info = vega10_cache_info;
 			num_of_cache_types = ARRAY_SIZE(vega10_cache_info);
 			break;
 		case IP_VERSION(9, 2, 1):
-			pcache_info = vega12_cache_info;
+			*pcache_info = vega12_cache_info;
 			num_of_cache_types = ARRAY_SIZE(vega12_cache_info);
 			break;
 		case IP_VERSION(9, 4, 0):
 		case IP_VERSION(9, 4, 1):
-			pcache_info = vega20_cache_info;
+			*pcache_info = vega20_cache_info;
 			num_of_cache_types = ARRAY_SIZE(vega20_cache_info);
 			break;
 		case IP_VERSION(9, 4, 2):
-			pcache_info = aldebaran_cache_info;
+			*pcache_info = aldebaran_cache_info;
 			num_of_cache_types = ARRAY_SIZE(aldebaran_cache_info);
 			break;
 		case IP_VERSION(9, 1, 0):
 		case IP_VERSION(9, 2, 2):
-			pcache_info = raven_cache_info;
+			*pcache_info = raven_cache_info;
 			num_of_cache_types = ARRAY_SIZE(raven_cache_info);
 			break;
 		case IP_VERSION(9, 3, 0):
-			pcache_info = renoir_cache_info;
+			*pcache_info = renoir_cache_info;
 			num_of_cache_types = ARRAY_SIZE(renoir_cache_info);
 			break;
 		case IP_VERSION(10, 1, 10):
 		case IP_VERSION(10, 1, 2):
 		case IP_VERSION(10, 1, 3):
 		case IP_VERSION(10, 1, 4):
-			pcache_info = navi10_cache_info;
+			*pcache_info = navi10_cache_info;
 			num_of_cache_types = ARRAY_SIZE(navi10_cache_info);
 			break;
 		case IP_VERSION(10, 1, 1):
-			pcache_info = navi14_cache_info;
+			*pcache_info = navi14_cache_info;
 			num_of_cache_types = ARRAY_SIZE(navi14_cache_info);
 			break;
 		case IP_VERSION(10, 3, 0):
-			pcache_info = sienna_cichlid_cache_info;
+			*pcache_info = sienna_cichlid_cache_info;
 			num_of_cache_types = ARRAY_SIZE(sienna_cichlid_cache_info);
 			break;
 		case IP_VERSION(10, 3, 2):
-			pcache_info = navy_flounder_cache_info;
+			*pcache_info = navy_flounder_cache_info;
 			num_of_cache_types = ARRAY_SIZE(navy_flounder_cache_info);
 			break;
 		case IP_VERSION(10, 3, 4):
-			pcache_info = dimgrey_cavefish_cache_info;
+			*pcache_info = dimgrey_cavefish_cache_info;
 			num_of_cache_types = ARRAY_SIZE(dimgrey_cavefish_cache_info);
 			break;
 		case IP_VERSION(10, 3, 1):
-			pcache_info = vangogh_cache_info;
+			*pcache_info = vangogh_cache_info;
 			num_of_cache_types = ARRAY_SIZE(vangogh_cache_info);
 			break;
 		case IP_VERSION(10, 3, 5):
-			pcache_info = beige_goby_cache_info;
+			*pcache_info = beige_goby_cache_info;
 			num_of_cache_types = ARRAY_SIZE(beige_goby_cache_info);
 			break;
 		case IP_VERSION(10, 3, 3):
-		case IP_VERSION(10, 3, 6): /* TODO: Double check these on production silicon */
-		case IP_VERSION(10, 3, 7): /* TODO: Double check these on production silicon */
-			pcache_info = yellow_carp_cache_info;
+			*pcache_info = yellow_carp_cache_info;
 			num_of_cache_types = ARRAY_SIZE(yellow_carp_cache_info);
 			break;
+		case IP_VERSION(10, 3, 6):
+			*pcache_info = gc_10_3_6_cache_info;
+			num_of_cache_types = ARRAY_SIZE(gc_10_3_6_cache_info);
+			break;
+		case IP_VERSION(10, 3, 7):
+			*pcache_info = gfx1037_cache_info;
+			num_of_cache_types = ARRAY_SIZE(gfx1037_cache_info);
+			break;
 		case IP_VERSION(11, 0, 0):
 		case IP_VERSION(11, 0, 1):
 		case IP_VERSION(11, 0, 2):
 		case IP_VERSION(11, 0, 3):
-			pcache_info = cache_info;
+		case IP_VERSION(11, 0, 4):
 			num_of_cache_types =
-				kfd_fill_gpu_cache_info_from_gfx_config(kdev, pcache_info);
+				kfd_fill_gpu_cache_info_from_gfx_config(kdev, *pcache_info);
 			break;
 		default:
-			return -EINVAL;
-		}
-	}
-
-	*size_filled = 0;
-	*num_of_entries = 0;
-
-	/* For each type of cache listed in the kfd_gpu_cache_info table,
-	 * go through all available Compute Units.
-	 * The [i,j,k] loop will
-	 *		if kfd_gpu_cache_info.num_cu_shared = 1
-	 *			will parse through all available CU
-	 *		If (kfd_gpu_cache_info.num_cu_shared != 1)
-	 *			then it will consider only one CU from
-	 *			the shared unit
-	 */
-
-	for (ct = 0; ct < num_of_cache_types; ct++) {
-	  cu_processor_id = gpu_processor_id;
-	  if (pcache_info[ct].cache_level == 1) {
-	    for (i = 0; i < cu_info->num_shader_engines; i++) {
-	      for (j = 0; j < cu_info->num_shader_arrays_per_engine; j++) {
-	        for (k = 0; k < cu_info->num_cu_per_sh;
-		  k += pcache_info[ct].num_cu_shared) {
-		  ret = fill_in_l1_pcache(pcache,
-					pcache_info,
-					cu_info,
-					mem_available,
-					cu_info->cu_bitmap[i % 4][j + i / 4],
-					ct,
-					cu_processor_id,
-					k);
-
-		  if (ret < 0)
+			*pcache_info = dummy_cache_info;
+			num_of_cache_types = ARRAY_SIZE(dummy_cache_info);
+			pr_warn("dummy cache info is used temporarily and real cache info need update later.\n");
 			break;
-
-		  if (!ret) {
-				pcache++;
-				(*num_of_entries)++;
-				mem_available -= sizeof(*pcache);
-				(*size_filled) += sizeof(*pcache);
-		  }
-
-		  /* Move to next CU block */
-		  num_cu_shared = ((k + pcache_info[ct].num_cu_shared) <=
-					cu_info->num_cu_per_sh) ?
-					pcache_info[ct].num_cu_shared :
-					(cu_info->num_cu_per_sh - k);
-		  cu_processor_id += num_cu_shared;
 		}
-	      }
-	    }
-	  } else {
-			ret = fill_in_l2_l3_pcache(pcache,
-				pcache_info,
-				cu_info,
-				mem_available,
-				ct,
-				cu_processor_id);
-
-			if (ret < 0)
-				break;
-
-			if (!ret) {
-				pcache++;
-				(*num_of_entries)++;
-				mem_available -= sizeof(*pcache);
-				(*size_filled) += sizeof(*pcache);
-			}
-	  }
 	}
-
-	pr_debug("Added [%d] GPU cache entries\n", *num_of_entries);
-
-	return 0;
+	return num_of_cache_types;
 }
 
 static bool kfd_ignore_crat(void)
@@ -1961,8 +1891,8 @@ static void kfd_find_numa_node_in_srat(struct kfd_dev *kdev)
 	struct acpi_table_header *table_header = NULL;
 	struct acpi_subtable_header *sub_header = NULL;
 	unsigned long table_end, subtable_len;
-	u32 pci_id = pci_domain_nr(kdev->pdev->bus) << 16 |
-			pci_dev_id(kdev->pdev);
+	u32 pci_id = pci_domain_nr(kdev->adev->pdev->bus) << 16 |
+			pci_dev_id(kdev->adev->pdev);
 	u32 bdf;
 	acpi_status status;
 	struct acpi_srat_cpu_affinity *cpu;
@@ -2037,7 +1967,7 @@ static void kfd_find_numa_node_in_srat(struct kfd_dev *kdev)
 		numa_node = 0;
 
 	if (numa_node != NUMA_NO_NODE)
-		set_dev_node(&kdev->pdev->dev, numa_node);
+		set_dev_node(&kdev->adev->pdev->dev, numa_node);
 }
 #endif
 
@@ -2098,14 +2028,14 @@ static int kfd_fill_gpu_direct_io_link_to_cpu(int *avail_size,
 	sub_type_hdr->proximity_domain_from = proximity_domain;
 
 #ifdef CONFIG_ACPI_NUMA
-	if (kdev->pdev->dev.numa_node == NUMA_NO_NODE)
+	if (kdev->adev->pdev->dev.numa_node == NUMA_NO_NODE)
 		kfd_find_numa_node_in_srat(kdev);
 #endif
 #ifdef CONFIG_NUMA
-	if (kdev->pdev->dev.numa_node == NUMA_NO_NODE)
+	if (kdev->adev->pdev->dev.numa_node == NUMA_NO_NODE)
 		sub_type_hdr->proximity_domain_to = 0;
 	else
-		sub_type_hdr->proximity_domain_to = kdev->pdev->dev.numa_node;
+		sub_type_hdr->proximity_domain_to = kdev->adev->pdev->dev.numa_node;
 #else
 	sub_type_hdr->proximity_domain_to = 0;
 #endif
@@ -2161,8 +2091,6 @@ static int kfd_create_vcrat_image_gpu(void *pcrat_image,
 	struct kfd_cu_info cu_info;
 	int avail_size = *size;
 	uint32_t total_num_of_cu;
-	int num_of_cache_entries = 0;
-	int cache_mem_filled = 0;
 	uint32_t nid = 0;
 	int ret = 0;
 
@@ -2263,31 +2191,12 @@ static int kfd_create_vcrat_image_gpu(void *pcrat_image,
 	crat_table->length += sizeof(struct crat_subtype_memory);
 	crat_table->total_entries++;
 
-	/* TODO: Fill in cache information. This information is NOT readily
-	 * available in KGD
-	 */
-	sub_type_hdr = (typeof(sub_type_hdr))((char *)sub_type_hdr +
-		sub_type_hdr->length);
-	ret = kfd_fill_gpu_cache_info(kdev, cu->processor_id_low,
-				avail_size,
-				&cu_info,
-				(struct crat_subtype_cache *)sub_type_hdr,
-				&cache_mem_filled,
-				&num_of_cache_entries);
-
-	if (ret < 0)
-		return ret;
-
-	crat_table->length += cache_mem_filled;
-	crat_table->total_entries += num_of_cache_entries;
-	avail_size -= cache_mem_filled;
-
 	/* Fill in Subtype: IO_LINKS
 	 *  Only direct links are added here which is Link from GPU to
 	 *  its NUMA node. Indirect links are added by userspace.
 	 */
 	sub_type_hdr = (typeof(sub_type_hdr))((char *)sub_type_hdr +
-		cache_mem_filled);
+		sub_type_hdr->length);
 	ret = kfd_fill_gpu_direct_io_link_to_cpu(&avail_size, kdev,
 		(struct crat_subtype_iolink *)sub_type_hdr, proximity_domain);
 
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_crat.h b/drivers/gpu/drm/amd/amdkfd/kfd_crat.h
index 482ba84a728d..8d1e8ba58dee 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_crat.h
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_crat.h
@@ -29,11 +29,10 @@
 #pragma pack(1)
 
 /*
- * 4CC signature values for the CRAT and CDIT ACPI tables
+ * 4CC signature value for the CRAT ACPI table
  */
 
 #define CRAT_SIGNATURE	"CRAT"
-#define CDIT_SIGNATURE	"CDIT"
 
 /*
  * Component Resource Association Table (CRAT)
@@ -292,31 +291,22 @@ struct crat_subtype_generic {
 	uint32_t	flags;
 };
 
-/*
- * Component Locality Distance Information Table (CDIT)
- */
-#define CDIT_OEMID_LENGTH	6
-#define CDIT_OEMTABLEID_LENGTH	8
-
-struct cdit_header {
-	uint32_t	signature;
-	uint32_t	length;
-	uint8_t		revision;
-	uint8_t		checksum;
-	uint8_t		oem_id[CDIT_OEMID_LENGTH];
-	uint8_t		oem_table_id[CDIT_OEMTABLEID_LENGTH];
-	uint32_t	oem_revision;
-	uint32_t	creator_id;
-	uint32_t	creator_revision;
-	uint32_t	total_entries;
-	uint16_t	num_domains;
-	uint8_t		entry[1];
-};
-
 #pragma pack()
 
 struct kfd_dev;
 
+/* Static table to describe GPU Cache information */
+struct kfd_gpu_cache_info {
+	uint32_t	cache_size;
+	uint32_t	cache_level;
+	uint32_t	flags;
+	/* Indicates how many Compute Units share this cache
+	 * within a SA. Value = 1 indicates the cache is not shared
+	 */
+	uint32_t	num_cu_shared;
+};
+int kfd_get_gpu_cache_info(struct kfd_dev *kdev, struct kfd_gpu_cache_info **pcache_info);
+
 int kfd_create_crat_image_acpi(void **crat_image, size_t *size);
 void kfd_destroy_crat_image(void *crat_image);
 int kfd_parse_crat_table(void *crat_image, struct list_head *device_list,
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device.c b/drivers/gpu/drm/amd/amdkfd/kfd_device.c
index 65a1d4f9004b..b8936340742b 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_device.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_device.c
@@ -153,6 +153,7 @@ static void kfd_device_info_set_event_interrupt_class(struct kfd_dev *kfd)
 	case IP_VERSION(11, 0, 1):
 	case IP_VERSION(11, 0, 2):
 	case IP_VERSION(11, 0, 3):
+	case IP_VERSION(11, 0, 4):
 		kfd->device_info.event_interrupt_class = &event_interrupt_class_v11;
 		break;
 	default:
@@ -227,7 +228,6 @@ struct kfd_dev *kgd2kfd_probe(struct amdgpu_device *adev, bool vf)
 {
 	struct kfd_dev *kfd = NULL;
 	const struct kfd2kgd_calls *f2g = NULL;
-	struct pci_dev *pdev = adev->pdev;
 	uint32_t gfx_target_version = 0;
 
 	switch (adev->asic_type) {
@@ -395,6 +395,7 @@ struct kfd_dev *kgd2kfd_probe(struct amdgpu_device *adev, bool vf)
 			f2g = &gfx_v11_kfd2kgd;
 			break;
 		case IP_VERSION(11, 0, 1):
+		case IP_VERSION(11, 0, 4):
 			gfx_target_version = 110003;
 			f2g = &gfx_v11_kfd2kgd;
 			break;
@@ -429,7 +430,6 @@ struct kfd_dev *kgd2kfd_probe(struct amdgpu_device *adev, bool vf)
 
 	kfd->adev = adev;
 	kfd_device_info_init(kfd, vf, gfx_target_version);
-	kfd->pdev = pdev;
 	kfd->init_complete = false;
 	kfd->kfd2kgd = f2g;
 	atomic_set(&kfd->compute_profile, 0);
@@ -497,7 +497,10 @@ static int kfd_gws_init(struct kfd_dev *kfd)
 		(KFD_GC_VERSION(kfd) == IP_VERSION(9, 4, 1)
 			&& kfd->mec2_fw_version >= 0x30)   ||
 		(KFD_GC_VERSION(kfd) == IP_VERSION(9, 4, 2)
-			&& kfd->mec2_fw_version >= 0x28))))
+			&& kfd->mec2_fw_version >= 0x28) ||
+		(KFD_GC_VERSION(kfd) >= IP_VERSION(10, 3, 0)
+			&& KFD_GC_VERSION(kfd) < IP_VERSION(11, 0, 0)
+			&& kfd->mec2_fw_version >= 0x6b))))
 		ret = amdgpu_amdkfd_alloc_gws(kfd->adev,
 				kfd->adev->gds.gws_size, &kfd->gws);
 
@@ -511,12 +514,10 @@ static void kfd_smi_init(struct kfd_dev *dev)
 }
 
 bool kgd2kfd_device_init(struct kfd_dev *kfd,
-			 struct drm_device *ddev,
 			 const struct kgd2kfd_shared_resources *gpu_resources)
 {
 	unsigned int size, map_process_packet_size;
 
-	kfd->ddev = ddev;
 	kfd->mec_fw_version = amdgpu_amdkfd_get_fw_version(kfd->adev,
 			KGD_ENGINE_MEC1);
 	kfd->mec2_fw_version = amdgpu_amdkfd_get_fw_version(kfd->adev,
@@ -541,7 +542,7 @@ bool kgd2kfd_device_init(struct kfd_dev *kfd,
 	     kfd->mec_fw_version < kfd->device_info.no_atomic_fw_version)) {
 		dev_info(kfd_device,
 			 "skipped device %x:%x, PCI rejects atomics %d<%d\n",
-			 kfd->pdev->vendor, kfd->pdev->device,
+			 kfd->adev->pdev->vendor, kfd->adev->pdev->device,
 			 kfd->mec_fw_version,
 			 kfd->device_info.no_atomic_fw_version);
 		return false;
@@ -650,8 +651,8 @@ bool kgd2kfd_device_init(struct kfd_dev *kfd,
 	kfd_smi_init(kfd);
 
 	kfd->init_complete = true;
-	dev_info(kfd_device, "added device %x:%x\n", kfd->pdev->vendor,
-		 kfd->pdev->device);
+	dev_info(kfd_device, "added device %x:%x\n", kfd->adev->pdev->vendor,
+		 kfd->adev->pdev->device);
 
 	pr_debug("Starting kfd with the following scheduling policy %d\n",
 		kfd->dqm->sched_policy);
@@ -676,7 +677,7 @@ alloc_gtt_mem_failure:
 		amdgpu_amdkfd_free_gws(kfd->adev, kfd->gws);
 	dev_err(kfd_device,
 		"device %x:%x NOT added due to errors\n",
-		kfd->pdev->vendor, kfd->pdev->device);
+		kfd->adev->pdev->vendor, kfd->adev->pdev->device);
 out:
 	return kfd->init_complete;
 }
@@ -789,7 +790,7 @@ int kgd2kfd_resume_iommu(struct kfd_dev *kfd)
 	if (err)
 		dev_err(kfd_device,
 			"Failed to resume IOMMU for device %x:%x\n",
-			kfd->pdev->vendor, kfd->pdev->device);
+			kfd->adev->pdev->vendor, kfd->adev->pdev->device);
 	return err;
 }
 
@@ -801,7 +802,7 @@ static int kfd_resume(struct kfd_dev *kfd)
 	if (err)
 		dev_err(kfd_device,
 			"Error starting queue manager for device %x:%x\n",
-			kfd->pdev->vendor, kfd->pdev->device);
+			kfd->adev->pdev->vendor, kfd->adev->pdev->device);
 
 	return err;
 }
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_events.c b/drivers/gpu/drm/amd/amdkfd/kfd_events.c
index 83e3ce9f6049..729d26d648af 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_events.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_events.c
@@ -506,6 +506,7 @@ int kfd_criu_restore_event(struct file *devkfd,
 		ret = create_other_event(p, ev, &ev_priv->event_id);
 		break;
 	}
+	mutex_unlock(&p->event_mutex);
 
 exit:
 	if (ret)
@@ -513,8 +514,6 @@ exit:
 
 	kfree(ev_priv);
 
-	mutex_unlock(&p->event_mutex);
-
 	return ret;
 }
 
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_iommu.c b/drivers/gpu/drm/amd/amdkfd/kfd_iommu.c
index fbd0afe4da42..ec1bf611624e 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_iommu.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_iommu.c
@@ -49,7 +49,7 @@ int kfd_iommu_check_device(struct kfd_dev *kfd)
 		return -ENODEV;
 
 	iommu_info.flags = 0;
-	err = amd_iommu_device_info(kfd->pdev, &iommu_info);
+	err = amd_iommu_device_info(kfd->adev->pdev, &iommu_info);
 	if (err)
 		return err;
 
@@ -71,7 +71,7 @@ int kfd_iommu_device_init(struct kfd_dev *kfd)
 		return 0;
 
 	iommu_info.flags = 0;
-	err = amd_iommu_device_info(kfd->pdev, &iommu_info);
+	err = amd_iommu_device_info(kfd->adev->pdev, &iommu_info);
 	if (err < 0) {
 		dev_err(kfd_device,
 			"error getting iommu info. is the iommu enabled?\n");
@@ -121,7 +121,7 @@ int kfd_iommu_bind_process_to_device(struct kfd_process_device *pdd)
 		return -EINVAL;
 	}
 
-	err = amd_iommu_bind_pasid(dev->pdev, p->pasid, p->lead_thread);
+	err = amd_iommu_bind_pasid(dev->adev->pdev, p->pasid, p->lead_thread);
 	if (!err)
 		pdd->bound = PDD_BOUND;
 
@@ -139,7 +139,8 @@ void kfd_iommu_unbind_process(struct kfd_process *p)
 
 	for (i = 0; i < p->n_pdds; i++)
 		if (p->pdds[i]->bound == PDD_BOUND)
-			amd_iommu_unbind_pasid(p->pdds[i]->dev->pdev, p->pasid);
+			amd_iommu_unbind_pasid(p->pdds[i]->dev->adev->pdev,
+					       p->pasid);
 }
 
 /* Callback for process shutdown invoked by the IOMMU driver */
@@ -222,7 +223,7 @@ static int kfd_bind_processes_to_device(struct kfd_dev *kfd)
 			continue;
 		}
 
-		err = amd_iommu_bind_pasid(kfd->pdev, p->pasid,
+		err = amd_iommu_bind_pasid(kfd->adev->pdev, p->pasid,
 				p->lead_thread);
 		if (err < 0) {
 			pr_err("Unexpected pasid 0x%x binding failure\n",
@@ -282,9 +283,9 @@ void kfd_iommu_suspend(struct kfd_dev *kfd)
 
 	kfd_unbind_processes_from_device(kfd);
 
-	amd_iommu_set_invalidate_ctx_cb(kfd->pdev, NULL);
-	amd_iommu_set_invalid_ppr_cb(kfd->pdev, NULL);
-	amd_iommu_free_device(kfd->pdev);
+	amd_iommu_set_invalidate_ctx_cb(kfd->adev->pdev, NULL);
+	amd_iommu_set_invalid_ppr_cb(kfd->adev->pdev, NULL);
+	amd_iommu_free_device(kfd->adev->pdev);
 }
 
 /** kfd_iommu_resume - Restore IOMMU after resume
@@ -302,20 +303,20 @@ int kfd_iommu_resume(struct kfd_dev *kfd)
 
 	pasid_limit = kfd_get_pasid_limit();
 
-	err = amd_iommu_init_device(kfd->pdev, pasid_limit);
+	err = amd_iommu_init_device(kfd->adev->pdev, pasid_limit);
 	if (err)
 		return -ENXIO;
 
-	amd_iommu_set_invalidate_ctx_cb(kfd->pdev,
+	amd_iommu_set_invalidate_ctx_cb(kfd->adev->pdev,
 					iommu_pasid_shutdown_callback);
-	amd_iommu_set_invalid_ppr_cb(kfd->pdev,
+	amd_iommu_set_invalid_ppr_cb(kfd->adev->pdev,
 				     iommu_invalid_ppr_cb);
 
 	err = kfd_bind_processes_to_device(kfd);
 	if (err) {
-		amd_iommu_set_invalidate_ctx_cb(kfd->pdev, NULL);
-		amd_iommu_set_invalid_ppr_cb(kfd->pdev, NULL);
-		amd_iommu_free_device(kfd->pdev);
+		amd_iommu_set_invalidate_ctx_cb(kfd->adev->pdev, NULL);
+		amd_iommu_set_invalid_ppr_cb(kfd->adev->pdev, NULL);
+		amd_iommu_free_device(kfd->adev->pdev);
 		return err;
 	}
 
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c b/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c
index 2797029bd500..10048ce16aea 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c
@@ -28,7 +28,6 @@
 #include "amdgpu_sync.h"
 #include "amdgpu_object.h"
 #include "amdgpu_vm.h"
-#include "amdgpu_mn.h"
 #include "amdgpu_res_cursor.h"
 #include "kfd_priv.h"
 #include "kfd_svm.h"
@@ -65,8 +64,11 @@ svm_migrate_gart_map(struct amdgpu_ring *ring, uint64_t npages,
 	num_dw = ALIGN(adev->mman.buffer_funcs->copy_num_dw, 8);
 	num_bytes = npages * 8;
 
-	r = amdgpu_job_alloc_with_ib(adev, num_dw * 4 + num_bytes,
-				     AMDGPU_IB_POOL_DELAYED, &job);
+	r = amdgpu_job_alloc_with_ib(adev, &adev->mman.entity,
+				     AMDGPU_FENCE_OWNER_UNDEFINED,
+				     num_dw * 4 + num_bytes,
+				     AMDGPU_IB_POOL_DELAYED,
+				     &job);
 	if (r)
 		return r;
 
@@ -89,18 +91,10 @@ svm_migrate_gart_map(struct amdgpu_ring *ring, uint64_t npages,
 	cpu_addr = &job->ibs[0].ptr[num_dw];
 
 	amdgpu_gart_map(adev, 0, npages, addr, pte_flags, cpu_addr);
-	r = amdgpu_job_submit(job, &adev->mman.entity,
-			      AMDGPU_FENCE_OWNER_UNDEFINED, &fence);
-	if (r)
-		goto error_free;
-
+	fence = amdgpu_job_submit(job);
 	dma_fence_put(fence);
 
 	return r;
-
-error_free:
-	amdgpu_job_free(job);
-	return r;
 }
 
 /**
@@ -529,8 +523,8 @@ svm_migrate_ram_to_vram(struct svm_range *prange, uint32_t best_loc,
 	for (addr = start; addr < end;) {
 		unsigned long next;
 
-		vma = find_vma(mm, addr);
-		if (!vma || addr < vma->vm_start)
+		vma = vma_lookup(mm, addr);
+		if (!vma)
 			break;
 
 		next = min(vma->vm_end, end);
@@ -798,8 +792,8 @@ int svm_migrate_vram_to_ram(struct svm_range *prange, struct mm_struct *mm,
 	for (addr = start; addr < end;) {
 		unsigned long next;
 
-		vma = find_vma(mm, addr);
-		if (!vma || addr < vma->vm_start) {
+		vma = vma_lookup(mm, addr);
+		if (!vma) {
 			pr_debug("failed to find vma for prange %p\n", prange);
 			r = -EFAULT;
 			break;
@@ -973,12 +967,10 @@ out_unlock_prange:
 out_unlock_svms:
 	mutex_unlock(&p->svms.lock);
 out_unref_process:
+	pr_debug("CPU fault svms 0x%p address 0x%lx done\n", &p->svms, addr);
 	kfd_unref_process(p);
 out_mmput:
 	mmput(mm);
-
-	pr_debug("CPU fault svms 0x%p address 0x%lx done\n", &p->svms, addr);
-
 	return r ? VM_FAULT_SIGBUS : 0;
 }
 
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_pm4_headers_diq.h b/drivers/gpu/drm/amd/amdkfd/kfd_pm4_headers_diq.h
deleted file mode 100644
index f9cd28690151..000000000000
--- a/drivers/gpu/drm/amd/amdkfd/kfd_pm4_headers_diq.h
+++ /dev/null
@@ -1,291 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 OR MIT */
-/*
- * Copyright 2014-2022 Advanced Micro Devices, Inc.
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * the rights to use, copy, modify, merge, publish, distribute, sublicense,
- * and/or sell copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in
- * all copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
- * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
- * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
- * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
- * OTHER DEALINGS IN THE SOFTWARE.
- *
- */
-
-#ifndef KFD_PM4_HEADERS_DIQ_H_
-#define KFD_PM4_HEADERS_DIQ_H_
-
-/*--------------------_INDIRECT_BUFFER-------------------- */
-
-#ifndef _PM4__INDIRECT_BUFFER_DEFINED
-#define _PM4__INDIRECT_BUFFER_DEFINED
-enum _INDIRECT_BUFFER_cache_policy_enum {
-	cache_policy___indirect_buffer__lru = 0,
-	cache_policy___indirect_buffer__stream = 1,
-	cache_policy___indirect_buffer__bypass = 2
-};
-
-enum {
-	IT_INDIRECT_BUFFER_PASID = 0x5C
-};
-
-struct pm4__indirect_buffer_pasid {
-	union {
-		union PM4_MES_TYPE_3_HEADER header;	/* header */
-		unsigned int ordinal1;
-	};
-
-	union {
-		struct {
-			unsigned int reserved1:2;
-			unsigned int ib_base_lo:30;
-		} bitfields2;
-		unsigned int ordinal2;
-	};
-
-	union {
-		struct {
-			unsigned int ib_base_hi:16;
-			unsigned int reserved2:16;
-		} bitfields3;
-		unsigned int ordinal3;
-	};
-
-	union {
-		unsigned int control;
-		unsigned int ordinal4;
-	};
-
-	union {
-		struct {
-			unsigned int pasid:10;
-			unsigned int reserved4:22;
-		} bitfields5;
-		unsigned int ordinal5;
-	};
-
-};
-
-#endif
-
-/*--------------------_RELEASE_MEM-------------------- */
-
-#ifndef _PM4__RELEASE_MEM_DEFINED
-#define _PM4__RELEASE_MEM_DEFINED
-enum _RELEASE_MEM_event_index_enum {
-	event_index___release_mem__end_of_pipe = 5,
-	event_index___release_mem__shader_done = 6
-};
-
-enum _RELEASE_MEM_cache_policy_enum {
-	cache_policy___release_mem__lru = 0,
-	cache_policy___release_mem__stream = 1,
-	cache_policy___release_mem__bypass = 2
-};
-
-enum _RELEASE_MEM_dst_sel_enum {
-	dst_sel___release_mem__memory_controller = 0,
-	dst_sel___release_mem__tc_l2 = 1,
-	dst_sel___release_mem__queue_write_pointer_register = 2,
-	dst_sel___release_mem__queue_write_pointer_poll_mask_bit = 3
-};
-
-enum _RELEASE_MEM_int_sel_enum {
-	int_sel___release_mem__none = 0,
-	int_sel___release_mem__send_interrupt_only = 1,
-	int_sel___release_mem__send_interrupt_after_write_confirm = 2,
-	int_sel___release_mem__send_data_after_write_confirm = 3
-};
-
-enum _RELEASE_MEM_data_sel_enum {
-	data_sel___release_mem__none = 0,
-	data_sel___release_mem__send_32_bit_low = 1,
-	data_sel___release_mem__send_64_bit_data = 2,
-	data_sel___release_mem__send_gpu_clock_counter = 3,
-	data_sel___release_mem__send_cp_perfcounter_hi_lo = 4,
-	data_sel___release_mem__store_gds_data_to_memory = 5
-};
-
-struct pm4__release_mem {
-	union {
-		union PM4_MES_TYPE_3_HEADER header;	/*header */
-		unsigned int ordinal1;
-	};
-
-	union {
-		struct {
-			unsigned int event_type:6;
-			unsigned int reserved1:2;
-			enum _RELEASE_MEM_event_index_enum event_index:4;
-			unsigned int tcl1_vol_action_ena:1;
-			unsigned int tc_vol_action_ena:1;
-			unsigned int reserved2:1;
-			unsigned int tc_wb_action_ena:1;
-			unsigned int tcl1_action_ena:1;
-			unsigned int tc_action_ena:1;
-			unsigned int reserved3:6;
-			unsigned int atc:1;
-			enum _RELEASE_MEM_cache_policy_enum cache_policy:2;
-			unsigned int reserved4:5;
-		} bitfields2;
-		unsigned int ordinal2;
-	};
-
-	union {
-		struct {
-			unsigned int reserved5:16;
-			enum _RELEASE_MEM_dst_sel_enum dst_sel:2;
-			unsigned int reserved6:6;
-			enum _RELEASE_MEM_int_sel_enum int_sel:3;
-			unsigned int reserved7:2;
-			enum _RELEASE_MEM_data_sel_enum data_sel:3;
-		} bitfields3;
-		unsigned int ordinal3;
-	};
-
-	union {
-		struct {
-			unsigned int reserved8:2;
-			unsigned int address_lo_32b:30;
-		} bitfields4;
-		struct {
-			unsigned int reserved9:3;
-			unsigned int address_lo_64b:29;
-		} bitfields5;
-		unsigned int ordinal4;
-	};
-
-	unsigned int address_hi;
-
-	unsigned int data_lo;
-
-	unsigned int data_hi;
-
-};
-#endif
-
-
-/*--------------------_SET_CONFIG_REG-------------------- */
-
-#ifndef _PM4__SET_CONFIG_REG_DEFINED
-#define _PM4__SET_CONFIG_REG_DEFINED
-
-struct pm4__set_config_reg {
-	union {
-		union PM4_MES_TYPE_3_HEADER header;	/*header */
-		unsigned int ordinal1;
-	};
-
-	union {
-		struct {
-			unsigned int reg_offset:16;
-			unsigned int reserved1:7;
-			unsigned int vmid_shift:5;
-			unsigned int insert_vmid:1;
-			unsigned int reserved2:3;
-		} bitfields2;
-		unsigned int ordinal2;
-	};
-
-	unsigned int reg_data[1];	/*1..N of these fields */
-
-};
-#endif
-
-/*--------------------_WAIT_REG_MEM-------------------- */
-
-#ifndef _PM4__WAIT_REG_MEM_DEFINED
-#define _PM4__WAIT_REG_MEM_DEFINED
-enum _WAIT_REG_MEM_function_enum {
-	function___wait_reg_mem__always_pass = 0,
-	function___wait_reg_mem__less_than_ref_value = 1,
-	function___wait_reg_mem__less_than_equal_to_the_ref_value = 2,
-	function___wait_reg_mem__equal_to_the_reference_value = 3,
-	function___wait_reg_mem__not_equal_reference_value = 4,
-	function___wait_reg_mem__greater_than_or_equal_reference_value = 5,
-	function___wait_reg_mem__greater_than_reference_value = 6,
-	function___wait_reg_mem__reserved = 7
-};
-
-enum _WAIT_REG_MEM_mem_space_enum {
-	mem_space___wait_reg_mem__register_space = 0,
-	mem_space___wait_reg_mem__memory_space = 1
-};
-
-enum _WAIT_REG_MEM_operation_enum {
-	operation___wait_reg_mem__wait_reg_mem = 0,
-	operation___wait_reg_mem__wr_wait_wr_reg = 1
-};
-
-struct pm4__wait_reg_mem {
-	union {
-		union PM4_MES_TYPE_3_HEADER header;	/*header */
-		unsigned int ordinal1;
-	};
-
-	union {
-		struct {
-			enum _WAIT_REG_MEM_function_enum function:3;
-			unsigned int reserved1:1;
-			enum _WAIT_REG_MEM_mem_space_enum mem_space:2;
-			enum _WAIT_REG_MEM_operation_enum operation:2;
-			unsigned int reserved2:24;
-		} bitfields2;
-		unsigned int ordinal2;
-	};
-
-	union {
-		struct {
-			unsigned int reserved3:2;
-			unsigned int memory_poll_addr_lo:30;
-		} bitfields3;
-		struct {
-			unsigned int register_poll_addr:16;
-			unsigned int reserved4:16;
-		} bitfields4;
-		struct {
-			unsigned int register_write_addr:16;
-			unsigned int reserved5:16;
-		} bitfields5;
-		unsigned int ordinal3;
-	};
-
-	union {
-		struct {
-			unsigned int poll_address_hi:16;
-			unsigned int reserved6:16;
-		} bitfields6;
-		struct {
-			unsigned int register_write_addr:16;
-			unsigned int reserved7:16;
-		} bitfields7;
-		unsigned int ordinal4;
-	};
-
-	unsigned int reference;
-
-	unsigned int mask;
-
-	union {
-		struct {
-			unsigned int poll_interval:16;
-			unsigned int reserved8:16;
-		} bitfields8;
-		unsigned int ordinal7;
-	};
-
-};
-#endif
-
-
-#endif /* KFD_PM4_HEADERS_DIQ_H_ */
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
index bf610e3b683b..552c3ac85a13 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
@@ -254,8 +254,6 @@ struct kfd_dev {
 	struct amdgpu_device *adev;
 
 	struct kfd_device_info device_info;
-	struct pci_dev *pdev;
-	struct drm_device *ddev;
 
 	unsigned int id;		/* topology stub index */
 
@@ -1365,7 +1363,7 @@ void kfd_dec_compute_active(struct kfd_dev *dev);
 static inline int kfd_devcgroup_check_permission(struct kfd_dev *kfd)
 {
 #if defined(CONFIG_CGROUP_DEVICE) || defined(CONFIG_CGROUP_BPF)
-	struct drm_device *ddev = kfd->ddev;
+	struct drm_device *ddev = adev_to_drm(kfd->adev);
 
 	return devcgroup_check_permission(DEVCG_DEV_CHAR, DRM_MAJOR,
 					  ddev->render->index,
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_process.c b/drivers/gpu/drm/amd/amdkfd/kfd_process.c
index 951b63677248..51b1683ac5c1 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_process.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_process.c
@@ -689,13 +689,13 @@ void kfd_process_destroy_wq(void)
 }
 
 static void kfd_process_free_gpuvm(struct kgd_mem *mem,
-			struct kfd_process_device *pdd, void *kptr)
+			struct kfd_process_device *pdd, void **kptr)
 {
 	struct kfd_dev *dev = pdd->dev;
 
-	if (kptr) {
+	if (kptr && *kptr) {
 		amdgpu_amdkfd_gpuvm_unmap_gtt_bo_from_kernel(mem);
-		kptr = NULL;
+		*kptr = NULL;
 	}
 
 	amdgpu_amdkfd_gpuvm_unmap_memory_from_gpu(dev->adev, mem, pdd->drm_priv);
@@ -795,7 +795,7 @@ static void kfd_process_device_destroy_ib_mem(struct kfd_process_device *pdd)
 	if (!qpd->ib_kaddr || !qpd->ib_base)
 		return;
 
-	kfd_process_free_gpuvm(qpd->ib_mem, pdd, qpd->ib_kaddr);
+	kfd_process_free_gpuvm(qpd->ib_mem, pdd, &qpd->ib_kaddr);
 }
 
 struct kfd_process *kfd_create_process(struct file *filep)
@@ -1050,8 +1050,8 @@ static void kfd_process_destroy_pdds(struct kfd_process *p)
 		 * for auto suspend
 		 */
 		if (pdd->runtime_inuse) {
-			pm_runtime_mark_last_busy(pdd->dev->ddev->dev);
-			pm_runtime_put_autosuspend(pdd->dev->ddev->dev);
+			pm_runtime_mark_last_busy(adev_to_drm(pdd->dev->adev)->dev);
+			pm_runtime_put_autosuspend(adev_to_drm(pdd->dev->adev)->dev);
 			pdd->runtime_inuse = false;
 		}
 
@@ -1277,7 +1277,7 @@ static void kfd_process_device_destroy_cwsr_dgpu(struct kfd_process_device *pdd)
 	if (!dev->cwsr_enabled || !qpd->cwsr_kaddr || !qpd->cwsr_base)
 		return;
 
-	kfd_process_free_gpuvm(qpd->cwsr_mem, pdd, qpd->cwsr_kaddr);
+	kfd_process_free_gpuvm(qpd->cwsr_mem, pdd, &qpd->cwsr_kaddr);
 }
 
 void kfd_process_set_trap_handler(struct qcm_process_device *qpd,
@@ -1576,9 +1576,9 @@ int kfd_process_device_init_vm(struct kfd_process_device *pdd,
 	p = pdd->process;
 	dev = pdd->dev;
 
-	ret = amdgpu_amdkfd_gpuvm_acquire_process_vm(
-		dev->adev, drm_file, p->pasid,
-		&p->kgd_process_info, &p->ef);
+	ret = amdgpu_amdkfd_gpuvm_acquire_process_vm(dev->adev, drm_file,
+						     &p->kgd_process_info,
+						     &p->ef);
 	if (ret) {
 		pr_err("Failed to create process VM object\n");
 		return ret;
@@ -1593,13 +1593,19 @@ int kfd_process_device_init_vm(struct kfd_process_device *pdd,
 	if (ret)
 		goto err_init_cwsr;
 
+	ret = amdgpu_amdkfd_gpuvm_set_vm_pasid(dev->adev, drm_file, p->pasid);
+	if (ret)
+		goto err_set_pasid;
+
 	pdd->drm_file = drm_file;
 
 	return 0;
 
+err_set_pasid:
+	kfd_process_device_destroy_cwsr_dgpu(pdd);
 err_init_cwsr:
+	kfd_process_device_destroy_ib_mem(pdd);
 err_reserve_ib_mem:
-	kfd_process_device_free_bos(pdd);
 	pdd->drm_priv = NULL;
 
 	return ret;
@@ -1633,9 +1639,9 @@ struct kfd_process_device *kfd_bind_process_to_device(struct kfd_dev *dev,
 	 * pdd is destroyed.
 	 */
 	if (!pdd->runtime_inuse) {
-		err = pm_runtime_get_sync(dev->ddev->dev);
+		err = pm_runtime_get_sync(adev_to_drm(dev->adev)->dev);
 		if (err < 0) {
-			pm_runtime_put_autosuspend(dev->ddev->dev);
+			pm_runtime_put_autosuspend(adev_to_drm(dev->adev)->dev);
 			return ERR_PTR(err);
 		}
 	}
@@ -1655,8 +1661,8 @@ struct kfd_process_device *kfd_bind_process_to_device(struct kfd_dev *dev,
 out:
 	/* balance runpm reference count and exit with error */
 	if (!pdd->runtime_inuse) {
-		pm_runtime_mark_last_busy(dev->ddev->dev);
-		pm_runtime_put_autosuspend(dev->ddev->dev);
+		pm_runtime_mark_last_busy(adev_to_drm(dev->adev)->dev);
+		pm_runtime_put_autosuspend(adev_to_drm(dev->adev)->dev);
 	}
 
 	return ERR_PTR(err);
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c
index 64fdf63093a0..814f99888ab1 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c
@@ -26,7 +26,7 @@
 #include "amdgpu_sync.h"
 #include "amdgpu_object.h"
 #include "amdgpu_vm.h"
-#include "amdgpu_mn.h"
+#include "amdgpu_hmm.h"
 #include "amdgpu.h"
 #include "amdgpu_xgmi.h"
 #include "kfd_priv.h"
@@ -259,7 +259,7 @@ void svm_range_free_dma_mappings(struct svm_range *prange)
 			pr_debug("failed to find device idx %d\n", gpuidx);
 			continue;
 		}
-		dev = &pdd->dev->pdev->dev;
+		dev = &pdd->dev->adev->pdev->dev;
 		svm_range_dma_unmap(dev, dma_addr, 0, prange->npages);
 		kvfree(dma_addr);
 		prange->dma_addr[gpuidx] = NULL;
@@ -1586,8 +1586,8 @@ static int svm_range_validate_and_map(struct mm_struct *mm,
 		unsigned long npages;
 		bool readonly;
 
-		vma = find_vma(mm, addr);
-		if (!vma || addr < vma->vm_start) {
+		vma = vma_lookup(mm, addr);
+		if (!vma) {
 			r = -EFAULT;
 			goto unreserve_out;
 		}
@@ -1596,9 +1596,9 @@ static int svm_range_validate_and_map(struct mm_struct *mm,
 		next = min(vma->vm_end, end);
 		npages = (next - addr) >> PAGE_SHIFT;
 		WRITE_ONCE(p->svms.faulting_task, current);
-		r = amdgpu_hmm_range_get_pages(&prange->notifier, mm, NULL,
-					       addr, npages, &hmm_range,
-					       readonly, true, owner);
+		r = amdgpu_hmm_range_get_pages(&prange->notifier, addr, npages,
+					       readonly, owner, NULL,
+					       &hmm_range);
 		WRITE_ONCE(p->svms.faulting_task, NULL);
 		if (r) {
 			pr_debug("failed %d to get svm range pages\n", r);
@@ -2542,8 +2542,8 @@ svm_range_get_range_boundaries(struct kfd_process *p, int64_t addr,
 	struct interval_tree_node *node;
 	unsigned long start_limit, end_limit;
 
-	vma = find_vma(p->mm, addr << PAGE_SHIFT);
-	if (!vma || (addr << PAGE_SHIFT) < vma->vm_start) {
+	vma = vma_lookup(p->mm, addr << PAGE_SHIFT);
+	if (!vma) {
 		pr_debug("VMA does not exist in address [0x%llx]\n", addr);
 		return -EFAULT;
 	}
@@ -2871,8 +2871,8 @@ retry_write_locked:
 	/* __do_munmap removed VMA, return success as we are handling stale
 	 * retry fault.
 	 */
-	vma = find_vma(mm, addr << PAGE_SHIFT);
-	if (!vma || (addr << PAGE_SHIFT) < vma->vm_start) {
+	vma = vma_lookup(mm, addr << PAGE_SHIFT);
+	if (!vma) {
 		pr_debug("address 0x%llx VMA is removed\n", addr);
 		r = 0;
 		goto out_unlock_range;
@@ -3152,9 +3152,8 @@ svm_range_is_valid(struct kfd_process *p, uint64_t start, uint64_t size)
 	start <<= PAGE_SHIFT;
 	end = start + (size << PAGE_SHIFT);
 	do {
-		vma = find_vma(p->mm, start);
-		if (!vma || start < vma->vm_start ||
-		    (vma->vm_flags & device_vma))
+		vma = vma_lookup(p->mm, start);
+		if (!vma || (vma->vm_flags & device_vma))
 			return -EFAULT;
 		start = min(end, vma->vm_end);
 	} while (start < end);
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_topology.c b/drivers/gpu/drm/amd/amdkfd/kfd_topology.c
index 3f0a4a415907..bceb1a5b2518 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_topology.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_topology.c
@@ -115,7 +115,7 @@ struct kfd_dev *kfd_device_by_pci_dev(const struct pci_dev *pdev)
 	down_read(&topology_lock);
 
 	list_for_each_entry(top_dev, &topology_device_list, list)
-		if (top_dev->gpu && top_dev->gpu->pdev == pdev) {
+		if (top_dev->gpu && top_dev->gpu->adev->pdev == pdev) {
 			device = top_dev->gpu;
 			break;
 		}
@@ -364,7 +364,6 @@ static ssize_t kfd_cache_show(struct kobject *kobj, struct attribute *attr,
 
 	/* Making sure that the buffer is an empty string */
 	buffer[0] = 0;
-
 	cache = container_of(attr, struct kfd_cache_properties, attr);
 	if (cache->gpu && kfd_devcgroup_check_permission(cache->gpu))
 		return -EPERM;
@@ -379,12 +378,13 @@ static ssize_t kfd_cache_show(struct kobject *kobj, struct attribute *attr,
 	sysfs_show_32bit_prop(buffer, offs, "association", cache->cache_assoc);
 	sysfs_show_32bit_prop(buffer, offs, "latency", cache->cache_latency);
 	sysfs_show_32bit_prop(buffer, offs, "type", cache->cache_type);
+
 	offs += snprintf(buffer+offs, PAGE_SIZE-offs, "sibling_map ");
-	for (i = 0; i < CRAT_SIBLINGMAP_SIZE; i++)
+	for (i = 0; i < cache->sibling_map_size; i++)
 		for (j = 0; j < sizeof(cache->sibling_map[0])*8; j++)
 			/* Check each bit */
 			offs += snprintf(buffer+offs, PAGE_SIZE-offs, "%d,",
-					 (cache->sibling_map[i] >> j) & 1);
+						(cache->sibling_map[i] >> j) & 1);
 
 	/* Replace the last "," with end of line */
 	buffer[offs-1] = '\n';
@@ -1169,13 +1169,12 @@ static uint32_t kfd_generate_gpu_id(struct kfd_dev *gpu)
 
 	local_mem_size = gpu->local_mem_info.local_mem_size_private +
 			gpu->local_mem_info.local_mem_size_public;
-
-	buf[0] = gpu->pdev->devfn;
-	buf[1] = gpu->pdev->subsystem_vendor |
-		(gpu->pdev->subsystem_device << 16);
-	buf[2] = pci_domain_nr(gpu->pdev->bus);
-	buf[3] = gpu->pdev->device;
-	buf[4] = gpu->pdev->bus->number;
+	buf[0] = gpu->adev->pdev->devfn;
+	buf[1] = gpu->adev->pdev->subsystem_vendor |
+		(gpu->adev->pdev->subsystem_device << 16);
+	buf[2] = pci_domain_nr(gpu->adev->pdev->bus);
+	buf[3] = gpu->adev->pdev->device;
+	buf[4] = gpu->adev->pdev->bus->number;
 	buf[5] = lower_32_bits(local_mem_size);
 	buf[6] = upper_32_bits(local_mem_size);
 
@@ -1198,7 +1197,6 @@ static struct kfd_topology_device *kfd_assign_gpu(struct kfd_dev *gpu)
 	struct kfd_iolink_properties *iolink;
 	struct kfd_iolink_properties *p2plink;
 
-	down_write(&topology_lock);
 	list_for_each_entry(dev, &topology_device_list, list) {
 		/* Discrete GPUs need their own topology device list
 		 * entries. Don't assign them to CPU/APU nodes.
@@ -1222,7 +1220,6 @@ static struct kfd_topology_device *kfd_assign_gpu(struct kfd_dev *gpu)
 			break;
 		}
 	}
-	up_write(&topology_lock);
 	return out_dev;
 }
 
@@ -1269,7 +1266,7 @@ static void kfd_set_iolink_no_atomics(struct kfd_topology_device *dev,
 	if (target_gpu_dev) {
 		uint32_t cap;
 
-		pcie_capability_read_dword(target_gpu_dev->gpu->pdev,
+		pcie_capability_read_dword(target_gpu_dev->gpu->adev->pdev,
 				PCI_EXP_DEVCAP2, &cap);
 
 		if (!(cap & (PCI_EXP_DEVCAP2_ATOMIC_COMP32 |
@@ -1593,21 +1590,290 @@ out:
 	return ret;
 }
 
+
+/* Helper function. See kfd_fill_gpu_cache_info for parameter description */
+static int fill_in_l1_pcache(struct kfd_cache_properties **props_ext,
+				struct kfd_gpu_cache_info *pcache_info,
+				struct kfd_cu_info *cu_info,
+				int cu_bitmask,
+				int cache_type, unsigned int cu_processor_id,
+				int cu_block)
+{
+	unsigned int cu_sibling_map_mask;
+	int first_active_cu;
+	struct kfd_cache_properties *pcache = NULL;
+
+	cu_sibling_map_mask = cu_bitmask;
+	cu_sibling_map_mask >>= cu_block;
+	cu_sibling_map_mask &= ((1 << pcache_info[cache_type].num_cu_shared) - 1);
+	first_active_cu = ffs(cu_sibling_map_mask);
+
+	/* CU could be inactive. In case of shared cache find the first active
+	 * CU. and incase of non-shared cache check if the CU is inactive. If
+	 * inactive active skip it
+	 */
+	if (first_active_cu) {
+		pcache = kfd_alloc_struct(pcache);
+		if (!pcache)
+			return -ENOMEM;
+
+		memset(pcache, 0, sizeof(struct kfd_cache_properties));
+		pcache->processor_id_low = cu_processor_id + (first_active_cu - 1);
+		pcache->cache_level = pcache_info[cache_type].cache_level;
+		pcache->cache_size = pcache_info[cache_type].cache_size;
+
+		if (pcache_info[cache_type].flags & CRAT_CACHE_FLAGS_DATA_CACHE)
+			pcache->cache_type |= HSA_CACHE_TYPE_DATA;
+		if (pcache_info[cache_type].flags & CRAT_CACHE_FLAGS_INST_CACHE)
+			pcache->cache_type |= HSA_CACHE_TYPE_INSTRUCTION;
+		if (pcache_info[cache_type].flags & CRAT_CACHE_FLAGS_CPU_CACHE)
+			pcache->cache_type |= HSA_CACHE_TYPE_CPU;
+		if (pcache_info[cache_type].flags & CRAT_CACHE_FLAGS_SIMD_CACHE)
+			pcache->cache_type |= HSA_CACHE_TYPE_HSACU;
+
+		/* Sibling map is w.r.t processor_id_low, so shift out
+		 * inactive CU
+		 */
+		cu_sibling_map_mask =
+			cu_sibling_map_mask >> (first_active_cu - 1);
+
+		pcache->sibling_map[0] = (uint8_t)(cu_sibling_map_mask & 0xFF);
+		pcache->sibling_map[1] =
+				(uint8_t)((cu_sibling_map_mask >> 8) & 0xFF);
+		pcache->sibling_map[2] =
+				(uint8_t)((cu_sibling_map_mask >> 16) & 0xFF);
+		pcache->sibling_map[3] =
+				(uint8_t)((cu_sibling_map_mask >> 24) & 0xFF);
+
+		pcache->sibling_map_size = 4;
+		*props_ext = pcache;
+
+		return 0;
+	}
+	return 1;
+}
+
+/* Helper function. See kfd_fill_gpu_cache_info for parameter description */
+static int fill_in_l2_l3_pcache(struct kfd_cache_properties **props_ext,
+				struct kfd_gpu_cache_info *pcache_info,
+				struct kfd_cu_info *cu_info,
+				int cache_type, unsigned int cu_processor_id)
+{
+	unsigned int cu_sibling_map_mask;
+	int first_active_cu;
+	int i, j, k;
+	struct kfd_cache_properties *pcache = NULL;
+
+	cu_sibling_map_mask = cu_info->cu_bitmap[0][0];
+	cu_sibling_map_mask &=
+		((1 << pcache_info[cache_type].num_cu_shared) - 1);
+	first_active_cu = ffs(cu_sibling_map_mask);
+
+	/* CU could be inactive. In case of shared cache find the first active
+	 * CU. and incase of non-shared cache check if the CU is inactive. If
+	 * inactive active skip it
+	 */
+	if (first_active_cu) {
+		pcache = kfd_alloc_struct(pcache);
+		if (!pcache)
+			return -ENOMEM;
+
+		memset(pcache, 0, sizeof(struct kfd_cache_properties));
+		pcache->processor_id_low = cu_processor_id
+					+ (first_active_cu - 1);
+		pcache->cache_level = pcache_info[cache_type].cache_level;
+		pcache->cache_size = pcache_info[cache_type].cache_size;
+
+		if (pcache_info[cache_type].flags & CRAT_CACHE_FLAGS_DATA_CACHE)
+			pcache->cache_type |= HSA_CACHE_TYPE_DATA;
+		if (pcache_info[cache_type].flags & CRAT_CACHE_FLAGS_INST_CACHE)
+			pcache->cache_type |= HSA_CACHE_TYPE_INSTRUCTION;
+		if (pcache_info[cache_type].flags & CRAT_CACHE_FLAGS_CPU_CACHE)
+			pcache->cache_type |= HSA_CACHE_TYPE_CPU;
+		if (pcache_info[cache_type].flags & CRAT_CACHE_FLAGS_SIMD_CACHE)
+			pcache->cache_type |= HSA_CACHE_TYPE_HSACU;
+
+		/* Sibling map is w.r.t processor_id_low, so shift out
+		 * inactive CU
+		 */
+		cu_sibling_map_mask = cu_sibling_map_mask >> (first_active_cu - 1);
+		k = 0;
+
+		for (i = 0; i < cu_info->num_shader_engines; i++) {
+			for (j = 0; j < cu_info->num_shader_arrays_per_engine; j++) {
+				pcache->sibling_map[k] = (uint8_t)(cu_sibling_map_mask & 0xFF);
+				pcache->sibling_map[k+1] = (uint8_t)((cu_sibling_map_mask >> 8) & 0xFF);
+				pcache->sibling_map[k+2] = (uint8_t)((cu_sibling_map_mask >> 16) & 0xFF);
+				pcache->sibling_map[k+3] = (uint8_t)((cu_sibling_map_mask >> 24) & 0xFF);
+				k += 4;
+
+				cu_sibling_map_mask = cu_info->cu_bitmap[i % 4][j + i / 4];
+				cu_sibling_map_mask &= ((1 << pcache_info[cache_type].num_cu_shared) - 1);
+			}
+		}
+		pcache->sibling_map_size = k;
+		*props_ext = pcache;
+		return 0;
+	}
+	return 1;
+}
+
+#define KFD_MAX_CACHE_TYPES 6
+
+/* kfd_fill_cache_non_crat_info - Fill GPU cache info using kfd_gpu_cache_info
+ * tables
+ */
+static void kfd_fill_cache_non_crat_info(struct kfd_topology_device *dev, struct kfd_dev *kdev)
+{
+	struct kfd_gpu_cache_info *pcache_info = NULL;
+	int i, j, k;
+	int ct = 0;
+	unsigned int cu_processor_id;
+	int ret;
+	unsigned int num_cu_shared;
+	struct kfd_cu_info cu_info;
+	struct kfd_cu_info *pcu_info;
+	int gpu_processor_id;
+	struct kfd_cache_properties *props_ext;
+	int num_of_entries = 0;
+	int num_of_cache_types = 0;
+	struct kfd_gpu_cache_info cache_info[KFD_MAX_CACHE_TYPES];
+
+	amdgpu_amdkfd_get_cu_info(kdev->adev, &cu_info);
+	pcu_info = &cu_info;
+
+	gpu_processor_id = dev->node_props.simd_id_base;
+
+	pcache_info = cache_info;
+	num_of_cache_types = kfd_get_gpu_cache_info(kdev, &pcache_info);
+	if (!num_of_cache_types) {
+		pr_warn("no cache info found\n");
+		return;
+	}
+
+	/* For each type of cache listed in the kfd_gpu_cache_info table,
+	 * go through all available Compute Units.
+	 * The [i,j,k] loop will
+	 *		if kfd_gpu_cache_info.num_cu_shared = 1
+	 *			will parse through all available CU
+	 *		If (kfd_gpu_cache_info.num_cu_shared != 1)
+	 *			then it will consider only one CU from
+	 *			the shared unit
+	 */
+	for (ct = 0; ct < num_of_cache_types; ct++) {
+		cu_processor_id = gpu_processor_id;
+		if (pcache_info[ct].cache_level == 1) {
+			for (i = 0; i < pcu_info->num_shader_engines; i++) {
+				for (j = 0; j < pcu_info->num_shader_arrays_per_engine; j++) {
+					for (k = 0; k < pcu_info->num_cu_per_sh; k += pcache_info[ct].num_cu_shared) {
+
+						ret = fill_in_l1_pcache(&props_ext, pcache_info, pcu_info,
+										pcu_info->cu_bitmap[i % 4][j + i / 4], ct,
+										cu_processor_id, k);
+
+						if (ret < 0)
+							break;
+
+						if (!ret) {
+							num_of_entries++;
+							list_add_tail(&props_ext->list, &dev->cache_props);
+						}
+
+						/* Move to next CU block */
+						num_cu_shared = ((k + pcache_info[ct].num_cu_shared) <=
+							pcu_info->num_cu_per_sh) ?
+							pcache_info[ct].num_cu_shared :
+							(pcu_info->num_cu_per_sh - k);
+						cu_processor_id += num_cu_shared;
+					}
+				}
+			}
+		} else {
+			ret = fill_in_l2_l3_pcache(&props_ext, pcache_info,
+								pcu_info, ct, cu_processor_id);
+
+			if (ret < 0)
+				break;
+
+			if (!ret) {
+				num_of_entries++;
+				list_add_tail(&props_ext->list, &dev->cache_props);
+			}
+		}
+	}
+	dev->node_props.caches_count += num_of_entries;
+	pr_debug("Added [%d] GPU cache entries\n", num_of_entries);
+}
+
+static int kfd_topology_add_device_locked(struct kfd_dev *gpu, uint32_t gpu_id,
+					  struct kfd_topology_device **dev)
+{
+	int proximity_domain = ++topology_crat_proximity_domain;
+	struct list_head temp_topology_device_list;
+	void *crat_image = NULL;
+	size_t image_size = 0;
+	int res;
+
+	res = kfd_create_crat_image_virtual(&crat_image, &image_size,
+					    COMPUTE_UNIT_GPU, gpu,
+					    proximity_domain);
+	if (res) {
+		pr_err("Error creating VCRAT for GPU (ID: 0x%x)\n",
+		       gpu_id);
+		topology_crat_proximity_domain--;
+		goto err;
+	}
+
+	INIT_LIST_HEAD(&temp_topology_device_list);
+
+	res = kfd_parse_crat_table(crat_image,
+				   &temp_topology_device_list,
+				   proximity_domain);
+	if (res) {
+		pr_err("Error parsing VCRAT for GPU (ID: 0x%x)\n",
+		       gpu_id);
+		topology_crat_proximity_domain--;
+		goto err;
+	}
+
+	kfd_topology_update_device_list(&temp_topology_device_list,
+					&topology_device_list);
+
+	*dev = kfd_assign_gpu(gpu);
+	if (WARN_ON(!*dev)) {
+		res = -ENODEV;
+		goto err;
+	}
+
+	/* Fill the cache affinity information here for the GPUs
+	 * using VCRAT
+	 */
+	kfd_fill_cache_non_crat_info(*dev, gpu);
+
+	/* Update the SYSFS tree, since we added another topology
+	 * device
+	 */
+	res = kfd_topology_update_sysfs();
+	if (!res)
+		sys_props.generation_count++;
+	else
+		pr_err("Failed to update GPU (ID: 0x%x) to sysfs topology. res=%d\n",
+		       gpu_id, res);
+
+err:
+	kfd_destroy_crat_image(crat_image);
+	return res;
+}
+
 int kfd_topology_add_device(struct kfd_dev *gpu)
 {
 	uint32_t gpu_id;
 	struct kfd_topology_device *dev;
 	struct kfd_cu_info cu_info;
 	int res = 0;
-	struct list_head temp_topology_device_list;
-	void *crat_image = NULL;
-	size_t image_size = 0;
-	int proximity_domain;
 	int i;
 	const char *asic_name = amdgpu_asic_name[gpu->adev->asic_type];
 
-	INIT_LIST_HEAD(&temp_topology_device_list);
-
 	gpu_id = kfd_generate_gpu_id(gpu);
 	pr_debug("Adding new GPU (ID: 0x%x) to topology\n", gpu_id);
 
@@ -1617,50 +1883,13 @@ int kfd_topology_add_device(struct kfd_dev *gpu)
 	 * CRAT to create a new topology device. Once created assign the gpu to
 	 * that topology device
 	 */
+	down_write(&topology_lock);
 	dev = kfd_assign_gpu(gpu);
-	if (!dev) {
-		down_write(&topology_lock);
-		proximity_domain = ++topology_crat_proximity_domain;
-
-		res = kfd_create_crat_image_virtual(&crat_image, &image_size,
-						    COMPUTE_UNIT_GPU, gpu,
-						    proximity_domain);
-		if (res) {
-			pr_err("Error creating VCRAT for GPU (ID: 0x%x)\n",
-			       gpu_id);
-			topology_crat_proximity_domain--;
-			return res;
-		}
-		res = kfd_parse_crat_table(crat_image,
-					   &temp_topology_device_list,
-					   proximity_domain);
-		if (res) {
-			pr_err("Error parsing VCRAT for GPU (ID: 0x%x)\n",
-			       gpu_id);
-			topology_crat_proximity_domain--;
-			goto err;
-		}
-
-		kfd_topology_update_device_list(&temp_topology_device_list,
-			&topology_device_list);
-
-		/* Update the SYSFS tree, since we added another topology
-		 * device
-		 */
-		res = kfd_topology_update_sysfs();
-		up_write(&topology_lock);
-
-		if (!res)
-			sys_props.generation_count++;
-		else
-			pr_err("Failed to update GPU (ID: 0x%x) to sysfs topology. res=%d\n",
-						gpu_id, res);
-		dev = kfd_assign_gpu(gpu);
-		if (WARN_ON(!dev)) {
-			res = -ENODEV;
-			goto err;
-		}
-	}
+	if (!dev)
+		res = kfd_topology_add_device_locked(gpu, gpu_id, &dev);
+	up_write(&topology_lock);
+	if (res)
+		return res;
 
 	dev->gpu_id = gpu_id;
 	gpu->id = gpu_id;
@@ -1688,13 +1917,13 @@ int kfd_topology_add_device(struct kfd_dev *gpu)
 		cu_info.num_shader_arrays_per_engine;
 
 	dev->node_props.gfx_target_version = gpu->device_info.gfx_target_version;
-	dev->node_props.vendor_id = gpu->pdev->vendor;
-	dev->node_props.device_id = gpu->pdev->device;
+	dev->node_props.vendor_id = gpu->adev->pdev->vendor;
+	dev->node_props.device_id = gpu->adev->pdev->device;
 	dev->node_props.capability |=
 		((dev->gpu->adev->rev_id << HSA_CAP_ASIC_REVISION_SHIFT) &
 			HSA_CAP_ASIC_REVISION_MASK);
-	dev->node_props.location_id = pci_dev_id(gpu->pdev);
-	dev->node_props.domain = pci_domain_nr(gpu->pdev->bus);
+	dev->node_props.location_id = pci_dev_id(gpu->adev->pdev);
+	dev->node_props.domain = pci_domain_nr(gpu->adev->pdev->bus);
 	dev->node_props.max_engine_clk_fcompute =
 		amdgpu_amdkfd_get_max_engine_clock_in_mhz(dev->gpu->adev);
 	dev->node_props.max_engine_clk_ccompute =
@@ -1783,11 +2012,9 @@ int kfd_topology_add_device(struct kfd_dev *gpu)
 
 	kfd_debug_print_topology();
 
-	if (!res)
-		kfd_notify_gpu_change(gpu_id, 1);
-err:
-	kfd_destroy_crat_image(crat_image);
-	return res;
+	kfd_notify_gpu_change(gpu_id, 1);
+
+	return 0;
 }
 
 /**
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_topology.h b/drivers/gpu/drm/amd/amdkfd/kfd_topology.h
index 9f6c949186c1..fca30d00a9bb 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_topology.h
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_topology.h
@@ -80,6 +80,8 @@ struct kfd_mem_properties {
 	struct attribute	attr;
 };
 
+#define CACHE_SIBLINGMAP_SIZE 64
+
 struct kfd_cache_properties {
 	struct list_head	list;
 	uint32_t		processor_id_low;
@@ -90,10 +92,11 @@ struct kfd_cache_properties {
 	uint32_t		cache_assoc;
 	uint32_t		cache_latency;
 	uint32_t		cache_type;
-	uint8_t			sibling_map[CRAT_SIBLINGMAP_SIZE];
+	uint8_t			sibling_map[CACHE_SIBLINGMAP_SIZE];
 	struct kfd_dev		*gpu;
 	struct kobject		*kobj;
 	struct attribute	attr;
+	uint32_t		sibling_map_size;
 };
 
 struct kfd_iolink_properties {
@@ -128,7 +131,6 @@ struct kfd_topology_device {
 	uint32_t			proximity_domain;
 	struct kfd_node_properties	node_props;
 	struct list_head		mem_props;
-	uint32_t			cache_count;
 	struct list_head		cache_props;
 	struct list_head		io_link_props;
 	struct list_head		p2p_link_props;
diff --git a/drivers/gpu/drm/amd/display/Kconfig b/drivers/gpu/drm/amd/display/Kconfig
index 6925e0280dbe..2efe93f74f84 100644
--- a/drivers/gpu/drm/amd/display/Kconfig
+++ b/drivers/gpu/drm/amd/display/Kconfig
@@ -5,13 +5,21 @@ menu "Display Engine Configuration"
 config DRM_AMD_DC
 	bool "AMD DC - Enable new display engine"
 	default y
+	depends on BROKEN || !CC_IS_CLANG || X86_64 || SPARC64 || ARM64
 	select SND_HDA_COMPONENT if SND_HDA_CORE
-	select DRM_AMD_DC_DCN if (X86 || PPC_LONG_DOUBLE_128)
+	# !CC_IS_CLANG: https://github.com/ClangBuiltLinux/linux/issues/1752
+	select DRM_AMD_DC_DCN if (X86 || PPC_LONG_DOUBLE_128 || (ARM64 && KERNEL_MODE_NEON && !CC_IS_CLANG))
 	help
 	  Choose this option if you want to use the new display engine
 	  support for AMDGPU. This adds required support for Vega and
 	  Raven ASICs.
 
+	  calculate_bandwidth() is presently broken on all !(X86_64 || SPARC64 || ARM64)
+	  architectures built with Clang (all released versions), whereby the stack
+	  frame gets blown up to well over 5k.  This would cause an immediate kernel
+	  panic on most architectures.  We'll revert this when the following bug report
+	  has been resolved: https://github.com/llvm/llvm-project/issues/41896.
+
 config DRM_AMD_DC_DCN
 	def_bool n
 	help
diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
index 3c072754738d..50c783e19f5a 100644
--- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
+++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
@@ -82,7 +82,6 @@
 #include <drm/drm_atomic_uapi.h>
 #include <drm/drm_atomic_helper.h>
 #include <drm/drm_blend.h>
-#include <drm/drm_fb_helper.h>
 #include <drm/drm_fourcc.h>
 #include <drm/drm_edid.h>
 #include <drm/drm_vblank.h>
@@ -1097,7 +1096,7 @@ static int dm_dmub_hw_init(struct amdgpu_device *adev)
 	/* Initialize hardware. */
 	memset(&hw_params, 0, sizeof(hw_params));
 	hw_params.fb_base = adev->gmc.fb_start;
-	hw_params.fb_offset = adev->gmc.aper_base;
+	hw_params.fb_offset = adev->vm_manager.vram_base_offset;
 
 	/* backdoor load firmware and trigger dmub running */
 	if (adev->firmware.load_type != AMDGPU_FW_LOAD_PSP)
@@ -1219,7 +1218,7 @@ static void mmhub_read_system_context(struct amdgpu_device *adev, struct dc_phy_
 	pa_config->system_aperture.agp_top = (uint64_t)agp_top << 24;
 
 	pa_config->system_aperture.fb_base = adev->gmc.fb_start;
-	pa_config->system_aperture.fb_offset = adev->gmc.aper_base;
+	pa_config->system_aperture.fb_offset = adev->vm_manager.vram_base_offset;
 	pa_config->system_aperture.fb_top = adev->gmc.fb_end;
 
 	pa_config->gart_config.page_table_start_addr = page_table_start.quad_part << 12;
@@ -1364,7 +1363,44 @@ static const struct dmi_system_id hpd_disconnect_quirk_table[] = {
 			DMI_MATCH(DMI_PRODUCT_NAME, "Precision 3460"),
 		},
 	},
+	{
+		.matches = {
+			DMI_MATCH(DMI_SYS_VENDOR, "Dell Inc."),
+			DMI_MATCH(DMI_PRODUCT_NAME, "OptiPlex Tower Plus 7010"),
+		},
+	},
+	{
+		.matches = {
+			DMI_MATCH(DMI_SYS_VENDOR, "Dell Inc."),
+			DMI_MATCH(DMI_PRODUCT_NAME, "OptiPlex Tower 7010"),
+		},
+	},
+	{
+		.matches = {
+			DMI_MATCH(DMI_SYS_VENDOR, "Dell Inc."),
+			DMI_MATCH(DMI_PRODUCT_NAME, "OptiPlex SFF Plus 7010"),
+		},
+	},
+	{
+		.matches = {
+			DMI_MATCH(DMI_SYS_VENDOR, "Dell Inc."),
+			DMI_MATCH(DMI_PRODUCT_NAME, "OptiPlex SFF 7010"),
+		},
+	},
+	{
+		.matches = {
+			DMI_MATCH(DMI_SYS_VENDOR, "Dell Inc."),
+			DMI_MATCH(DMI_PRODUCT_NAME, "OptiPlex Micro Plus 7010"),
+		},
+	},
+	{
+		.matches = {
+			DMI_MATCH(DMI_SYS_VENDOR, "Dell Inc."),
+			DMI_MATCH(DMI_PRODUCT_NAME, "OptiPlex Micro 7010"),
+		},
+	},
 	{}
+	/* TODO: refactor this from a fixed table to a dynamic option */
 };
 
 static void retrieve_dmi_info(struct amdgpu_display_manager *dm)
@@ -1397,9 +1433,9 @@ static int amdgpu_dm_init(struct amdgpu_device *adev)
 	memset(&init_params, 0, sizeof(init_params));
 #endif
 
+	mutex_init(&adev->dm.dpia_aux_lock);
 	mutex_init(&adev->dm.dc_lock);
 	mutex_init(&adev->dm.audio_lock);
-	spin_lock_init(&adev->dm.vblank_lock);
 
 	if(amdgpu_dm_irq_init(adev)) {
 		DRM_ERROR("amdgpu: failed to initialize DM IRQ support.\n");
@@ -1467,6 +1503,7 @@ static int amdgpu_dm_init(struct amdgpu_device *adev)
 		case IP_VERSION(3, 0, 1):
 		case IP_VERSION(3, 1, 2):
 		case IP_VERSION(3, 1, 3):
+		case IP_VERSION(3, 1, 4):
 		case IP_VERSION(3, 1, 5):
 		case IP_VERSION(3, 1, 6):
 			init_data.flags.gpu_vm_support = true;
@@ -1549,6 +1586,9 @@ static int amdgpu_dm_init(struct amdgpu_device *adev)
 
 	adev->dm.dc->debug.visual_confirm = amdgpu_dc_visual_confirm;
 
+	/* TODO: Remove after DP2 receiver gets proper support of Cable ID feature */
+	adev->dm.dc->debug.ignore_cable_id = true;
+
 	r = dm_dmub_hw_init(adev);
 	if (r) {
 		DRM_ERROR("DMUB interface failed to initialize: status=%d\n", r);
@@ -1634,12 +1674,6 @@ static int amdgpu_dm_init(struct amdgpu_device *adev)
 		}
 	}
 
-	if (amdgpu_dm_initialize_drm_device(adev)) {
-		DRM_ERROR(
-		"amdgpu: failed to initialize sw for display support.\n");
-		goto error;
-	}
-
 	/* Enable outbox notification only after IRQ handlers are registered and DMUB is alive.
 	 * It is expected that DMUB will resend any pending notifications at this point, for
 	 * example HPD from DPIA.
@@ -1647,6 +1681,12 @@ static int amdgpu_dm_init(struct amdgpu_device *adev)
 	if (dc_is_dmub_outbox_supported(adev->dm.dc))
 		dc_enable_dmub_outbox(adev->dm.dc);
 
+	if (amdgpu_dm_initialize_drm_device(adev)) {
+		DRM_ERROR(
+		"amdgpu: failed to initialize sw for display support.\n");
+		goto error;
+	}
+
 	/* create fake encoders for MST */
 	dm_dp_create_fake_mst_encoders(adev);
 
@@ -1759,6 +1799,7 @@ static void amdgpu_dm_fini(struct amdgpu_device *adev)
 
 	mutex_destroy(&adev->dm.audio_lock);
 	mutex_destroy(&adev->dm.dc_lock);
+	mutex_destroy(&adev->dm.dpia_aux_lock);
 
 	return;
 }
@@ -2810,7 +2851,6 @@ const struct amdgpu_ip_block_version dm_ip_block =
 static const struct drm_mode_config_funcs amdgpu_dm_mode_funcs = {
 	.fb_create = amdgpu_display_user_framebuffer_create,
 	.get_format_info = amd_get_format_info,
-	.output_poll_changed = drm_fb_helper_output_poll_changed,
 	.atomic_check = amdgpu_dm_atomic_check,
 	.atomic_commit = drm_atomic_helper_commit,
 };
@@ -4590,6 +4630,7 @@ static int dm_early_init(void *handle)
 		adev_to_drm(adev)->dev,
 		&dev_attr_s3_debug);
 #endif
+	adev->dc_enabled = true;
 
 	return 0;
 }
@@ -4829,6 +4870,35 @@ static int fill_dc_plane_attributes(struct amdgpu_device *adev,
 	return 0;
 }
 
+static inline void fill_dc_dirty_rect(struct drm_plane *plane,
+				      struct rect *dirty_rect, int32_t x,
+				      int32_t y, int32_t width, int32_t height,
+				      int *i, bool ffu)
+{
+	if (*i > DC_MAX_DIRTY_RECTS)
+		return;
+
+	if (*i == DC_MAX_DIRTY_RECTS)
+		goto out;
+
+	dirty_rect->x = x;
+	dirty_rect->y = y;
+	dirty_rect->width = width;
+	dirty_rect->height = height;
+
+	if (ffu)
+		drm_dbg(plane->dev,
+			"[PLANE:%d] PSR FFU dirty rect size (%d, %d)\n",
+			plane->base.id, width, height);
+	else
+		drm_dbg(plane->dev,
+			"[PLANE:%d] PSR SU dirty rect at (%d, %d) size (%d, %d)",
+			plane->base.id, x, y, width, height);
+
+out:
+	(*i)++;
+}
+
 /**
  * fill_dc_dirty_rects() - Fill DC dirty regions for PSR selective updates
  *
@@ -4849,10 +4919,6 @@ static int fill_dc_plane_attributes(struct amdgpu_device *adev,
  * addition, certain use cases - such as cursor and multi-plane overlay (MPO) -
  * implicitly provide damage clips without any client support via the plane
  * bounds.
- *
- * Today, amdgpu_dm only supports the MPO and cursor usecase.
- *
- * TODO: Also enable for FB_DAMAGE_CLIPS
  */
 static void fill_dc_dirty_rects(struct drm_plane *plane,
 				struct drm_plane_state *old_plane_state,
@@ -4863,12 +4929,11 @@ static void fill_dc_dirty_rects(struct drm_plane *plane,
 	struct dm_crtc_state *dm_crtc_state = to_dm_crtc_state(crtc_state);
 	struct rect *dirty_rects = flip_addrs->dirty_rects;
 	uint32_t num_clips;
+	struct drm_mode_rect *clips;
 	bool bb_changed;
 	bool fb_changed;
 	uint32_t i = 0;
 
-	flip_addrs->dirty_rect_count = 0;
-
 	/*
 	 * Cursor plane has it's own dirty rect update interface. See
 	 * dcn10_dmub_update_cursor_data and dmub_cmd_update_cursor_info_data
@@ -4876,20 +4941,20 @@ static void fill_dc_dirty_rects(struct drm_plane *plane,
 	if (plane->type == DRM_PLANE_TYPE_CURSOR)
 		return;
 
-	/*
-	 * Today, we only consider MPO use-case for PSR SU. If MPO not
-	 * requested, and there is a plane update, do FFU.
-	 */
+	num_clips = drm_plane_get_damage_clips_count(new_plane_state);
+	clips = drm_plane_get_damage_clips(new_plane_state);
+
 	if (!dm_crtc_state->mpo_requested) {
-		dirty_rects[0].x = 0;
-		dirty_rects[0].y = 0;
-		dirty_rects[0].width = dm_crtc_state->base.mode.crtc_hdisplay;
-		dirty_rects[0].height = dm_crtc_state->base.mode.crtc_vdisplay;
-		flip_addrs->dirty_rect_count = 1;
-		DRM_DEBUG_DRIVER("[PLANE:%d] PSR FFU dirty rect size (%d, %d)\n",
-				 new_plane_state->plane->base.id,
-				 dm_crtc_state->base.mode.crtc_hdisplay,
-				 dm_crtc_state->base.mode.crtc_vdisplay);
+		if (!num_clips || num_clips > DC_MAX_DIRTY_RECTS)
+			goto ffu;
+
+		for (; flip_addrs->dirty_rect_count < num_clips; clips++)
+			fill_dc_dirty_rect(new_plane_state->plane,
+					   &dirty_rects[i], clips->x1,
+					   clips->y1, clips->x2 - clips->x1,
+					   clips->y2 - clips->y1,
+					   &flip_addrs->dirty_rect_count,
+					   false);
 		return;
 	}
 
@@ -4900,7 +4965,6 @@ static void fill_dc_dirty_rects(struct drm_plane *plane,
 	 * If plane is moved or resized, also add old bounding box to dirty
 	 * rects.
 	 */
-	num_clips = drm_plane_get_damage_clips_count(new_plane_state);
 	fb_changed = old_plane_state->fb->base.id !=
 		     new_plane_state->fb->base.id;
 	bb_changed = (old_plane_state->crtc_x != new_plane_state->crtc_x ||
@@ -4908,36 +4972,51 @@ static void fill_dc_dirty_rects(struct drm_plane *plane,
 		      old_plane_state->crtc_w != new_plane_state->crtc_w ||
 		      old_plane_state->crtc_h != new_plane_state->crtc_h);
 
-	DRM_DEBUG_DRIVER("[PLANE:%d] PSR bb_changed:%d fb_changed:%d num_clips:%d\n",
-			 new_plane_state->plane->base.id,
-			 bb_changed, fb_changed, num_clips);
+	drm_dbg(plane->dev,
+		"[PLANE:%d] PSR bb_changed:%d fb_changed:%d num_clips:%d\n",
+		new_plane_state->plane->base.id,
+		bb_changed, fb_changed, num_clips);
 
-	if (num_clips || fb_changed || bb_changed) {
-		dirty_rects[i].x = new_plane_state->crtc_x;
-		dirty_rects[i].y = new_plane_state->crtc_y;
-		dirty_rects[i].width = new_plane_state->crtc_w;
-		dirty_rects[i].height = new_plane_state->crtc_h;
-		DRM_DEBUG_DRIVER("[PLANE:%d] PSR SU dirty rect at (%d, %d) size (%d, %d)\n",
-				 new_plane_state->plane->base.id,
-				 dirty_rects[i].x, dirty_rects[i].y,
-				 dirty_rects[i].width, dirty_rects[i].height);
-		i += 1;
-	}
-
-	/* Add old plane bounding-box if plane is moved or resized */
 	if (bb_changed) {
-		dirty_rects[i].x = old_plane_state->crtc_x;
-		dirty_rects[i].y = old_plane_state->crtc_y;
-		dirty_rects[i].width = old_plane_state->crtc_w;
-		dirty_rects[i].height = old_plane_state->crtc_h;
-		DRM_DEBUG_DRIVER("[PLANE:%d] PSR SU dirty rect at (%d, %d) size (%d, %d)\n",
-				old_plane_state->plane->base.id,
-				dirty_rects[i].x, dirty_rects[i].y,
-				dirty_rects[i].width, dirty_rects[i].height);
-		i += 1;
-	}
+		fill_dc_dirty_rect(new_plane_state->plane, &dirty_rects[i],
+				   new_plane_state->crtc_x,
+				   new_plane_state->crtc_y,
+				   new_plane_state->crtc_w,
+				   new_plane_state->crtc_h, &i, false);
+
+		/* Add old plane bounding-box if plane is moved or resized */
+		fill_dc_dirty_rect(new_plane_state->plane, &dirty_rects[i],
+				   old_plane_state->crtc_x,
+				   old_plane_state->crtc_y,
+				   old_plane_state->crtc_w,
+				   old_plane_state->crtc_h, &i, false);
+	}
+
+	if (num_clips) {
+		for (; i < num_clips; clips++)
+			fill_dc_dirty_rect(new_plane_state->plane,
+					   &dirty_rects[i], clips->x1,
+					   clips->y1, clips->x2 - clips->x1,
+					   clips->y2 - clips->y1, &i, false);
+	} else if (fb_changed && !bb_changed) {
+		fill_dc_dirty_rect(new_plane_state->plane, &dirty_rects[i],
+				   new_plane_state->crtc_x,
+				   new_plane_state->crtc_y,
+				   new_plane_state->crtc_w,
+				   new_plane_state->crtc_h, &i, false);
+	}
+
+	if (i > DC_MAX_DIRTY_RECTS)
+		goto ffu;
 
 	flip_addrs->dirty_rect_count = i;
+	return;
+
+ffu:
+	fill_dc_dirty_rect(new_plane_state->plane, &dirty_rects[0], 0, 0,
+			   dm_crtc_state->base.mode.crtc_hdisplay,
+			   dm_crtc_state->base.mode.crtc_vdisplay,
+			   &flip_addrs->dirty_rect_count, true);
 }
 
 static void update_stream_scaling_settings(const struct drm_display_mode *mode,
@@ -5602,16 +5681,14 @@ static void apply_dsc_policy_for_stream(struct amdgpu_dm_connector *aconnector,
 {
 	struct drm_connector *drm_connector = &aconnector->base;
 	uint32_t link_bandwidth_kbps;
-	uint32_t max_dsc_target_bpp_limit_override = 0;
 	struct dc *dc = sink->ctx->dc;
 	uint32_t max_supported_bw_in_kbps, timing_bw_in_kbps;
 	uint32_t dsc_max_supported_bw_in_kbps;
+	uint32_t max_dsc_target_bpp_limit_override =
+		drm_connector->display_info.max_dsc_bpp;
 
 	link_bandwidth_kbps = dc_link_bandwidth_kbps(aconnector->dc_link,
 							dc_link_get_link_cap(aconnector->dc_link));
-	if (stream->link && stream->link->local_sink)
-		max_dsc_target_bpp_limit_override =
-			stream->link->local_sink->edid_caps.panel_patch.max_dsc_target_bpp_limit;
 
 	/* Set DSC policy according to dsc_clock_en */
 	dc_dsc_policy_set_enable_dsc_when_not_needed(
@@ -5684,7 +5761,7 @@ create_stream_for_sink(struct amdgpu_dm_connector *aconnector,
 	const struct drm_connector_state *con_state =
 		dm_state ? &dm_state->base : NULL;
 	struct dc_stream_state *stream = NULL;
-	struct drm_display_mode mode = *drm_mode;
+	struct drm_display_mode mode;
 	struct drm_display_mode saved_mode;
 	struct drm_display_mode *freesync_mode = NULL;
 	bool native_mode_found = false;
@@ -5692,12 +5769,14 @@ create_stream_for_sink(struct amdgpu_dm_connector *aconnector,
 	bool scale = dm_state ? (dm_state->scaling != RMX_OFF) : false;
 	int mode_refresh;
 	int preferred_refresh = 0;
+	enum color_transfer_func tf = TRANSFER_FUNC_UNKNOWN;
 #if defined(CONFIG_DRM_AMD_DC_DCN)
 	struct dsc_dec_dpcd_caps dsc_caps;
 #endif
 
 	struct dc_sink *sink = NULL;
 
+	drm_mode_init(&mode, drm_mode);
 	memset(&saved_mode, 0, sizeof(saved_mode));
 
 	if (aconnector == NULL) {
@@ -5815,7 +5894,9 @@ create_stream_for_sink(struct amdgpu_dm_connector *aconnector,
 			if (stream->link->dpcd_caps.dprx_feature.bits.VSC_SDP_COLORIMETRY_SUPPORTED)
 				stream->use_vsc_sdp_for_colorimetry = true;
 		}
-		mod_build_vsc_infopacket(stream, &stream->vsc_infopacket, stream->output_color_space);
+		if (stream->out_transfer_func->tf == TRANSFER_FUNCTION_GAMMA22)
+			tf = TRANSFER_FUNC_GAMMA_22;
+		mod_build_vsc_infopacket(stream, &stream->vsc_infopacket, stream->output_color_space, tf);
 		aconnector->psr_skip_count = AMDGPU_DM_PSR_ENTRY_DELAY;
 
 	}
@@ -6145,6 +6226,70 @@ static void handle_edid_mgmt(struct amdgpu_dm_connector *aconnector)
 	create_eml_sink(aconnector);
 }
 
+static enum dc_status dm_validate_stream_and_context(struct dc *dc,
+						struct dc_stream_state *stream)
+{
+	enum dc_status dc_result = DC_ERROR_UNEXPECTED;
+	struct dc_plane_state *dc_plane_state = NULL;
+	struct dc_state *dc_state = NULL;
+
+	if (!stream)
+		goto cleanup;
+
+	dc_plane_state = dc_create_plane_state(dc);
+	if (!dc_plane_state)
+		goto cleanup;
+
+	dc_state = dc_create_state(dc);
+	if (!dc_state)
+		goto cleanup;
+
+	/* populate stream to plane */
+	dc_plane_state->src_rect.height  = stream->src.height;
+	dc_plane_state->src_rect.width   = stream->src.width;
+	dc_plane_state->dst_rect.height  = stream->src.height;
+	dc_plane_state->dst_rect.width   = stream->src.width;
+	dc_plane_state->clip_rect.height = stream->src.height;
+	dc_plane_state->clip_rect.width  = stream->src.width;
+	dc_plane_state->plane_size.surface_pitch = ((stream->src.width + 255) / 256) * 256;
+	dc_plane_state->plane_size.surface_size.height = stream->src.height;
+	dc_plane_state->plane_size.surface_size.width  = stream->src.width;
+	dc_plane_state->plane_size.chroma_size.height  = stream->src.height;
+	dc_plane_state->plane_size.chroma_size.width   = stream->src.width;
+	dc_plane_state->tiling_info.gfx9.swizzle =  DC_SW_UNKNOWN;
+	dc_plane_state->format = SURFACE_PIXEL_FORMAT_GRPH_ARGB8888;
+	dc_plane_state->tiling_info.gfx9.swizzle = DC_SW_UNKNOWN;
+	dc_plane_state->rotation = ROTATION_ANGLE_0;
+	dc_plane_state->is_tiling_rotated = false;
+	dc_plane_state->tiling_info.gfx8.array_mode = DC_ARRAY_LINEAR_GENERAL;
+
+	dc_result = dc_validate_stream(dc, stream);
+	if (dc_result == DC_OK)
+		dc_result = dc_validate_plane(dc, dc_plane_state);
+
+	if (dc_result == DC_OK)
+		dc_result = dc_add_stream_to_ctx(dc, dc_state, stream);
+
+	if (dc_result == DC_OK && !dc_add_plane_to_context(
+						dc,
+						stream,
+						dc_plane_state,
+						dc_state))
+		dc_result = DC_FAIL_ATTACH_SURFACES;
+
+	if (dc_result == DC_OK)
+		dc_result = dc_validate_global_state(dc, dc_state, true);
+
+cleanup:
+	if (dc_state)
+		dc_release_state(dc_state);
+
+	if (dc_plane_state)
+		dc_plane_state_release(dc_plane_state);
+
+	return dc_result;
+}
+
 struct dc_stream_state *
 create_validate_stream_for_sink(struct amdgpu_dm_connector *aconnector,
 				const struct drm_display_mode *drm_mode,
@@ -6171,6 +6316,9 @@ create_validate_stream_for_sink(struct amdgpu_dm_connector *aconnector,
 		if (dc_result == DC_OK && stream->signal == SIGNAL_TYPE_DISPLAY_PORT_MST)
 			dc_result = dm_dp_mst_is_port_support_mode(aconnector, stream);
 
+		if (dc_result == DC_OK)
+			dc_result = dm_validate_stream_and_context(adev->dm.dc, stream);
+
 		if (dc_result != DC_OK) {
 			DRM_DEBUG_KMS("Mode %dx%d (clk %d) failed DC validation with error %d (%s)\n",
 				      drm_mode->hdisplay,
@@ -6459,7 +6607,7 @@ static int dm_update_mst_vcpi_slots_for_dsc(struct drm_atomic_state *state,
 	struct drm_connector_state *new_con_state;
 	struct amdgpu_dm_connector *aconnector;
 	struct dm_connector_state *dm_conn_state;
-	int i, j;
+	int i, j, ret;
 	int vcpi, pbn_div, pbn, slot_num = 0;
 
 	for_each_new_connector_in_state(state, connector, new_con_state, i) {
@@ -6506,8 +6654,11 @@ static int dm_update_mst_vcpi_slots_for_dsc(struct drm_atomic_state *state,
 			dm_conn_state->pbn = pbn;
 			dm_conn_state->vcpi_slots = slot_num;
 
-			drm_dp_mst_atomic_enable_dsc(state, aconnector->port, dm_conn_state->pbn,
-						     false);
+			ret = drm_dp_mst_atomic_enable_dsc(state, aconnector->port,
+							   dm_conn_state->pbn, false);
+			if (ret < 0)
+				return ret;
+
 			continue;
 		}
 
@@ -7614,9 +7765,10 @@ static void amdgpu_dm_commit_planes(struct drm_atomic_state *state,
 		bundle->surface_updates[planes_count].plane_info =
 			&bundle->plane_infos[planes_count];
 
-		fill_dc_dirty_rects(plane, old_plane_state, new_plane_state,
-				    new_crtc_state,
-				    &bundle->flip_addrs[planes_count]);
+		if (acrtc_state->stream->link->psr_settings.psr_feature_enabled)
+			fill_dc_dirty_rects(plane, old_plane_state,
+					    new_plane_state, new_crtc_state,
+					    &bundle->flip_addrs[planes_count]);
 
 		/*
 		 * Only allow immediate flips for fast updates that don't
@@ -7832,6 +7984,9 @@ static void amdgpu_dm_commit_planes(struct drm_atomic_state *state,
 			 */
 			if (acrtc_state->stream->link->psr_settings.psr_version >= DC_PSR_VERSION_SU_1 &&
 			    acrtc_attach->dm_irq_params.allow_psr_entry &&
+#ifdef CONFIG_DRM_AMD_SECURE_DISPLAY
+			    !amdgpu_dm_crc_window_is_activated(acrtc_state->base.crtc) &&
+#endif
 			    !acrtc_state->stream->link->psr_settings.psr_allow_active)
 				amdgpu_dm_psr_enable(acrtc_state->stream);
 		} else {
@@ -8293,8 +8448,8 @@ static void amdgpu_dm_atomic_commit_tail(struct drm_atomic_state *state)
 #if defined(CONFIG_DRM_AMD_SECURE_DISPLAY)
 				if (amdgpu_dm_crc_window_is_activated(crtc)) {
 					spin_lock_irqsave(&adev_to_drm(adev)->event_lock, flags);
-					acrtc->dm_irq_params.crc_window.update_win = true;
-					acrtc->dm_irq_params.crc_window.skip_frame_cnt = 2;
+					acrtc->dm_irq_params.window_param.update_win = true;
+					acrtc->dm_irq_params.window_param.skip_frame_cnt = 2;
 					spin_lock_irq(&crc_rd_wrk->crc_rd_work_lock);
 					crc_rd_wrk->crtc = crtc;
 					spin_unlock_irq(&crc_rd_wrk->crc_rd_work_lock);
@@ -9520,10 +9675,9 @@ static int amdgpu_dm_atomic_check(struct drm_device *dev,
 
 #if defined(CONFIG_DRM_AMD_DC_DCN)
 	if (dc_resource_is_dsc_encoding_supported(dc)) {
-		if (!pre_validate_dsc(state, &dm_state, vars)) {
-			ret = -EINVAL;
+		ret = pre_validate_dsc(state, &dm_state, vars);
+		if (ret != 0)
 			goto fail;
-		}
 	}
 #endif
 
@@ -9618,9 +9772,9 @@ static int amdgpu_dm_atomic_check(struct drm_device *dev,
 		}
 
 #if defined(CONFIG_DRM_AMD_DC_DCN)
-		if (!compute_mst_dsc_configs_for_state(state, dm_state->context, vars)) {
+		ret = compute_mst_dsc_configs_for_state(state, dm_state->context, vars);
+		if (ret) {
 			DRM_DEBUG_DRIVER("compute_mst_dsc_configs_for_state() failed\n");
-			ret = -EINVAL;
 			goto fail;
 		}
 
@@ -10083,79 +10237,92 @@ uint32_t dm_read_reg_func(const struct dc_context *ctx, uint32_t address,
 	return value;
 }
 
-static int amdgpu_dm_set_dmub_async_sync_status(bool is_cmd_aux,
-						struct dc_context *ctx,
-						uint8_t status_type,
-						uint32_t *operation_result)
+int amdgpu_dm_process_dmub_aux_transfer_sync(
+		struct dc_context *ctx,
+		unsigned int link_index,
+		struct aux_payload *payload,
+		enum aux_return_code_type *operation_result)
 {
 	struct amdgpu_device *adev = ctx->driver_context;
-	int return_status = -1;
 	struct dmub_notification *p_notify = adev->dm.dmub_notify;
+	int ret = -1;
 
-	if (is_cmd_aux) {
-		if (status_type == DMUB_ASYNC_TO_SYNC_ACCESS_SUCCESS) {
-			return_status = p_notify->aux_reply.length;
-			*operation_result = p_notify->result;
-		} else if (status_type == DMUB_ASYNC_TO_SYNC_ACCESS_TIMEOUT) {
-			*operation_result = AUX_RET_ERROR_TIMEOUT;
-		} else if (status_type == DMUB_ASYNC_TO_SYNC_ACCESS_FAIL) {
-			*operation_result = AUX_RET_ERROR_ENGINE_ACQUIRE;
-		} else {
-			*operation_result = AUX_RET_ERROR_UNKNOWN;
+	mutex_lock(&adev->dm.dpia_aux_lock);
+	if (!dc_process_dmub_aux_transfer_async(ctx->dc, link_index, payload)) {
+		*operation_result = AUX_RET_ERROR_ENGINE_ACQUIRE;
+		goto out;
+ 	}
+
+	if (!wait_for_completion_timeout(&adev->dm.dmub_aux_transfer_done, 10 * HZ)) {
+		DRM_ERROR("wait_for_completion_timeout timeout!");
+		*operation_result = AUX_RET_ERROR_TIMEOUT;
+		goto out;
+	}
+
+	if (p_notify->result != AUX_RET_SUCCESS) {
+		/*
+		 * Transient states before tunneling is enabled could
+		 * lead to this error. We can ignore this for now.
+		 */
+		if (p_notify->result != AUX_RET_ERROR_PROTOCOL_ERROR) {
+			DRM_WARN("DPIA AUX failed on 0x%x(%d), error %d\n",
+					payload->address, payload->length,
+					p_notify->result);
 		}
-	} else {
-		if (status_type == DMUB_ASYNC_TO_SYNC_ACCESS_SUCCESS) {
-			return_status = 0;
-			*operation_result = p_notify->sc_status;
-		} else {
-			*operation_result = SET_CONFIG_UNKNOWN_ERROR;
+		*operation_result = AUX_RET_ERROR_INVALID_REPLY;
+		goto out;
+	}
+
+
+	payload->reply[0] = adev->dm.dmub_notify->aux_reply.command;
+	if (!payload->write && p_notify->aux_reply.length &&
+			(payload->reply[0] == AUX_TRANSACTION_REPLY_AUX_ACK)) {
+
+		if (payload->length != p_notify->aux_reply.length) {
+			DRM_WARN("invalid read length %d from DPIA AUX 0x%x(%d)!\n",
+				p_notify->aux_reply.length,
+					payload->address, payload->length);
+			*operation_result = AUX_RET_ERROR_INVALID_REPLY;
+			goto out;
 		}
+
+		memcpy(payload->data, p_notify->aux_reply.data,
+				p_notify->aux_reply.length);
 	}
 
-	return return_status;
+	/* success */
+	ret = p_notify->aux_reply.length;
+	*operation_result = p_notify->result;
+out:
+	mutex_unlock(&adev->dm.dpia_aux_lock);
+	return ret;
 }
 
-int amdgpu_dm_process_dmub_aux_transfer_sync(bool is_cmd_aux, struct dc_context *ctx,
-	unsigned int link_index, void *cmd_payload, void *operation_result)
+int amdgpu_dm_process_dmub_set_config_sync(
+		struct dc_context *ctx,
+		unsigned int link_index,
+		struct set_config_cmd_payload *payload,
+		enum set_config_status *operation_result)
 {
 	struct amdgpu_device *adev = ctx->driver_context;
-	int ret = 0;
+	bool is_cmd_complete;
+	int ret;
 
-	if (is_cmd_aux) {
-		dc_process_dmub_aux_transfer_async(ctx->dc,
-			link_index, (struct aux_payload *)cmd_payload);
-	} else if (dc_process_dmub_set_config_async(ctx->dc, link_index,
-					(struct set_config_cmd_payload *)cmd_payload,
-					adev->dm.dmub_notify)) {
-		return amdgpu_dm_set_dmub_async_sync_status(is_cmd_aux,
-					ctx, DMUB_ASYNC_TO_SYNC_ACCESS_SUCCESS,
-					(uint32_t *)operation_result);
-	}
+	mutex_lock(&adev->dm.dpia_aux_lock);
+	is_cmd_complete = dc_process_dmub_set_config_async(ctx->dc,
+			link_index, payload, adev->dm.dmub_notify);
 
-	ret = wait_for_completion_timeout(&adev->dm.dmub_aux_transfer_done, 10 * HZ);
-	if (ret == 0) {
+	if (is_cmd_complete || wait_for_completion_timeout(&adev->dm.dmub_aux_transfer_done, 10 * HZ)) {
+		ret = 0;
+		*operation_result = adev->dm.dmub_notify->sc_status;
+	} else {
 		DRM_ERROR("wait_for_completion_timeout timeout!");
-		return amdgpu_dm_set_dmub_async_sync_status(is_cmd_aux,
-				ctx, DMUB_ASYNC_TO_SYNC_ACCESS_TIMEOUT,
-				(uint32_t *)operation_result);
+		ret = -1;
+		*operation_result = SET_CONFIG_UNKNOWN_ERROR;
 	}
 
-	if (is_cmd_aux) {
-		if (adev->dm.dmub_notify->result == AUX_RET_SUCCESS) {
-			struct aux_payload *payload = (struct aux_payload *)cmd_payload;
-
-			payload->reply[0] = adev->dm.dmub_notify->aux_reply.command;
-			if (!payload->write && adev->dm.dmub_notify->aux_reply.length &&
-			    payload->reply[0] == AUX_TRANSACTION_REPLY_AUX_ACK) {
-				memcpy(payload->data, adev->dm.dmub_notify->aux_reply.data,
-				       adev->dm.dmub_notify->aux_reply.length);
-			}
-		}
-	}
-
-	return amdgpu_dm_set_dmub_async_sync_status(is_cmd_aux,
-			ctx, DMUB_ASYNC_TO_SYNC_ACCESS_SUCCESS,
-			(uint32_t *)operation_result);
+	mutex_unlock(&adev->dm.dpia_aux_lock);
+	return ret;
 }
 
 /*
@@ -10167,8 +10334,8 @@ int amdgpu_dm_process_dmub_aux_transfer_sync(bool is_cmd_aux, struct dc_context
  */
 bool check_seamless_boot_capability(struct amdgpu_device *adev)
 {
-	switch (adev->asic_type) {
-	case CHIP_VANGOGH:
+	switch (adev->ip_versions[DCE_HWIP][0]) {
+	case IP_VERSION(3, 0, 1):
 		if (!adev->mman.keep_stolen_vga_memory)
 			return true;
 		break;
diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h
index b5ce15c43bcc..df3c25e32c65 100644
--- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h
+++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h
@@ -51,12 +51,6 @@
 #define AMDGPU_DMUB_NOTIFICATION_MAX 5
 
 /*
- * DMUB Async to Sync Mechanism Status
- */
-#define DMUB_ASYNC_TO_SYNC_ACCESS_FAIL 1
-#define DMUB_ASYNC_TO_SYNC_ACCESS_TIMEOUT 2
-#define DMUB_ASYNC_TO_SYNC_ACCESS_SUCCESS 3
-/*
 #include "include/amdgpu_dal_power_if.h"
 #include "amdgpu_dm_irq.h"
 */
@@ -65,7 +59,9 @@
 #include "signal_types.h"
 #include "amdgpu_dm_crc.h"
 struct aux_payload;
+struct set_config_cmd_payload;
 enum aux_return_code_type;
+enum set_config_status;
 
 /* Forward declarations */
 struct amdgpu_device;
@@ -366,13 +362,6 @@ struct amdgpu_display_manager {
 	struct mutex audio_lock;
 
 	/**
-	 * @vblank_lock:
-	 *
-	 * Guards access to deferred vblank work state.
-	 */
-	spinlock_t vblank_lock;
-
-	/**
 	 * @audio_component:
 	 *
 	 * Used to notify ELD changes to sound driver.
@@ -555,6 +544,13 @@ struct amdgpu_display_manager {
 	 * occurred on certain intel platform
 	 */
 	bool aux_hpd_discon_quirk;
+
+	/**
+	 * @dpia_aux_lock:
+	 *
+	 * Guards access to DPIA AUX
+	 */
+	struct mutex dpia_aux_lock;
 };
 
 enum dsc_clock_force_state {
@@ -798,9 +794,11 @@ void amdgpu_dm_update_connector_after_detect(
 
 extern const struct drm_encoder_helper_funcs amdgpu_dm_encoder_helper_funcs;
 
-int amdgpu_dm_process_dmub_aux_transfer_sync(bool is_cmd_aux,
-					struct dc_context *ctx, unsigned int link_index,
-					void *payload, void *operation_result);
+int amdgpu_dm_process_dmub_aux_transfer_sync(struct dc_context *ctx, unsigned int link_index,
+					struct aux_payload *payload, enum aux_return_code_type *operation_result);
+
+int amdgpu_dm_process_dmub_set_config_sync(struct dc_context *ctx, unsigned int link_index,
+					struct set_config_cmd_payload *payload, enum set_config_status *operation_result);
 
 bool check_seamless_boot_capability(struct amdgpu_device *adev);
 
diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_crc.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_crc.c
index 8a441a22c46e..66df2394d7e4 100644
--- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_crc.c
+++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_crc.c
@@ -89,13 +89,13 @@ static void amdgpu_dm_set_crc_window_default(struct drm_crtc *crtc)
 	struct amdgpu_crtc *acrtc = to_amdgpu_crtc(crtc);
 
 	spin_lock_irq(&drm_dev->event_lock);
-	acrtc->dm_irq_params.crc_window.x_start = 0;
-	acrtc->dm_irq_params.crc_window.y_start = 0;
-	acrtc->dm_irq_params.crc_window.x_end = 0;
-	acrtc->dm_irq_params.crc_window.y_end = 0;
-	acrtc->dm_irq_params.crc_window.activated = false;
-	acrtc->dm_irq_params.crc_window.update_win = false;
-	acrtc->dm_irq_params.crc_window.skip_frame_cnt = 0;
+	acrtc->dm_irq_params.window_param.x_start = 0;
+	acrtc->dm_irq_params.window_param.y_start = 0;
+	acrtc->dm_irq_params.window_param.x_end = 0;
+	acrtc->dm_irq_params.window_param.y_end = 0;
+	acrtc->dm_irq_params.window_param.activated = false;
+	acrtc->dm_irq_params.window_param.update_win = false;
+	acrtc->dm_irq_params.window_param.skip_frame_cnt = 0;
 	spin_unlock_irq(&drm_dev->event_lock);
 }
 
@@ -123,6 +123,8 @@ static void amdgpu_dm_crtc_notify_ta_to_read(struct work_struct *work)
 	phy_id = crc_rd_wrk->phy_inst;
 	spin_unlock_irq(&crc_rd_wrk->crc_rd_work_lock);
 
+	mutex_lock(&psp->securedisplay_context.mutex);
+
 	psp_prep_securedisplay_cmd_buf(psp, &securedisplay_cmd,
 						TA_SECUREDISPLAY_COMMAND__SEND_ROI_CRC);
 	securedisplay_cmd->securedisplay_in_message.send_roi_crc.phy_id =
@@ -133,6 +135,24 @@ static void amdgpu_dm_crtc_notify_ta_to_read(struct work_struct *work)
 			psp_securedisplay_parse_resp_status(psp, securedisplay_cmd->status);
 		}
 	}
+
+	mutex_unlock(&psp->securedisplay_context.mutex);
+}
+
+static void
+amdgpu_dm_forward_crc_window(struct work_struct *work)
+{
+	struct crc_fw_work *crc_fw_wrk;
+	struct amdgpu_display_manager *dm;
+
+	crc_fw_wrk = container_of(work, struct crc_fw_work, forward_roi_work);
+	dm = crc_fw_wrk->dm;
+
+	mutex_lock(&dm->dc_lock);
+	dc_stream_forward_crc_window(dm->dc, &crc_fw_wrk->rect, crc_fw_wrk->stream, crc_fw_wrk->is_stop_cmd);
+	mutex_unlock(&dm->dc_lock);
+
+	kfree(crc_fw_wrk);
 }
 
 bool amdgpu_dm_crc_window_is_activated(struct drm_crtc *crtc)
@@ -142,7 +162,7 @@ bool amdgpu_dm_crc_window_is_activated(struct drm_crtc *crtc)
 	bool ret = false;
 
 	spin_lock_irq(&drm_dev->event_lock);
-	ret = acrtc->dm_irq_params.crc_window.activated;
+	ret = acrtc->dm_irq_params.window_param.activated;
 	spin_unlock_irq(&drm_dev->event_lock);
 
 	return ret;
@@ -187,9 +207,11 @@ int amdgpu_dm_crtc_configure_crc_source(struct drm_crtc *crtc,
 			if (adev->dm.crc_rd_wrk) {
 				flush_work(&adev->dm.crc_rd_wrk->notify_ta_work);
 				spin_lock_irq(&adev->dm.crc_rd_wrk->crc_rd_work_lock);
+
 				if (adev->dm.crc_rd_wrk->crtc == crtc) {
-					dc_stream_stop_dmcu_crc_win_update(stream_state->ctx->dc,
-									dm_crtc_state->stream);
+					/* stop ROI update on this crtc */
+					dc_stream_forward_crc_window(stream_state->ctx->dc,
+							NULL, stream_state, true);
 					adev->dm.crc_rd_wrk->crtc = NULL;
 				}
 				spin_unlock_irq(&adev->dm.crc_rd_wrk->crc_rd_work_lock);
@@ -439,14 +461,9 @@ void amdgpu_dm_crtc_handle_crc_window_irq(struct drm_crtc *crtc)
 	enum amdgpu_dm_pipe_crc_source cur_crc_src;
 	struct amdgpu_crtc *acrtc = NULL;
 	struct amdgpu_device *adev = NULL;
-	struct crc_rd_work *crc_rd_wrk = NULL;
-	struct crc_params *crc_window = NULL, tmp_window;
+	struct crc_rd_work *crc_rd_wrk;
+	struct crc_fw_work *crc_fw_wrk;
 	unsigned long flags1, flags2;
-	struct crtc_position position;
-	uint32_t v_blank;
-	uint32_t v_back_porch;
-	uint32_t crc_window_latch_up_line;
-	struct dc_crtc_timing *timing_out;
 
 	if (crtc == NULL)
 		return;
@@ -458,74 +475,54 @@ void amdgpu_dm_crtc_handle_crc_window_irq(struct drm_crtc *crtc)
 	spin_lock_irqsave(&drm_dev->event_lock, flags1);
 	stream_state = acrtc->dm_irq_params.stream;
 	cur_crc_src = acrtc->dm_irq_params.crc_src;
-	timing_out = &stream_state->timing;
 
 	/* Early return if CRC capture is not enabled. */
 	if (!amdgpu_dm_is_valid_crc_source(cur_crc_src))
 		goto cleanup;
 
-	if (dm_is_crc_source_crtc(cur_crc_src)) {
-		if (acrtc->dm_irq_params.crc_window.activated) {
-			if (acrtc->dm_irq_params.crc_window.update_win) {
-				if (acrtc->dm_irq_params.crc_window.skip_frame_cnt) {
-					acrtc->dm_irq_params.crc_window.skip_frame_cnt -= 1;
-					goto cleanup;
-				}
-				crc_window = &tmp_window;
-
-				tmp_window.windowa_x_start =
-							acrtc->dm_irq_params.crc_window.x_start;
-				tmp_window.windowa_y_start =
-							acrtc->dm_irq_params.crc_window.y_start;
-				tmp_window.windowa_x_end =
-							acrtc->dm_irq_params.crc_window.x_end;
-				tmp_window.windowa_y_end =
-							acrtc->dm_irq_params.crc_window.y_end;
-				tmp_window.windowb_x_start =
-							acrtc->dm_irq_params.crc_window.x_start;
-				tmp_window.windowb_y_start =
-							acrtc->dm_irq_params.crc_window.y_start;
-				tmp_window.windowb_x_end =
-							acrtc->dm_irq_params.crc_window.x_end;
-				tmp_window.windowb_y_end =
-							acrtc->dm_irq_params.crc_window.y_end;
-
-				dc_stream_forward_dmcu_crc_window(stream_state->ctx->dc,
-									stream_state, crc_window);
-
-				acrtc->dm_irq_params.crc_window.update_win = false;
-
-				dc_stream_get_crtc_position(stream_state->ctx->dc, &stream_state, 1,
-					&position.vertical_count,
-					&position.nominal_vcount);
-
-				v_blank = timing_out->v_total - timing_out->v_border_top -
-					timing_out->v_addressable - timing_out->v_border_bottom;
-
-				v_back_porch = v_blank - timing_out->v_front_porch -
-					timing_out->v_sync_width;
-
-				crc_window_latch_up_line = v_back_porch + timing_out->v_sync_width;
-
-				/* take 3 lines margin*/
-				if ((position.vertical_count + 3) >= crc_window_latch_up_line)
-					acrtc->dm_irq_params.crc_window.skip_frame_cnt = 1;
-				else
-					acrtc->dm_irq_params.crc_window.skip_frame_cnt = 0;
-			} else {
-				if (acrtc->dm_irq_params.crc_window.skip_frame_cnt == 0) {
-					if (adev->dm.crc_rd_wrk) {
-						crc_rd_wrk = adev->dm.crc_rd_wrk;
-						spin_lock_irqsave(&crc_rd_wrk->crc_rd_work_lock, flags2);
-						crc_rd_wrk->phy_inst =
-							stream_state->link->link_enc_hw_inst;
-						spin_unlock_irqrestore(&crc_rd_wrk->crc_rd_work_lock, flags2);
-						schedule_work(&crc_rd_wrk->notify_ta_work);
-					}
-				} else {
-					acrtc->dm_irq_params.crc_window.skip_frame_cnt -= 1;
-				}
-			}
+	if (!dm_is_crc_source_crtc(cur_crc_src))
+		goto cleanup;
+
+	if (!acrtc->dm_irq_params.window_param.activated)
+		goto cleanup;
+
+	if (acrtc->dm_irq_params.window_param.update_win) {
+		if (acrtc->dm_irq_params.window_param.skip_frame_cnt) {
+			acrtc->dm_irq_params.window_param.skip_frame_cnt -= 1;
+			goto cleanup;
+		}
+
+		/* prepare work for dmub to update ROI */
+		crc_fw_wrk = kzalloc(sizeof(*crc_fw_wrk), GFP_ATOMIC);
+		if (!crc_fw_wrk)
+			goto cleanup;
+
+		INIT_WORK(&crc_fw_wrk->forward_roi_work, amdgpu_dm_forward_crc_window);
+		crc_fw_wrk->dm = &adev->dm;
+		crc_fw_wrk->stream = stream_state;
+		crc_fw_wrk->rect.x = acrtc->dm_irq_params.window_param.x_start;
+		crc_fw_wrk->rect.y = acrtc->dm_irq_params.window_param.y_start;
+		crc_fw_wrk->rect.width = acrtc->dm_irq_params.window_param.x_end -
+								acrtc->dm_irq_params.window_param.x_start;
+		crc_fw_wrk->rect.height = acrtc->dm_irq_params.window_param.y_end -
+								acrtc->dm_irq_params.window_param.y_start;
+		schedule_work(&crc_fw_wrk->forward_roi_work);
+
+		acrtc->dm_irq_params.window_param.update_win = false;
+		acrtc->dm_irq_params.window_param.skip_frame_cnt = 1;
+
+	} else {
+		if (acrtc->dm_irq_params.window_param.skip_frame_cnt) {
+			acrtc->dm_irq_params.window_param.skip_frame_cnt -= 1;
+			goto cleanup;
+		}
+
+		if (adev->dm.crc_rd_wrk) {
+			crc_rd_wrk = adev->dm.crc_rd_wrk;
+			spin_lock_irqsave(&crc_rd_wrk->crc_rd_work_lock, flags2);
+			crc_rd_wrk->phy_inst = stream_state->link->link_enc_hw_inst;
+			spin_unlock_irqrestore(&crc_rd_wrk->crc_rd_work_lock, flags2);
+			schedule_work(&crc_rd_wrk->notify_ta_work);
 		}
 	}
 
diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_crc.h b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_crc.h
index f07850db60a6..71bce608d751 100644
--- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_crc.h
+++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_crc.h
@@ -40,7 +40,7 @@ enum amdgpu_dm_pipe_crc_source {
 };
 
 #ifdef CONFIG_DRM_AMD_SECURE_DISPLAY
-struct crc_window_parm {
+struct crc_window_param {
 	uint16_t x_start;
 	uint16_t y_start;
 	uint16_t x_end;
@@ -53,6 +53,7 @@ struct crc_window_parm {
 	int skip_frame_cnt;
 };
 
+/* read_work for driver to call PSP to read */
 struct crc_rd_work {
 	struct work_struct notify_ta_work;
 	/* To protect crc_rd_work carried fields*/
@@ -60,6 +61,15 @@ struct crc_rd_work {
 	struct drm_crtc *crtc;
 	uint8_t phy_inst;
 };
+
+/* forward_work for driver to forward ROI to dmu */
+struct crc_fw_work {
+	struct work_struct forward_roi_work;
+	struct amdgpu_display_manager *dm;
+	struct dc_stream_state *stream;
+	struct rect rect;
+	bool is_stop_cmd;
+};
 #endif
 
 static inline bool amdgpu_dm_is_valid_crc_source(enum amdgpu_dm_pipe_crc_source source)
diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_crtc.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_crtc.c
index 594fe8a4d02b..22125daf9dcf 100644
--- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_crtc.c
+++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_crtc.c
@@ -127,6 +127,9 @@ static void vblank_control_worker(struct work_struct *work)
 				amdgpu_dm_psr_disable(vblank_work->stream);
 		} else if (vblank_work->stream->link->psr_settings.psr_feature_enabled &&
 			   !vblank_work->stream->link->psr_settings.psr_allow_active &&
+#ifdef CONFIG_DRM_AMD_SECURE_DISPLAY
+			   !amdgpu_dm_crc_window_is_activated(&vblank_work->acrtc->base) &&
+#endif
 			   vblank_work->acrtc->dm_irq_params.allow_psr_entry) {
 			amdgpu_dm_psr_enable(vblank_work->stream);
 		}
@@ -412,7 +415,7 @@ int amdgpu_dm_crtc_init(struct amdgpu_display_manager *dm,
 {
 	struct amdgpu_crtc *acrtc = NULL;
 	struct drm_plane *cursor_plane;
-
+	bool is_dcn;
 	int res = -ENOMEM;
 
 	cursor_plane = kzalloc(sizeof(*cursor_plane), GFP_KERNEL);
@@ -450,8 +453,14 @@ int amdgpu_dm_crtc_init(struct amdgpu_display_manager *dm,
 	acrtc->otg_inst = -1;
 
 	dm->adev->mode_info.crtcs[crtc_index] = acrtc;
-	drm_crtc_enable_color_mgmt(&acrtc->base, MAX_COLOR_LUT_ENTRIES,
+
+	/* Don't enable DRM CRTC degamma property for DCE since it doesn't
+	 * support programmable degamma anywhere.
+	 */
+	is_dcn = dm->adev->dm.dc->caps.color.dpp.dcn_arch;
+	drm_crtc_enable_color_mgmt(&acrtc->base, is_dcn ? MAX_COLOR_LUT_ENTRIES : 0,
 				   true, MAX_COLOR_LUT_ENTRIES);
+
 	drm_mode_crtc_set_gamma_size(&acrtc->base, MAX_COLOR_LEGACY_LUT_ENTRIES);
 
 	return 0;
diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_debugfs.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_debugfs.c
index ee242d9d8b06..461037a3dd75 100644
--- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_debugfs.c
+++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_debugfs.c
@@ -38,6 +38,10 @@
 #include "link_hwss.h"
 #include "dc/dc_dmub_srv.h"
 
+#ifdef CONFIG_DRM_AMD_SECURE_DISPLAY
+#include "amdgpu_dm_psr.h"
+#endif
+
 struct dmub_debugfs_trace_header {
 	uint32_t entry_count;
 	uint32_t reserved[3];
@@ -299,6 +303,8 @@ static ssize_t dp_link_settings_write(struct file *f, const char __user *buf,
 	case LINK_RATE_HIGH2:
 	case LINK_RATE_HIGH3:
 	case LINK_RATE_UHBR10:
+	case LINK_RATE_UHBR13_5:
+	case LINK_RATE_UHBR20:
 		break;
 	default:
 		valid_input = false;
@@ -2633,6 +2639,25 @@ static int dp_mst_progress_status_show(struct seq_file *m, void *unused)
 	return 0;
 }
 
+/*
+ * Reports whether the connected display is a USB4 DPIA tunneled display
+ * Example usage: cat /sys/kernel/debug/dri/0/DP-8/is_dpia_link
+ */
+static int is_dpia_link_show(struct seq_file *m, void *data)
+{
+	struct drm_connector *connector = m->private;
+	struct amdgpu_dm_connector *aconnector = to_amdgpu_dm_connector(connector);
+	struct dc_link *link = aconnector->dc_link;
+
+	if (connector->status != connector_status_connected)
+		return -ENODEV;
+
+	seq_printf(m, "%s\n", (link->ep_type == DISPLAY_ENDPOINT_USB4_DPIA) ? "yes" :
+				(link->ep_type == DISPLAY_ENDPOINT_PHY) ? "no" : "unknown");
+
+	return 0;
+}
+
 DEFINE_SHOW_ATTRIBUTE(dp_dsc_fec_support);
 DEFINE_SHOW_ATTRIBUTE(dmub_fw_state);
 DEFINE_SHOW_ATTRIBUTE(dmub_tracebuffer);
@@ -2644,6 +2669,7 @@ DEFINE_SHOW_ATTRIBUTE(internal_display);
 DEFINE_SHOW_ATTRIBUTE(psr_capability);
 DEFINE_SHOW_ATTRIBUTE(dp_is_mst_connector);
 DEFINE_SHOW_ATTRIBUTE(dp_mst_progress_status);
+DEFINE_SHOW_ATTRIBUTE(is_dpia_link);
 
 static const struct file_operations dp_dsc_clock_en_debugfs_fops = {
 	.owner = THIS_MODULE,
@@ -2788,7 +2814,8 @@ static const struct {
 		{"max_bpc", &dp_max_bpc_debugfs_fops},
 		{"dsc_disable_passthrough", &dp_dsc_disable_passthrough_debugfs_fops},
 		{"is_mst_connector", &dp_is_mst_connector_fops},
-		{"mst_progress_status", &dp_mst_progress_status_fops}
+		{"mst_progress_status", &dp_mst_progress_status_fops},
+		{"is_dpia_link", &is_dpia_link_fops}
 };
 
 #ifdef CONFIG_DRM_AMD_DC_HDCP
@@ -3079,8 +3106,8 @@ static int crc_win_x_start_set(void *data, u64 val)
 	struct amdgpu_crtc *acrtc = to_amdgpu_crtc(crtc);
 
 	spin_lock_irq(&drm_dev->event_lock);
-	acrtc->dm_irq_params.crc_window.x_start = (uint16_t) val;
-	acrtc->dm_irq_params.crc_window.update_win = false;
+	acrtc->dm_irq_params.window_param.x_start = (uint16_t) val;
+	acrtc->dm_irq_params.window_param.update_win = false;
 	spin_unlock_irq(&drm_dev->event_lock);
 
 	return 0;
@@ -3096,7 +3123,7 @@ static int crc_win_x_start_get(void *data, u64 *val)
 	struct amdgpu_crtc *acrtc = to_amdgpu_crtc(crtc);
 
 	spin_lock_irq(&drm_dev->event_lock);
-	*val = acrtc->dm_irq_params.crc_window.x_start;
+	*val = acrtc->dm_irq_params.window_param.x_start;
 	spin_unlock_irq(&drm_dev->event_lock);
 
 	return 0;
@@ -3116,8 +3143,8 @@ static int crc_win_y_start_set(void *data, u64 val)
 	struct amdgpu_crtc *acrtc = to_amdgpu_crtc(crtc);
 
 	spin_lock_irq(&drm_dev->event_lock);
-	acrtc->dm_irq_params.crc_window.y_start = (uint16_t) val;
-	acrtc->dm_irq_params.crc_window.update_win = false;
+	acrtc->dm_irq_params.window_param.y_start = (uint16_t) val;
+	acrtc->dm_irq_params.window_param.update_win = false;
 	spin_unlock_irq(&drm_dev->event_lock);
 
 	return 0;
@@ -3133,7 +3160,7 @@ static int crc_win_y_start_get(void *data, u64 *val)
 	struct amdgpu_crtc *acrtc = to_amdgpu_crtc(crtc);
 
 	spin_lock_irq(&drm_dev->event_lock);
-	*val = acrtc->dm_irq_params.crc_window.y_start;
+	*val = acrtc->dm_irq_params.window_param.y_start;
 	spin_unlock_irq(&drm_dev->event_lock);
 
 	return 0;
@@ -3152,8 +3179,8 @@ static int crc_win_x_end_set(void *data, u64 val)
 	struct amdgpu_crtc *acrtc = to_amdgpu_crtc(crtc);
 
 	spin_lock_irq(&drm_dev->event_lock);
-	acrtc->dm_irq_params.crc_window.x_end = (uint16_t) val;
-	acrtc->dm_irq_params.crc_window.update_win = false;
+	acrtc->dm_irq_params.window_param.x_end = (uint16_t) val;
+	acrtc->dm_irq_params.window_param.update_win = false;
 	spin_unlock_irq(&drm_dev->event_lock);
 
 	return 0;
@@ -3169,7 +3196,7 @@ static int crc_win_x_end_get(void *data, u64 *val)
 	struct amdgpu_crtc *acrtc = to_amdgpu_crtc(crtc);
 
 	spin_lock_irq(&drm_dev->event_lock);
-	*val = acrtc->dm_irq_params.crc_window.x_end;
+	*val = acrtc->dm_irq_params.window_param.x_end;
 	spin_unlock_irq(&drm_dev->event_lock);
 
 	return 0;
@@ -3188,8 +3215,8 @@ static int crc_win_y_end_set(void *data, u64 val)
 	struct amdgpu_crtc *acrtc = to_amdgpu_crtc(crtc);
 
 	spin_lock_irq(&drm_dev->event_lock);
-	acrtc->dm_irq_params.crc_window.y_end = (uint16_t) val;
-	acrtc->dm_irq_params.crc_window.update_win = false;
+	acrtc->dm_irq_params.window_param.y_end = (uint16_t) val;
+	acrtc->dm_irq_params.window_param.update_win = false;
 	spin_unlock_irq(&drm_dev->event_lock);
 
 	return 0;
@@ -3205,7 +3232,7 @@ static int crc_win_y_end_get(void *data, u64 *val)
 	struct amdgpu_crtc *acrtc = to_amdgpu_crtc(crtc);
 
 	spin_lock_irq(&drm_dev->event_lock);
-	*val = acrtc->dm_irq_params.crc_window.y_end;
+	*val = acrtc->dm_irq_params.window_param.y_end;
 	spin_unlock_irq(&drm_dev->event_lock);
 
 	return 0;
@@ -3228,31 +3255,38 @@ static int crc_win_update_set(void *data, u64 val)
 		return 0;
 
 	if (val) {
+		new_acrtc = to_amdgpu_crtc(new_crtc);
+		mutex_lock(&adev->dm.dc_lock);
+		/* PSR may write to OTG CRC window control register,
+		 * so close it before starting secure_display.
+		 */
+		amdgpu_dm_psr_disable(new_acrtc->dm_irq_params.stream);
+
 		spin_lock_irq(&adev_to_drm(adev)->event_lock);
 		spin_lock_irq(&crc_rd_wrk->crc_rd_work_lock);
 		if (crc_rd_wrk->crtc) {
 			old_crtc = crc_rd_wrk->crtc;
 			old_acrtc = to_amdgpu_crtc(old_crtc);
 		}
-		new_acrtc = to_amdgpu_crtc(new_crtc);
 
 		if (old_crtc && old_crtc != new_crtc) {
-			old_acrtc->dm_irq_params.crc_window.activated = false;
-			old_acrtc->dm_irq_params.crc_window.update_win = false;
-			old_acrtc->dm_irq_params.crc_window.skip_frame_cnt = 0;
+			old_acrtc->dm_irq_params.window_param.activated = false;
+			old_acrtc->dm_irq_params.window_param.update_win = false;
+			old_acrtc->dm_irq_params.window_param.skip_frame_cnt = 0;
 
-			new_acrtc->dm_irq_params.crc_window.activated = true;
-			new_acrtc->dm_irq_params.crc_window.update_win = true;
-			new_acrtc->dm_irq_params.crc_window.skip_frame_cnt = 0;
+			new_acrtc->dm_irq_params.window_param.activated = true;
+			new_acrtc->dm_irq_params.window_param.update_win = true;
+			new_acrtc->dm_irq_params.window_param.skip_frame_cnt = 0;
 			crc_rd_wrk->crtc = new_crtc;
 		} else {
-			new_acrtc->dm_irq_params.crc_window.activated = true;
-			new_acrtc->dm_irq_params.crc_window.update_win = true;
-			new_acrtc->dm_irq_params.crc_window.skip_frame_cnt = 0;
+			new_acrtc->dm_irq_params.window_param.activated = true;
+			new_acrtc->dm_irq_params.window_param.update_win = true;
+			new_acrtc->dm_irq_params.window_param.skip_frame_cnt = 0;
 			crc_rd_wrk->crtc = new_crtc;
 		}
 		spin_unlock_irq(&crc_rd_wrk->crc_rd_work_lock);
 		spin_unlock_irq(&adev_to_drm(adev)->event_lock);
+		mutex_unlock(&adev->dm.dc_lock);
 	}
 
 	return 0;
diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_hdcp.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_hdcp.c
index 6202e31c7e3a..a7fd98f57f94 100644
--- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_hdcp.c
+++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_hdcp.c
@@ -495,7 +495,7 @@ static void update_config(void *handle, struct cp_psp_stream_config *config)
 	link->dp.mst_enabled = config->mst_enabled;
 	link->dp.usb4_enabled = config->usb4_enabled;
 	display->adjust.disable = MOD_HDCP_DISPLAY_DISABLE_AUTHENTICATION;
-	link->adjust.auth_delay = 3;
+	link->adjust.auth_delay = 0;
 	link->adjust.hdcp1.disable = 0;
 	conn_state = aconnector->base.state;
 
diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_helpers.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_helpers.c
index f0b01c8dc4a6..6994c9a1ed85 100644
--- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_helpers.c
+++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_helpers.c
@@ -42,39 +42,6 @@
 #include "dm_helpers.h"
 #include "ddc_service_types.h"
 
-struct monitor_patch_info {
-	unsigned int manufacturer_id;
-	unsigned int product_id;
-	void (*patch_func)(struct dc_edid_caps *edid_caps, unsigned int param);
-	unsigned int patch_param;
-};
-static void set_max_dsc_bpp_limit(struct dc_edid_caps *edid_caps, unsigned int param);
-
-static const struct monitor_patch_info monitor_patch_table[] = {
-{0x6D1E, 0x5BBF, set_max_dsc_bpp_limit, 15},
-{0x6D1E, 0x5B9A, set_max_dsc_bpp_limit, 15},
-};
-
-static void set_max_dsc_bpp_limit(struct dc_edid_caps *edid_caps, unsigned int param)
-{
-	if (edid_caps)
-		edid_caps->panel_patch.max_dsc_target_bpp_limit = param;
-}
-
-static int amdgpu_dm_patch_edid_caps(struct dc_edid_caps *edid_caps)
-{
-	int i, ret = 0;
-
-	for (i = 0; i < ARRAY_SIZE(monitor_patch_table); i++)
-		if ((edid_caps->manufacturer_id == monitor_patch_table[i].manufacturer_id)
-			&&  (edid_caps->product_id == monitor_patch_table[i].product_id)) {
-			monitor_patch_table[i].patch_func(edid_caps, monitor_patch_table[i].patch_param);
-			ret++;
-		}
-
-	return ret;
-}
-
 /* dm_helpers_parse_edid_caps
  *
  * Parse edid caps
@@ -149,8 +116,6 @@ enum dc_edid_status dm_helpers_parse_edid_caps(
 	kfree(sads);
 	kfree(sadb);
 
-	amdgpu_dm_patch_edid_caps(edid_caps);
-
 	return result;
 }
 
@@ -852,9 +817,8 @@ int dm_helper_dmub_aux_transfer_sync(
 		struct aux_payload *payload,
 		enum aux_return_code_type *operation_result)
 {
-	return amdgpu_dm_process_dmub_aux_transfer_sync(true, ctx,
-			link->link_index, (void *)payload,
-			(void *)operation_result);
+	return amdgpu_dm_process_dmub_aux_transfer_sync(ctx, link->link_index, payload,
+			operation_result);
 }
 
 int dm_helpers_dmub_set_config_sync(struct dc_context *ctx,
@@ -862,9 +826,8 @@ int dm_helpers_dmub_set_config_sync(struct dc_context *ctx,
 		struct set_config_cmd_payload *payload,
 		enum set_config_status *operation_result)
 {
-	return amdgpu_dm_process_dmub_aux_transfer_sync(false, ctx,
-			link->link_index, (void *)payload,
-			(void *)operation_result);
+	return amdgpu_dm_process_dmub_set_config_sync(ctx, link->link_index, payload,
+			operation_result);
 }
 
 void dm_set_dcn_clocks(struct dc_context *ctx, struct dc_clocks *clks)
@@ -1006,3 +969,11 @@ void dm_helpers_enable_periodic_detection(struct dc_context *ctx, bool enable)
 {
 	/* TODO: add periodic detection implementation */
 }
+
+void dm_helpers_dp_mst_update_branch_bandwidth(
+		struct dc_context *ctx,
+		struct dc_link *link)
+{
+	// TODO
+}
+
diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_irq_params.h b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_irq_params.h
index 79b5f9999fec..5c9303241aeb 100644
--- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_irq_params.h
+++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_irq_params.h
@@ -39,7 +39,7 @@ struct dm_irq_params {
 #ifdef CONFIG_DEBUG_FS
 	enum amdgpu_dm_pipe_crc_source crc_src;
 #ifdef CONFIG_DRM_AMD_SECURE_DISPLAY
-	struct crc_window_parm crc_window;
+	struct crc_window_param window_param;
 #endif
 #endif
 };
diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_mst_types.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_mst_types.c
index 6ff96b4bdda5..1edf7385f8d8 100644
--- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_mst_types.c
+++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_mst_types.c
@@ -642,15 +642,18 @@ static void set_dsc_configs_from_fairness_vars(struct dsc_mst_fairness_params *p
 		int count,
 		int k)
 {
+	struct drm_connector *drm_connector;
 	int i;
 
 	for (i = 0; i < count; i++) {
+		drm_connector = &params[i].aconnector->base;
+
 		memset(&params[i].timing->dsc_cfg, 0, sizeof(params[i].timing->dsc_cfg));
 		if (vars[i + k].dsc_enabled && dc_dsc_compute_config(
 					params[i].sink->ctx->dc->res_pool->dscs[0],
 					&params[i].sink->dsc_caps.dsc_dec_caps,
 					params[i].sink->ctx->dc->debug.dsc_min_slice_height_override,
-					params[i].sink->edid_caps.panel_patch.max_dsc_target_bpp_limit,
+					drm_connector->display_info.max_dsc_bpp,
 					0,
 					params[i].timing,
 					&params[i].timing->dsc_cfg)) {
@@ -692,24 +695,28 @@ static int bpp_x16_from_pbn(struct dsc_mst_fairness_params param, int pbn)
 	struct dc_dsc_config dsc_config;
 	u64 kbps;
 
+	struct drm_connector *drm_connector = &param.aconnector->base;
+	uint32_t max_dsc_target_bpp_limit_override =
+		drm_connector->display_info.max_dsc_bpp;
+
 	kbps = div_u64((u64)pbn * 994 * 8 * 54, 64);
 	dc_dsc_compute_config(
 			param.sink->ctx->dc->res_pool->dscs[0],
 			&param.sink->dsc_caps.dsc_dec_caps,
 			param.sink->ctx->dc->debug.dsc_min_slice_height_override,
-			param.sink->edid_caps.panel_patch.max_dsc_target_bpp_limit,
+			max_dsc_target_bpp_limit_override,
 			(int) kbps, param.timing, &dsc_config);
 
 	return dsc_config.bits_per_pixel;
 }
 
-static bool increase_dsc_bpp(struct drm_atomic_state *state,
-			     struct drm_dp_mst_topology_state *mst_state,
-			     struct dc_link *dc_link,
-			     struct dsc_mst_fairness_params *params,
-			     struct dsc_mst_fairness_vars *vars,
-			     int count,
-			     int k)
+static int increase_dsc_bpp(struct drm_atomic_state *state,
+			    struct drm_dp_mst_topology_state *mst_state,
+			    struct dc_link *dc_link,
+			    struct dsc_mst_fairness_params *params,
+			    struct dsc_mst_fairness_vars *vars,
+			    int count,
+			    int k)
 {
 	int i;
 	bool bpp_increased[MAX_PIPES];
@@ -719,6 +726,7 @@ static bool increase_dsc_bpp(struct drm_atomic_state *state,
 	int remaining_to_increase = 0;
 	int link_timeslots_used;
 	int fair_pbn_alloc;
+	int ret = 0;
 
 	for (i = 0; i < count; i++) {
 		if (vars[i + k].dsc_enabled) {
@@ -757,52 +765,60 @@ static bool increase_dsc_bpp(struct drm_atomic_state *state,
 
 		if (initial_slack[next_index] > fair_pbn_alloc) {
 			vars[next_index].pbn += fair_pbn_alloc;
-			if (drm_dp_atomic_find_time_slots(state,
-							  params[next_index].port->mgr,
-							  params[next_index].port,
-							  vars[next_index].pbn) < 0)
-				return false;
-			if (!drm_dp_mst_atomic_check(state)) {
+			ret = drm_dp_atomic_find_time_slots(state,
+							    params[next_index].port->mgr,
+							    params[next_index].port,
+							    vars[next_index].pbn);
+			if (ret < 0)
+				return ret;
+
+			ret = drm_dp_mst_atomic_check(state);
+			if (ret == 0) {
 				vars[next_index].bpp_x16 = bpp_x16_from_pbn(params[next_index], vars[next_index].pbn);
 			} else {
 				vars[next_index].pbn -= fair_pbn_alloc;
-				if (drm_dp_atomic_find_time_slots(state,
-								  params[next_index].port->mgr,
-								  params[next_index].port,
-								  vars[next_index].pbn) < 0)
-					return false;
+				ret = drm_dp_atomic_find_time_slots(state,
+								    params[next_index].port->mgr,
+								    params[next_index].port,
+								    vars[next_index].pbn);
+				if (ret < 0)
+					return ret;
 			}
 		} else {
 			vars[next_index].pbn += initial_slack[next_index];
-			if (drm_dp_atomic_find_time_slots(state,
-							  params[next_index].port->mgr,
-							  params[next_index].port,
-							  vars[next_index].pbn) < 0)
-				return false;
-			if (!drm_dp_mst_atomic_check(state)) {
+			ret = drm_dp_atomic_find_time_slots(state,
+							    params[next_index].port->mgr,
+							    params[next_index].port,
+							    vars[next_index].pbn);
+			if (ret < 0)
+				return ret;
+
+			ret = drm_dp_mst_atomic_check(state);
+			if (ret == 0) {
 				vars[next_index].bpp_x16 = params[next_index].bw_range.max_target_bpp_x16;
 			} else {
 				vars[next_index].pbn -= initial_slack[next_index];
-				if (drm_dp_atomic_find_time_slots(state,
-								  params[next_index].port->mgr,
-								  params[next_index].port,
-								  vars[next_index].pbn) < 0)
-					return false;
+				ret = drm_dp_atomic_find_time_slots(state,
+								    params[next_index].port->mgr,
+								    params[next_index].port,
+								    vars[next_index].pbn);
+				if (ret < 0)
+					return ret;
 			}
 		}
 
 		bpp_increased[next_index] = true;
 		remaining_to_increase--;
 	}
-	return true;
+	return 0;
 }
 
-static bool try_disable_dsc(struct drm_atomic_state *state,
-			    struct dc_link *dc_link,
-			    struct dsc_mst_fairness_params *params,
-			    struct dsc_mst_fairness_vars *vars,
-			    int count,
-			    int k)
+static int try_disable_dsc(struct drm_atomic_state *state,
+			   struct dc_link *dc_link,
+			   struct dsc_mst_fairness_params *params,
+			   struct dsc_mst_fairness_vars *vars,
+			   int count,
+			   int k)
 {
 	int i;
 	bool tried[MAX_PIPES];
@@ -810,6 +826,7 @@ static bool try_disable_dsc(struct drm_atomic_state *state,
 	int max_kbps_increase;
 	int next_index;
 	int remaining_to_try = 0;
+	int ret;
 
 	for (i = 0; i < count; i++) {
 		if (vars[i + k].dsc_enabled
@@ -840,49 +857,52 @@ static bool try_disable_dsc(struct drm_atomic_state *state,
 			break;
 
 		vars[next_index].pbn = kbps_to_peak_pbn(params[next_index].bw_range.stream_kbps);
-		if (drm_dp_atomic_find_time_slots(state,
-						  params[next_index].port->mgr,
-						  params[next_index].port,
-						  vars[next_index].pbn) < 0)
-			return false;
+		ret = drm_dp_atomic_find_time_slots(state,
+						    params[next_index].port->mgr,
+						    params[next_index].port,
+						    vars[next_index].pbn);
+		if (ret < 0)
+			return ret;
 
-		if (!drm_dp_mst_atomic_check(state)) {
+		ret = drm_dp_mst_atomic_check(state);
+		if (ret == 0) {
 			vars[next_index].dsc_enabled = false;
 			vars[next_index].bpp_x16 = 0;
 		} else {
 			vars[next_index].pbn = kbps_to_peak_pbn(params[next_index].bw_range.max_kbps);
-			if (drm_dp_atomic_find_time_slots(state,
-							  params[next_index].port->mgr,
-							  params[next_index].port,
-							  vars[next_index].pbn) < 0)
-				return false;
+			ret = drm_dp_atomic_find_time_slots(state,
+							    params[next_index].port->mgr,
+							    params[next_index].port,
+							    vars[next_index].pbn);
+			if (ret < 0)
+				return ret;
 		}
 
 		tried[next_index] = true;
 		remaining_to_try--;
 	}
-	return true;
+	return 0;
 }
 
-static bool compute_mst_dsc_configs_for_link(struct drm_atomic_state *state,
-					     struct dc_state *dc_state,
-					     struct dc_link *dc_link,
-					     struct dsc_mst_fairness_vars *vars,
-					     struct drm_dp_mst_topology_mgr *mgr,
-					     int *link_vars_start_index)
+static int compute_mst_dsc_configs_for_link(struct drm_atomic_state *state,
+					    struct dc_state *dc_state,
+					    struct dc_link *dc_link,
+					    struct dsc_mst_fairness_vars *vars,
+					    struct drm_dp_mst_topology_mgr *mgr,
+					    int *link_vars_start_index)
 {
 	struct dc_stream_state *stream;
 	struct dsc_mst_fairness_params params[MAX_PIPES];
 	struct amdgpu_dm_connector *aconnector;
 	struct drm_dp_mst_topology_state *mst_state = drm_atomic_get_mst_topology_state(state, mgr);
 	int count = 0;
-	int i, k;
+	int i, k, ret;
 	bool debugfs_overwrite = false;
 
 	memset(params, 0, sizeof(params));
 
 	if (IS_ERR(mst_state))
-		return false;
+		return PTR_ERR(mst_state);
 
 	mst_state->pbn_div = dm_mst_get_pbn_divider(dc_link);
 #if defined(CONFIG_DRM_AMD_DC_DCN)
@@ -933,7 +953,7 @@ static bool compute_mst_dsc_configs_for_link(struct drm_atomic_state *state,
 
 	if (count == 0) {
 		ASSERT(0);
-		return true;
+		return 0;
 	}
 
 	/* k is start index of vars for current phy link used by mst hub */
@@ -947,13 +967,17 @@ static bool compute_mst_dsc_configs_for_link(struct drm_atomic_state *state,
 		vars[i + k].pbn = kbps_to_peak_pbn(params[i].bw_range.stream_kbps);
 		vars[i + k].dsc_enabled = false;
 		vars[i + k].bpp_x16 = 0;
-		if (drm_dp_atomic_find_time_slots(state, params[i].port->mgr, params[i].port,
-						  vars[i + k].pbn) < 0)
-			return false;
+		ret = drm_dp_atomic_find_time_slots(state, params[i].port->mgr, params[i].port,
+						    vars[i + k].pbn);
+		if (ret < 0)
+			return ret;
 	}
-	if (!drm_dp_mst_atomic_check(state) && !debugfs_overwrite) {
+	ret = drm_dp_mst_atomic_check(state);
+	if (ret == 0 && !debugfs_overwrite) {
 		set_dsc_configs_from_fairness_vars(params, vars, count, k);
-		return true;
+		return 0;
+	} else if (ret != -ENOSPC) {
+		return ret;
 	}
 
 	/* Try max compression */
@@ -962,31 +986,36 @@ static bool compute_mst_dsc_configs_for_link(struct drm_atomic_state *state,
 			vars[i + k].pbn = kbps_to_peak_pbn(params[i].bw_range.min_kbps);
 			vars[i + k].dsc_enabled = true;
 			vars[i + k].bpp_x16 = params[i].bw_range.min_target_bpp_x16;
-			if (drm_dp_atomic_find_time_slots(state, params[i].port->mgr,
-							  params[i].port, vars[i + k].pbn) < 0)
-				return false;
+			ret = drm_dp_atomic_find_time_slots(state, params[i].port->mgr,
+							    params[i].port, vars[i + k].pbn);
+			if (ret < 0)
+				return ret;
 		} else {
 			vars[i + k].pbn = kbps_to_peak_pbn(params[i].bw_range.stream_kbps);
 			vars[i + k].dsc_enabled = false;
 			vars[i + k].bpp_x16 = 0;
-			if (drm_dp_atomic_find_time_slots(state, params[i].port->mgr,
-							  params[i].port, vars[i + k].pbn) < 0)
-				return false;
+			ret = drm_dp_atomic_find_time_slots(state, params[i].port->mgr,
+							    params[i].port, vars[i + k].pbn);
+			if (ret < 0)
+				return ret;
 		}
 	}
-	if (drm_dp_mst_atomic_check(state))
-		return false;
+	ret = drm_dp_mst_atomic_check(state);
+	if (ret != 0)
+		return ret;
 
 	/* Optimize degree of compression */
-	if (!increase_dsc_bpp(state, mst_state, dc_link, params, vars, count, k))
-		return false;
+	ret = increase_dsc_bpp(state, mst_state, dc_link, params, vars, count, k);
+	if (ret < 0)
+		return ret;
 
-	if (!try_disable_dsc(state, dc_link, params, vars, count, k))
-		return false;
+	ret = try_disable_dsc(state, dc_link, params, vars, count, k);
+	if (ret < 0)
+		return ret;
 
 	set_dsc_configs_from_fairness_vars(params, vars, count, k);
 
-	return true;
+	return 0;
 }
 
 static bool is_dsc_need_re_compute(
@@ -1087,15 +1116,17 @@ static bool is_dsc_need_re_compute(
 	return is_dsc_need_re_compute;
 }
 
-bool compute_mst_dsc_configs_for_state(struct drm_atomic_state *state,
-				       struct dc_state *dc_state,
-				       struct dsc_mst_fairness_vars *vars)
+int compute_mst_dsc_configs_for_state(struct drm_atomic_state *state,
+				      struct dc_state *dc_state,
+				      struct dsc_mst_fairness_vars *vars)
 {
 	int i, j;
 	struct dc_stream_state *stream;
 	bool computed_streams[MAX_PIPES];
 	struct amdgpu_dm_connector *aconnector;
+	struct drm_dp_mst_topology_mgr *mst_mgr;
 	int link_vars_start_index = 0;
+	int ret = 0;
 
 	for (i = 0; i < dc_state->stream_count; i++)
 		computed_streams[i] = false;
@@ -1108,7 +1139,7 @@ bool compute_mst_dsc_configs_for_state(struct drm_atomic_state *state,
 
 		aconnector = (struct amdgpu_dm_connector *)stream->dm_stream_context;
 
-		if (!aconnector || !aconnector->dc_sink)
+		if (!aconnector || !aconnector->dc_sink || !aconnector->port)
 			continue;
 
 		if (!aconnector->dc_sink->dsc_caps.dsc_dec_caps.is_dsc_supported)
@@ -1118,19 +1149,16 @@ bool compute_mst_dsc_configs_for_state(struct drm_atomic_state *state,
 			continue;
 
 		if (dcn20_remove_stream_from_ctx(stream->ctx->dc, dc_state, stream) != DC_OK)
-			return false;
+			return -EINVAL;
 
 		if (!is_dsc_need_re_compute(state, dc_state, stream->link))
 			continue;
 
-		mutex_lock(&aconnector->mst_mgr.lock);
-		if (!compute_mst_dsc_configs_for_link(state, dc_state, stream->link, vars,
-						      &aconnector->mst_mgr,
-						      &link_vars_start_index)) {
-			mutex_unlock(&aconnector->mst_mgr.lock);
-			return false;
-		}
-		mutex_unlock(&aconnector->mst_mgr.lock);
+		mst_mgr = aconnector->port->mgr;
+		ret = compute_mst_dsc_configs_for_link(state, dc_state, stream->link, vars, mst_mgr,
+						       &link_vars_start_index);
+		if (ret != 0)
+			return ret;
 
 		for (j = 0; j < dc_state->stream_count; j++) {
 			if (dc_state->streams[j]->link == stream->link)
@@ -1143,22 +1171,23 @@ bool compute_mst_dsc_configs_for_state(struct drm_atomic_state *state,
 
 		if (stream->timing.flags.DSC == 1)
 			if (dc_stream_add_dsc_to_resource(stream->ctx->dc, dc_state, stream) != DC_OK)
-				return false;
+				return -EINVAL;
 	}
 
-	return true;
+	return ret;
 }
 
-static bool
-	pre_compute_mst_dsc_configs_for_state(struct drm_atomic_state *state,
-					      struct dc_state *dc_state,
-					      struct dsc_mst_fairness_vars *vars)
+static int pre_compute_mst_dsc_configs_for_state(struct drm_atomic_state *state,
+						 struct dc_state *dc_state,
+						 struct dsc_mst_fairness_vars *vars)
 {
 	int i, j;
 	struct dc_stream_state *stream;
 	bool computed_streams[MAX_PIPES];
 	struct amdgpu_dm_connector *aconnector;
+	struct drm_dp_mst_topology_mgr *mst_mgr;
 	int link_vars_start_index = 0;
+	int ret = 0;
 
 	for (i = 0; i < dc_state->stream_count; i++)
 		computed_streams[i] = false;
@@ -1171,7 +1200,7 @@ static bool
 
 		aconnector = (struct amdgpu_dm_connector *)stream->dm_stream_context;
 
-		if (!aconnector || !aconnector->dc_sink)
+		if (!aconnector || !aconnector->dc_sink || !aconnector->port)
 			continue;
 
 		if (!aconnector->dc_sink->dsc_caps.dsc_dec_caps.is_dsc_supported)
@@ -1183,14 +1212,11 @@ static bool
 		if (!is_dsc_need_re_compute(state, dc_state, stream->link))
 			continue;
 
-		mutex_lock(&aconnector->mst_mgr.lock);
-		if (!compute_mst_dsc_configs_for_link(state, dc_state, stream->link, vars,
-						      &aconnector->mst_mgr,
-						      &link_vars_start_index)) {
-			mutex_unlock(&aconnector->mst_mgr.lock);
-			return false;
-		}
-		mutex_unlock(&aconnector->mst_mgr.lock);
+		mst_mgr = aconnector->port->mgr;
+		ret = compute_mst_dsc_configs_for_link(state, dc_state, stream->link, vars, mst_mgr,
+						       &link_vars_start_index);
+		if (ret != 0)
+			return ret;
 
 		for (j = 0; j < dc_state->stream_count; j++) {
 			if (dc_state->streams[j]->link == stream->link)
@@ -1198,7 +1224,7 @@ static bool
 		}
 	}
 
-	return true;
+	return ret;
 }
 
 static int find_crtc_index_in_state_by_stream(struct drm_atomic_state *state,
@@ -1253,9 +1279,9 @@ static bool is_dsc_precompute_needed(struct drm_atomic_state *state)
 	return ret;
 }
 
-bool pre_validate_dsc(struct drm_atomic_state *state,
-		      struct dm_atomic_state **dm_state_ptr,
-		      struct dsc_mst_fairness_vars *vars)
+int pre_validate_dsc(struct drm_atomic_state *state,
+		     struct dm_atomic_state **dm_state_ptr,
+		     struct dsc_mst_fairness_vars *vars)
 {
 	int i;
 	struct dm_atomic_state *dm_state;
@@ -1264,11 +1290,12 @@ bool pre_validate_dsc(struct drm_atomic_state *state,
 
 	if (!is_dsc_precompute_needed(state)) {
 		DRM_INFO_ONCE("DSC precompute is not needed.\n");
-		return true;
+		return 0;
 	}
-	if (dm_atomic_get_state(state, dm_state_ptr)) {
+	ret = dm_atomic_get_state(state, dm_state_ptr);
+	if (ret != 0) {
 		DRM_INFO_ONCE("dm_atomic_get_state() failed\n");
-		return false;
+		return ret;
 	}
 	dm_state = *dm_state_ptr;
 
@@ -1280,7 +1307,7 @@ bool pre_validate_dsc(struct drm_atomic_state *state,
 
 	local_dc_state = kmemdup(dm_state->context, sizeof(struct dc_state), GFP_KERNEL);
 	if (!local_dc_state)
-		return false;
+		return -ENOMEM;
 
 	for (i = 0; i < local_dc_state->stream_count; i++) {
 		struct dc_stream_state *stream = dm_state->context->streams[i];
@@ -1316,9 +1343,9 @@ bool pre_validate_dsc(struct drm_atomic_state *state,
 	if (ret != 0)
 		goto clean_exit;
 
-	if (!pre_compute_mst_dsc_configs_for_state(state, local_dc_state, vars)) {
+	ret = pre_compute_mst_dsc_configs_for_state(state, local_dc_state, vars);
+	if (ret != 0) {
 		DRM_INFO_ONCE("pre_compute_mst_dsc_configs_for_state() failed\n");
-		ret = -EINVAL;
 		goto clean_exit;
 	}
 
@@ -1349,7 +1376,7 @@ clean_exit:
 
 	kfree(local_dc_state);
 
-	return (ret == 0);
+	return ret;
 }
 
 static unsigned int kbps_from_pbn(unsigned int pbn)
@@ -1392,6 +1419,7 @@ enum dc_status dm_dp_mst_is_port_support_mode(
 	unsigned int upper_link_bw_in_kbps = 0, down_link_bw_in_kbps = 0;
 	unsigned int max_compressed_bw_in_kbps = 0;
 	struct dc_dsc_bw_range bw_range = {0};
+	struct drm_dp_mst_topology_mgr *mst_mgr;
 
 	/*
 	 * check if the mode could be supported if DSC pass-through is supported
@@ -1400,7 +1428,8 @@ enum dc_status dm_dp_mst_is_port_support_mode(
 	 */
 	if (is_dsc_common_config_possible(stream, &bw_range) &&
 	    aconnector->port->passthrough_aux) {
-		mutex_lock(&aconnector->mst_mgr.lock);
+		mst_mgr = aconnector->port->mgr;
+		mutex_lock(&mst_mgr->lock);
 
 		cur_link_settings = stream->link->verified_link_cap;
 
@@ -1413,7 +1442,7 @@ enum dc_status dm_dp_mst_is_port_support_mode(
 		end_to_end_bw_in_kbps = min(upper_link_bw_in_kbps,
 					    down_link_bw_in_kbps);
 
-		mutex_unlock(&aconnector->mst_mgr.lock);
+		mutex_unlock(&mst_mgr->lock);
 
 		/*
 		 * use the maximum dsc compression bandwidth as the required
diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_mst_types.h b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_mst_types.h
index b92a7c5671aa..97fd70df531b 100644
--- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_mst_types.h
+++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_mst_types.h
@@ -53,15 +53,15 @@ struct dsc_mst_fairness_vars {
 	struct amdgpu_dm_connector *aconnector;
 };
 
-bool compute_mst_dsc_configs_for_state(struct drm_atomic_state *state,
-				       struct dc_state *dc_state,
-				       struct dsc_mst_fairness_vars *vars);
+int compute_mst_dsc_configs_for_state(struct drm_atomic_state *state,
+				      struct dc_state *dc_state,
+				      struct dsc_mst_fairness_vars *vars);
 
 bool needs_dsc_aux_workaround(struct dc_link *link);
 
-bool pre_validate_dsc(struct drm_atomic_state *state,
-		      struct dm_atomic_state **dm_state_ptr,
-		      struct dsc_mst_fairness_vars *vars);
+int pre_validate_dsc(struct drm_atomic_state *state,
+		     struct dm_atomic_state **dm_state_ptr,
+		     struct dsc_mst_fairness_vars *vars);
 
 enum dc_status dm_dp_mst_is_port_support_mode(
 	struct amdgpu_dm_connector *aconnector,
diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_plane.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_plane.c
index dfd3be49eac8..3c50b3ff7954 100644
--- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_plane.c
+++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_plane.c
@@ -1369,7 +1369,7 @@ static bool dm_plane_format_mod_supported(struct drm_plane *plane,
 {
 	struct amdgpu_device *adev = drm_to_adev(plane->dev);
 	const struct drm_format_info *info = drm_format_info(format);
-	struct hw_asic_id asic_id = adev->dm.dc->ctx->asic_id;
+	int i;
 
 	enum dm_micro_swizzle microtile = modifier_gfx9_swizzle_mode(modifier) & 3;
 
@@ -1386,49 +1386,13 @@ static bool dm_plane_format_mod_supported(struct drm_plane *plane,
 		return true;
 	}
 
-	/* check if swizzle mode is supported by this version of DCN */
-	switch (asic_id.chip_family) {
-	case FAMILY_SI:
-	case FAMILY_CI:
-	case FAMILY_KV:
-	case FAMILY_CZ:
-	case FAMILY_VI:
-		/* asics before AI does not have modifier support */
-		return false;
-	case FAMILY_AI:
-	case FAMILY_RV:
-	case FAMILY_NV:
-	case FAMILY_VGH:
-	case FAMILY_YELLOW_CARP:
-	case AMDGPU_FAMILY_GC_10_3_6:
-	case AMDGPU_FAMILY_GC_10_3_7:
-		switch (AMD_FMT_MOD_GET(TILE, modifier)) {
-		case AMD_FMT_MOD_TILE_GFX9_64K_R_X:
-		case AMD_FMT_MOD_TILE_GFX9_64K_D_X:
-		case AMD_FMT_MOD_TILE_GFX9_64K_S_X:
-		case AMD_FMT_MOD_TILE_GFX9_64K_D:
-			return true;
-		default:
-			return false;
-		}
-		break;
-	case AMDGPU_FAMILY_GC_11_0_0:
-	case AMDGPU_FAMILY_GC_11_0_1:
-		switch (AMD_FMT_MOD_GET(TILE, modifier)) {
-		case AMD_FMT_MOD_TILE_GFX11_256K_R_X:
-		case AMD_FMT_MOD_TILE_GFX9_64K_R_X:
-		case AMD_FMT_MOD_TILE_GFX9_64K_D_X:
-		case AMD_FMT_MOD_TILE_GFX9_64K_S_X:
-		case AMD_FMT_MOD_TILE_GFX9_64K_D:
-			return true;
-		default:
-			return false;
-		}
-		break;
-	default:
-		ASSERT(0); /* Unknown asic */
-		break;
+	/* Check that the modifier is on the list of the plane's supported modifiers. */
+	for (i = 0; i < plane->modifier_count; i++) {
+		if (modifier == plane->modifiers[i])
+			break;
 	}
+	if (i == plane->modifier_count)
+		return false;
 
 	/*
 	 * For D swizzle the canonical modifier depends on the bpp, so check
@@ -1636,6 +1600,10 @@ int amdgpu_dm_plane_init(struct amdgpu_display_manager *dm,
 		drm_plane_create_rotation_property(plane, DRM_MODE_ROTATE_0,
 						   supported_rotations);
 
+	if (dm->adev->ip_versions[DCE_HWIP][0] > IP_VERSION(3, 0, 1) &&
+	    plane->type != DRM_PLANE_TYPE_CURSOR)
+		drm_plane_enable_fb_damage_clips(plane);
+
 	drm_plane_helper_add(plane, &dm_plane_helper_funcs);
 
 #ifdef CONFIG_DRM_AMD_DC_HDR
diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_trace.h b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_trace.h
index d3bc9dc21771..0f580ea37576 100644
--- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_trace.h
+++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_trace.h
@@ -37,6 +37,7 @@
 #include <drm/drm_framebuffer.h>
 #include <drm/drm_encoder.h>
 #include <drm/drm_atomic.h>
+#include "dcn10/dcn10_optc.h"
 
 #include "dc/inc/core_types.h"
 
@@ -662,6 +663,69 @@ TRACE_EVENT(dcn_fpu,
 	    )
 );
 
+TRACE_EVENT(dcn_optc_lock_unlock_state,
+	    TP_PROTO(const struct optc *optc_state, int instance, bool lock, const char *function, const int line),
+	    TP_ARGS(optc_state, instance, lock, function, line),
+
+	    TP_STRUCT__entry(
+			     __field(const char *, function)
+			     __field(int, instance)
+			     __field(bool, lock)
+			     __field(int, line)
+			     __field(int, opp_count)
+			     __field(int, max_h_total)
+			     __field(int, max_v_total)
+			     __field(int, min_h_blank)
+			     __field(int, min_h_sync_width)
+			     __field(int, min_v_sync_width)
+			     __field(int, min_v_blank)
+			     __field(int, min_v_blank_interlace)
+			     __field(int, vstartup_start)
+			     __field(int, vupdate_offset)
+			     __field(int, vupdate_width)
+			     __field(int, vready_offset)
+	    ),
+	    TP_fast_assign(
+			   __entry->function = function;
+			   __entry->instance = instance;
+			   __entry->lock = lock;
+			   __entry->line = line;
+			   __entry->opp_count = optc_state->opp_count;
+			   __entry->max_h_total = optc_state->max_h_total;
+			   __entry->max_v_total = optc_state->max_v_total;
+			   __entry->min_h_blank = optc_state->min_h_blank;
+			   __entry->min_h_sync_width = optc_state->min_h_sync_width;
+			   __entry->min_v_sync_width = optc_state->min_v_sync_width;
+			   __entry->min_v_blank = optc_state->min_v_blank;
+			   __entry->min_v_blank_interlace = optc_state->min_v_blank_interlace;
+			   __entry->vstartup_start = optc_state->vstartup_start;
+			   __entry->vupdate_offset = optc_state->vupdate_offset;
+			   __entry->vupdate_width = optc_state->vupdate_width;
+			   __entry->vready_offset = optc_state->vupdate_offset;
+	    ),
+	    TP_printk("%s: %s()+%d: optc_instance=%d opp_count=%d max_h_total=%d max_v_total=%d "
+		      "min_h_blank=%d min_h_sync_width=%d min_v_sync_width=%d min_v_blank=%d "
+		      "min_v_blank_interlace=%d vstartup_start=%d vupdate_offset=%d vupdate_width=%d "
+		      "vready_offset=%d",
+		      __entry->lock ? "Lock" : "Unlock",
+		      __entry->function,
+		      __entry->line,
+		      __entry->instance,
+		      __entry->opp_count,
+		      __entry->max_h_total,
+		      __entry->max_v_total,
+		      __entry->min_h_blank,
+		      __entry->min_h_sync_width,
+		      __entry->min_v_sync_width,
+		      __entry->min_v_blank,
+		      __entry->min_v_blank_interlace,
+		      __entry->vstartup_start,
+		      __entry->vupdate_offset,
+		      __entry->vupdate_width,
+		      __entry->vready_offset
+	    )
+);
+
 #endif /* _AMDGPU_DM_TRACE_H_ */
 
 #undef TRACE_INCLUDE_PATH
diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/dc_fpu.c b/drivers/gpu/drm/amd/display/amdgpu_dm/dc_fpu.c
index ab0c6d191038..1743ca0a3641 100644
--- a/drivers/gpu/drm/amd/display/amdgpu_dm/dc_fpu.c
+++ b/drivers/gpu/drm/amd/display/amdgpu_dm/dc_fpu.c
@@ -31,6 +31,8 @@
 #elif defined(CONFIG_PPC64)
 #include <asm/switch_to.h>
 #include <asm/cputable.h>
+#elif defined(CONFIG_ARM64)
+#include <asm/neon.h>
 #endif
 
 /**
@@ -99,6 +101,8 @@ void dc_fpu_begin(const char *function_name, const int line)
 			preempt_disable();
 			enable_kernel_fp();
 		}
+#elif defined(CONFIG_ARM64)
+		kernel_neon_begin();
 #endif
 	}
 
@@ -136,6 +140,8 @@ void dc_fpu_end(const char *function_name, const int line)
 			disable_kernel_fp();
 			preempt_enable();
 		}
+#elif defined(CONFIG_ARM64)
+		kernel_neon_end();
 #endif
 	}
 
diff --git a/drivers/gpu/drm/amd/display/dc/bios/bios_parser.c b/drivers/gpu/drm/amd/display/dc/bios/bios_parser.c
index 9b8ea6e9a2b9..a1a00f432168 100644
--- a/drivers/gpu/drm/amd/display/dc/bios/bios_parser.c
+++ b/drivers/gpu/drm/amd/display/dc/bios/bios_parser.c
@@ -138,7 +138,9 @@ static uint8_t get_number_of_objects(struct bios_parser *bp, uint32_t offset)
 
 	uint32_t object_table_offset = bp->object_info_tbl_offset + offset;
 
-	table = GET_IMAGE(ATOM_OBJECT_TABLE, object_table_offset);
+	table = ((ATOM_OBJECT_TABLE *) bios_get_image(&bp->base,
+				object_table_offset,
+				struct_size(table, asObjects, 1)));
 
 	if (!table)
 		return 0;
@@ -166,8 +168,9 @@ static struct graphics_object_id bios_parser_get_connector_id(
 	uint32_t connector_table_offset = bp->object_info_tbl_offset
 		+ le16_to_cpu(bp->object_info_tbl.v1_1->usConnectorObjectTableOffset);
 
-	ATOM_OBJECT_TABLE *tbl =
-		GET_IMAGE(ATOM_OBJECT_TABLE, connector_table_offset);
+	ATOM_OBJECT_TABLE *tbl = ((ATOM_OBJECT_TABLE *) bios_get_image(&bp->base,
+				connector_table_offset,
+				struct_size(tbl, asObjects, 1)));
 
 	if (!tbl) {
 		dm_error("Can't get connector table from atom bios.\n");
@@ -662,8 +665,9 @@ static enum bp_result get_ss_info_v3_1(
 	if (!DATA_TABLES(ASIC_InternalSS_Info))
 		return BP_RESULT_UNSUPPORTED;
 
-	ss_table_header_include = GET_IMAGE(ATOM_ASIC_INTERNAL_SS_INFO_V3,
-		DATA_TABLES(ASIC_InternalSS_Info));
+	ss_table_header_include = ((ATOM_ASIC_INTERNAL_SS_INFO_V3 *) bios_get_image(&bp->base,
+				DATA_TABLES(ASIC_InternalSS_Info),
+				struct_size(ss_table_header_include, asSpreadSpectrum, 1)));
 	table_size =
 		(le16_to_cpu(ss_table_header_include->sHeader.usStructureSize)
 				- sizeof(ATOM_COMMON_TABLE_HEADER))
@@ -1029,8 +1033,10 @@ static enum bp_result get_ss_info_from_internal_ss_info_tbl_V2_1(
 	if (!DATA_TABLES(ASIC_InternalSS_Info))
 		return result;
 
-	header = GET_IMAGE(ATOM_ASIC_INTERNAL_SS_INFO_V2,
-		DATA_TABLES(ASIC_InternalSS_Info));
+	header = ((ATOM_ASIC_INTERNAL_SS_INFO_V2 *) bios_get_image(
+				&bp->base,
+				DATA_TABLES(ASIC_InternalSS_Info),
+				struct_size(header, asSpreadSpectrum, 1)));
 
 	memset(info, 0, sizeof(struct spread_spectrum_info));
 
@@ -1709,8 +1715,10 @@ static uint32_t get_ss_entry_number_from_internal_ss_info_tbl_v2_1(
 	if (!DATA_TABLES(ASIC_InternalSS_Info))
 		return 0;
 
-	header_include = GET_IMAGE(ATOM_ASIC_INTERNAL_SS_INFO_V2,
-			DATA_TABLES(ASIC_InternalSS_Info));
+	header_include = ((ATOM_ASIC_INTERNAL_SS_INFO_V2 *) bios_get_image(
+				&bp->base,
+				DATA_TABLES(ASIC_InternalSS_Info),
+				struct_size(header_include, asSpreadSpectrum, 1)));
 
 	size = (le16_to_cpu(header_include->sHeader.usStructureSize)
 			- sizeof(ATOM_COMMON_TABLE_HEADER))
@@ -1746,8 +1754,9 @@ static uint32_t get_ss_entry_number_from_internal_ss_info_tbl_V3_1(
 	if (!DATA_TABLES(ASIC_InternalSS_Info))
 		return number;
 
-	header_include = GET_IMAGE(ATOM_ASIC_INTERNAL_SS_INFO_V3,
-			DATA_TABLES(ASIC_InternalSS_Info));
+	header_include = ((ATOM_ASIC_INTERNAL_SS_INFO_V3 *) bios_get_image(&bp->base,
+				DATA_TABLES(ASIC_InternalSS_Info),
+				struct_size(header_include, asSpreadSpectrum, 1)));
 	size = (le16_to_cpu(header_include->sHeader.usStructureSize) -
 			sizeof(ATOM_COMMON_TABLE_HEADER)) /
 					sizeof(ATOM_ASIC_SS_ASSIGNMENT_V3);
@@ -1789,11 +1798,13 @@ static enum bp_result bios_parser_get_gpio_pin_info(
 	if (!DATA_TABLES(GPIO_Pin_LUT))
 		return BP_RESULT_BADBIOSTABLE;
 
-	header = GET_IMAGE(ATOM_GPIO_PIN_LUT, DATA_TABLES(GPIO_Pin_LUT));
+	header = ((ATOM_GPIO_PIN_LUT *) bios_get_image(&bp->base,
+				DATA_TABLES(GPIO_Pin_LUT),
+				struct_size(header, asGPIO_Pin, 1)));
 	if (!header)
 		return BP_RESULT_BADBIOSTABLE;
 
-	if (sizeof(ATOM_COMMON_TABLE_HEADER) + sizeof(ATOM_GPIO_PIN_LUT)
+	if (sizeof(ATOM_COMMON_TABLE_HEADER) + struct_size(header, asGPIO_Pin, 1)
 			> le16_to_cpu(header->sHeader.usStructureSize))
 		return BP_RESULT_BADBIOSTABLE;
 
@@ -1978,7 +1989,8 @@ static ATOM_OBJECT *get_bios_object(struct bios_parser *bp,
 
 	offset += bp->object_info_tbl_offset;
 
-	tbl = GET_IMAGE(ATOM_OBJECT_TABLE, offset);
+	tbl = ((ATOM_OBJECT_TABLE *) bios_get_image(&bp->base, offset,
+				struct_size(tbl, asObjects, 1)));
 	if (!tbl)
 		return NULL;
 
@@ -2600,8 +2612,7 @@ static enum bp_result update_slot_layout_info(
 
 	for (;;) {
 
-		record_header = (ATOM_COMMON_RECORD_HEADER *)
-			GET_IMAGE(ATOM_COMMON_RECORD_HEADER, record_offset);
+		record_header = GET_IMAGE(ATOM_COMMON_RECORD_HEADER, record_offset);
 		if (record_header == NULL) {
 			result = BP_RESULT_BADBIOSTABLE;
 			break;
@@ -2615,7 +2626,7 @@ static enum bp_result update_slot_layout_info(
 
 		if (record_header->ucRecordType ==
 			ATOM_BRACKET_LAYOUT_RECORD_TYPE &&
-			sizeof(ATOM_BRACKET_LAYOUT_RECORD)
+			struct_size(record, asConnInfo, 1)
 			<= record_header->ucRecordSize) {
 			record = (ATOM_BRACKET_LAYOUT_RECORD *)
 				(record_header);
@@ -2709,8 +2720,9 @@ static enum bp_result get_bracket_layout_record(
 
 	genericTableOffset = bp->object_info_tbl_offset +
 		bp->object_info_tbl.v1_3->usMiscObjectTableOffset;
-	object_table = (ATOM_OBJECT_TABLE *)
-		GET_IMAGE(ATOM_OBJECT_TABLE, genericTableOffset);
+	object_table = ((ATOM_OBJECT_TABLE *) bios_get_image(&bp->base,
+				genericTableOffset,
+				struct_size(object_table, asObjects, 1)));
 	if (!object_table)
 		return BP_RESULT_FAILURE;
 
diff --git a/drivers/gpu/drm/amd/display/dc/bios/bios_parser2.c b/drivers/gpu/drm/amd/display/dc/bios/bios_parser2.c
index ee0456b5e14e..074e70a5c458 100644
--- a/drivers/gpu/drm/amd/display/dc/bios/bios_parser2.c
+++ b/drivers/gpu/drm/amd/display/dc/bios/bios_parser2.c
@@ -462,6 +462,7 @@ static enum bp_result get_gpio_i2c_info(
 	uint32_t count = 0;
 	unsigned int table_index = 0;
 	bool find_valid = false;
+	struct atom_gpio_pin_assignment *pin;
 
 	if (!info)
 		return BP_RESULT_BADINPUT;
@@ -489,20 +490,17 @@ static enum bp_result get_gpio_i2c_info(
 			- sizeof(struct atom_common_table_header))
 				/ sizeof(struct atom_gpio_pin_assignment);
 
+	pin = (struct atom_gpio_pin_assignment *) header->gpio_pin;
+
 	for (table_index = 0; table_index < count; table_index++) {
-		if (((record->i2c_id & I2C_HW_CAP) == (
-		header->gpio_pin[table_index].gpio_id &
-						I2C_HW_CAP)) &&
-		((record->i2c_id & I2C_HW_ENGINE_ID_MASK)  ==
-		(header->gpio_pin[table_index].gpio_id &
-					I2C_HW_ENGINE_ID_MASK)) &&
-		((record->i2c_id & I2C_HW_LANE_MUX) ==
-		(header->gpio_pin[table_index].gpio_id &
-						I2C_HW_LANE_MUX))) {
+		if (((record->i2c_id & I2C_HW_CAP) 				== (pin->gpio_id & I2C_HW_CAP)) &&
+		    ((record->i2c_id & I2C_HW_ENGINE_ID_MASK)	== (pin->gpio_id & I2C_HW_ENGINE_ID_MASK)) &&
+		    ((record->i2c_id & I2C_HW_LANE_MUX) 		== (pin->gpio_id & I2C_HW_LANE_MUX))) {
 			/* still valid */
 			find_valid = true;
 			break;
 		}
+		pin = (struct atom_gpio_pin_assignment *)((uint8_t *)pin + sizeof(struct atom_gpio_pin_assignment));
 	}
 
 	/* If we don't find the entry that we are looking for then
@@ -2393,6 +2391,26 @@ static enum bp_result get_vram_info_v25(
 	return result;
 }
 
+static enum bp_result get_vram_info_v30(
+	struct bios_parser *bp,
+	struct dc_vram_info *info)
+{
+	struct atom_vram_info_header_v3_0 *info_v30;
+	enum bp_result result = BP_RESULT_OK;
+
+	info_v30 = GET_IMAGE(struct atom_vram_info_header_v3_0,
+						DATA_TABLES(vram_info));
+
+	if (info_v30 == NULL)
+		return BP_RESULT_BADBIOSTABLE;
+
+	info->num_chans = info_v30->channel_num;
+	info->dram_channel_width_bytes = (1 << info_v30->channel_width) / 8;
+
+	return result;
+}
+
+
 /*
  * get_integrated_info_v11
  *
@@ -3060,6 +3078,16 @@ static enum bp_result bios_parser_get_vram_info(
 			}
 			break;
 
+		case 3:
+			switch (revision.minor) {
+			case 0:
+				result = get_vram_info_v30(bp, info);
+				break;
+			default:
+				break;
+			}
+			break;
+
 		default:
 			return result;
 		}
diff --git a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn21/rn_clk_mgr_vbios_smu.h b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn21/rn_clk_mgr_vbios_smu.h
index 3e5df27aa96f..1ce19d875358 100644
--- a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn21/rn_clk_mgr_vbios_smu.h
+++ b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn21/rn_clk_mgr_vbios_smu.h
@@ -26,6 +26,8 @@
 #ifndef DAL_DC_RN_CLK_MGR_VBIOS_SMU_H_
 #define DAL_DC_RN_CLK_MGR_VBIOS_SMU_H_
 
+enum dcn_pwr_state;
+
 int rn_vbios_smu_get_smu_version(struct clk_mgr_internal *clk_mgr);
 int rn_vbios_smu_set_dispclk(struct clk_mgr_internal *clk_mgr, int requested_dispclk_khz);
 int rn_vbios_smu_set_dprefclk(struct clk_mgr_internal *clk_mgr);
@@ -33,7 +35,7 @@ int rn_vbios_smu_set_hard_min_dcfclk(struct clk_mgr_internal *clk_mgr, int reque
 int rn_vbios_smu_set_min_deep_sleep_dcfclk(struct clk_mgr_internal *clk_mgr, int requested_min_ds_dcfclk_khz);
 void rn_vbios_smu_set_phyclk(struct clk_mgr_internal *clk_mgr, int requested_phyclk_khz);
 int rn_vbios_smu_set_dppclk(struct clk_mgr_internal *clk_mgr, int requested_dpp_khz);
-void rn_vbios_smu_set_dcn_low_power_state(struct clk_mgr_internal *clk_mgr, int display_count);
+void rn_vbios_smu_set_dcn_low_power_state(struct clk_mgr_internal *clk_mgr, enum dcn_pwr_state);
 void rn_vbios_smu_enable_48mhz_tmdp_refclk_pwrdwn(struct clk_mgr_internal *clk_mgr, bool enable);
 void rn_vbios_smu_enable_pme_wa(struct clk_mgr_internal *clk_mgr);
 int rn_vbios_smu_is_periodic_retraining_disabled(struct clk_mgr_internal *clk_mgr);
diff --git a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn30/dcn30_smu11_driver_if.h b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn30/dcn30_smu11_driver_if.h
index 8ea8ee57b39f..61bb1d86182e 100644
--- a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn30/dcn30_smu11_driver_if.h
+++ b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn30/dcn30_smu11_driver_if.h
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: MIT
 // This is a stripped-down version of the smu11_driver_if.h file for the relevant DAL interfaces.
 
 #define SMU11_DRIVER_IF_VERSION 0x40
diff --git a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn31/dcn31_clk_mgr.c b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn31/dcn31_clk_mgr.c
index c1eaf571407a..1c0569b1dc8f 100644
--- a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn31/dcn31_clk_mgr.c
+++ b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn31/dcn31_clk_mgr.c
@@ -609,8 +609,10 @@ static void dcn31_clk_mgr_helper_populate_bw_params(struct clk_mgr_internal *clk
 	}
 
 	bw_params->vram_type = bios_info->memory_type;
-	bw_params->num_channels = bios_info->ma_channel_number;
 
+	bw_params->dram_channel_width_bytes = bios_info->memory_type == 0x22 ? 8 : 4;
+	//bw_params->dram_channel_width_bytes = dc->ctx->asic_id.vram_width;
+	bw_params->num_channels = bios_info->ma_channel_number ? bios_info->ma_channel_number : 4;
 	for (i = 0; i < WM_SET_COUNT; i++) {
 		bw_params->wm_table.entries[i].wm_inst = i;
 
diff --git a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn31/dcn31_smu.c b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn31/dcn31_smu.c
index 090b2c02aee1..0827c7df2855 100644
--- a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn31/dcn31_smu.c
+++ b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn31/dcn31_smu.c
@@ -333,8 +333,8 @@ void dcn31_smu_set_zstate_support(struct clk_mgr_internal *clk_mgr, enum dcn_zst
 			(support == DCN_ZSTATE_SUPPORT_ALLOW_Z10_ONLY))
 		support = DCN_ZSTATE_SUPPORT_DISALLOW;
 
-
-	if (support == DCN_ZSTATE_SUPPORT_ALLOW_Z10_ONLY)
+	if (support == DCN_ZSTATE_SUPPORT_ALLOW_Z10_ONLY ||
+	    support == DCN_ZSTATE_SUPPORT_ALLOW_Z8_Z10_ONLY)
 		param = 1;
 	else
 		param = 0;
diff --git a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn314/dcn314_clk_mgr.c b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn314/dcn314_clk_mgr.c
index 1131c6d73f6c..20a06c04e4a1 100644
--- a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn314/dcn314_clk_mgr.c
+++ b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn314/dcn314_clk_mgr.c
@@ -363,32 +363,32 @@ static struct wm_table ddr5_wm_table = {
 			.wm_inst = WM_A,
 			.wm_type = WM_TYPE_PSTATE_CHG,
 			.pstate_latency_us = 11.72,
-			.sr_exit_time_us = 9,
-			.sr_enter_plus_exit_time_us = 11,
+			.sr_exit_time_us = 12.5,
+			.sr_enter_plus_exit_time_us = 14.5,
 			.valid = true,
 		},
 		{
 			.wm_inst = WM_B,
 			.wm_type = WM_TYPE_PSTATE_CHG,
 			.pstate_latency_us = 11.72,
-			.sr_exit_time_us = 9,
-			.sr_enter_plus_exit_time_us = 11,
+			.sr_exit_time_us = 12.5,
+			.sr_enter_plus_exit_time_us = 14.5,
 			.valid = true,
 		},
 		{
 			.wm_inst = WM_C,
 			.wm_type = WM_TYPE_PSTATE_CHG,
 			.pstate_latency_us = 11.72,
-			.sr_exit_time_us = 9,
-			.sr_enter_plus_exit_time_us = 11,
+			.sr_exit_time_us = 12.5,
+			.sr_enter_plus_exit_time_us = 14.5,
 			.valid = true,
 		},
 		{
 			.wm_inst = WM_D,
 			.wm_type = WM_TYPE_PSTATE_CHG,
 			.pstate_latency_us = 11.72,
-			.sr_exit_time_us = 9,
-			.sr_enter_plus_exit_time_us = 11,
+			.sr_exit_time_us = 12.5,
+			.sr_enter_plus_exit_time_us = 14.5,
 			.valid = true,
 		},
 	}
@@ -400,32 +400,32 @@ static struct wm_table lpddr5_wm_table = {
 			.wm_inst = WM_A,
 			.wm_type = WM_TYPE_PSTATE_CHG,
 			.pstate_latency_us = 11.65333,
-			.sr_exit_time_us = 11.5,
-			.sr_enter_plus_exit_time_us = 14.5,
+			.sr_exit_time_us = 16.5,
+			.sr_enter_plus_exit_time_us = 18.5,
 			.valid = true,
 		},
 		{
 			.wm_inst = WM_B,
 			.wm_type = WM_TYPE_PSTATE_CHG,
 			.pstate_latency_us = 11.65333,
-			.sr_exit_time_us = 11.5,
-			.sr_enter_plus_exit_time_us = 14.5,
+			.sr_exit_time_us = 16.5,
+			.sr_enter_plus_exit_time_us = 18.5,
 			.valid = true,
 		},
 		{
 			.wm_inst = WM_C,
 			.wm_type = WM_TYPE_PSTATE_CHG,
 			.pstate_latency_us = 11.65333,
-			.sr_exit_time_us = 11.5,
-			.sr_enter_plus_exit_time_us = 14.5,
+			.sr_exit_time_us = 16.5,
+			.sr_enter_plus_exit_time_us = 18.5,
 			.valid = true,
 		},
 		{
 			.wm_inst = WM_D,
 			.wm_type = WM_TYPE_PSTATE_CHG,
 			.pstate_latency_us = 11.65333,
-			.sr_exit_time_us = 11.5,
-			.sr_enter_plus_exit_time_us = 14.5,
+			.sr_exit_time_us = 16.5,
+			.sr_enter_plus_exit_time_us = 18.5,
 			.valid = true,
 		},
 	}
diff --git a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn314/dcn314_smu.c b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn314/dcn314_smu.c
index ef0795b14a1f..f47cfe6b42bd 100644
--- a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn314/dcn314_smu.c
+++ b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn314/dcn314_smu.c
@@ -123,9 +123,10 @@ static int dcn314_smu_send_msg_with_param(struct clk_mgr_internal *clk_mgr,
 	uint32_t result;
 
 	result = dcn314_smu_wait_for_response(clk_mgr, 10, 200000);
-	ASSERT(result == VBIOSSMC_Result_OK);
 
-	smu_print("SMU response after wait: %d\n", result);
+	if (result != VBIOSSMC_Result_OK)
+		smu_print("SMU Response was not OK. SMU response after wait received is: %d\n",
+				result);
 
 	if (result == VBIOSSMC_Status_BUSY)
 		return -1;
@@ -216,6 +217,12 @@ int dcn314_smu_set_hard_min_dcfclk(struct clk_mgr_internal *clk_mgr, int request
 			VBIOSSMC_MSG_SetHardMinDcfclkByFreq,
 			khz_to_mhz_ceil(requested_dcfclk_khz));
 
+#ifdef DBG
+	smu_print("actual_dcfclk_set_mhz %d is set to : %d\n",
+			actual_dcfclk_set_mhz,
+			actual_dcfclk_set_mhz * 1000);
+#endif
+
 	return actual_dcfclk_set_mhz * 1000;
 }
 
@@ -339,8 +346,6 @@ void dcn314_smu_set_zstate_support(struct clk_mgr_internal *clk_mgr, enum dcn_zs
 	if (!clk_mgr->smu_present)
 		return;
 
-	// Arg[15:0] = 8/9/0 for Z8/Z9/disallow -> existing bits
-	// Arg[16] = Disallow Z9 -> new bit
 	switch (support) {
 
 	case DCN_ZSTATE_SUPPORT_ALLOW:
@@ -359,6 +364,16 @@ void dcn314_smu_set_zstate_support(struct clk_mgr_internal *clk_mgr, enum dcn_zs
 		param = (1 << 10);
 		break;
 
+	case DCN_ZSTATE_SUPPORT_ALLOW_Z8_Z10_ONLY:
+		msg_id = VBIOSSMC_MSG_AllowZstatesEntry;
+		param = (1 << 10) | (1 << 8);
+		break;
+
+	case DCN_ZSTATE_SUPPORT_ALLOW_Z8_ONLY:
+		msg_id = VBIOSSMC_MSG_AllowZstatesEntry;
+		param = (1 << 8);
+		break;
+
 	default: //DCN_ZSTATE_SUPPORT_UNKNOWN
 		msg_id = VBIOSSMC_MSG_AllowZstatesEntry;
 		param = 0;
diff --git a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn315/dcn315_clk_mgr.c b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn315/dcn315_clk_mgr.c
index 893991a0eb97..07edd9777edf 100644
--- a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn315/dcn315_clk_mgr.c
+++ b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn315/dcn315_clk_mgr.c
@@ -458,19 +458,6 @@ static void dcn315_get_dpm_table_from_smu(struct clk_mgr_internal *clk_mgr,
 	dcn315_smu_transfer_dpm_table_smu_2_dram(clk_mgr);
 }
 
-static uint32_t find_max_clk_value(const uint32_t clocks[], uint32_t num_clocks)
-{
-	uint32_t max = 0;
-	int i;
-
-	for (i = 0; i < num_clocks; ++i) {
-		if (clocks[i] > max)
-			max = clocks[i];
-	}
-
-	return max;
-}
-
 static void dcn315_clk_mgr_helper_populate_bw_params(
 		struct clk_mgr_internal *clk_mgr,
 		struct integrated_info *bios_info,
@@ -478,29 +465,21 @@ static void dcn315_clk_mgr_helper_populate_bw_params(
 {
 	int i;
 	struct clk_bw_params *bw_params = clk_mgr->base.bw_params;
-	uint32_t max_pstate = 0, max_fclk = 0, min_pstate = 0;
+	uint32_t max_pstate = clock_table->NumDfPstatesEnabled - 1;
 	struct clk_limit_table_entry def_max = bw_params->clk_table.entries[bw_params->clk_table.num_entries - 1];
 
-	/* Find highest fclk pstate */
-	for (i = 0; i < clock_table->NumDfPstatesEnabled; i++) {
-		if (clock_table->DfPstateTable[i].FClk > max_fclk) {
-			max_fclk = clock_table->DfPstateTable[i].FClk;
-			max_pstate = i;
-		}
-	}
-
 	/* For 315 we want to base clock table on dcfclk, need at least one entry regardless of pmfw table */
 	for (i = 0; i < clock_table->NumDcfClkLevelsEnabled; i++) {
 		int j;
-		uint32_t min_fclk = clock_table->DfPstateTable[0].FClk;
 
-		for (j = 1; j < clock_table->NumDfPstatesEnabled; j++) {
-			if (clock_table->DfPstateTable[j].Voltage <= clock_table->SocVoltage[i]
-					&& clock_table->DfPstateTable[j].FClk < min_fclk) {
-				min_fclk = clock_table->DfPstateTable[j].FClk;
-				min_pstate = j;
-			}
+		/* DF table is sorted with clocks decreasing */
+		for (j = clock_table->NumDfPstatesEnabled - 2; j >= 0; j--) {
+			if (clock_table->DfPstateTable[j].Voltage <= clock_table->SocVoltage[i])
+				max_pstate = j;
 		}
+		/* Max DCFCLK should match up with max pstate */
+		if (i == clock_table->NumDcfClkLevelsEnabled - 1)
+			max_pstate = 0;
 
 		/* First search defaults for the clocks we don't read using closest lower or equal default dcfclk */
 		for (j = bw_params->clk_table.num_entries - 1; j > 0; j--)
@@ -511,9 +490,9 @@ static void dcn315_clk_mgr_helper_populate_bw_params(
 		bw_params->clk_table.entries[i].dtbclk_mhz = bw_params->clk_table.entries[j].dtbclk_mhz;
 
 		/* Now update clocks we do read */
-		bw_params->clk_table.entries[i].fclk_mhz = min_fclk;
-		bw_params->clk_table.entries[i].memclk_mhz = clock_table->DfPstateTable[min_pstate].MemClk;
-		bw_params->clk_table.entries[i].voltage = clock_table->DfPstateTable[min_pstate].Voltage;
+		bw_params->clk_table.entries[i].fclk_mhz = clock_table->DfPstateTable[max_pstate].FClk;
+		bw_params->clk_table.entries[i].memclk_mhz = clock_table->DfPstateTable[max_pstate].MemClk;
+		bw_params->clk_table.entries[i].voltage = clock_table->SocVoltage[i];
 		bw_params->clk_table.entries[i].dcfclk_mhz = clock_table->DcfClocks[i];
 		bw_params->clk_table.entries[i].socclk_mhz = clock_table->SocClocks[i];
 		bw_params->clk_table.entries[i].dispclk_mhz = clock_table->DispClocks[i];
@@ -521,25 +500,16 @@ static void dcn315_clk_mgr_helper_populate_bw_params(
 		bw_params->clk_table.entries[i].wck_ratio = 1;
 	}
 
-	/* Make sure to include at least one entry and highest pstate */
-	if (max_pstate != min_pstate || i == 0) {
-		bw_params->clk_table.entries[i].fclk_mhz = max_fclk;
-		bw_params->clk_table.entries[i].memclk_mhz = clock_table->DfPstateTable[max_pstate].MemClk;
-		bw_params->clk_table.entries[i].voltage = clock_table->DfPstateTable[max_pstate].Voltage;
-		bw_params->clk_table.entries[i].dcfclk_mhz = find_max_clk_value(clock_table->DcfClocks, NUM_DCFCLK_DPM_LEVELS);
+	/* Make sure to include at least one entry */
+	if (i == 0) {
+		bw_params->clk_table.entries[i].fclk_mhz = clock_table->DfPstateTable[0].FClk;
+		bw_params->clk_table.entries[i].memclk_mhz = clock_table->DfPstateTable[0].MemClk;
+		bw_params->clk_table.entries[i].voltage = clock_table->DfPstateTable[0].Voltage;
+		bw_params->clk_table.entries[i].dcfclk_mhz = clock_table->DcfClocks[0];
 		bw_params->clk_table.entries[i].wck_ratio = 1;
 		i++;
 	}
-	bw_params->clk_table.num_entries = i--;
-
-	/* Make sure all highest clocks are included*/
-	bw_params->clk_table.entries[i].socclk_mhz = find_max_clk_value(clock_table->SocClocks, NUM_SOCCLK_DPM_LEVELS);
-	bw_params->clk_table.entries[i].dispclk_mhz = find_max_clk_value(clock_table->DispClocks, NUM_DISPCLK_DPM_LEVELS);
-	bw_params->clk_table.entries[i].dppclk_mhz = find_max_clk_value(clock_table->DppClocks, NUM_DPPCLK_DPM_LEVELS);
-	ASSERT(clock_table->DcfClocks[i] == find_max_clk_value(clock_table->DcfClocks, NUM_DCFCLK_DPM_LEVELS));
-	bw_params->clk_table.entries[i].phyclk_mhz = def_max.phyclk_mhz;
-	bw_params->clk_table.entries[i].phyclk_d18_mhz = def_max.phyclk_d18_mhz;
-	bw_params->clk_table.entries[i].dtbclk_mhz = def_max.dtbclk_mhz;
+	bw_params->clk_table.num_entries = i;
 
 	/* Set any 0 clocks to max default setting. Not an issue for
 	 * power since we aren't doing switching in such case anyway
@@ -565,6 +535,11 @@ static void dcn315_clk_mgr_helper_populate_bw_params(
 		if (!bw_params->clk_table.entries[i].dtbclk_mhz)
 			bw_params->clk_table.entries[i].dtbclk_mhz = def_max.dtbclk_mhz;
 	}
+
+	/* Make sure all highest default clocks are included*/
+	ASSERT(bw_params->clk_table.entries[i-1].phyclk_mhz == def_max.phyclk_mhz);
+	ASSERT(bw_params->clk_table.entries[i-1].phyclk_d18_mhz == def_max.phyclk_d18_mhz);
+	ASSERT(bw_params->clk_table.entries[i-1].dtbclk_mhz == def_max.dtbclk_mhz);
 	ASSERT(bw_params->clk_table.entries[i-1].dcfclk_mhz);
 	bw_params->vram_type = bios_info->memory_type;
 	bw_params->num_channels = bios_info->ma_channel_number;
diff --git a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn316/dcn316_clk_mgr.c b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn316/dcn316_clk_mgr.c
index 187f5b27fdc8..3edc81e2d417 100644
--- a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn316/dcn316_clk_mgr.c
+++ b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn316/dcn316_clk_mgr.c
@@ -553,6 +553,7 @@ static void dcn316_clk_mgr_helper_populate_bw_params(
 
 	bw_params->vram_type = bios_info->memory_type;
 	bw_params->num_channels = bios_info->ma_channel_number;
+	bw_params->dram_channel_width_bytes = bios_info->memory_type == 0x22 ? 8 : 4;
 
 	for (i = 0; i < WM_SET_COUNT; i++) {
 		bw_params->wm_table.entries[i].wm_inst = i;
diff --git a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn32/dcn32_clk_mgr.c b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn32/dcn32_clk_mgr.c
index 1c612ccf1944..200fcec19186 100644
--- a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn32/dcn32_clk_mgr.c
+++ b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn32/dcn32_clk_mgr.c
@@ -157,6 +157,7 @@ void dcn32_init_clocks(struct clk_mgr *clk_mgr_base)
 	struct clk_mgr_internal *clk_mgr = TO_CLK_MGR_INTERNAL(clk_mgr_base);
 	unsigned int num_levels;
 	struct clk_limit_num_entries *num_entries_per_clk = &clk_mgr_base->bw_params->clk_table.num_entries_per_clk;
+	unsigned int i;
 
 	memset(&(clk_mgr_base->clks), 0, sizeof(struct dc_clocks));
 	clk_mgr_base->clks.p_state_change_support = true;
@@ -205,18 +206,17 @@ void dcn32_init_clocks(struct clk_mgr *clk_mgr_base)
 		clk_mgr->dpm_present = true;
 
 	if (clk_mgr_base->ctx->dc->debug.min_disp_clk_khz) {
-		unsigned int i;
-
 		for (i = 0; i < num_levels; i++)
 			if (clk_mgr_base->bw_params->clk_table.entries[i].dispclk_mhz
 					< khz_to_mhz_ceil(clk_mgr_base->ctx->dc->debug.min_disp_clk_khz))
 				clk_mgr_base->bw_params->clk_table.entries[i].dispclk_mhz
 					= khz_to_mhz_ceil(clk_mgr_base->ctx->dc->debug.min_disp_clk_khz);
 	}
+	for (i = 0; i < num_levels; i++)
+		if (clk_mgr_base->bw_params->clk_table.entries[i].dispclk_mhz > 1950)
+			clk_mgr_base->bw_params->clk_table.entries[i].dispclk_mhz = 1950;
 
 	if (clk_mgr_base->ctx->dc->debug.min_dpp_clk_khz) {
-		unsigned int i;
-
 		for (i = 0; i < num_levels; i++)
 			if (clk_mgr_base->bw_params->clk_table.entries[i].dppclk_mhz
 					< khz_to_mhz_ceil(clk_mgr_base->ctx->dc->debug.min_dpp_clk_khz))
@@ -233,41 +233,6 @@ void dcn32_init_clocks(struct clk_mgr *clk_mgr_base)
 	DC_FP_END();
 }
 
-static void dcn32_update_clocks_update_dtb_dto(struct clk_mgr_internal *clk_mgr,
-			struct dc_state *context,
-			int ref_dtbclk_khz)
-{
-	struct dccg *dccg = clk_mgr->dccg;
-	uint32_t tg_mask = 0;
-	int i;
-
-	for (i = 0; i < clk_mgr->base.ctx->dc->res_pool->pipe_count; i++) {
-		struct pipe_ctx *pipe_ctx = &context->res_ctx.pipe_ctx[i];
-		struct dtbclk_dto_params dto_params = {0};
-
-		/* use mask to program DTO once per tg */
-		if (pipe_ctx->stream_res.tg &&
-				!(tg_mask & (1 << pipe_ctx->stream_res.tg->inst))) {
-			tg_mask |= (1 << pipe_ctx->stream_res.tg->inst);
-
-			dto_params.otg_inst = pipe_ctx->stream_res.tg->inst;
-			dto_params.ref_dtbclk_khz = ref_dtbclk_khz;
-
-			if (is_dp_128b_132b_signal(pipe_ctx)) {
-				dto_params.pixclk_khz = pipe_ctx->stream->phy_pix_clk;
-
-				if (pipe_ctx->stream_res.audio != NULL)
-					dto_params.req_audio_dtbclk_khz = 24000;
-			}
-			if (dc_is_hdmi_signal(pipe_ctx->stream->signal))
-				dto_params.is_hdmi = true;
-
-			dccg->funcs->set_dtbclk_dto(clk_mgr->dccg, &dto_params);
-			//dccg->funcs->set_audio_dtbclk_dto(clk_mgr->dccg, &dto_params);
-		}
-	}
-}
-
 /* Since DPPCLK request to PMFW needs to be exact (due to DPP DTO programming),
  * update DPPCLK to be the exact frequency that will be set after the DPPCLK
  * divider is updated. This will prevent rounding issues that could cause DPP
@@ -438,7 +403,7 @@ static void dcn32_update_clocks(struct clk_mgr *clk_mgr_base,
 	}
 
 	if (!new_clocks->dtbclk_en) {
-		new_clocks->ref_dtbclk_khz = 0;
+		new_clocks->ref_dtbclk_khz = clk_mgr_base->bw_params->clk_table.entries[0].dtbclk_mhz * 1000;
 	}
 
 	/* clock limits are received with MHz precision, divide by 1000 to prevent setting clocks at every call */
@@ -447,8 +412,6 @@ static void dcn32_update_clocks(struct clk_mgr *clk_mgr_base,
 		/* DCCG requires KHz precision for DTBCLK */
 		clk_mgr_base->clks.ref_dtbclk_khz =
 				dcn32_smu_set_hard_min_by_freq(clk_mgr, PPCLK_DTBCLK, khz_to_mhz_ceil(new_clocks->ref_dtbclk_khz));
-
-		dcn32_update_clocks_update_dtb_dto(clk_mgr, context, clk_mgr_base->clks.ref_dtbclk_khz);
 	}
 
 	if (dc->config.forced_clocks == false || (force_reset && safe_to_lower)) {
@@ -669,6 +632,9 @@ static void dcn32_get_memclk_states_from_smu(struct clk_mgr *clk_mgr_base)
 			&clk_mgr_base->bw_params->clk_table.entries[0].memclk_mhz,
 			&num_entries_per_clk->num_memclk_levels);
 
+	/* memclk must have at least one level */
+	num_entries_per_clk->num_memclk_levels = num_entries_per_clk->num_memclk_levels ? num_entries_per_clk->num_memclk_levels : 1;
+
 	dcn32_init_single_clock(clk_mgr, PPCLK_FCLK,
 			&clk_mgr_base->bw_params->clk_table.entries[0].fclk_mhz,
 			&num_entries_per_clk->num_fclk_levels);
diff --git a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn32/dcn32_smu13_driver_if.h b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn32/dcn32_smu13_driver_if.h
index d30fbbdd1792..d3d5a8caccf8 100644
--- a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn32/dcn32_smu13_driver_if.h
+++ b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn32/dcn32_smu13_driver_if.h
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: MIT
 // This is a stripped-down version of the smu13_driver_if.h file for the relevant DAL interfaces.
 
 #define SMU13_DRIVER_IF_VERSION  0x18
diff --git a/drivers/gpu/drm/amd/display/dc/core/dc.c b/drivers/gpu/drm/amd/display/dc/core/dc.c
index 997ab031f816..0cb8d1f934d1 100644
--- a/drivers/gpu/drm/amd/display/dc/core/dc.c
+++ b/drivers/gpu/drm/amd/display/dc/core/dc.c
@@ -135,9 +135,7 @@ static const char DC_BUILD_ID[] = "production-build";
  * one or two (in the pipe-split case).
  */
 
-/*******************************************************************************
- * Private functions
- ******************************************************************************/
+/* Private functions */
 
 static inline void elevate_update_type(enum surface_update_type *original, enum surface_update_type new)
 {
@@ -401,9 +399,6 @@ bool dc_stream_adjust_vmin_vmax(struct dc *dc,
 {
 	int i;
 
-	if (memcmp(adjust, &stream->adjust, sizeof(struct dc_crtc_timing_adjust)) == 0)
-		return true;
-
 	stream->adjust.v_total_max = adjust->v_total_max;
 	stream->adjust.v_total_mid = adjust->v_total_mid;
 	stream->adjust.v_total_mid_frame_num = adjust->v_total_mid_frame_num;
@@ -424,18 +419,14 @@ bool dc_stream_adjust_vmin_vmax(struct dc *dc,
 }
 
 /**
- *****************************************************************************
- *  Function: dc_stream_get_last_vrr_vtotal
+ * dc_stream_get_last_used_drr_vtotal - dc_stream_get_last_vrr_vtotal
  *
- *  @brief
- *     Looks up the pipe context of dc_stream_state and gets the
- *     last VTOTAL used by DRR (Dynamic Refresh Rate)
+ * @dc: [in] dc reference
+ * @stream: [in] Initial dc stream state
+ * @adjust: [in] Updated parameters for vertical_total_min and
  *
- *  @param [in] dc: dc reference
- *  @param [in] stream: Initial dc stream state
- *  @param [in] adjust: Updated parameters for vertical_total_min and
- *  vertical_total_max
- *****************************************************************************
+ * Looks up the pipe context of dc_stream_state and gets the last VTOTAL used
+ * by DRR (Dynamic Refresh Rate)
  */
 bool dc_stream_get_last_used_drr_vtotal(struct dc *dc,
 		struct dc_stream_state *stream,
@@ -491,86 +482,79 @@ bool dc_stream_get_crtc_position(struct dc *dc,
 }
 
 #if defined(CONFIG_DRM_AMD_SECURE_DISPLAY)
-bool dc_stream_forward_dmcu_crc_window(struct dc *dc, struct dc_stream_state *stream,
-			     struct crc_params *crc_window)
+static inline void
+dc_stream_forward_dmub_crc_window(struct dc_dmub_srv *dmub_srv,
+		struct rect *rect, struct otg_phy_mux *mux_mapping, bool is_stop)
 {
-	int i;
-	struct dmcu *dmcu = dc->res_pool->dmcu;
-	struct pipe_ctx *pipe;
-	struct crc_region tmp_win, *crc_win;
-	struct otg_phy_mux mapping_tmp, *mux_mapping;
-
-	/*crc window can't be null*/
-	if (!crc_window)
-		return false;
-
-	if ((dmcu != NULL && dmcu->funcs->is_dmcu_initialized(dmcu))) {
-		crc_win = &tmp_win;
-		mux_mapping = &mapping_tmp;
-		/*set crc window*/
-		tmp_win.x_start = crc_window->windowa_x_start;
-		tmp_win.y_start = crc_window->windowa_y_start;
-		tmp_win.x_end = crc_window->windowa_x_end;
-		tmp_win.y_end = crc_window->windowa_y_end;
-
-		for (i = 0; i < MAX_PIPES; i++) {
-			pipe = &dc->current_state->res_ctx.pipe_ctx[i];
-			if (pipe->stream == stream && !pipe->top_pipe && !pipe->prev_odm_pipe)
-				break;
-		}
-
-		/* Stream not found */
-		if (i == MAX_PIPES)
-			return false;
-
+	union dmub_rb_cmd cmd = {0};
 
-		/*set mux routing info*/
-		mapping_tmp.phy_output_num = stream->link->link_enc_hw_inst;
-		mapping_tmp.otg_output_num = pipe->stream_res.tg->inst;
+	cmd.secure_display.roi_info.phy_id = mux_mapping->phy_output_num;
+	cmd.secure_display.roi_info.otg_id = mux_mapping->otg_output_num;
 
-		dmcu->funcs->forward_crc_window(dmcu, crc_win, mux_mapping);
+	if (is_stop) {
+		cmd.secure_display.header.type = DMUB_CMD__SECURE_DISPLAY;
+		cmd.secure_display.header.sub_type = DMUB_CMD__SECURE_DISPLAY_CRC_STOP_UPDATE;
 	} else {
-		DC_LOG_DC("dmcu is not initialized");
-		return false;
+		cmd.secure_display.header.type = DMUB_CMD__SECURE_DISPLAY;
+		cmd.secure_display.header.sub_type = DMUB_CMD__SECURE_DISPLAY_CRC_WIN_NOTIFY;
+		cmd.secure_display.roi_info.x_start = rect->x;
+		cmd.secure_display.roi_info.y_start = rect->y;
+		cmd.secure_display.roi_info.x_end = rect->x + rect->width;
+		cmd.secure_display.roi_info.y_end = rect->y + rect->height;
 	}
 
-	return true;
+	dc_dmub_srv_cmd_queue(dmub_srv, &cmd);
+	dc_dmub_srv_cmd_execute(dmub_srv);
 }
 
-bool dc_stream_stop_dmcu_crc_win_update(struct dc *dc, struct dc_stream_state *stream)
+static inline void
+dc_stream_forward_dmcu_crc_window(struct dmcu *dmcu,
+		struct rect *rect, struct otg_phy_mux *mux_mapping, bool is_stop)
 {
-	int i;
-	struct dmcu *dmcu = dc->res_pool->dmcu;
-	struct pipe_ctx *pipe;
-	struct otg_phy_mux mapping_tmp, *mux_mapping;
+	if (is_stop)
+		dmcu->funcs->stop_crc_win_update(dmcu, mux_mapping);
+	else
+		dmcu->funcs->forward_crc_window(dmcu, rect, mux_mapping);
+}
 
-	if ((dmcu != NULL && dmcu->funcs->is_dmcu_initialized(dmcu))) {
-		mux_mapping = &mapping_tmp;
+bool
+dc_stream_forward_crc_window(struct dc *dc,
+		struct rect *rect, struct dc_stream_state *stream, bool is_stop)
+{
+	struct dmcu *dmcu;
+	struct dc_dmub_srv *dmub_srv;
+	struct otg_phy_mux mux_mapping;
+	struct pipe_ctx *pipe;
+	int i;
 
-		for (i = 0; i < MAX_PIPES; i++) {
-			pipe = &dc->current_state->res_ctx.pipe_ctx[i];
-			if (pipe->stream == stream && !pipe->top_pipe && !pipe->prev_odm_pipe)
-				break;
-		}
+	for (i = 0; i < MAX_PIPES; i++) {
+		pipe = &dc->current_state->res_ctx.pipe_ctx[i];
+		if (pipe->stream == stream && !pipe->top_pipe && !pipe->prev_odm_pipe)
+			break;
+	}
 
-		/* Stream not found */
-		if (i == MAX_PIPES)
-			return false;
+	/* Stream not found */
+	if (i == MAX_PIPES)
+		return false;
 
+	mux_mapping.phy_output_num = stream->link->link_enc_hw_inst;
+	mux_mapping.otg_output_num = pipe->stream_res.tg->inst;
 
-		/*set mux routing info*/
-		mapping_tmp.phy_output_num = stream->link->link_enc_hw_inst;
-		mapping_tmp.otg_output_num = pipe->stream_res.tg->inst;
+	dmcu = dc->res_pool->dmcu;
+	dmub_srv = dc->ctx->dmub_srv;
 
-		dmcu->funcs->stop_crc_win_update(dmcu, mux_mapping);
-	} else {
-		DC_LOG_DC("dmcu is not initialized");
+	/* forward to dmub */
+	if (dmub_srv)
+		dc_stream_forward_dmub_crc_window(dmub_srv, rect, &mux_mapping, is_stop);
+	/* forward to dmcu */
+	else if (dmcu && dmcu->funcs->is_dmcu_initialized(dmcu))
+		dc_stream_forward_dmcu_crc_window(dmcu, rect, &mux_mapping, is_stop);
+	else
 		return false;
-	}
 
 	return true;
 }
-#endif
+#endif /* CONFIG_DRM_AMD_SECURE_DISPLAY */
 
 /**
  * dc_stream_configure_crc() - Configure CRC capture for the given stream.
@@ -1070,6 +1054,8 @@ static void disable_dangling_plane(struct dc *dc, struct dc_state *context)
 	int i, j;
 	struct dc_state *dangling_context = dc_create_state(dc);
 	struct dc_state *current_ctx;
+	struct pipe_ctx *pipe;
+	struct timing_generator *tg;
 
 	if (dangling_context == NULL)
 		return;
@@ -1112,6 +1098,18 @@ static void disable_dangling_plane(struct dc *dc, struct dc_state *context)
 		}
 
 		if (should_disable && old_stream) {
+			pipe = &dc->current_state->res_ctx.pipe_ctx[i];
+			tg = pipe->stream_res.tg;
+			/* When disabling plane for a phantom pipe, we must turn on the
+			 * phantom OTG so the disable programming gets the double buffer
+			 * update. Otherwise the pipe will be left in a partially disabled
+			 * state that can result in underflow or hang when enabling it
+			 * again for different use.
+			 */
+			if (old_stream->mall_stream_config.type == SUBVP_PHANTOM) {
+				if (tg->funcs->enable_crtc)
+					tg->funcs->enable_crtc(tg);
+			}
 			dc_rem_all_planes_for_stream(dc, old_stream, dangling_context);
 			disable_all_writeback_pipes_for_stream(dc, old_stream, dangling_context);
 
@@ -1127,6 +1125,15 @@ static void disable_dangling_plane(struct dc *dc, struct dc_state *context)
 				dc->hwss.interdependent_update_lock(dc, dc->current_state, false);
 				dc->hwss.post_unlock_program_front_end(dc, dangling_context);
 			}
+			/* We need to put the phantom OTG back into it's default (disabled) state or we
+			 * can get corruption when transition from one SubVP config to a different one.
+			 * The OTG is set to disable on falling edge of VUPDATE so the plane disable
+			 * will still get it's double buffer update.
+			 */
+			if (old_stream->mall_stream_config.type == SUBVP_PHANTOM) {
+				if (tg->funcs->disable_phantom_crtc)
+					tg->funcs->disable_phantom_crtc(tg);
+			}
 		}
 	}
 
@@ -1219,9 +1226,7 @@ static void wait_for_no_pipes_pending(struct dc *dc, struct dc_state *context)
 	PERF_TRACE();
 }
 
-/*******************************************************************************
- * Public functions
- ******************************************************************************/
+/* Public functions */
 
 struct dc *dc_create(const struct dc_init_data *init_params)
 {
@@ -1488,17 +1493,19 @@ static void program_timing_sync(
 	}
 }
 
-static bool context_changed(
-		struct dc *dc,
-		struct dc_state *context)
+static bool streams_changed(struct dc *dc,
+			    struct dc_stream_state *streams[],
+			    uint8_t stream_count)
 {
 	uint8_t i;
 
-	if (context->stream_count != dc->current_state->stream_count)
+	if (stream_count != dc->current_state->stream_count)
 		return true;
 
 	for (i = 0; i < dc->current_state->stream_count; i++) {
-		if (dc->current_state->streams[i] != context->streams[i])
+		if (dc->current_state->streams[i] != streams[i])
+			return true;
+		if (!streams[i]->link->link_state_valid)
 			return true;
 	}
 
@@ -1549,6 +1556,9 @@ bool dc_validate_boot_timing(const struct dc *dc,
 	if (tg_inst >= dc->res_pool->timing_generator_count)
 		return false;
 
+	if (tg_inst != link->link_enc->preferred_engine)
+		return false;
+
 	tg = dc->res_pool->timing_generators[tg_inst];
 
 	if (!tg->funcs->get_hw_timing)
@@ -1722,8 +1732,13 @@ void dc_z10_save_init(struct dc *dc)
 		dc->hwss.z10_save_init(dc);
 }
 
-/*
- * Applies given context to HW and copy it into current context.
+/**
+ * dc_commit_state_no_check - Apply context to the hardware
+ *
+ * @dc: DC object with the current status to be updated
+ * @context: New state that will become the current status at the end of this function
+ *
+ * Applies given context to the hardware and copy it into current context.
  * It's up to the user to release the src context afterwards.
  */
 static enum dc_status dc_commit_state_no_check(struct dc *dc, struct dc_state *context)
@@ -1760,6 +1775,12 @@ static enum dc_status dc_commit_state_no_check(struct dc *dc, struct dc_state *c
 		context->stream_count == 0)
 		dc->hwss.prepare_bandwidth(dc, context);
 
+	/* When SubVP is active, all HW programming must be done while
+	 * SubVP lock is acquired
+	 */
+	if (dc->hwss.subvp_pipe_control_lock)
+		dc->hwss.subvp_pipe_control_lock(dc, context, true, true, NULL, subvp_prev_use);
+
 	if (dc->debug.enable_double_buffered_dsc_pg_support)
 		dc->hwss.update_dsc_pg(dc, context, false);
 
@@ -1787,9 +1808,6 @@ static enum dc_status dc_commit_state_no_check(struct dc *dc, struct dc_state *c
 		dc->hwss.wait_for_mpcc_disconnect(dc, dc->res_pool, pipe);
 	}
 
-	if (dc->hwss.subvp_pipe_control_lock)
-		dc->hwss.subvp_pipe_control_lock(dc, context, true, true, NULL, subvp_prev_use);
-
 	result = dc->hwss.apply_ctx_to_hw(dc, context);
 
 	if (result != DC_OK) {
@@ -1888,12 +1906,108 @@ static enum dc_status dc_commit_state_no_check(struct dc *dc, struct dc_state *c
 	return result;
 }
 
+/**
+ * dc_commit_streams - Commit current stream state
+ *
+ * @dc: DC object with the commit state to be configured in the hardware
+ * @streams: Array with a list of stream state
+ * @stream_count: Total of streams
+ *
+ * Function responsible for commit streams change to the hardware.
+ *
+ * Return:
+ * Return DC_OK if everything work as expected, otherwise, return a dc_status
+ * code.
+ */
+enum dc_status dc_commit_streams(struct dc *dc,
+				 struct dc_stream_state *streams[],
+				 uint8_t stream_count)
+{
+	int i, j;
+	struct dc_state *context;
+	enum dc_status res = DC_OK;
+	struct dc_validation_set set[MAX_STREAMS] = {0};
+
+	if (dc->ctx->dce_environment == DCE_ENV_VIRTUAL_HW)
+		return res;
+
+	if (!streams_changed(dc, streams, stream_count))
+		return res;
+
+	DC_LOG_DC("%s: %d streams\n", __func__, stream_count);
+
+	for (i = 0; i < stream_count; i++) {
+		struct dc_stream_state *stream = streams[i];
+		struct dc_stream_status *status = dc_stream_get_status(stream);
+
+		dc_stream_log(dc, stream);
+
+		set[i].stream = stream;
+
+		if (status) {
+			set[i].plane_count = status->plane_count;
+			for (j = 0; j < status->plane_count; j++)
+				set[i].plane_states[j] = status->plane_states[j];
+		}
+	}
+
+	context = dc_create_state(dc);
+	if (!context)
+		goto context_alloc_fail;
+
+	dc_resource_state_copy_construct_current(dc, context);
+
+	res = dc_validate_with_context(dc, set, stream_count, context, false);
+	if (res != DC_OK) {
+		BREAK_TO_DEBUGGER();
+		goto fail;
+	}
+
+	res = dc_commit_state_no_check(dc, context);
+
+	for (i = 0; i < stream_count; i++) {
+		for (j = 0; j < context->stream_count; j++) {
+			if (streams[i]->stream_id == context->streams[j]->stream_id)
+				streams[i]->out.otg_offset = context->stream_status[j].primary_otg_inst;
+
+			if (dc_is_embedded_signal(streams[i]->signal)) {
+				struct dc_stream_status *status = dc_stream_get_status_from_state(context, streams[i]);
+
+				if (dc->hwss.is_abm_supported)
+					status->is_abm_supported = dc->hwss.is_abm_supported(dc, context, streams[i]);
+				else
+					status->is_abm_supported = true;
+			}
+		}
+	}
+
+fail:
+	dc_release_state(context);
+
+context_alloc_fail:
+
+	DC_LOG_DC("%s Finished.\n", __func__);
+
+	return res;
+}
+
+/* TODO: When the transition to the new commit sequence is done, remove this
+ * function in favor of dc_commit_streams. */
 bool dc_commit_state(struct dc *dc, struct dc_state *context)
 {
 	enum dc_status result = DC_ERROR_UNEXPECTED;
 	int i;
 
-	if (!context_changed(dc, context))
+	/* TODO: Since change commit sequence can have a huge impact,
+	 * we decided to only enable it for DCN3x. However, as soon as
+	 * we get more confident about this change we'll need to enable
+	 * the new sequence for all ASICs. */
+	if (dc->ctx->dce_version >= DCN_VERSION_3_2) {
+		result = dc_commit_streams(dc, context->streams, context->stream_count);
+		return result == DC_OK;
+	}
+
+	if (!streams_changed(dc, context->streams, context->stream_count))
 		return DC_OK;
 
 	DC_LOG_DC("%s: %d streams\n",
@@ -2950,7 +3064,7 @@ static bool update_planes_and_stream_state(struct dc *dc,
 		 * Ensures that we have enough pipes for newly added MPO planes
 		 */
 		if (dc->res_pool->funcs->remove_phantom_pipes)
-			dc->res_pool->funcs->remove_phantom_pipes(dc, context);
+			dc->res_pool->funcs->remove_phantom_pipes(dc, context, false);
 
 		/*remove old surfaces from context */
 		if (!dc_rem_all_planes_for_stream(dc, stream, context)) {
@@ -2987,6 +3101,19 @@ static bool update_planes_and_stream_state(struct dc *dc,
 
 	if (update_type == UPDATE_TYPE_FULL) {
 		if (!dc->res_pool->funcs->validate_bandwidth(dc, context, false)) {
+			/* For phantom pipes we remove and create a new set of phantom pipes
+			 * for each full update (because we don't know if we'll need phantom
+			 * pipes until after the first round of validation). However, if validation
+			 * fails we need to keep the existing phantom pipes (because we don't update
+			 * the dc->current_state).
+			 *
+			 * The phantom stream/plane refcount is decremented for validation because
+			 * we assume it'll be removed (the free comes when the dc_state is freed),
+			 * but if validation fails we have to increment back the refcount so it's
+			 * consistent.
+			 */
+			if (dc->res_pool->funcs->retain_phantom_pipes)
+				dc->res_pool->funcs->retain_phantom_pipes(dc, dc->current_state);
 			BREAK_TO_DEBUGGER();
 			goto fail;
 		}
@@ -3297,22 +3424,6 @@ static void commit_planes_for_stream(struct dc *dc,
 		dc->hwss.pipe_control_lock(dc, top_pipe_to_program, true);
 	}
 
-	if (update_type != UPDATE_TYPE_FAST) {
-		for (i = 0; i < dc->res_pool->pipe_count; i++) {
-			struct pipe_ctx *new_pipe = &context->res_ctx.pipe_ctx[i];
-
-			if ((new_pipe->stream && new_pipe->stream->mall_stream_config.type == SUBVP_PHANTOM) ||
-					subvp_prev_use) {
-				// If old context or new context has phantom pipes, apply
-				// the phantom timings now. We can't change the phantom
-				// pipe configuration safely without driver acquiring
-				// the DMCUB lock first.
-				dc->hwss.apply_ctx_to_hw(dc, context);
-				break;
-			}
-		}
-	}
-
 	dc_dmub_update_dirty_rect(dc, surface_count, stream, srf_updates, context);
 
 	if (update_type != UPDATE_TYPE_FAST) {
@@ -3370,6 +3481,24 @@ static void commit_planes_for_stream(struct dc *dc,
 		return;
 	}
 
+	if (update_type != UPDATE_TYPE_FAST) {
+		for (j = 0; j < dc->res_pool->pipe_count; j++) {
+			struct pipe_ctx *pipe_ctx = &context->res_ctx.pipe_ctx[j];
+
+			if (dc->debug.visual_confirm == VISUAL_CONFIRM_SUBVP &&
+				pipe_ctx->stream && pipe_ctx->plane_state) {
+				/* Only update visual confirm for SUBVP here.
+				 * The bar appears on all pipes, so we need to update the bar on all displays,
+				 * so the information doesn't get stale.
+				 */
+				struct mpcc_blnd_cfg blnd_cfg = { 0 };
+
+				dc->hwss.update_visual_confirm_color(dc, pipe_ctx, &blnd_cfg.black_color,
+						pipe_ctx->plane_res.hubp->inst);
+			}
+		}
+	}
+
 	if (!IS_DIAG_DC(dc->ctx->dce_environment)) {
 		for (i = 0; i < surface_count; i++) {
 			struct dc_plane_state *plane_state = srf_updates[i].surface;
@@ -3487,7 +3616,6 @@ static void commit_planes_for_stream(struct dc *dc,
 					dc->hwss.update_plane_addr(dc, pipe_ctx);
 			}
 		}
-
 	}
 
 	if (should_lock_all_pipes && dc->hwss.interdependent_update_lock) {
@@ -3524,6 +3652,44 @@ static void commit_planes_for_stream(struct dc *dc,
 					top_pipe_to_program->stream_res.tg);
 		}
 
+	/* For phantom pipe OTG enable, it has to be done after any previous pipe
+	 * that was in use has already been programmed at gotten its double buffer
+	 * update for "disable".
+	 */
+	if (update_type != UPDATE_TYPE_FAST) {
+		for (i = 0; i < dc->res_pool->pipe_count; i++) {
+			struct pipe_ctx *pipe = &context->res_ctx.pipe_ctx[i];
+			struct pipe_ctx *old_pipe = &dc->current_state->res_ctx.pipe_ctx[i];
+
+			/* If an active, non-phantom pipe is being transitioned into a phantom
+			 * pipe, wait for the double buffer update to complete first before we do
+			 * ANY phantom pipe programming.
+			 */
+			if (pipe->stream && pipe->stream->mall_stream_config.type == SUBVP_PHANTOM &&
+					old_pipe->stream && old_pipe->stream->mall_stream_config.type != SUBVP_PHANTOM) {
+				old_pipe->stream_res.tg->funcs->wait_for_state(
+						old_pipe->stream_res.tg,
+						CRTC_STATE_VBLANK);
+				old_pipe->stream_res.tg->funcs->wait_for_state(
+						old_pipe->stream_res.tg,
+						CRTC_STATE_VACTIVE);
+			}
+		}
+		for (i = 0; i < dc->res_pool->pipe_count; i++) {
+			struct pipe_ctx *new_pipe = &context->res_ctx.pipe_ctx[i];
+
+			if ((new_pipe->stream && new_pipe->stream->mall_stream_config.type == SUBVP_PHANTOM) ||
+					subvp_prev_use) {
+				// If old context or new context has phantom pipes, apply
+				// the phantom timings now. We can't change the phantom
+				// pipe configuration safely without driver acquiring
+				// the DMCUB lock first.
+				dc->hwss.apply_ctx_to_hw(dc, context);
+				break;
+			}
+		}
+	}
+
 	if (update_type != UPDATE_TYPE_FAST)
 		dc->hwss.post_unlock_program_front_end(dc, context);
 	if (update_type != UPDATE_TYPE_FAST)
@@ -3563,10 +3729,24 @@ static void commit_planes_for_stream(struct dc *dc,
 	}
 }
 
-/* Determines if the incoming context requires a applying transition state with unnecessary
- * pipe splitting and ODM disabled, due to hardware limitations. In a case where
- * the OPP associated with an MPCC might change due to plane additions, this function
+/**
+ * could_mpcc_tree_change_for_active_pipes - Check if an OPP associated with MPCC might change
+ *
+ * @dc: Used to get the current state status
+ * @stream: Target stream, which we want to remove the attached planes
+ * @surface_count: Number of surface update
+ * @is_plane_addition: [in] Fill out with true if it is a plane addition case
+ *
+ * DCN32x and newer support a feature named Dynamic ODM which can conflict with
+ * the MPO if used simultaneously in some specific configurations (e.g.,
+ * 4k@144). This function checks if the incoming context requires applying a
+ * transition state with unnecessary pipe splitting and ODM disabled to
+ * circumvent our hardware limitations to prevent this edge case. If the OPP
+ * associated with an MPCC might change due to plane additions, this function
  * returns true.
+ *
+ * Return:
+ * Return true if OPP and MPCC might change, otherwise, return false.
  */
 static bool could_mpcc_tree_change_for_active_pipes(struct dc *dc,
 		struct dc_stream_state *stream,
@@ -3576,6 +3756,7 @@ static bool could_mpcc_tree_change_for_active_pipes(struct dc *dc,
 
 	struct dc_stream_status *cur_stream_status = stream_get_status(dc->current_state, stream);
 	bool force_minimal_pipe_splitting = false;
+	bool subvp_active = false;
 	uint32_t i;
 
 	*is_plane_addition = false;
@@ -3608,39 +3789,55 @@ static bool could_mpcc_tree_change_for_active_pipes(struct dc *dc,
 		}
 	}
 
-	/* For SubVP pipe split case when adding MPO video
-	 * we need to add a minimal transition. In this case
-	 * there will be 2 streams (1 main stream, 1 phantom
-	 * stream).
-	 */
-	if (cur_stream_status &&
-			dc->current_state->stream_count == 2 &&
-			stream->mall_stream_config.type == SUBVP_MAIN) {
-		bool is_pipe_split = false;
+	for (i = 0; i < dc->res_pool->pipe_count; i++) {
+		struct pipe_ctx *pipe = &dc->current_state->res_ctx.pipe_ctx[i];
 
-		for (i = 0; i < dc->res_pool->pipe_count; i++) {
-			if (dc->current_state->res_ctx.pipe_ctx[i].stream == stream &&
-					(dc->current_state->res_ctx.pipe_ctx[i].bottom_pipe ||
-					dc->current_state->res_ctx.pipe_ctx[i].next_odm_pipe)) {
-				is_pipe_split = true;
-				break;
-			}
+		if (pipe->stream && pipe->stream->mall_stream_config.type != SUBVP_NONE) {
+			subvp_active = true;
+			break;
 		}
+	}
 
+	/* For SubVP when adding or removing planes we need to add a minimal transition
+	 * (even when disabling all planes). Whenever disabling a phantom pipe, we
+	 * must use the minimal transition path to disable the pipe correctly.
+	 *
+	 * We want to use the minimal transition whenever subvp is active, not only if
+	 * a plane is being added / removed from a subvp stream (MPO plane can be added
+	 * to a DRR pipe of SubVP + DRR config, in which case we still want to run through
+	 * a min transition to disable subvp.
+	 */
+	if (cur_stream_status && subvp_active) {
 		/* determine if minimal transition is required due to SubVP*/
-		if (surface_count > 0 && is_pipe_split) {
-			if (cur_stream_status->plane_count > surface_count) {
-				force_minimal_pipe_splitting = true;
-			} else if (cur_stream_status->plane_count < surface_count) {
-				force_minimal_pipe_splitting = true;
-				*is_plane_addition = true;
-			}
+		if (cur_stream_status->plane_count > surface_count) {
+			force_minimal_pipe_splitting = true;
+		} else if (cur_stream_status->plane_count < surface_count) {
+			force_minimal_pipe_splitting = true;
+			*is_plane_addition = true;
 		}
 	}
 
 	return force_minimal_pipe_splitting;
 }
 
+/**
+ * commit_minimal_transition_state - Create a transition pipe split state
+ *
+ * @dc: Used to get the current state status
+ * @transition_base_context: New transition state
+ *
+ * In some specific configurations, such as pipe split on multi-display with
+ * MPO and/or Dynamic ODM, removing a plane may cause unsupported pipe
+ * programming when moving to new planes. To mitigate those types of problems,
+ * this function adds a transition state that minimizes pipe usage before
+ * programming the new configuration. When adding a new plane, the current
+ * state requires the least pipes, so it is applied without splitting. When
+ * removing a plane, the new state requires the least pipes, so it is applied
+ * without splitting.
+ *
+ * Return:
+ * Return false if something is wrong in the transition state.
+ */
 static bool commit_minimal_transition_state(struct dc *dc,
 		struct dc_state *transition_base_context)
 {
@@ -3650,9 +3847,48 @@ static bool commit_minimal_transition_state(struct dc *dc,
 	bool temp_subvp_policy;
 	enum dc_status ret = DC_ERROR_UNEXPECTED;
 	unsigned int i, j;
+	unsigned int pipe_in_use = 0;
+	bool subvp_in_use = false;
 
 	if (!transition_context)
 		return false;
+	/* Setup:
+	 * Store the current ODM and MPC config in some temp variables to be
+	 * restored after we commit the transition state.
+	 */
+
+	/* check current pipes in use*/
+	for (i = 0; i < dc->res_pool->pipe_count; i++) {
+		struct pipe_ctx *pipe = &transition_base_context->res_ctx.pipe_ctx[i];
+
+		if (pipe->plane_state)
+			pipe_in_use++;
+	}
+
+	/* If SubVP is enabled and we are adding or removing planes from any main subvp
+	 * pipe, we must use the minimal transition.
+	 */
+	for (i = 0; i < dc->res_pool->pipe_count; i++) {
+		struct pipe_ctx *pipe = &dc->current_state->res_ctx.pipe_ctx[i];
+
+		if (pipe->stream && pipe->stream->mall_stream_config.type == SUBVP_PHANTOM) {
+			subvp_in_use = true;
+			break;
+		}
+	}
+
+	/* When the OS add a new surface if we have been used all of pipes with odm combine
+	 * and mpc split feature, it need use commit_minimal_transition_state to transition safely.
+	 * After OS exit MPO, it will back to use odm and mpc split with all of pipes, we need
+	 * call it again. Otherwise return true to skip.
+	 *
+	 * Reduce the scenarios to use dc_commit_state_no_check in the stage of flip. Especially
+	 * enter/exit MPO when DCN still have enough resources.
+	 */
+	if (pipe_in_use != dc->res_pool->pipe_count && !subvp_in_use) {
+		dc_release_state(transition_context);
+		return true;
+	}
 
 	if (!dc->config.is_vmin_only_asic) {
 		tmp_mpc_policy = dc->debug.pipe_split_policy;
@@ -3667,7 +3903,7 @@ static bool commit_minimal_transition_state(struct dc *dc,
 
 	dc_resource_state_copy_construct(transition_base_context, transition_context);
 
-	//commit minimal state
+	/* commit minimal state */
 	if (dc->res_pool->funcs->validate_bandwidth(dc, transition_context, false)) {
 		for (i = 0; i < transition_context->stream_count; i++) {
 			struct dc_stream_status *stream_status = &transition_context->stream_status[i];
@@ -3685,10 +3921,12 @@ static bool commit_minimal_transition_state(struct dc *dc,
 		ret = dc_commit_state_no_check(dc, transition_context);
 	}
 
-	/*always release as dc_commit_state_no_check retains in good case*/
+	/* always release as dc_commit_state_no_check retains in good case */
 	dc_release_state(transition_context);
 
-	/*restore previous pipe split and odm policy*/
+	/* TearDown:
+	 * Restore original configuration for ODM and MPO.
+	 */
 	if (!dc->config.is_vmin_only_asic)
 		dc->debug.pipe_split_policy = tmp_mpc_policy;
 
@@ -3696,12 +3934,12 @@ static bool commit_minimal_transition_state(struct dc *dc,
 	dc->debug.force_disable_subvp = temp_subvp_policy;
 
 	if (ret != DC_OK) {
-		/*this should never happen*/
+		/* this should never happen */
 		BREAK_TO_DEBUGGER();
 		return false;
 	}
 
-	/*force full surface update*/
+	/* force full surface update */
 	for (i = 0; i < dc->current_state->stream_count; i++) {
 		for (j = 0; j < dc->current_state->stream_status[i].plane_count; j++) {
 			dc->current_state->stream_status[i].plane_states[j]->update_flags.raw = 0xFFFFFFFF;
@@ -3719,6 +3957,7 @@ bool dc_update_planes_and_stream(struct dc *dc,
 	struct dc_state *context;
 	enum surface_update_type update_type;
 	int i;
+	struct mall_temp_config mall_temp_config;
 
 	/* In cases where MPO and split or ODM are used transitions can
 	 * cause underflow. Apply stream configuration with minimal pipe
@@ -3750,11 +3989,29 @@ bool dc_update_planes_and_stream(struct dc *dc,
 
 	/* on plane removal, minimal state is the new one */
 	if (force_minimal_pipe_splitting && !is_plane_addition) {
+		/* Since all phantom pipes are removed in full validation,
+		 * we have to save and restore the subvp/mall config when
+		 * we do a minimal transition since the flags marking the
+		 * pipe as subvp/phantom will be cleared (dc copy constructor
+		 * creates a shallow copy).
+		 */
+		if (dc->res_pool->funcs->save_mall_state)
+			dc->res_pool->funcs->save_mall_state(dc, context, &mall_temp_config);
 		if (!commit_minimal_transition_state(dc, context)) {
 			dc_release_state(context);
 			return false;
 		}
-
+		if (dc->res_pool->funcs->restore_mall_state)
+			dc->res_pool->funcs->restore_mall_state(dc, context, &mall_temp_config);
+
+		/* If we do a minimal transition with plane removal and the context
+		 * has subvp we also have to retain back the phantom stream / planes
+		 * since the refcount is decremented as part of the min transition
+		 * (we commit a state with no subvp, so the phantom streams / planes
+		 * had to be removed).
+		 */
+		if (dc->res_pool->funcs->retain_phantom_pipes)
+			dc->res_pool->funcs->retain_phantom_pipes(dc, context);
 		update_type = UPDATE_TYPE_FULL;
 	}
 
@@ -3806,6 +4063,18 @@ void dc_commit_updates_for_stream(struct dc *dc,
 	struct dc_context *dc_ctx = dc->ctx;
 	int i, j;
 
+	/* TODO: Since change commit sequence can have a huge impact,
+	 * we decided to only enable it for DCN3x. However, as soon as
+	 * we get more confident about this change we'll need to enable
+	 * the new sequence for all ASICs.
+	 */
+	if (dc->ctx->dce_version >= DCN_VERSION_3_2) {
+		dc_update_planes_and_stream(dc, srf_updates,
+					    surface_count, stream,
+					    stream_update);
+		return;
+	}
+
 	stream_status = dc_stream_get_status(stream);
 	context = dc->current_state;
 
@@ -4387,21 +4656,17 @@ void dc_mclk_switch_using_fw_based_vblank_stretch_shut_down(struct dc *dc)
 		dc->current_state->bw_ctx.bw.dcn.clk.fw_based_mclk_switching_shut_down = true;
 }
 
-/*
- *****************************************************************************
- * Function: dc_is_dmub_outbox_supported -
+/**
+ * dc_is_dmub_outbox_supported - Check if DMUB firmware support outbox notification
  *
- * @brief
- *      Checks whether DMUB FW supports outbox notifications, if supported
- *		DM should register outbox interrupt prior to actually enabling interrupts
- *		via dc_enable_dmub_outbox
+ * @dc: [in] dc structure
  *
- *  @param
- *		[in] dc: dc structure
+ * Checks whether DMUB FW supports outbox notifications, if supported DM
+ * should register outbox interrupt prior to actually enabling interrupts
+ * via dc_enable_dmub_outbox
  *
- *  @return
- *		True if DMUB FW supports outbox notifications, False otherwise
- *****************************************************************************
+ * Return:
+ * True if DMUB FW supports outbox notifications, False otherwise
  */
 bool dc_is_dmub_outbox_supported(struct dc *dc)
 {
@@ -4419,21 +4684,17 @@ bool dc_is_dmub_outbox_supported(struct dc *dc)
 	return dc->debug.enable_dmub_aux_for_legacy_ddc;
 }
 
-/*
- *****************************************************************************
- *  Function: dc_enable_dmub_notifications
+/**
+ * dc_enable_dmub_notifications - Check if dmub fw supports outbox
  *
- *  @brief
- *		Calls dc_is_dmub_outbox_supported to check if dmub fw supports outbox
- *		notifications. All DMs shall switch to dc_is_dmub_outbox_supported.
- *		This API shall be removed after switching.
+ * @dc: [in] dc structure
  *
- *  @param
- *		[in] dc: dc structure
+ * Calls dc_is_dmub_outbox_supported to check if dmub fw supports outbox
+ * notifications. All DMs shall switch to dc_is_dmub_outbox_supported.  This
+ * API shall be removed after switching.
  *
- *  @return
- *		True if DMUB FW supports outbox notifications, False otherwise
- *****************************************************************************
+ * Return:
+ * True if DMUB FW supports outbox notifications, False otherwise
  */
 bool dc_enable_dmub_notifications(struct dc *dc)
 {
@@ -4441,18 +4702,11 @@ bool dc_enable_dmub_notifications(struct dc *dc)
 }
 
 /**
- *****************************************************************************
- *  Function: dc_enable_dmub_outbox
+ * dc_enable_dmub_outbox - Enables DMUB unsolicited notification
  *
- *  @brief
- *		Enables DMUB unsolicited notifications to x86 via outbox
+ * dc: [in] dc structure
  *
- *  @param
- *		[in] dc: dc structure
- *
- *  @return
- *		None
- *****************************************************************************
+ * Enables DMUB unsolicited notifications to x86 via outbox.
  */
 void dc_enable_dmub_outbox(struct dc *dc)
 {
@@ -4553,21 +4807,17 @@ uint8_t get_link_index_from_dpia_port_index(const struct dc *dc,
 }
 
 /**
- *****************************************************************************
- *  Function: dc_process_dmub_set_config_async
+ * dc_process_dmub_set_config_async - Submits set_config command
  *
- *  @brief
- *		Submits set_config command to dmub via inbox message
+ * @dc: [in] dc structure
+ * @link_index: [in] link_index: link index
+ * @payload: [in] aux payload
+ * @notify: [out] set_config immediate reply
  *
- *  @param
- *		[in] dc: dc structure
- *		[in] link_index: link index
- *		[in] payload: aux payload
- *		[out] notify: set_config immediate reply
+ * Submits set_config command to dmub via inbox message.
  *
- *  @return
- *		True if successful, False if failure
- *****************************************************************************
+ * Return:
+ * True if successful, False if failure
  */
 bool dc_process_dmub_set_config_async(struct dc *dc,
 				uint32_t link_index,
@@ -4603,21 +4853,17 @@ bool dc_process_dmub_set_config_async(struct dc *dc,
 }
 
 /**
- *****************************************************************************
- *  Function: dc_process_dmub_set_mst_slots
+ * dc_process_dmub_set_mst_slots - Submits MST solt allocation
  *
- *  @brief
- *		Submits mst slot allocation command to dmub via inbox message
+ * @dc: [in] dc structure
+ * @link_index: [in] link index
+ * @mst_alloc_slots: [in] mst slots to be allotted
+ * @mst_slots_in_use: [out] mst slots in use returned in failure case
  *
- *  @param
- *		[in] dc: dc structure
- *		[in] link_index: link index
- *		[in] mst_alloc_slots: mst slots to be allotted
- *		[out] mst_slots_in_use: mst slots in use returned in failure case
+ * Submits mst slot allocation command to dmub via inbox message
  *
- *	@return
- *		DC_OK if successful, DC_ERROR if failure
- *****************************************************************************
+ * Return:
+ * DC_OK if successful, DC_ERROR if failure
  */
 enum dc_status dc_process_dmub_set_mst_slots(const struct dc *dc,
 				uint32_t link_index,
@@ -4657,19 +4903,12 @@ enum dc_status dc_process_dmub_set_mst_slots(const struct dc *dc,
 }
 
 /**
- *****************************************************************************
- *  Function: dc_process_dmub_dpia_hpd_int_enable
+ * dc_process_dmub_dpia_hpd_int_enable - Submits DPIA DPD interruption
  *
- *  @brief
- *		Submits dpia hpd int enable command to dmub via inbox message
+ * @dc [in]: dc structure
+ * @hpd_int_enable [in]: 1 for hpd int enable, 0 to disable
  *
- *  @param
- *		[in] dc: dc structure
- *		[in] hpd_int_enable: 1 for hpd int enable, 0 to disable
- *
- *	@return
- *		None
- *****************************************************************************
+ * Submits dpia hpd int enable command to dmub via inbox message
  */
 void dc_process_dmub_dpia_hpd_int_enable(const struct dc *dc,
 				uint32_t hpd_int_enable)
@@ -4698,16 +4937,13 @@ void dc_disable_accelerated_mode(struct dc *dc)
 
 
 /**
- *****************************************************************************
- *  dc_notify_vsync_int_state() - notifies vsync enable/disable state
+ *  dc_notify_vsync_int_state - notifies vsync enable/disable state
  *  @dc: dc structure
- *	@stream: stream where vsync int state changed
- *	@enable: whether vsync is enabled or disabled
- *
- *  Called when vsync is enabled/disabled
- *	Will notify DMUB to start/stop ABM interrupts after steady state is reached
+ *  @stream: stream where vsync int state changed
+ *  @enable: whether vsync is enabled or disabled
  *
- *****************************************************************************
+ *  Called when vsync is enabled/disabled Will notify DMUB to start/stop ABM
+ *  interrupts after steady state is reached.
  */
 void dc_notify_vsync_int_state(struct dc *dc, struct dc_stream_state *stream, bool enable)
 {
@@ -4749,14 +4985,18 @@ void dc_notify_vsync_int_state(struct dc *dc, struct dc_stream_state *stream, bo
 	if (pipe->stream_res.abm && pipe->stream_res.abm->funcs->set_abm_pause)
 		pipe->stream_res.abm->funcs->set_abm_pause(pipe->stream_res.abm, !enable, i, pipe->stream_res.tg->inst);
 }
-/*
- * dc_extended_blank_supported: Decide whether extended blank is supported
+
+/**
+ * dc_extended_blank_supported 0 Decide whether extended blank is supported
+ *
+ * @dc: [in] Current DC state
  *
- * Extended blank is a freesync optimization feature to be enabled in the future.
- * During the extra vblank period gained from freesync, we have the ability to enter z9/z10.
+ * Extended blank is a freesync optimization feature to be enabled in the
+ * future.  During the extra vblank period gained from freesync, we have the
+ * ability to enter z9/z10.
  *
- * @param [in] dc: Current DC state
- * @return: Indicate whether extended blank is supported (true or false)
+ * Return:
+ * Indicate whether extended blank is supported (true or false)
  */
 bool dc_extended_blank_supported(struct dc *dc)
 {
diff --git a/drivers/gpu/drm/amd/display/dc/core/dc_hw_sequencer.c b/drivers/gpu/drm/amd/display/dc/core/dc_hw_sequencer.c
index 7c2e3b8dc26a..471078fc3900 100644
--- a/drivers/gpu/drm/amd/display/dc/core/dc_hw_sequencer.c
+++ b/drivers/gpu/drm/amd/display/dc/core/dc_hw_sequencer.c
@@ -366,6 +366,7 @@ void get_hdr_visual_confirm_color(
 		struct tg_color *color)
 {
 	uint32_t color_value = MAX_TG_COLOR_VALUE;
+	bool is_sdr = false;
 
 	/* Determine the overscan color based on the top-most (desktop) plane's context */
 	struct pipe_ctx *top_pipe_ctx  = pipe_ctx;
@@ -382,7 +383,8 @@ void get_hdr_visual_confirm_color(
 			/* FreeSync 2 ARGB2101010 - set border color to pink */
 			color->color_r_cr = color_value;
 			color->color_b_cb = color_value;
-		}
+		} else
+			is_sdr = true;
 		break;
 	case PIXEL_FORMAT_FP16:
 		if (top_pipe_ctx->stream->out_transfer_func->tf == TRANSFER_FUNCTION_PQ) {
@@ -391,14 +393,19 @@ void get_hdr_visual_confirm_color(
 		} else if (top_pipe_ctx->stream->out_transfer_func->tf == TRANSFER_FUNCTION_GAMMA22) {
 			/* FreeSync 2 HDR - set border color to green */
 			color->color_g_y = color_value;
-		}
+		} else
+			is_sdr = true;
 		break;
 	default:
+		is_sdr = true;
+		break;
+	}
+
+	if (is_sdr) {
 		/* SDR - set border color to Gray */
 		color->color_r_cr = color_value/2;
 		color->color_b_cb = color_value/2;
 		color->color_g_y = color_value/2;
-		break;
 	}
 }
 
diff --git a/drivers/gpu/drm/amd/display/dc/core/dc_link.c b/drivers/gpu/drm/amd/display/dc/core/dc_link.c
index d7b1ace6328a..342e906ae26e 100644
--- a/drivers/gpu/drm/amd/display/dc/core/dc_link.c
+++ b/drivers/gpu/drm/amd/display/dc/core/dc_link.c
@@ -3378,7 +3378,7 @@ bool dc_link_setup_psr(struct dc_link *link,
 		case FAMILY_YELLOW_CARP:
 		case AMDGPU_FAMILY_GC_10_3_6:
 		case AMDGPU_FAMILY_GC_11_0_1:
-			if (dc->debug.disable_z10)
+			if (dc->debug.disable_z10 || dc->debug.psr_skip_crtc_disable)
 				psr_context->psr_level.bits.SKIP_CRTC_DISABLE = true;
 			break;
 		default:
@@ -4229,6 +4229,7 @@ static void fpga_dp_hpo_enable_link_and_stream(struct dc_state *state, struct pi
 		link_hwss->ext.set_throttled_vcp_size(pipe_ctx, avg_time_slots_per_mtp);
 
 	dc->hwss.unblank_stream(pipe_ctx, &stream->link->cur_link_settings);
+	dc->hwss.enable_audio_stream(pipe_ctx);
 }
 
 void core_link_enable_stream(
@@ -4308,10 +4309,7 @@ void core_link_enable_stream(
 			/* Still enable stream features & audio on seamless boot for DP external displays */
 			if (pipe_ctx->stream->signal == SIGNAL_TYPE_DISPLAY_PORT) {
 				enable_stream_features(pipe_ctx);
-				if (pipe_ctx->stream_res.audio != NULL) {
-					pipe_ctx->stream_res.stream_enc->funcs->dp_audio_enable(pipe_ctx->stream_res.stream_enc);
-					dc->hwss.enable_audio_stream(pipe_ctx);
-				}
+				dc->hwss.enable_audio_stream(pipe_ctx);
 			}
 
 #if defined(CONFIG_DRM_AMD_DC_HDCP)
@@ -4665,6 +4663,10 @@ void dc_link_set_preferred_training_settings(struct dc *dc,
 		link->preferred_link_setting.link_rate = LINK_RATE_UNKNOWN;
 	}
 
+	if (link->connector_signal == SIGNAL_TYPE_DISPLAY_PORT &&
+			link->type == dc_connection_mst_branch)
+		dm_helpers_dp_mst_update_branch_bandwidth(dc->ctx, link);
+
 	/* Retrain now, or wait until next stream update to apply */
 	if (skip_immediate_retrain == false)
 		dc_link_set_preferred_link_settings(dc, &link->preferred_link_setting, link);
diff --git a/drivers/gpu/drm/amd/display/dc/core/dc_link_ddc.c b/drivers/gpu/drm/amd/display/dc/core/dc_link_ddc.c
index 651231387043..ce8d6a54ca54 100644
--- a/drivers/gpu/drm/amd/display/dc/core/dc_link_ddc.c
+++ b/drivers/gpu/drm/amd/display/dc/core/dc_link_ddc.c
@@ -82,6 +82,7 @@ struct dp_hdmi_dongle_signature_data {
 #define HDMI_SCDC_STATUS_FLAGS 0x40
 #define HDMI_SCDC_ERR_DETECT 0x50
 #define HDMI_SCDC_TEST_CONFIG 0xC0
+#define HDMI_SCDC_DEVICE_ID 0xD3
 
 union hdmi_scdc_update_read_data {
 	uint8_t byte[2];
diff --git a/drivers/gpu/drm/amd/display/dc/core/dc_link_dp.c b/drivers/gpu/drm/amd/display/dc/core/dc_link_dp.c
index 1254d38f1778..dedd1246ce58 100644
--- a/drivers/gpu/drm/amd/display/dc/core/dc_link_dp.c
+++ b/drivers/gpu/drm/amd/display/dc/core/dc_link_dp.c
@@ -1912,7 +1912,7 @@ enum dc_status dpcd_configure_lttpr_mode(struct dc_link *link, struct link_train
 	return status;
 }
 
-static void dpcd_exit_training_mode(struct dc_link *link)
+static void dpcd_exit_training_mode(struct dc_link *link, enum dp_link_encoding encoding)
 {
 	uint8_t sink_status = 0;
 	uint8_t i;
@@ -1920,12 +1920,14 @@ static void dpcd_exit_training_mode(struct dc_link *link)
 	/* clear training pattern set */
 	dpcd_set_training_pattern(link, DP_TRAINING_PATTERN_VIDEOIDLE);
 
-	/* poll for intra-hop disable */
-	for (i = 0; i < 10; i++) {
-		if ((core_link_read_dpcd(link, DP_SINK_STATUS, &sink_status, 1) == DC_OK) &&
-				(sink_status & DP_INTRA_HOP_AUX_REPLY_INDICATION) == 0)
-			break;
-		udelay(1000);
+	if (encoding == DP_128b_132b_ENCODING) {
+		/* poll for intra-hop disable */
+		for (i = 0; i < 10; i++) {
+			if ((core_link_read_dpcd(link, DP_SINK_STATUS, &sink_status, 1) == DC_OK) &&
+					(sink_status & DP_INTRA_HOP_AUX_REPLY_INDICATION) == 0)
+				break;
+			udelay(1000);
+		}
 	}
 }
 
@@ -2649,7 +2651,7 @@ enum link_training_result dc_link_dp_perform_link_training(
 			&lt_settings);
 
 	/* reset previous training states */
-	dpcd_exit_training_mode(link);
+	dpcd_exit_training_mode(link, encoding);
 
 	/* configure link prior to entering training mode */
 	dpcd_configure_lttpr_mode(link, &lt_settings);
@@ -2670,7 +2672,7 @@ enum link_training_result dc_link_dp_perform_link_training(
 		ASSERT(0);
 
 	/* exit training mode */
-	dpcd_exit_training_mode(link);
+	dpcd_exit_training_mode(link, encoding);
 
 	/* switch to video idle */
 	if ((status == LINK_TRAINING_SUCCESS) || !skip_video_pattern)
@@ -2771,8 +2773,11 @@ bool perform_link_training_with_retries(
 					/* Update verified link settings to current one
 					 * Because DPIA LT might fallback to lower link setting.
 					 */
-					link->verified_link_cap.link_rate = link->cur_link_settings.link_rate;
-					link->verified_link_cap.lane_count = link->cur_link_settings.lane_count;
+					if (stream->signal == SIGNAL_TYPE_DISPLAY_PORT_MST) {
+						link->verified_link_cap.link_rate = link->cur_link_settings.link_rate;
+						link->verified_link_cap.lane_count = link->cur_link_settings.lane_count;
+						dm_helpers_dp_mst_update_branch_bandwidth(link->ctx, link);
+					}
 				}
 			} else {
 				status = dc_link_dp_perform_link_training(link,
@@ -3020,7 +3025,7 @@ static enum dc_link_rate get_lttpr_max_link_rate(struct dc_link *link)
 
 static enum dc_link_rate get_cable_max_link_rate(struct dc_link *link)
 {
-	enum dc_link_rate cable_max_link_rate = LINK_RATE_HIGH3;
+	enum dc_link_rate cable_max_link_rate = LINK_RATE_UNKNOWN;
 
 	if (link->dpcd_caps.cable_id.bits.UHBR10_20_CAPABILITY & DP_UHBR20)
 		cable_max_link_rate = LINK_RATE_UHBR20;
@@ -3083,15 +3088,29 @@ struct dc_link_settings dp_get_max_link_cap(struct dc_link *link)
 		max_link_cap.link_spread =
 				link->reported_link_cap.link_spread;
 
-	/* Lower link settings based on cable attributes */
+	/* Lower link settings based on cable attributes
+	 * Cable ID is a DP2 feature to identify max certified link rate that
+	 * a cable can carry. The cable identification method requires both
+	 * cable and display hardware support. Since the specs comes late, it is
+	 * anticipated that the first round of DP2 cables and displays may not
+	 * be fully compatible to reliably return cable ID data. Therefore the
+	 * decision of our cable id policy is that if the cable can return non
+	 * zero cable id data, we will take cable's link rate capability into
+	 * account. However if we get zero data, the cable link rate capability
+	 * is considered inconclusive. In this case, we will not take cable's
+	 * capability into account to avoid of over limiting hardware capability
+	 * from users. The max overall link rate capability is still determined
+	 * after actual dp pre-training. Cable id is considered as an auxiliary
+	 * method of determining max link bandwidth capability.
+	 */
 	cable_max_link_rate = get_cable_max_link_rate(link);
 
 	if (!link->dc->debug.ignore_cable_id &&
+			cable_max_link_rate != LINK_RATE_UNKNOWN &&
 			cable_max_link_rate < max_link_cap.link_rate)
 		max_link_cap.link_rate = cable_max_link_rate;
 
-	/*
-	 * account for lttpr repeaters cap
+	/* account for lttpr repeaters cap
 	 * notes: repeaters do not snoop in the DPRX Capabilities addresses (3.6.3).
 	 */
 	if (dp_is_lttpr_present(link)) {
@@ -4540,9 +4559,19 @@ void dc_link_dp_handle_link_loss(struct dc_link *link)
 
 	for (i = 0; i < MAX_PIPES; i++) {
 		pipe_ctx = &link->dc->current_state->res_ctx.pipe_ctx[i];
-		if (pipe_ctx && pipe_ctx->stream && !pipe_ctx->stream->dpms_off &&
-				pipe_ctx->stream->link == link && !pipe_ctx->prev_odm_pipe)
+		if (pipe_ctx && pipe_ctx->stream && !pipe_ctx->stream->dpms_off
+				&& pipe_ctx->stream->link == link && !pipe_ctx->prev_odm_pipe) {
+			// Always use max settings here for DP 1.4a LL Compliance CTS
+			if (link->is_automated) {
+				pipe_ctx->link_config.dp_link_settings.lane_count =
+						link->verified_link_cap.lane_count;
+				pipe_ctx->link_config.dp_link_settings.link_rate =
+						link->verified_link_cap.link_rate;
+				pipe_ctx->link_config.dp_link_settings.link_spread =
+						link->verified_link_cap.link_spread;
+			}
 			core_link_enable_stream(link->dc->current_state, pipe_ctx);
+		}
 	}
 }
 
@@ -4583,6 +4612,8 @@ bool dc_link_handle_hpd_rx_irq(struct dc_link *link, union hpd_irq_data *out_hpd
 	}
 
 	if (hpd_irq_dpcd_data.bytes.device_service_irq.bits.AUTOMATED_TEST) {
+		// Workaround for DP 1.4a LL Compliance CTS as USB4 has to share encoders unlike DP and USBC
+		link->is_automated = true;
 		device_service_clear.bits.AUTOMATED_TEST = 1;
 		core_link_write_dpcd(
 			link,
@@ -5031,7 +5062,7 @@ static bool dpcd_read_sink_ext_caps(struct dc_link *link)
 	return true;
 }
 
-bool dp_retrieve_lttpr_cap(struct dc_link *link)
+enum dc_status dp_retrieve_lttpr_cap(struct dc_link *link)
 {
 	uint8_t lttpr_dpcd_data[8];
 	enum dc_status status = DC_ERROR_UNEXPECTED;
@@ -5099,7 +5130,7 @@ bool dp_retrieve_lttpr_cap(struct dc_link *link)
 		CONN_DATA_DETECT(link, lttpr_dpcd_data, sizeof(lttpr_dpcd_data), "LTTPR Caps: ");
 
 	DC_LOG_DC("is_lttpr_present = %d\n", is_lttpr_present);
-	return is_lttpr_present;
+	return status;
 }
 
 bool dp_is_lttpr_present(struct dc_link *link)
@@ -5227,76 +5258,45 @@ static void retrieve_cable_id(struct dc_link *link)
 				&link->dpcd_caps.cable_id, &usbc_cable_id);
 }
 
-/* DPRX may take some time to respond to AUX messages after HPD asserted.
- * If AUX read unsuccessful, try to wake unresponsive DPRX by toggling DPCD SET_POWER (0x600).
- */
-static enum dc_status wa_try_to_wake_dprx(struct dc_link *link, uint64_t timeout_ms)
+static enum dc_status wake_up_aux_channel(struct dc_link *link)
 {
 	enum dc_status status = DC_ERROR_UNEXPECTED;
-	uint8_t dpcd_data = 0;
-	uint64_t start_ts = 0;
-	uint64_t current_ts = 0;
-	uint64_t time_taken_ms = 0;
-	enum dc_connection_type type = dc_connection_none;
-	bool lttpr_present;
-	bool vbios_lttpr_interop = link->dc->caps.vbios_lttpr_aware;
+	uint32_t aux_channel_retry_cnt = 0;
+	uint8_t dpcd_power_state = '\0';
 
-	lttpr_present = dp_is_lttpr_present(link) ||
-			(!vbios_lttpr_interop || !link->dc->caps.extended_aux_timeout_support);
-	DC_LOG_DC("lttpr_present = %d.\n", lttpr_present ? 1 : 0);
+	while (status != DC_OK && aux_channel_retry_cnt < 10) {
+		status = core_link_read_dpcd(link, DP_SET_POWER,
+				&dpcd_power_state, sizeof(dpcd_power_state));
 
-	/* Issue an AUX read to test DPRX responsiveness. If LTTPR is supported the first read is expected to
-	 * be to determine LTTPR capabilities. Otherwise trying to read power state should be an innocuous AUX read.
-	 */
-	if (lttpr_present)
-		status = core_link_read_dpcd(
-				link,
-				DP_LT_TUNABLE_PHY_REPEATER_FIELD_DATA_STRUCTURE_REV,
-				&dpcd_data,
-				sizeof(dpcd_data));
-	else
-		status = core_link_read_dpcd(
-				link,
-				DP_SET_POWER,
-				&dpcd_data,
-				sizeof(dpcd_data));
+		/* Delay 1 ms if AUX CH is in power down state. Based on spec
+		 * section 2.3.1.2, if AUX CH may be powered down due to
+		 * write to DPCD 600h = 2. Sink AUX CH is monitoring differential
+		 * signal and may need up to 1 ms before being able to reply.
+		 */
+		if (status != DC_OK || dpcd_power_state == DP_SET_POWER_D3) {
+			udelay(1000);
+			aux_channel_retry_cnt++;
+		}
+	}
 
 	if (status != DC_OK) {
-		DC_LOG_WARNING("%s: Read DPCD LTTPR_CAP failed - try to toggle DPCD SET_POWER for %lld ms.",
-				__func__,
-				timeout_ms);
-		start_ts = dm_get_timestamp(link->ctx);
-
-		do {
-			if (!dc_link_detect_sink(link, &type) || type == dc_connection_none)
-				break;
-
-			dpcd_data = DP_SET_POWER_D3;
-			status = core_link_write_dpcd(
-					link,
-					DP_SET_POWER,
-					&dpcd_data,
-					sizeof(dpcd_data));
-
-			dpcd_data = DP_SET_POWER_D0;
-			status = core_link_write_dpcd(
-					link,
-					DP_SET_POWER,
-					&dpcd_data,
-					sizeof(dpcd_data));
-
-			current_ts = dm_get_timestamp(link->ctx);
-			time_taken_ms = div_u64(dm_get_elapse_time_in_ns(link->ctx, current_ts, start_ts), 1000000);
-		} while (status != DC_OK && time_taken_ms < timeout_ms);
+		dpcd_power_state = DP_SET_POWER_D0;
+		status = core_link_write_dpcd(
+				link,
+				DP_SET_POWER,
+				&dpcd_power_state,
+				sizeof(dpcd_power_state));
 
-		DC_LOG_WARNING("%s: DPCD SET_POWER %s after %lld ms%s",
-				__func__,
-				(status == DC_OK) ? "succeeded" : "failed",
-				time_taken_ms,
-				(type == dc_connection_none) ? ". Unplugged." : ".");
+		dpcd_power_state = DP_SET_POWER_D3;
+		status = core_link_write_dpcd(
+				link,
+				DP_SET_POWER,
+				&dpcd_power_state,
+				sizeof(dpcd_power_state));
+		return DC_ERROR_UNEXPECTED;
 	}
 
-	return status;
+	return DC_OK;
 }
 
 static bool retrieve_link_cap(struct dc_link *link)
@@ -5308,7 +5308,6 @@ static bool retrieve_link_cap(struct dc_link *link)
 	/*Only need to read 1 byte starting from DP_DPRX_FEATURE_ENUMERATION_LIST.
 	 */
 	uint8_t dpcd_dprx_data = '\0';
-	uint8_t dpcd_power_state = '\0';
 
 	struct dp_device_vendor_id sink_id;
 	union down_stream_port_count down_strm_port_count;
@@ -5316,11 +5315,9 @@ static bool retrieve_link_cap(struct dc_link *link)
 	union dp_downstream_port_present ds_port = { 0 };
 	enum dc_status status = DC_ERROR_UNEXPECTED;
 	uint32_t read_dpcd_retry_cnt = 3;
-	uint32_t aux_channel_retry_cnt = 0;
 	int i;
 	struct dp_sink_hw_fw_revision dp_hw_fw_revision;
 	const uint32_t post_oui_delay = 30; // 30ms
-	bool is_lttpr_present = false;
 
 	memset(dpcd_data, '\0', sizeof(dpcd_data));
 	memset(&down_strm_port_count,
@@ -5335,51 +5332,17 @@ static bool retrieve_link_cap(struct dc_link *link)
 	dc_link_aux_try_to_configure_timeout(link->ddc,
 			LINK_AUX_DEFAULT_LTTPR_TIMEOUT_PERIOD);
 
-	/* Try to ensure AUX channel active before proceeding. */
-	if (link->dc->debug.aux_wake_wa.bits.enable_wa) {
-		uint64_t timeout_ms = link->dc->debug.aux_wake_wa.bits.timeout_ms;
+	status = dp_retrieve_lttpr_cap(link);
 
-		if (link->dc->debug.aux_wake_wa.bits.use_default_timeout)
-			timeout_ms = LINK_AUX_WAKE_TIMEOUT_MS;
-		status = wa_try_to_wake_dprx(link, timeout_ms);
-	}
-
-	while (status != DC_OK && aux_channel_retry_cnt < 10) {
-		status = core_link_read_dpcd(link, DP_SET_POWER,
-				&dpcd_power_state, sizeof(dpcd_power_state));
-
-		/* Delay 1 ms if AUX CH is in power down state. Based on spec
-		 * section 2.3.1.2, if AUX CH may be powered down due to
-		 * write to DPCD 600h = 2. Sink AUX CH is monitoring differential
-		 * signal and may need up to 1 ms before being able to reply.
-		 */
-		if (status != DC_OK || dpcd_power_state == DP_SET_POWER_D3) {
-			udelay(1000);
-			aux_channel_retry_cnt++;
-		}
-	}
-
-	/* If aux channel is not active, return false and trigger another detect*/
 	if (status != DC_OK) {
-		dpcd_power_state = DP_SET_POWER_D0;
-		status = core_link_write_dpcd(
-				link,
-				DP_SET_POWER,
-				&dpcd_power_state,
-				sizeof(dpcd_power_state));
-
-		dpcd_power_state = DP_SET_POWER_D3;
-		status = core_link_write_dpcd(
-				link,
-				DP_SET_POWER,
-				&dpcd_power_state,
-				sizeof(dpcd_power_state));
-		return false;
+		status = wake_up_aux_channel(link);
+		if (status == DC_OK)
+			dp_retrieve_lttpr_cap(link);
+		else
+			return false;
 	}
 
-	is_lttpr_present = dp_retrieve_lttpr_cap(link);
-
-	if (is_lttpr_present)
+	if (dp_is_lttpr_present(link))
 		configure_lttpr_mode_transparent(link);
 
 	/* Read DP tunneling information. */
@@ -5406,7 +5369,7 @@ static bool retrieve_link_cap(struct dc_link *link)
 		return false;
 	}
 
-	if (!is_lttpr_present)
+	if (!dp_is_lttpr_present(link))
 		dc_link_aux_try_to_configure_timeout(link->ddc, LINK_AUX_DEFAULT_TIMEOUT_PERIOD);
 
 	{
@@ -7294,6 +7257,7 @@ void dp_retrain_link_dp_test(struct dc_link *link,
 	struct pipe_ctx *pipes =
 			&link->dc->current_state->res_ctx.pipe_ctx[0];
 	unsigned int i;
+	bool do_fallback = false;
 
 
 	for (i = 0; i < MAX_PIPES; i++) {
@@ -7326,32 +7290,23 @@ void dp_retrain_link_dp_test(struct dc_link *link,
 			memset(&link->cur_link_settings, 0,
 				sizeof(link->cur_link_settings));
 
+			if (link->ep_type == DISPLAY_ENDPOINT_USB4_DPIA)
+				do_fallback = true;
+
 			perform_link_training_with_retries(
 					link_setting,
 					skip_video_pattern,
 					LINK_TRAINING_ATTEMPTS,
 					&pipes[i],
 					SIGNAL_TYPE_DISPLAY_PORT,
-					false);
+					do_fallback);
 
 			link->dc->hwss.enable_stream(&pipes[i]);
 
 			link->dc->hwss.unblank_stream(&pipes[i],
 					link_setting);
 
-			if (pipes[i].stream_res.audio) {
-				/* notify audio driver for
-				 * audio modes of monitor */
-				pipes[i].stream_res.audio->funcs->az_enable(
-						pipes[i].stream_res.audio);
-
-				/* un-mute audio */
-				/* TODO: audio should be per stream rather than
-				 * per link */
-				pipes[i].stream_res.stream_enc->funcs->
-				audio_mute_control(
-					pipes[i].stream_res.stream_enc, false);
-			}
+			link->dc->hwss.enable_audio_stream(&pipes[i]);
 		}
 	}
 }
diff --git a/drivers/gpu/drm/amd/display/dc/core/dc_link_dpia.c b/drivers/gpu/drm/amd/display/dc/core/dc_link_dpia.c
index 74e36b34d3f7..d130d58ac08e 100644
--- a/drivers/gpu/drm/amd/display/dc/core/dc_link_dpia.c
+++ b/drivers/gpu/drm/amd/display/dc/core/dc_link_dpia.c
@@ -791,10 +791,14 @@ static enum link_training_result dpia_training_eq_transparent(
 		}
 
 		if (dp_is_ch_eq_done(lane_count, dpcd_lane_status) &&
-		    dp_is_symbol_locked(link->cur_link_settings.lane_count, dpcd_lane_status) &&
-		    dp_is_interlane_aligned(dpcd_lane_status_updated)) {
-			result =  LINK_TRAINING_SUCCESS;
-			break;
+				dp_is_symbol_locked(link->cur_link_settings.lane_count, dpcd_lane_status)) {
+			/* Take into consideration corner case for DP 1.4a LL Compliance CTS as USB4
+			 * has to share encoders unlike DP and USBC
+			 */
+			if (dp_is_interlane_aligned(dpcd_lane_status_updated) || (link->is_automated && retries_eq)) {
+				result =  LINK_TRAINING_SUCCESS;
+				break;
+			}
 		}
 
 		/* Update VS/PE. */
@@ -1008,7 +1012,8 @@ enum link_training_result dc_link_dpia_perform_link_training(
 	 */
 	if (result == LINK_TRAINING_SUCCESS) {
 		msleep(5);
-		result = dp_check_link_loss_status(link, &lt_settings);
+		if (!link->is_automated)
+			result = dp_check_link_loss_status(link, &lt_settings);
 	} else if (result == LINK_TRAINING_ABORT) {
 		dpia_training_abort(link, &lt_settings, repeater_id);
 	} else {
diff --git a/drivers/gpu/drm/amd/display/dc/core/dc_resource.c b/drivers/gpu/drm/amd/display/dc/core/dc_resource.c
index fd8db482e56f..da164685547d 100644
--- a/drivers/gpu/drm/amd/display/dc/core/dc_resource.c
+++ b/drivers/gpu/drm/amd/display/dc/core/dc_resource.c
@@ -1768,6 +1768,17 @@ bool dc_remove_plane_from_context(
 	return true;
 }
 
+/**
+ * dc_rem_all_planes_for_stream - Remove planes attached to the target stream.
+ *
+ * @dc: Current dc state.
+ * @stream: Target stream, which we want to remove the attached plans.
+ * @context: New context.
+ *
+ * Return:
+ * Return true if DC was able to remove all planes from the target
+ * stream, otherwise, return false.
+ */
 bool dc_rem_all_planes_for_stream(
 		const struct dc *dc,
 		struct dc_stream_state *stream,
@@ -2562,9 +2573,12 @@ enum dc_status resource_map_pool_resources(
 
 /**
  * dc_resource_state_copy_construct_current() - Creates a new dc_state from existing state
- * Is a shallow copy.  Increments refcounts on existing streams and planes.
+ *
  * @dc: copy out of dc->current_state
  * @dst_ctx: copy into this
+ *
+ * This function makes a shallow copy of the current DC state and increments
+ * refcounts on existing streams and planes.
  */
 void dc_resource_state_copy_construct_current(
 		const struct dc *dc,
@@ -2593,15 +2607,241 @@ bool dc_resource_is_dsc_encoding_supported(const struct dc *dc)
 	return dc->res_pool->res_cap->num_dsc > 0;
 }
 
+static bool planes_changed_for_existing_stream(struct dc_state *context,
+					       struct dc_stream_state *stream,
+					       const struct dc_validation_set set[],
+					       int set_count)
+{
+	int i, j;
+	struct dc_stream_status *stream_status = NULL;
+
+	for (i = 0; i < context->stream_count; i++) {
+		if (context->streams[i] == stream) {
+			stream_status = &context->stream_status[i];
+			break;
+		}
+	}
+
+	if (!stream_status)
+		ASSERT(0);
+
+	for (i = 0; i < set_count; i++)
+		if (set[i].stream == stream)
+			break;
+
+	if (i == set_count)
+		ASSERT(0);
+
+	if (set[i].plane_count != stream_status->plane_count)
+		return true;
+
+	for (j = 0; j < set[i].plane_count; j++)
+		if (set[i].plane_states[j] != stream_status->plane_states[j])
+			return true;
+
+	return false;
+}
 
 /**
- * dc_validate_global_state() - Determine if HW can support a given state
- * Checks HW resource availability and bandwidth requirement.
+ * dc_validate_with_context - Validate and update the potential new stream in the context object
+ *
+ * @dc: Used to get the current state status
+ * @set: An array of dc_validation_set with all the current streams reference
+ * @set_count: Total of streams
+ * @context: New context
+ * @fast_validate: Enable or disable fast validation
+ *
+ * This function updates the potential new stream in the context object. It
+ * creates multiple lists for the add, remove, and unchanged streams. In
+ * particular, if the unchanged streams have a plane that changed, it is
+ * necessary to remove all planes from the unchanged streams. In summary, this
+ * function is responsible for validating the new context.
+ *
+ * Return:
+ * In case of success, return DC_OK (1), otherwise, return a DC error.
+ */
+enum dc_status dc_validate_with_context(struct dc *dc,
+					const struct dc_validation_set set[],
+					int set_count,
+					struct dc_state *context,
+					bool fast_validate)
+{
+	struct dc_stream_state *unchanged_streams[MAX_PIPES] = { 0 };
+	struct dc_stream_state *del_streams[MAX_PIPES] = { 0 };
+	struct dc_stream_state *add_streams[MAX_PIPES] = { 0 };
+	int old_stream_count = context->stream_count;
+	enum dc_status res = DC_ERROR_UNEXPECTED;
+	int unchanged_streams_count = 0;
+	int del_streams_count = 0;
+	int add_streams_count = 0;
+	bool found = false;
+	int i, j, k;
+
+	DC_LOGGER_INIT(dc->ctx->logger);
+
+	/* First build a list of streams to be remove from current context */
+	for (i = 0; i < old_stream_count; i++) {
+		struct dc_stream_state *stream = context->streams[i];
+
+		for (j = 0; j < set_count; j++) {
+			if (stream == set[j].stream) {
+				found = true;
+				break;
+			}
+		}
+
+		if (!found)
+			del_streams[del_streams_count++] = stream;
+
+		found = false;
+	}
+
+	/* Second, build a list of new streams */
+	for (i = 0; i < set_count; i++) {
+		struct dc_stream_state *stream = set[i].stream;
+
+		for (j = 0; j < old_stream_count; j++) {
+			if (stream == context->streams[j]) {
+				found = true;
+				break;
+			}
+		}
+
+		if (!found)
+			add_streams[add_streams_count++] = stream;
+
+		found = false;
+	}
+
+	/* Build a list of unchanged streams which is necessary for handling
+	 * planes change such as added, removed, and updated.
+	 */
+	for (i = 0; i < set_count; i++) {
+		/* Check if stream is part of the delete list */
+		for (j = 0; j < del_streams_count; j++) {
+			if (set[i].stream == del_streams[j]) {
+				found = true;
+				break;
+			}
+		}
+
+		if (!found) {
+			/* Check if stream is part of the add list */
+			for (j = 0; j < add_streams_count; j++) {
+				if (set[i].stream == add_streams[j]) {
+					found = true;
+					break;
+				}
+			}
+		}
+
+		if (!found)
+			unchanged_streams[unchanged_streams_count++] = set[i].stream;
+
+		found = false;
+	}
+
+	/* Remove all planes for unchanged streams if planes changed */
+	for (i = 0; i < unchanged_streams_count; i++) {
+		if (planes_changed_for_existing_stream(context,
+						       unchanged_streams[i],
+						       set,
+						       set_count)) {
+			if (!dc_rem_all_planes_for_stream(dc,
+							  unchanged_streams[i],
+							  context)) {
+				res = DC_FAIL_DETACH_SURFACES;
+				goto fail;
+			}
+		}
+	}
+
+	/* Remove all planes for removed streams and then remove the streams */
+	for (i = 0; i < del_streams_count; i++) {
+		/* Need to cpy the dwb data from the old stream in order to efc to work */
+		if (del_streams[i]->num_wb_info > 0) {
+			for (j = 0; j < add_streams_count; j++) {
+				if (del_streams[i]->sink == add_streams[j]->sink) {
+					add_streams[j]->num_wb_info = del_streams[i]->num_wb_info;
+					for (k = 0; k < del_streams[i]->num_wb_info; k++)
+						add_streams[j]->writeback_info[k] = del_streams[i]->writeback_info[k];
+				}
+			}
+		}
+
+		if (!dc_rem_all_planes_for_stream(dc, del_streams[i], context)) {
+			res = DC_FAIL_DETACH_SURFACES;
+			goto fail;
+		}
+
+		res = dc_remove_stream_from_ctx(dc, context, del_streams[i]);
+		if (res != DC_OK)
+			goto fail;
+	}
+
+	/* Swap seamless boot stream to pipe 0 (if needed) to ensure pipe_ctx
+	 * matches. This may change in the future if seamless_boot_stream can be
+	 * multiple.
+	 */
+	for (i = 0; i < add_streams_count; i++) {
+		mark_seamless_boot_stream(dc, add_streams[i]);
+		if (add_streams[i]->apply_seamless_boot_optimization && i != 0) {
+			struct dc_stream_state *temp = add_streams[0];
+
+			add_streams[0] = add_streams[i];
+			add_streams[i] = temp;
+			break;
+		}
+	}
+
+	/* Add new streams and then add all planes for the new stream */
+	for (i = 0; i < add_streams_count; i++) {
+		calculate_phy_pix_clks(add_streams[i]);
+		res = dc_add_stream_to_ctx(dc, context, add_streams[i]);
+		if (res != DC_OK)
+			goto fail;
+
+		if (!add_all_planes_for_stream(dc, add_streams[i], set, set_count, context)) {
+			res = DC_FAIL_ATTACH_SURFACES;
+			goto fail;
+		}
+	}
+
+	/* Add all planes for unchanged streams if planes changed */
+	for (i = 0; i < unchanged_streams_count; i++) {
+		if (planes_changed_for_existing_stream(context,
+						       unchanged_streams[i],
+						       set,
+						       set_count)) {
+			if (!add_all_planes_for_stream(dc, unchanged_streams[i], set, set_count, context)) {
+				res = DC_FAIL_ATTACH_SURFACES;
+				goto fail;
+			}
+		}
+	}
+
+	res = dc_validate_global_state(dc, context, fast_validate);
+
+fail:
+	if (res != DC_OK)
+		DC_LOG_WARNING("%s:resource validation failed, dc_status:%d\n",
+			       __func__,
+			       res);
+
+	return res;
+}
+
+/**
+ * dc_validate_global_state() - Determine if hardware can support a given state
+ *
  * @dc: dc struct for this driver
  * @new_ctx: state to be validated
  * @fast_validate: set to true if only yes/no to support matters
  *
- * Return: DC_OK if the result can be programmed.  Otherwise, an error code.
+ * Checks hardware resource availability and bandwidth requirement.
+ *
+ * Return:
+ * DC_OK if the result can be programmed. Otherwise, an error code.
  */
 enum dc_status dc_validate_global_state(
 		struct dc *dc,
@@ -2789,6 +3029,12 @@ static void set_avi_info_frame(
 		hdmi_info.bits.C0_C1   = COLORIMETRY_EXTENDED;
 	}
 
+	if (pixel_encoding && color_space == COLOR_SPACE_2020_YCBCR &&
+			stream->out_transfer_func->tf == TRANSFER_FUNCTION_GAMMA22) {
+		hdmi_info.bits.EC0_EC2 = 0;
+		hdmi_info.bits.C0_C1 = COLORIMETRY_ITU709;
+	}
+
 	/* TODO: un-hardcode aspect ratio */
 	aspect = stream->timing.aspect_ratio;
 
@@ -3734,4 +3980,4 @@ bool dc_resource_acquire_secondary_pipe_for_mpc_odm(
 	}
 
 	return true;
-}
-\ No newline at end of file
+}
diff --git a/drivers/gpu/drm/amd/display/dc/core/dc_stream.c b/drivers/gpu/drm/amd/display/dc/core/dc_stream.c
index 38d71b5c1f2d..20e534f73513 100644
--- a/drivers/gpu/drm/amd/display/dc/core/dc_stream.c
+++ b/drivers/gpu/drm/amd/display/dc/core/dc_stream.c
@@ -332,9 +332,21 @@ bool dc_stream_set_cursor_attributes(
 
 	dc = stream->ctx->dc;
 
-	if (dc->debug.allow_sw_cursor_fallback && attributes->height * attributes->width * 4 > 16384)
-		if (stream->mall_stream_config.type == SUBVP_MAIN)
+	/* SubVP is not compatible with HW cursor larger than 64 x 64 x 4.
+	 * Therefore, if cursor is greater than 64 x 64 x 4, fallback to SW cursor in the following case:
+	 * 1. For single display cases, if resolution is >= 5K and refresh rate < 120hz
+	 * 2. For multi display cases, if resolution is >= 4K and refresh rate < 120hz
+	 *
+	 * [< 120hz is a requirement for SubVP configs]
+	 */
+	if (dc->debug.allow_sw_cursor_fallback && attributes->height * attributes->width * 4 > 16384) {
+		if (dc->current_state->stream_count == 1 && stream->timing.v_addressable >= 2880 &&
+				((stream->timing.pix_clk_100hz * 100) / stream->timing.v_total / stream->timing.h_total) < 120)
 			return false;
+		else if (dc->current_state->stream_count > 1 && stream->timing.v_addressable >= 2160 &&
+				((stream->timing.pix_clk_100hz * 100) / stream->timing.v_total / stream->timing.h_total) < 120)
+			return false;
+	}
 
 	stream->cursor_attributes = *attributes;
 
diff --git a/drivers/gpu/drm/amd/display/dc/dc.h b/drivers/gpu/drm/amd/display/dc/dc.h
index bfc5474c0f4c..85ebeaa2de18 100644
--- a/drivers/gpu/drm/amd/display/dc/dc.h
+++ b/drivers/gpu/drm/amd/display/dc/dc.h
@@ -47,7 +47,7 @@ struct aux_payload;
 struct set_config_cmd_payload;
 struct dmub_notification;
 
-#define DC_VER "3.2.207"
+#define DC_VER "3.2.215"
 
 #define MAX_SURFACES 3
 #define MAX_PLANES 6
@@ -56,9 +56,7 @@ struct dmub_notification;
 #define MIN_VIEWPORT_SIZE 12
 #define MAX_NUM_EDP 2
 
-/*******************************************************************************
- * Display Core Interfaces
- ******************************************************************************/
+/* Display Core Interfaces */
 struct dc_versions {
 	const char *dc_ver;
 	struct dmcu_version dmcu_version;
@@ -263,11 +261,13 @@ struct dc_caps {
 	uint32_t cache_line_size;
 	uint32_t cache_num_ways;
 	uint16_t subvp_fw_processing_delay_us;
+	uint8_t subvp_drr_max_vblank_margin_us;
 	uint16_t subvp_prefetch_end_to_mall_start_us;
 	uint8_t subvp_swath_height_margin_lines; // subvp start line must be aligned to 2 x swath height
 	uint16_t subvp_pstate_allow_width_us;
 	uint16_t subvp_vertical_int_margin_us;
 	bool seamless_odm;
+	uint8_t subvp_drr_vblank_start_margin_us;
 };
 
 struct dc_bug_wa {
@@ -395,6 +395,7 @@ struct dc_config {
 	bool disable_dmcu;
 	bool enable_4to1MPC;
 	bool enable_windowed_mpo_odm;
+	bool forceHBR2CP2520; // Used for switching between test patterns TPS4 and CP2520
 	uint32_t allow_edp_hotplug_detection;
 	bool clamp_min_dcfclk;
 	uint64_t vblank_alignment_dto_params;
@@ -408,6 +409,7 @@ struct dc_config {
 	bool use_default_clock_table;
 	bool force_bios_enable_lttpr;
 	uint8_t force_bios_fixed_vs;
+	int sdpif_request_limit_words_per_umc;
 
 };
 
@@ -457,15 +459,15 @@ enum pipe_split_policy {
 	MPC_SPLIT_DYNAMIC = 0,
 
 	/**
-	 * @MPC_SPLIT_DYNAMIC: Avoid pipe split, which means that DC will not
+	 * @MPC_SPLIT_AVOID: Avoid pipe split, which means that DC will not
 	 * try any sort of split optimization.
 	 */
 	MPC_SPLIT_AVOID = 1,
 
 	/**
-	 * @MPC_SPLIT_DYNAMIC: With this option, DC will only try to optimize
-	 * the pipe utilization when using a single display; if the user
-	 * connects to a second display, DC will avoid pipe split.
+	 * @MPC_SPLIT_AVOID_MULT_DISP: With this option, DC will only try to
+	 * optimize the pipe utilization when using a single display; if the
+	 * user connects to a second display, DC will avoid pipe split.
 	 */
 	MPC_SPLIT_AVOID_MULT_DISP = 2,
 };
@@ -491,12 +493,17 @@ enum dcn_pwr_state {
 enum dcn_zstate_support_state {
 	DCN_ZSTATE_SUPPORT_UNKNOWN,
 	DCN_ZSTATE_SUPPORT_ALLOW,
+	DCN_ZSTATE_SUPPORT_ALLOW_Z8_ONLY,
+	DCN_ZSTATE_SUPPORT_ALLOW_Z8_Z10_ONLY,
 	DCN_ZSTATE_SUPPORT_ALLOW_Z10_ONLY,
 	DCN_ZSTATE_SUPPORT_DISALLOW,
 };
-/*
- * For any clocks that may differ per pipe
- * only the max is stored in this structure
+
+/**
+ * struct dc_clocks - DC pipe clocks
+ *
+ * For any clocks that may differ per pipe only the max is stored in this
+ * structure
  */
 struct dc_clocks {
 	int dispclk_khz;
@@ -523,6 +530,16 @@ struct dc_clocks {
 	bool prev_p_state_change_support;
 	bool fclk_prev_p_state_change_support;
 	int num_ways;
+
+	/*
+	 * @fw_based_mclk_switching
+	 *
+	 * DC has a mechanism that leverage the variable refresh rate to switch
+	 * memory clock in cases that we have a large latency to achieve the
+	 * memory clock change and a short vblank window. DC has some
+	 * requirements to enable this feature, and this field describes if the
+	 * system support or not such a feature.
+	 */
 	bool fw_based_mclk_switching;
 	bool fw_based_mclk_switching_shut_down;
 	int prev_num_ways;
@@ -764,7 +781,6 @@ struct dc_debug_options {
 	bool disable_mem_low_power;
 	bool pstate_enabled;
 	bool disable_dmcu;
-	bool disable_psr;
 	bool force_abm_enable;
 	bool disable_stereo_support;
 	bool vsr_support;
@@ -828,6 +844,7 @@ struct dc_debug_options {
 	int crb_alloc_policy_min_disp_count;
 	bool disable_z10;
 	bool enable_z9_disable_interface;
+	bool psr_skip_crtc_disable;
 	union dpia_debug_options dpia_debug;
 	bool disable_fixed_vs_aux_timeout_wa;
 	bool force_disable_subvp;
@@ -836,6 +853,7 @@ struct dc_debug_options {
 	unsigned int force_subvp_num_ways;
 	unsigned int force_mall_ss_num_ways;
 	bool alloc_extra_way_for_cursor;
+	uint32_t subvp_extra_lines;
 	bool force_usr_allow;
 	/* uses value at boot and disables switch */
 	bool disable_dtb_ref_clk_switch;
@@ -852,6 +870,8 @@ struct dc_debug_options {
 	bool enable_double_buffered_dsc_pg_support;
 	bool enable_dp_dig_pixel_rate_div_policy;
 	enum lttpr_mode lttpr_mode_override;
+	unsigned int dsc_delay_factor_wa_x1000;
+	unsigned int min_prefetch_in_strobe_ns;
 };
 
 struct gpu_info_soc_bounding_box_v1_0;
@@ -988,9 +1008,7 @@ void dc_init_callbacks(struct dc *dc,
 void dc_deinit_callbacks(struct dc *dc);
 void dc_destroy(struct dc **dc);
 
-/*******************************************************************************
- * Surface Interfaces
- ******************************************************************************/
+/* Surface Interfaces */
 
 enum {
 	TRANSFER_FUNC_POINTS = 1025
@@ -1269,12 +1287,23 @@ void dc_post_update_surfaces_to_stream(
 
 #include "dc_stream.h"
 
-/*
- * Structure to store surface/stream associations for validation
+/**
+ * struct dc_validation_set - Struct to store surface/stream associations for validation
  */
 struct dc_validation_set {
+	/**
+	 * @stream: Stream state properties
+	 */
 	struct dc_stream_state *stream;
+
+	/**
+	 * @plane_state: Surface state
+	 */
 	struct dc_plane_state *plane_states[MAX_SURFACES];
+
+	/**
+	 * @plane_count: Total of active planes
+	 */
 	uint8_t plane_count;
 };
 
@@ -1286,6 +1315,12 @@ enum dc_status dc_validate_plane(struct dc *dc, const struct dc_plane_state *pla
 
 void get_clock_requirements_for_state(struct dc_state *state, struct AsicStateEx *info);
 
+enum dc_status dc_validate_with_context(struct dc *dc,
+					const struct dc_validation_set set[],
+					int set_count,
+					struct dc_state *context,
+					bool fast_validate);
+
 bool dc_set_generic_gpio_for_stereo(bool enable,
 		struct gpio_service *gpio_service);
 
@@ -1321,15 +1356,12 @@ void dc_resource_state_destruct(struct dc_state *context);
 
 bool dc_resource_is_dsc_encoding_supported(const struct dc *dc);
 
-/*
- * TODO update to make it about validation sets
- * Set up streams and links associated to drive sinks
- * The streams parameter is an absolute set of all active streams.
- *
- * After this call:
- *   Phy, Encoder, Timing Generator are programmed and enabled.
- *   New streams are enabled with blank stream; no memory read.
- */
+enum dc_status dc_commit_streams(struct dc *dc,
+				 struct dc_stream_state *streams[],
+				 uint8_t stream_count);
+
+/* TODO: When the transition to the new commit sequence is done, remove this
+ * function in favor of dc_commit_streams. */
 bool dc_commit_state(struct dc *dc, struct dc_state *context);
 
 struct dc_state *dc_create_state(struct dc *dc);
@@ -1337,9 +1369,7 @@ struct dc_state *dc_copy_state(struct dc_state *src_ctx);
 void dc_retain_state(struct dc_state *context);
 void dc_release_state(struct dc_state *context);
 
-/*******************************************************************************
- * Link Interfaces
- ******************************************************************************/
+/* Link Interfaces */
 
 struct dpcd_caps {
 	union dpcd_rev dpcd_rev;
@@ -1441,9 +1471,7 @@ struct hdcp_caps {
 
 uint32_t dc_get_opp_for_plane(struct dc *dc, struct dc_plane_state *plane);
 
-/*******************************************************************************
- * Sink Interfaces - A sink corresponds to a display output device
- ******************************************************************************/
+/* Sink Interfaces - A sink corresponds to a display output device */
 
 struct dc_container_id {
 	// 128bit GUID in binary form
@@ -1526,9 +1554,7 @@ struct dc_cursor {
 };
 
 
-/*******************************************************************************
- * Interrupt interfaces
- ******************************************************************************/
+/* Interrupt interfaces */
 enum dc_irq_source dc_interrupt_to_irq_source(
 		struct dc *dc,
 		uint32_t src_id,
@@ -1540,9 +1566,7 @@ enum dc_irq_source dc_get_hpd_irq_source_at_index(
 
 void dc_notify_vsync_int_state(struct dc *dc, struct dc_stream_state *stream, bool enable);
 
-/*******************************************************************************
- * Power Interfaces
- ******************************************************************************/
+/* Power Interfaces */
 
 void dc_set_power_state(
 		struct dc *dc,
@@ -1615,14 +1639,10 @@ enum dc_status dc_process_dmub_set_mst_slots(const struct dc *dc,
 void dc_process_dmub_dpia_hpd_int_enable(const struct dc *dc,
 				uint32_t hpd_int_enable);
 
-/*******************************************************************************
- * DSC Interfaces
- ******************************************************************************/
+/* DSC Interfaces */
 #include "dc_dsc.h"
 
-/*******************************************************************************
- * Disable acc mode Interfaces
- ******************************************************************************/
+/* Disable acc mode Interfaces */
 void dc_disable_accelerated_mode(struct dc *dc);
 
 #endif /* DC_INTERFACE_H_ */
diff --git a/drivers/gpu/drm/amd/display/dc/dc_dmub_srv.c b/drivers/gpu/drm/amd/display/dc/dc_dmub_srv.c
index 0541e87e4f38..6ccf477d1c4d 100644
--- a/drivers/gpu/drm/amd/display/dc/dc_dmub_srv.c
+++ b/drivers/gpu/drm/amd/display/dc/dc_dmub_srv.c
@@ -423,25 +423,20 @@ void dc_dmub_srv_get_visual_confirm_color_cmd(struct dc *dc, struct pipe_ctx *pi
 
 #ifdef CONFIG_DRM_AMD_DC_DCN
 /**
- * ***********************************************************************************************
- * populate_subvp_cmd_drr_info: Helper to populate DRR pipe info for the DMCUB subvp command
+ * populate_subvp_cmd_drr_info - Helper to populate DRR pipe info for the DMCUB subvp command
  *
- * Populate the DMCUB SubVP command with DRR pipe info. All the information required for calculating
- * the SubVP + DRR microschedule is populated here.
+ * @dc: [in] current dc state
+ * @subvp_pipe: [in] pipe_ctx for the SubVP pipe
+ * @vblank_pipe: [in] pipe_ctx for the DRR pipe
+ * @pipe_data: [in] Pipe data which stores the VBLANK/DRR info
+ *
+ * Populate the DMCUB SubVP command with DRR pipe info. All the information
+ * required for calculating the SubVP + DRR microschedule is populated here.
  *
  * High level algorithm:
  * 1. Get timing for SubVP pipe, phantom pipe, and DRR pipe
  * 2. Calculate the min and max vtotal which supports SubVP + DRR microschedule
  * 3. Populate the drr_info with the min and max supported vtotal values
- *
- * @param [in] dc: current dc state
- * @param [in] subvp_pipe: pipe_ctx for the SubVP pipe
- * @param [in] vblank_pipe: pipe_ctx for the DRR pipe
- * @param [in] pipe_data: Pipe data which stores the VBLANK/DRR info
- *
- * @return: void
- *
- * ***********************************************************************************************
  */
 static void populate_subvp_cmd_drr_info(struct dc *dc,
 		struct pipe_ctx *subvp_pipe,
@@ -482,33 +477,38 @@ static void populate_subvp_cmd_drr_info(struct dc *dc,
 			(((uint64_t)main_timing->pix_clk_100hz * 100)));
 	drr_active_us = div64_u64(((uint64_t)drr_timing->v_addressable * drr_timing->h_total * 1000000),
 			(((uint64_t)drr_timing->pix_clk_100hz * 100)));
-	max_drr_vblank_us = div64_u64((subvp_active_us - prefetch_us - drr_active_us), 2) + drr_active_us;
-	max_drr_mallregion_us = subvp_active_us - prefetch_us - mall_region_us;
+	max_drr_vblank_us = div64_u64((subvp_active_us - prefetch_us -
+			dc->caps.subvp_fw_processing_delay_us - drr_active_us), 2) + drr_active_us;
+	max_drr_mallregion_us = subvp_active_us - prefetch_us - mall_region_us - dc->caps.subvp_fw_processing_delay_us;
 	max_drr_supported_us = max_drr_vblank_us > max_drr_mallregion_us ? max_drr_vblank_us : max_drr_mallregion_us;
 	max_vtotal_supported = div64_u64(((uint64_t)drr_timing->pix_clk_100hz * 100 * max_drr_supported_us),
 			(((uint64_t)drr_timing->h_total * 1000000)));
 
+	/* When calculating the max vtotal supported for SubVP + DRR cases, add
+	 * margin due to possible rounding errors (being off by 1 line in the
+	 * FW calculation can incorrectly push the P-State switch to wait 1 frame
+	 * longer).
+	 */
+	max_vtotal_supported = max_vtotal_supported - dc->caps.subvp_drr_max_vblank_margin_us;
+
 	pipe_data->pipe_config.vblank_data.drr_info.min_vtotal_supported = min_vtotal_supported;
 	pipe_data->pipe_config.vblank_data.drr_info.max_vtotal_supported = max_vtotal_supported;
+	pipe_data->pipe_config.vblank_data.drr_info.drr_vblank_start_margin = dc->caps.subvp_drr_vblank_start_margin_us;
 }
 
 /**
- * ***********************************************************************************************
- * populate_subvp_cmd_vblank_pipe_info: Helper to populate VBLANK pipe info for the DMUB subvp command
- *
- * Populate the DMCUB SubVP command with VBLANK pipe info. All the information required to calculate
- * the microschedule for SubVP + VBLANK case is stored in the pipe_data (subvp_data and vblank_data).
- * Also check if the VBLANK pipe is a DRR display -- if it is make a call to populate drr_info.
- *
- * @param [in] dc: current dc state
- * @param [in] context: new dc state
- * @param [in] cmd: DMUB cmd to be populated with SubVP info
- * @param [in] vblank_pipe: pipe_ctx for the VBLANK pipe
- * @param [in] cmd_pipe_index: index for the pipe array in DMCUB SubVP cmd
+ * populate_subvp_cmd_vblank_pipe_info - Helper to populate VBLANK pipe info for the DMUB subvp command
  *
- * @return: void
+ * @dc: [in] current dc state
+ * @context: [in] new dc state
+ * @cmd: [in] DMUB cmd to be populated with SubVP info
+ * @vblank_pipe: [in] pipe_ctx for the VBLANK pipe
+ * @cmd_pipe_index: [in] index for the pipe array in DMCUB SubVP cmd
  *
- * ***********************************************************************************************
+ * Populate the DMCUB SubVP command with VBLANK pipe info. All the information
+ * required to calculate the microschedule for SubVP + VBLANK case is stored in
+ * the pipe_data (subvp_data and vblank_data).  Also check if the VBLANK pipe
+ * is a DRR display -- if it is make a call to populate drr_info.
  */
 static void populate_subvp_cmd_vblank_pipe_info(struct dc *dc,
 		struct dc_state *context,
@@ -551,22 +551,18 @@ static void populate_subvp_cmd_vblank_pipe_info(struct dc *dc,
 }
 
 /**
- * ***********************************************************************************************
- * update_subvp_prefetch_end_to_mall_start: Helper for SubVP + SubVP case
+ * update_subvp_prefetch_end_to_mall_start - Helper for SubVP + SubVP case
  *
- * For SubVP + SubVP, we use a single vertical interrupt to start the microschedule for both
- * SubVP pipes. In order for this to work correctly, the MALL REGION of both SubVP pipes must
- * start at the same time. This function lengthens the prefetch end to mall start delay of the
- * SubVP pipe that has the shorter prefetch so that both MALL REGION's will start at the same time.
+ * @dc: [in] current dc state
+ * @context: [in] new dc state
+ * @cmd: [in] DMUB cmd to be populated with SubVP info
+ * @subvp_pipes: [in] Array of SubVP pipes (should always be length 2)
  *
- * @param [in] dc: current dc state
- * @param [in] context: new dc state
- * @param [in] cmd: DMUB cmd to be populated with SubVP info
- * @param [in] subvp_pipes: Array of SubVP pipes (should always be length 2)
- *
- * @return: void
- *
- * ***********************************************************************************************
+ * For SubVP + SubVP, we use a single vertical interrupt to start the
+ * microschedule for both SubVP pipes. In order for this to work correctly, the
+ * MALL REGION of both SubVP pipes must start at the same time. This function
+ * lengthens the prefetch end to mall start delay of the SubVP pipe that has
+ * the shorter prefetch so that both MALL REGION's will start at the same time.
  */
 static void update_subvp_prefetch_end_to_mall_start(struct dc *dc,
 		struct dc_state *context,
@@ -608,22 +604,17 @@ static void update_subvp_prefetch_end_to_mall_start(struct dc *dc,
 }
 
 /**
- * ***************************************************************************************
- * setup_subvp_dmub_command: Helper to populate the SubVP pipe info for the DMUB subvp command
- *
- * Populate the DMCUB SubVP command with SubVP pipe info. All the information required to
- * calculate the microschedule for the SubVP pipe is stored in the pipe_data of the DMCUB
- * SubVP command.
- *
- * @param [in] dc: current dc state
- * @param [in] context: new dc state
- * @param [in] cmd: DMUB cmd to be populated with SubVP info
- * @param [in] subvp_pipe: pipe_ctx for the SubVP pipe
- * @param [in] cmd_pipe_index: index for the pipe array in DMCUB SubVP cmd
+ * populate_subvp_cmd_pipe_info - Helper to populate the SubVP pipe info for the DMUB subvp command
  *
- * @return: void
+ * @dc: [in] current dc state
+ * @context: [in] new dc state
+ * @cmd: [in] DMUB cmd to be populated with SubVP info
+ * @subvp_pipe: [in] pipe_ctx for the SubVP pipe
+ * @cmd_pipe_index: [in] index for the pipe array in DMCUB SubVP cmd
  *
- * ***************************************************************************************
+ * Populate the DMCUB SubVP command with SubVP pipe info. All the information
+ * required to calculate the microschedule for the SubVP pipe is stored in the
+ * pipe_data of the DMCUB SubVP command.
  */
 static void populate_subvp_cmd_pipe_info(struct dc *dc,
 		struct dc_state *context,
@@ -703,19 +694,14 @@ static void populate_subvp_cmd_pipe_info(struct dc *dc,
 }
 
 /**
- * ***************************************************************************************
- * dc_dmub_setup_subvp_dmub_command: Populate the DMCUB SubVP command
+ * dc_dmub_setup_subvp_dmub_command - Populate the DMCUB SubVP command
  *
- * This function loops through each pipe and populates the DMUB
- * SubVP CMD info based on the pipe (e.g. SubVP, VBLANK).
+ * @dc: [in] current dc state
+ * @context: [in] new dc state
+ * @cmd: [in] DMUB cmd to be populated with SubVP info
  *
- * @param [in] dc: current dc state
- * @param [in] context: new dc state
- * @param [in] cmd: DMUB cmd to be populated with SubVP info
- *
- * @return: void
- *
- * ***************************************************************************************
+ * This function loops through each pipe and populates the DMUB SubVP CMD info
+ * based on the pipe (e.g. SubVP, VBLANK).
  */
 void dc_dmub_setup_subvp_dmub_command(struct dc *dc,
 		struct dc_state *context,
@@ -882,11 +868,59 @@ void dc_dmub_srv_log_diagnostic_data(struct dc_dmub_srv *dc_dmub_srv)
 		diag_data.is_cw6_enabled);
 }
 
+static bool dc_can_pipe_disable_cursor(struct pipe_ctx *pipe_ctx)
+{
+	struct pipe_ctx *test_pipe, *split_pipe;
+	const struct scaler_data *scl_data = &pipe_ctx->plane_res.scl_data;
+	struct rect r1 = scl_data->recout, r2, r2_half;
+	int r1_r = r1.x + r1.width, r1_b = r1.y + r1.height, r2_r, r2_b;
+	int cur_layer = pipe_ctx->plane_state->layer_index;
+
+	/**
+	 * Disable the cursor if there's another pipe above this with a
+	 * plane that contains this pipe's viewport to prevent double cursor
+	 * and incorrect scaling artifacts.
+	 */
+	for (test_pipe = pipe_ctx->top_pipe; test_pipe;
+	     test_pipe = test_pipe->top_pipe) {
+		// Skip invisible layer and pipe-split plane on same layer
+		if (!test_pipe->plane_state->visible || test_pipe->plane_state->layer_index == cur_layer)
+			continue;
+
+		r2 = test_pipe->plane_res.scl_data.recout;
+		r2_r = r2.x + r2.width;
+		r2_b = r2.y + r2.height;
+		split_pipe = test_pipe;
+
+		/**
+		 * There is another half plane on same layer because of
+		 * pipe-split, merge together per same height.
+		 */
+		for (split_pipe = pipe_ctx->top_pipe; split_pipe;
+		     split_pipe = split_pipe->top_pipe)
+			if (split_pipe->plane_state->layer_index == test_pipe->plane_state->layer_index) {
+				r2_half = split_pipe->plane_res.scl_data.recout;
+				r2.x = (r2_half.x < r2.x) ? r2_half.x : r2.x;
+				r2.width = r2.width + r2_half.width;
+				r2_r = r2.x + r2.width;
+				break;
+			}
+
+		if (r1.x >= r2.x && r1.y >= r2.y && r1_r <= r2_r && r1_b <= r2_b)
+			return true;
+	}
+
+	return false;
+}
+
 static bool dc_dmub_should_update_cursor_data(struct pipe_ctx *pipe_ctx)
 {
 	if (pipe_ctx->plane_state != NULL) {
 		if (pipe_ctx->plane_state->address.type == PLN_ADDR_TYPE_VIDEO_PROGRESSIVE)
 			return false;
+
+		if (dc_can_pipe_disable_cursor(pipe_ctx))
+			return false;
 	}
 
 	if ((pipe_ctx->stream->link->psr_settings.psr_version == DC_PSR_VERSION_SU_1 ||
@@ -962,19 +996,14 @@ static void dc_build_cursor_attribute_update_payload1(
 }
 
 /**
- * ***************************************************************************************
- * dc_send_update_cursor_info_to_dmu: Populate the DMCUB Cursor update info command
- *
- * This function would store the cursor related information and pass it into dmub
+ * dc_send_update_cursor_info_to_dmu - Populate the DMCUB Cursor update info command
  *
- * @param [in] pCtx: pipe context
- * @param [in] pipe_idx: pipe index
+ * @pCtx: [in] pipe context
+ * @pipe_idx: [in] pipe index
  *
- * @return: void
- *
- * ***************************************************************************************
+ * This function would store the cursor related information and pass it into
+ * dmub
  */
-
 void dc_send_update_cursor_info_to_dmu(
 		struct pipe_ctx *pCtx, uint8_t pipe_idx)
 {
diff --git a/drivers/gpu/drm/amd/display/dc/dc_link.h b/drivers/gpu/drm/amd/display/dc/dc_link.h
index caf0c7af2d0b..2e18bcf6b11a 100644
--- a/drivers/gpu/drm/amd/display/dc/dc_link.h
+++ b/drivers/gpu/drm/amd/display/dc/dc_link.h
@@ -117,7 +117,7 @@ struct psr_settings {
  * Add a struct dc_panel_config under dc_link
  */
 struct dc_panel_config {
-	// extra panel power sequence parameters
+	/* extra panel power sequence parameters */
 	struct pps {
 		unsigned int extra_t3_ms;
 		unsigned int extra_t7_ms;
@@ -127,13 +127,21 @@ struct dc_panel_config {
 		unsigned int extra_t12_ms;
 		unsigned int extra_post_OUI_ms;
 	} pps;
-	// ABM
+	/* PSR */
+	struct psr {
+		bool disable_psr;
+		bool disallow_psrsu;
+		bool rc_disable;
+		bool rc_allow_static_screen;
+		bool rc_allow_fullscreen_VPB;
+	} psr;
+	/* ABM */
 	struct varib {
 		unsigned int varibright_feature_enable;
 		unsigned int def_varibright_level;
 		unsigned int abm_config_setting;
 	} varib;
-	// edp DSC
+	/* edp DSC */
 	struct dsc {
 		bool disable_dsc_edp;
 		unsigned int force_dsc_edp_policy;
@@ -143,6 +151,20 @@ struct dc_panel_config {
 		bool optimize_edp_link_rate; /* eDP ILR */
 	} ilr;
 };
+
+/*
+ *  USB4 DPIA BW ALLOCATION STRUCTS
+ */
+struct dc_dpia_bw_alloc {
+	int sink_verified_bw;  // The Verified BW that sink can allocated and use that has been verified already
+	int sink_allocated_bw; // The Actual Allocated BW that sink currently allocated
+	int padding_bw;        // The Padding "Un-used" BW allocated by CM for padding reasons
+	int sink_max_bw;       // The Max BW that sink can require/support
+	int estimated_bw;      // The estimated available BW for this DPIA
+	int bw_granularity;    // BW Granularity
+	bool bw_alloc_enabled; // The BW Alloc Mode Support is turned ON for all 3:  DP-Tx & Dpia & CM
+};
+
 /*
  * A link contains one or more sinks and their connected status.
  * The currently active signal type (HDMI, DP-SST, DP-MST) is also reported.
@@ -158,6 +180,14 @@ struct dc_link {
 	enum dc_irq_source irq_source_hpd_rx;/* aka DP Short Pulse  */
 	bool is_hpd_filter_disabled;
 	bool dp_ss_off;
+
+	/**
+	 * @link_state_valid:
+	 *
+	 * If there is no link and local sink, this variable should be set to
+	 * false. Otherwise, it should be set to true; usually, the function
+	 * core_link_enable_stream sets this field to true.
+	 */
 	bool link_state_valid;
 	bool aux_access_disabled;
 	bool sync_lt_in_progress;
@@ -168,6 +198,7 @@ struct dc_link {
 	bool is_dig_mapping_flexible;
 	bool hpd_status; /* HPD status of link without physical HPD pin. */
 	bool is_hpd_pending; /* Indicates a new received hpd */
+	bool is_automated; /* Indicates automated testing */
 
 	bool edp_sink_present;
 
diff --git a/drivers/gpu/drm/amd/display/dc/dc_stream.h b/drivers/gpu/drm/amd/display/dc/dc_stream.h
index 9e6025c98db9..dfd3df1d2f7e 100644
--- a/drivers/gpu/drm/amd/display/dc/dc_stream.h
+++ b/drivers/gpu/drm/amd/display/dc/dc_stream.h
@@ -41,6 +41,10 @@ struct timing_sync_info {
 struct dc_stream_status {
 	int primary_otg_inst;
 	int stream_enc_inst;
+
+	/**
+	 * @plane_count: Total of planes attached to a single stream
+	 */
 	int plane_count;
 	int audio_inst;
 	struct timing_sync_info timing_sync_info;
@@ -140,7 +144,7 @@ struct test_pattern {
 	unsigned int cust_pattern_size;
 };
 
-#define SUBVP_DRR_MARGIN_US 500 // 500us for DRR margin (SubVP + DRR)
+#define SUBVP_DRR_MARGIN_US 600 // 600us for DRR margin (SubVP + DRR)
 
 enum mall_stream_type {
 	SUBVP_NONE, // subvp not in use
@@ -156,6 +160,17 @@ struct mall_stream_config {
 	struct dc_stream_state *paired_stream;	// master / slave stream
 };
 
+/* Temp struct used to save and restore MALL config
+ * during validation.
+ *
+ * TODO: Move MALL config into dc_state instead of stream struct
+ * to avoid needing to save/restore.
+ */
+struct mall_temp_config {
+	struct mall_stream_config mall_stream_config[MAX_PIPES];
+	bool is_phantom_plane[MAX_PIPES];
+};
+
 struct dc_stream_state {
 	// sink is deprecated, new code should not reference
 	// this pointer
@@ -197,7 +212,18 @@ struct dc_stream_state {
 	bool use_vsc_sdp_for_colorimetry;
 	bool ignore_msa_timing_param;
 
+	/**
+	 * @allow_freesync:
+	 *
+	 * It say if Freesync is enabled or not.
+	 */
 	bool allow_freesync;
+
+	/**
+	 * @vrr_active_variable:
+	 *
+	 * It describes if VRR is in use.
+	 */
 	bool vrr_active_variable;
 	bool freesync_on_desktop;
 
@@ -517,10 +543,10 @@ bool dc_stream_get_crtc_position(struct dc *dc,
 				 unsigned int *nom_v_pos);
 
 #if defined(CONFIG_DRM_AMD_SECURE_DISPLAY)
-bool dc_stream_forward_dmcu_crc_window(struct dc *dc, struct dc_stream_state *stream,
-			     struct crc_params *crc_window);
-bool dc_stream_stop_dmcu_crc_win_update(struct dc *dc,
-				 struct dc_stream_state *stream);
+bool dc_stream_forward_crc_window(struct dc *dc,
+		struct rect *rect,
+		struct dc_stream_state *stream,
+		bool is_stop);
 #endif
 
 bool dc_stream_configure_crc(struct dc *dc,
diff --git a/drivers/gpu/drm/amd/display/dc/dc_trace.h b/drivers/gpu/drm/amd/display/dc/dc_trace.h
index c711797e5c9e..bbec308a3a5e 100644
--- a/drivers/gpu/drm/amd/display/dc/dc_trace.h
+++ b/drivers/gpu/drm/amd/display/dc/dc_trace.h
@@ -40,3 +40,5 @@
 
 #define TRACE_DCN_FPU(begin, function, line, ref_count) \
 	trace_dcn_fpu(begin, function, line, ref_count)
+#define TRACE_OPTC_LOCK_UNLOCK_STATE(optc, inst, lock) \
+	trace_dcn_optc_lock_unlock_state(optc, inst, lock, __func__, __LINE__)
diff --git a/drivers/gpu/drm/amd/display/dc/dc_types.h b/drivers/gpu/drm/amd/display/dc/dc_types.h
index ad9041472cca..dc78e2404b48 100644
--- a/drivers/gpu/drm/amd/display/dc/dc_types.h
+++ b/drivers/gpu/drm/amd/display/dc/dc_types.h
@@ -993,4 +993,11 @@ struct display_endpoint_id {
 	enum display_endpoint_type ep_type;
 };
 
+#if defined(CONFIG_DRM_AMD_SECURE_DISPLAY)
+struct otg_phy_mux {
+	uint8_t phy_output_num;
+	uint8_t otg_output_num;
+};
+#endif
+
 #endif /* DC_TYPES_H_ */
diff --git a/drivers/gpu/drm/amd/display/dc/dce/dce_dmcu.c b/drivers/gpu/drm/amd/display/dc/dce/dce_dmcu.c
index fbb19e253f50..d3cc5ec46956 100644
--- a/drivers/gpu/drm/amd/display/dc/dce/dce_dmcu.c
+++ b/drivers/gpu/drm/amd/display/dc/dce/dce_dmcu.c
@@ -927,19 +927,20 @@ static bool dcn10_recv_edid_cea_ack(struct dmcu *dmcu, int *offset)
 
 #if defined(CONFIG_DRM_AMD_SECURE_DISPLAY)
 static void dcn10_forward_crc_window(struct dmcu *dmcu,
-					struct crc_region *crc_win,
+					struct rect *rect,
 					struct otg_phy_mux *mux_mapping)
 {
 	struct dce_dmcu *dmcu_dce = TO_DCE_DMCU(dmcu);
 	unsigned int dmcu_max_retry_on_wait_reg_ready = 801;
 	unsigned int dmcu_wait_reg_ready_interval = 100;
 	unsigned int crc_start = 0, crc_end = 0, otg_phy_mux = 0;
+	int x_start, y_start, x_end, y_end;
 
 	/* If microcontroller is not running, do nothing */
 	if (dmcu->dmcu_state != DMCU_RUNNING)
 		return;
 
-	if (!crc_win)
+	if (!rect)
 		return;
 
 	/* waitDMCUReadyForCmd */
@@ -947,9 +948,14 @@ static void dcn10_forward_crc_window(struct dmcu *dmcu,
 				dmcu_wait_reg_ready_interval,
 				dmcu_max_retry_on_wait_reg_ready);
 
+	x_start = rect->x;
+	y_start = rect->y;
+	x_end = x_start + rect->width;
+	y_end = y_start + rect->height;
+
 	/* build up nitification data */
-	crc_start = (((unsigned int) crc_win->x_start) << 16) | crc_win->y_start;
-	crc_end = (((unsigned int) crc_win->x_end) << 16) | crc_win->y_end;
+	crc_start = (((unsigned int) x_start) << 16) | y_start;
+	crc_end = (((unsigned int) x_end) << 16) | y_end;
 	otg_phy_mux =
 		(((unsigned int) mux_mapping->otg_output_num) << 16) | mux_mapping->phy_output_num;
 
diff --git a/drivers/gpu/drm/amd/display/dc/dce/dmub_psr.c b/drivers/gpu/drm/amd/display/dc/dce/dmub_psr.c
index bec5e9f787fc..2d3201b77d6a 100644
--- a/drivers/gpu/drm/amd/display/dc/dce/dmub_psr.c
+++ b/drivers/gpu/drm/amd/display/dc/dce/dmub_psr.c
@@ -399,7 +399,11 @@ static bool dmub_psr_copy_settings(struct dmub_psr *dmub,
 		link->psr_settings.force_ffu_mode = 0;
 	copy_settings_data->force_ffu_mode = link->psr_settings.force_ffu_mode;
 
-	if (link->fec_state == dc_link_fec_enabled &&
+	if (((link->dpcd_caps.fec_cap.bits.FEC_CAPABLE &&
+		!link->dc->debug.disable_fec) &&
+		(link->dpcd_caps.dsc_caps.dsc_basic_caps.fields.dsc_support.DSC_SUPPORT &&
+		!link->panel_config.dsc.disable_dsc_edp &&
+		link->dc->caps.edp_dsc_support)) &&
 		link->dpcd_caps.sink_dev_id == DP_DEVICE_ID_38EC11 &&
 		(!memcmp(link->dpcd_caps.sink_dev_id_str, DP_SINK_DEVICE_STR_ID_1,
 			sizeof(DP_SINK_DEVICE_STR_ID_1)) ||
@@ -409,6 +413,11 @@ static bool dmub_psr_copy_settings(struct dmub_psr *dmub,
 	else
 		copy_settings_data->debug.bitfields.force_wakeup_by_tps3 = 0;
 
+	//WA for PSR1 on specific TCON, require frame delay for frame re-lock
+	copy_settings_data->relock_delay_frame_cnt = 0;
+	if (link->dpcd_caps.sink_dev_id == DP_BRANCH_DEVICE_ID_001CF8)
+		copy_settings_data->relock_delay_frame_cnt = 2;
+
 	dc_dmub_srv_cmd_queue(dc->dmub_srv, &cmd);
 	dc_dmub_srv_cmd_execute(dc->dmub_srv);
 	dc_dmub_srv_wait_idle(dc->dmub_srv);
diff --git a/drivers/gpu/drm/amd/display/dc/dce110/dce110_hw_sequencer.c b/drivers/gpu/drm/amd/display/dc/dce110/dce110_hw_sequencer.c
index d260eaa1509e..913a1fe6b3da 100644
--- a/drivers/gpu/drm/amd/display/dc/dce110/dce110_hw_sequencer.c
+++ b/drivers/gpu/drm/amd/display/dc/dce110/dce110_hw_sequencer.c
@@ -688,16 +688,6 @@ void dce110_enable_stream(struct pipe_ctx *pipe_ctx)
 		early_control = lane_count;
 
 	tg->funcs->set_early_control(tg, early_control);
-
-	/* enable audio only within mode set */
-	if (pipe_ctx->stream_res.audio != NULL) {
-		if (dc_is_dp_signal(pipe_ctx->stream->signal))
-			pipe_ctx->stream_res.stream_enc->funcs->dp_audio_enable(pipe_ctx->stream_res.stream_enc);
-	}
-
-
-
-
 }
 
 static enum bp_result link_transmitter_control(
@@ -1081,12 +1071,14 @@ void dce110_enable_audio_stream(struct pipe_ctx *pipe_ctx)
 	struct dc *dc;
 	struct clk_mgr *clk_mgr;
 	unsigned int i, num_audio = 1;
+	const struct link_hwss *link_hwss;
 
 	if (!pipe_ctx->stream)
 		return;
 
 	dc = pipe_ctx->stream->ctx->dc;
 	clk_mgr = dc->clk_mgr;
+	link_hwss = get_link_hwss(pipe_ctx->stream->link, &pipe_ctx->link_res);
 
 	if (pipe_ctx->stream_res.audio && pipe_ctx->stream_res.audio->enabled == true)
 		return;
@@ -1103,56 +1095,35 @@ void dce110_enable_audio_stream(struct pipe_ctx *pipe_ctx)
 		if (num_audio >= 1 && clk_mgr->funcs->enable_pme_wa)
 			/*this is the first audio. apply the PME w/a in order to wake AZ from D3*/
 			clk_mgr->funcs->enable_pme_wa(clk_mgr);
-		/* un-mute audio */
-		/* TODO: audio should be per stream rather than per link */
-		if (is_dp_128b_132b_signal(pipe_ctx))
-			pipe_ctx->stream_res.hpo_dp_stream_enc->funcs->audio_mute_control(
-					pipe_ctx->stream_res.hpo_dp_stream_enc, false);
-		else
-			pipe_ctx->stream_res.stream_enc->funcs->audio_mute_control(
-					pipe_ctx->stream_res.stream_enc, false);
+
+		link_hwss->enable_audio_packet(pipe_ctx);
+
 		if (pipe_ctx->stream_res.audio)
 			pipe_ctx->stream_res.audio->enabled = true;
 	}
-
-	if (dc_is_dp_signal(pipe_ctx->stream->signal))
-		dp_source_sequence_trace(pipe_ctx->stream->link, DPCD_SOURCE_SEQ_AFTER_ENABLE_AUDIO_STREAM);
 }
 
 void dce110_disable_audio_stream(struct pipe_ctx *pipe_ctx)
 {
 	struct dc *dc;
 	struct clk_mgr *clk_mgr;
+	const struct link_hwss *link_hwss;
 
 	if (!pipe_ctx || !pipe_ctx->stream)
 		return;
 
 	dc = pipe_ctx->stream->ctx->dc;
 	clk_mgr = dc->clk_mgr;
+	link_hwss = get_link_hwss(pipe_ctx->stream->link, &pipe_ctx->link_res);
 
 	if (pipe_ctx->stream_res.audio && pipe_ctx->stream_res.audio->enabled == false)
 		return;
 
-	if (is_dp_128b_132b_signal(pipe_ctx))
-		pipe_ctx->stream_res.hpo_dp_stream_enc->funcs->audio_mute_control(
-				pipe_ctx->stream_res.hpo_dp_stream_enc, true);
-	else
-		pipe_ctx->stream_res.stream_enc->funcs->audio_mute_control(
-				pipe_ctx->stream_res.stream_enc, true);
+	link_hwss->disable_audio_packet(pipe_ctx);
+
 	if (pipe_ctx->stream_res.audio) {
 		pipe_ctx->stream_res.audio->enabled = false;
 
-		if (dc_is_dp_signal(pipe_ctx->stream->signal))
-			if (is_dp_128b_132b_signal(pipe_ctx))
-				pipe_ctx->stream_res.hpo_dp_stream_enc->funcs->dp_audio_disable(
-						pipe_ctx->stream_res.hpo_dp_stream_enc);
-			else
-				pipe_ctx->stream_res.stream_enc->funcs->dp_audio_disable(
-						pipe_ctx->stream_res.stream_enc);
-		else
-			pipe_ctx->stream_res.stream_enc->funcs->hdmi_audio_disable(
-					pipe_ctx->stream_res.stream_enc);
-
 		if (clk_mgr->funcs->enable_pme_wa)
 			/*this is the first audio. apply the PME w/a in order to wake AZ from D3*/
 			clk_mgr->funcs->enable_pme_wa(clk_mgr);
@@ -1163,9 +1134,6 @@ void dce110_disable_audio_stream(struct pipe_ctx *pipe_ctx)
 		 * stream->stream_engine_id);
 		 */
 	}
-
-	if (dc_is_dp_signal(pipe_ctx->stream->signal))
-		dp_source_sequence_trace(pipe_ctx->stream->link, DPCD_SOURCE_SEQ_AFTER_DISABLE_AUDIO_STREAM);
 }
 
 void dce110_disable_stream(struct pipe_ctx *pipe_ctx)
@@ -1487,6 +1455,9 @@ static enum dc_status apply_single_controller_ctx_to_hw(
 	unsigned int event_triggers = 0;
 	struct pipe_ctx *odm_pipe = pipe_ctx->next_odm_pipe;
 	struct dce_hwseq *hws = dc->hwseq;
+	const struct link_hwss *link_hwss = get_link_hwss(
+			link, &pipe_ctx->link_res);
+
 
 	if (hws->funcs.disable_stream_gating) {
 		hws->funcs.disable_stream_gating(dc, pipe_ctx);
@@ -1497,23 +1468,8 @@ static enum dc_status apply_single_controller_ctx_to_hw(
 
 		build_audio_output(context, pipe_ctx, &audio_output);
 
-		if (dc_is_dp_signal(pipe_ctx->stream->signal))
-			if (is_dp_128b_132b_signal(pipe_ctx))
-				pipe_ctx->stream_res.hpo_dp_stream_enc->funcs->dp_audio_setup(
-						pipe_ctx->stream_res.hpo_dp_stream_enc,
-						pipe_ctx->stream_res.audio->inst,
-						&pipe_ctx->stream->audio_info);
-			else
-				pipe_ctx->stream_res.stream_enc->funcs->dp_audio_setup(
-						pipe_ctx->stream_res.stream_enc,
-						pipe_ctx->stream_res.audio->inst,
-						&pipe_ctx->stream->audio_info);
-		else
-			pipe_ctx->stream_res.stream_enc->funcs->hdmi_audio_setup(
-					pipe_ctx->stream_res.stream_enc,
-					pipe_ctx->stream_res.audio->inst,
-					&pipe_ctx->stream->audio_info,
-					&audio_output.crtc_info);
+		link_hwss->setup_audio_output(pipe_ctx, &audio_output,
+				pipe_ctx->stream_res.audio->inst);
 
 		pipe_ctx->stream_res.audio->funcs->az_configure(
 				pipe_ctx->stream_res.audio,
@@ -1605,8 +1561,13 @@ static enum dc_status apply_single_controller_ctx_to_hw(
 
 	pipe_ctx->plane_res.scl_data.lb_params.alpha_en = pipe_ctx->bottom_pipe != NULL;
 
-	pipe_ctx->stream->link->psr_settings.psr_feature_enabled = false;
-
+	/* Phantom and main stream share the same link (because the stream
+	 * is constructed with the same sink). Make sure not to override
+	 * and link programming on the main.
+	 */
+	if (pipe_ctx->stream->mall_stream_config.type != SUBVP_PHANTOM) {
+		pipe_ctx->stream->link->psr_settings.psr_feature_enabled = false;
+	}
 	return DC_OK;
 }
 
diff --git a/drivers/gpu/drm/amd/display/dc/dce120/dce120_resource.c b/drivers/gpu/drm/amd/display/dc/dce120/dce120_resource.c
index 1b70b78e2fa1..af631085e88c 100644
--- a/drivers/gpu/drm/amd/display/dc/dce120/dce120_resource.c
+++ b/drivers/gpu/drm/amd/display/dc/dce120/dce120_resource.c
@@ -359,7 +359,8 @@ static const struct dce_audio_registers audio_regs[] = {
 	audio_regs(2),
 	audio_regs(3),
 	audio_regs(4),
-	audio_regs(5)
+	audio_regs(5),
+	audio_regs(6),
 };
 
 #define DCE120_AUD_COMMON_MASK_SH_LIST(mask_sh)\
diff --git a/drivers/gpu/drm/amd/display/dc/dce60/dce60_resource.c b/drivers/gpu/drm/amd/display/dc/dce60/dce60_resource.c
index fc6aa098bda0..8db9f7514466 100644
--- a/drivers/gpu/drm/amd/display/dc/dce60/dce60_resource.c
+++ b/drivers/gpu/drm/amd/display/dc/dce60/dce60_resource.c
@@ -1128,6 +1128,7 @@ struct resource_pool *dce60_create_resource_pool(
 	if (dce60_construct(num_virtual_links, dc, pool))
 		return &pool->base;
 
+	kfree(pool);
 	BREAK_TO_DEBUGGER();
 	return NULL;
 }
@@ -1325,6 +1326,7 @@ struct resource_pool *dce61_create_resource_pool(
 	if (dce61_construct(num_virtual_links, dc, pool))
 		return &pool->base;
 
+	kfree(pool);
 	BREAK_TO_DEBUGGER();
 	return NULL;
 }
@@ -1518,6 +1520,7 @@ struct resource_pool *dce64_create_resource_pool(
 	if (dce64_construct(num_virtual_links, dc, pool))
 		return &pool->base;
 
+	kfree(pool);
 	BREAK_TO_DEBUGGER();
 	return NULL;
 }
diff --git a/drivers/gpu/drm/amd/display/dc/dce80/dce80_resource.c b/drivers/gpu/drm/amd/display/dc/dce80/dce80_resource.c
index b28025960050..5825e6f412bd 100644
--- a/drivers/gpu/drm/amd/display/dc/dce80/dce80_resource.c
+++ b/drivers/gpu/drm/amd/display/dc/dce80/dce80_resource.c
@@ -1137,6 +1137,7 @@ struct resource_pool *dce80_create_resource_pool(
 	if (dce80_construct(num_virtual_links, dc, pool))
 		return &pool->base;
 
+	kfree(pool);
 	BREAK_TO_DEBUGGER();
 	return NULL;
 }
@@ -1336,6 +1337,7 @@ struct resource_pool *dce81_create_resource_pool(
 	if (dce81_construct(num_virtual_links, dc, pool))
 		return &pool->base;
 
+	kfree(pool);
 	BREAK_TO_DEBUGGER();
 	return NULL;
 }
diff --git a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_dpp.c b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_dpp.c
index b9765b3899e1..ef52e6b6eccf 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_dpp.c
+++ b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_dpp.c
@@ -436,34 +436,48 @@ void dpp1_set_cursor_position(
 		uint32_t height)
 {
 	struct dcn10_dpp *dpp = TO_DCN10_DPP(dpp_base);
-	int src_x_offset = pos->x - pos->x_hotspot - param->viewport.x;
-	int src_y_offset = pos->y - pos->y_hotspot - param->viewport.y;
+	int x_pos = pos->x - param->viewport.x;
+	int y_pos = pos->y - param->viewport.y;
+	int x_hotspot = pos->x_hotspot;
+	int y_hotspot = pos->y_hotspot;
+	int src_x_offset = x_pos - pos->x_hotspot;
+	int src_y_offset = y_pos - pos->y_hotspot;
+	int cursor_height = (int)height;
+	int cursor_width = (int)width;
 	uint32_t cur_en = pos->enable ? 1 : 0;
 
-	// Cursor width/height and hotspots need to be rotated for offset calculation
+	// Transform cursor width / height and hotspots for offset calculations
 	if (param->rotation == ROTATION_ANGLE_90 || param->rotation == ROTATION_ANGLE_270) {
-		swap(width, height);
+		swap(cursor_height, cursor_width);
+		swap(x_hotspot, y_hotspot);
+
 		if (param->rotation == ROTATION_ANGLE_90) {
-			src_x_offset = pos->x - pos->y_hotspot - param->viewport.x;
-			src_y_offset = pos->y - pos->x_hotspot - param->viewport.y;
+			// hotspot = (-y, x)
+			src_x_offset = x_pos - (cursor_width - x_hotspot);
+			src_y_offset = y_pos - y_hotspot;
+		} else if (param->rotation == ROTATION_ANGLE_270) {
+			// hotspot = (y, -x)
+			src_x_offset = x_pos - x_hotspot;
+			src_y_offset = y_pos - (cursor_height - y_hotspot);
 		}
 	} else if (param->rotation == ROTATION_ANGLE_180) {
+		// hotspot = (-x, -y)
 		if (!param->mirror)
-			src_x_offset = pos->x - param->viewport.x;
+			src_x_offset = x_pos - (cursor_width - x_hotspot);
 
-		src_y_offset = pos->y - param->viewport.y;
+		src_y_offset = y_pos - (cursor_height - y_hotspot);
 	}
 
 	if (src_x_offset >= (int)param->viewport.width)
 		cur_en = 0;  /* not visible beyond right edge*/
 
-	if (src_x_offset + (int)width <= 0)
+	if (src_x_offset + cursor_width <= 0)
 		cur_en = 0;  /* not visible beyond left edge*/
 
 	if (src_y_offset >= (int)param->viewport.height)
 		cur_en = 0;  /* not visible beyond bottom edge*/
 
-	if (src_y_offset + (int)height <= 0)
+	if (src_y_offset + cursor_height <= 0)
 		cur_en = 0;  /* not visible beyond top edge*/
 
 	REG_UPDATE(CURSOR0_CONTROL,
diff --git a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hubbub.c b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hubbub.c
index 0f746bb4e500..d51f1ce02874 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hubbub.c
+++ b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hubbub.c
@@ -55,7 +55,7 @@ void hubbub1_wm_read_state(struct hubbub *hubbub,
 		s->sr_enter = REG_READ(DCHUBBUB_ARB_ALLOW_SR_ENTER_WATERMARK_A);
 		s->sr_exit = REG_READ(DCHUBBUB_ARB_ALLOW_SR_EXIT_WATERMARK_A);
 	}
-	s->dram_clk_chanage = REG_READ(DCHUBBUB_ARB_ALLOW_DRAM_CLK_CHANGE_WATERMARK_A);
+	s->dram_clk_change = REG_READ(DCHUBBUB_ARB_ALLOW_DRAM_CLK_CHANGE_WATERMARK_A);
 
 	s = &wm->sets[1];
 	s->wm_set = 1;
@@ -65,7 +65,7 @@ void hubbub1_wm_read_state(struct hubbub *hubbub,
 		s->sr_enter = REG_READ(DCHUBBUB_ARB_ALLOW_SR_ENTER_WATERMARK_B);
 		s->sr_exit = REG_READ(DCHUBBUB_ARB_ALLOW_SR_EXIT_WATERMARK_B);
 	}
-	s->dram_clk_chanage = REG_READ(DCHUBBUB_ARB_ALLOW_DRAM_CLK_CHANGE_WATERMARK_B);
+	s->dram_clk_change = REG_READ(DCHUBBUB_ARB_ALLOW_DRAM_CLK_CHANGE_WATERMARK_B);
 
 	s = &wm->sets[2];
 	s->wm_set = 2;
@@ -75,7 +75,7 @@ void hubbub1_wm_read_state(struct hubbub *hubbub,
 		s->sr_enter = REG_READ(DCHUBBUB_ARB_ALLOW_SR_ENTER_WATERMARK_C);
 		s->sr_exit = REG_READ(DCHUBBUB_ARB_ALLOW_SR_EXIT_WATERMARK_C);
 	}
-	s->dram_clk_chanage = REG_READ(DCHUBBUB_ARB_ALLOW_DRAM_CLK_CHANGE_WATERMARK_C);
+	s->dram_clk_change = REG_READ(DCHUBBUB_ARB_ALLOW_DRAM_CLK_CHANGE_WATERMARK_C);
 
 	s = &wm->sets[3];
 	s->wm_set = 3;
@@ -85,7 +85,7 @@ void hubbub1_wm_read_state(struct hubbub *hubbub,
 		s->sr_enter = REG_READ(DCHUBBUB_ARB_ALLOW_SR_ENTER_WATERMARK_D);
 		s->sr_exit = REG_READ(DCHUBBUB_ARB_ALLOW_SR_EXIT_WATERMARK_D);
 	}
-	s->dram_clk_chanage = REG_READ(DCHUBBUB_ARB_ALLOW_DRAM_CLK_CHANGE_WATERMARK_D);
+	s->dram_clk_change = REG_READ(DCHUBBUB_ARB_ALLOW_DRAM_CLK_CHANGE_WATERMARK_D);
 }
 
 void hubbub1_allow_self_refresh_control(struct hubbub *hubbub, bool allow)
diff --git a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hubbub.h b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hubbub.h
index e48fd044f572..ba1c0621f0f8 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hubbub.h
+++ b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hubbub.h
@@ -171,6 +171,7 @@ struct dcn_hubbub_registers {
 	uint32_t DCHUBBUB_ARB_FCLK_PSTATE_CHANGE_WATERMARK_B;
 	uint32_t DCHUBBUB_ARB_FCLK_PSTATE_CHANGE_WATERMARK_C;
 	uint32_t DCHUBBUB_ARB_FCLK_PSTATE_CHANGE_WATERMARK_D;
+	uint32_t SDPIF_REQUEST_RATE_LIMIT;
 };
 
 #define HUBBUB_REG_FIELD_LIST_DCN32(type) \
@@ -360,7 +361,8 @@ struct dcn_hubbub_registers {
 		type DCHUBBUB_ARB_ALLOW_SR_ENTER_WATERMARK_Z8_C;\
 		type DCHUBBUB_ARB_ALLOW_SR_EXIT_WATERMARK_Z8_C;\
 		type DCHUBBUB_ARB_ALLOW_SR_ENTER_WATERMARK_Z8_D;\
-		type DCHUBBUB_ARB_ALLOW_SR_EXIT_WATERMARK_Z8_D
+		type DCHUBBUB_ARB_ALLOW_SR_EXIT_WATERMARK_Z8_D;\
+		type SDPIF_REQUEST_RATE_LIMIT
 
 
 struct dcn_hubbub_shift {
diff --git a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hubp.c b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hubp.c
index 52e201e9b091..a142a00bc432 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hubp.c
+++ b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hubp.c
@@ -1179,10 +1179,12 @@ void hubp1_cursor_set_position(
 		const struct dc_cursor_mi_param *param)
 {
 	struct dcn10_hubp *hubp1 = TO_DCN10_HUBP(hubp);
-	int src_x_offset = pos->x - pos->x_hotspot - param->viewport.x;
-	int src_y_offset = pos->y - pos->y_hotspot - param->viewport.y;
+	int x_pos = pos->x - param->viewport.x;
+	int y_pos = pos->y - param->viewport.y;
 	int x_hotspot = pos->x_hotspot;
 	int y_hotspot = pos->y_hotspot;
+	int src_x_offset = x_pos - pos->x_hotspot;
+	int src_y_offset = y_pos - pos->y_hotspot;
 	int cursor_height = (int)hubp->curs_attr.height;
 	int cursor_width = (int)hubp->curs_attr.width;
 	uint32_t dst_x_offset;
@@ -1200,18 +1202,26 @@ void hubp1_cursor_set_position(
 	if (hubp->curs_attr.address.quad_part == 0)
 		return;
 
-	// Rotated cursor width/height and hotspots tweaks for offset calculation
+	// Transform cursor width / height and hotspots for offset calculations
 	if (param->rotation == ROTATION_ANGLE_90 || param->rotation == ROTATION_ANGLE_270) {
 		swap(cursor_height, cursor_width);
+		swap(x_hotspot, y_hotspot);
+
 		if (param->rotation == ROTATION_ANGLE_90) {
-			src_x_offset = pos->x - pos->y_hotspot - param->viewport.x;
-			src_y_offset = pos->y - pos->x_hotspot - param->viewport.y;
+			// hotspot = (-y, x)
+			src_x_offset = x_pos - (cursor_width - x_hotspot);
+			src_y_offset = y_pos - y_hotspot;
+		} else if (param->rotation == ROTATION_ANGLE_270) {
+			// hotspot = (y, -x)
+			src_x_offset = x_pos - x_hotspot;
+			src_y_offset = y_pos - (cursor_height - y_hotspot);
 		}
 	} else if (param->rotation == ROTATION_ANGLE_180) {
+		// hotspot = (-x, -y)
 		if (!param->mirror)
-			src_x_offset = pos->x - param->viewport.x;
+			src_x_offset = x_pos - (cursor_width - x_hotspot);
 
-		src_y_offset = pos->y - param->viewport.y;
+		src_y_offset = y_pos - (cursor_height - y_hotspot);
 	}
 
 	dst_x_offset = (src_x_offset >= 0) ? src_x_offset : 0;
@@ -1248,8 +1258,8 @@ void hubp1_cursor_set_position(
 			CURSOR_Y_POSITION, pos->y);
 
 	REG_SET_2(CURSOR_HOT_SPOT, 0,
-			CURSOR_HOT_SPOT_X, x_hotspot,
-			CURSOR_HOT_SPOT_Y, y_hotspot);
+			CURSOR_HOT_SPOT_X, pos->x_hotspot,
+			CURSOR_HOT_SPOT_Y, pos->y_hotspot);
 
 	REG_SET(CURSOR_DST_OFFSET, 0,
 			CURSOR_DST_X_OFFSET, dst_x_offset);
diff --git a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hw_sequencer.c b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hw_sequencer.c
index 11e4c4e46947..fe2023f18b7d 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hw_sequencer.c
+++ b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hw_sequencer.c
@@ -97,10 +97,12 @@ void dcn10_lock_all_pipes(struct dc *dc,
 	bool lock)
 {
 	struct pipe_ctx *pipe_ctx;
+	struct pipe_ctx *old_pipe_ctx;
 	struct timing_generator *tg;
 	int i;
 
 	for (i = 0; i < dc->res_pool->pipe_count; i++) {
+		old_pipe_ctx = &dc->current_state->res_ctx.pipe_ctx[i];
 		pipe_ctx = &context->res_ctx.pipe_ctx[i];
 		tg = pipe_ctx->stream_res.tg;
 
@@ -110,7 +112,7 @@ void dcn10_lock_all_pipes(struct dc *dc,
 		 */
 		if (pipe_ctx->top_pipe ||
 		    !pipe_ctx->stream ||
-		    !pipe_ctx->plane_state ||
+		    (!pipe_ctx->plane_state && !old_pipe_ctx->plane_state) ||
 		    !tg->funcs->is_tg_enabled(tg))
 			continue;
 
@@ -157,7 +159,7 @@ static void dcn10_log_hubbub_state(struct dc *dc,
 		DTN_INFO_MICRO_SEC(s->pte_meta_urgent);
 		DTN_INFO_MICRO_SEC(s->sr_enter);
 		DTN_INFO_MICRO_SEC(s->sr_exit);
-		DTN_INFO_MICRO_SEC(s->dram_clk_chanage);
+		DTN_INFO_MICRO_SEC(s->dram_clk_change);
 		DTN_INFO("\n");
 	}
 
@@ -867,6 +869,32 @@ static void false_optc_underflow_wa(
 		tg->funcs->clear_optc_underflow(tg);
 }
 
+static int calculate_vready_offset_for_group(struct pipe_ctx *pipe)
+{
+	struct pipe_ctx *other_pipe;
+	int vready_offset = pipe->pipe_dlg_param.vready_offset;
+
+	/* Always use the largest vready_offset of all connected pipes */
+	for (other_pipe = pipe->bottom_pipe; other_pipe != NULL; other_pipe = other_pipe->bottom_pipe) {
+		if (other_pipe->pipe_dlg_param.vready_offset > vready_offset)
+			vready_offset = other_pipe->pipe_dlg_param.vready_offset;
+	}
+	for (other_pipe = pipe->top_pipe; other_pipe != NULL; other_pipe = other_pipe->top_pipe) {
+		if (other_pipe->pipe_dlg_param.vready_offset > vready_offset)
+			vready_offset = other_pipe->pipe_dlg_param.vready_offset;
+	}
+	for (other_pipe = pipe->next_odm_pipe; other_pipe != NULL; other_pipe = other_pipe->next_odm_pipe) {
+		if (other_pipe->pipe_dlg_param.vready_offset > vready_offset)
+			vready_offset = other_pipe->pipe_dlg_param.vready_offset;
+	}
+	for (other_pipe = pipe->prev_odm_pipe; other_pipe != NULL; other_pipe = other_pipe->prev_odm_pipe) {
+		if (other_pipe->pipe_dlg_param.vready_offset > vready_offset)
+			vready_offset = other_pipe->pipe_dlg_param.vready_offset;
+	}
+
+	return vready_offset;
+}
+
 enum dc_status dcn10_enable_stream_timing(
 		struct pipe_ctx *pipe_ctx,
 		struct dc_state *context,
@@ -910,7 +938,7 @@ enum dc_status dcn10_enable_stream_timing(
 	pipe_ctx->stream_res.tg->funcs->program_timing(
 			pipe_ctx->stream_res.tg,
 			&stream->timing,
-			pipe_ctx->pipe_dlg_param.vready_offset,
+			calculate_vready_offset_for_group(pipe_ctx),
 			pipe_ctx->pipe_dlg_param.vstartup_start,
 			pipe_ctx->pipe_dlg_param.vupdate_offset,
 			pipe_ctx->pipe_dlg_param.vupdate_width,
@@ -2188,6 +2216,12 @@ void dcn10_enable_vblanks_synchronization(
 		opp = grouped_pipes[i]->stream_res.opp;
 		tg = grouped_pipes[i]->stream_res.tg;
 		tg->funcs->get_otg_active_size(tg, &width, &height);
+
+		if (!tg->funcs->is_tg_enabled(tg)) {
+			DC_SYNC_INFO("Skipping timing sync on disabled OTG\n");
+			return;
+		}
+
 		if (opp->funcs->opp_program_dpg_dimensions)
 			opp->funcs->opp_program_dpg_dimensions(opp, width, 2*(height) + 1);
 	}
@@ -2250,6 +2284,12 @@ void dcn10_enable_timing_synchronization(
 		opp = grouped_pipes[i]->stream_res.opp;
 		tg = grouped_pipes[i]->stream_res.tg;
 		tg->funcs->get_otg_active_size(tg, &width, &height);
+
+		if (!tg->funcs->is_tg_enabled(tg)) {
+			DC_SYNC_INFO("Skipping timing sync on disabled OTG\n");
+			return;
+		}
+
 		if (opp->funcs->opp_program_dpg_dimensions)
 			opp->funcs->opp_program_dpg_dimensions(opp, width, 2*(height) + 1);
 	}
@@ -2900,7 +2940,7 @@ void dcn10_program_pipe(
 
 		pipe_ctx->stream_res.tg->funcs->program_global_sync(
 				pipe_ctx->stream_res.tg,
-				pipe_ctx->pipe_dlg_param.vready_offset,
+				calculate_vready_offset_for_group(pipe_ctx),
 				pipe_ctx->pipe_dlg_param.vstartup_start,
 				pipe_ctx->pipe_dlg_param.vupdate_offset,
 				pipe_ctx->pipe_dlg_param.vupdate_width);
diff --git a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hw_sequencer_debug.c b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hw_sequencer_debug.c
index e8b6065fffad..a0f8e31d2adc 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hw_sequencer_debug.c
+++ b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hw_sequencer_debug.c
@@ -83,7 +83,7 @@ static unsigned int dcn10_get_hubbub_state(struct dc *dc, char *pBuf, unsigned i
 	memset(&wm, 0, sizeof(struct dcn_hubbub_wm));
 	dc->res_pool->hubbub->funcs->wm_read_state(dc->res_pool->hubbub, &wm);
 
-	chars_printed = snprintf_count(pBuf, remaining_buffer, "wm_set_index,data_urgent,pte_meta_urgent,sr_enter,sr_exit,dram_clk_chanage\n");
+	chars_printed = snprintf_count(pBuf, remaining_buffer, "wm_set_index,data_urgent,pte_meta_urgent,sr_enter,sr_exit,dram_clk_change\n");
 	remaining_buffer -= chars_printed;
 	pBuf += chars_printed;
 
@@ -98,7 +98,7 @@ static unsigned int dcn10_get_hubbub_state(struct dc *dc, char *pBuf, unsigned i
 			(s->pte_meta_urgent * frac) / ref_clk_mhz / frac, (s->pte_meta_urgent * frac) / ref_clk_mhz % frac,
 			(s->sr_enter * frac) / ref_clk_mhz / frac, (s->sr_enter * frac) / ref_clk_mhz % frac,
 			(s->sr_exit * frac) / ref_clk_mhz / frac, (s->sr_exit * frac) / ref_clk_mhz % frac,
-			(s->dram_clk_chanage * frac) / ref_clk_mhz / frac, (s->dram_clk_chanage * frac) / ref_clk_mhz % frac);
+			(s->dram_clk_change * frac) / ref_clk_mhz / frac, (s->dram_clk_change * frac) / ref_clk_mhz % frac);
 		remaining_buffer -= chars_printed;
 		pBuf += chars_printed;
 	}
diff --git a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_optc.c b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_optc.c
index 33d780218790..c9e53dc49c92 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_optc.c
+++ b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_optc.c
@@ -27,6 +27,7 @@
 #include "reg_helper.h"
 #include "dcn10_optc.h"
 #include "dc.h"
+#include "dc_trace.h"
 
 #define REG(reg)\
 	optc1->tg_regs->reg
@@ -657,6 +658,8 @@ void optc1_lock(struct timing_generator *optc)
 		REG_WAIT(OTG_MASTER_UPDATE_LOCK,
 				UPDATE_LOCK_STATUS, 1,
 				1, 10);
+
+	TRACE_OPTC_LOCK_UNLOCK_STATE(optc1, optc->inst, true);
 }
 
 void optc1_unlock(struct timing_generator *optc)
@@ -665,6 +668,8 @@ void optc1_unlock(struct timing_generator *optc)
 
 	REG_SET(OTG_MASTER_UPDATE_LOCK, 0,
 			OTG_MASTER_UPDATE_LOCK, 0);
+
+	TRACE_OPTC_LOCK_UNLOCK_STATE(optc1, optc->inst, false);
 }
 
 void optc1_get_position(struct timing_generator *optc,
diff --git a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_resource.c b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_resource.c
index 56d30baf12df..6bfac8088ab0 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_resource.c
+++ b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_resource.c
@@ -1295,47 +1295,6 @@ static uint32_t read_pipe_fuses(struct dc_context *ctx)
 	return value;
 }
 
-/*
- * Some architectures don't support soft-float (e.g. aarch64), on those
- * this function has to be called with hardfloat enabled, make sure not
- * to inline it so whatever fp stuff is done stays inside
- */
-static noinline void dcn10_resource_construct_fp(
-	struct dc *dc)
-{
-	if (dc->ctx->dce_version == DCN_VERSION_1_01) {
-		struct dcn_soc_bounding_box *dcn_soc = dc->dcn_soc;
-		struct dcn_ip_params *dcn_ip = dc->dcn_ip;
-		struct display_mode_lib *dml = &dc->dml;
-
-		dml->ip.max_num_dpp = 3;
-		/* TODO how to handle 23.84? */
-		dcn_soc->dram_clock_change_latency = 23;
-		dcn_ip->max_num_dpp = 3;
-	}
-	if (ASICREV_IS_RV1_F0(dc->ctx->asic_id.hw_internal_rev)) {
-		dc->dcn_soc->urgent_latency = 3;
-		dc->debug.disable_dmcu = true;
-		dc->dcn_soc->fabric_and_dram_bandwidth_vmax0p9 = 41.60f;
-	}
-
-
-	dc->dcn_soc->number_of_channels = dc->ctx->asic_id.vram_width / ddr4_dram_width;
-	ASSERT(dc->dcn_soc->number_of_channels < 3);
-	if (dc->dcn_soc->number_of_channels == 0)/*old sbios bug*/
-		dc->dcn_soc->number_of_channels = 2;
-
-	if (dc->dcn_soc->number_of_channels == 1) {
-		dc->dcn_soc->fabric_and_dram_bandwidth_vmax0p9 = 19.2f;
-		dc->dcn_soc->fabric_and_dram_bandwidth_vnom0p8 = 17.066f;
-		dc->dcn_soc->fabric_and_dram_bandwidth_vmid0p72 = 14.933f;
-		dc->dcn_soc->fabric_and_dram_bandwidth_vmin0p65 = 12.8f;
-		if (ASICREV_IS_RV1_F0(dc->ctx->asic_id.hw_internal_rev)) {
-			dc->dcn_soc->fabric_and_dram_bandwidth_vmax0p9 = 20.80f;
-		}
-	}
-}
-
 static bool verify_clock_values(struct dm_pp_clock_levels_with_voltage *clks)
 {
 	int i;
@@ -1510,8 +1469,9 @@ static bool dcn10_resource_construct(
 	memcpy(dc->dcn_ip, &dcn10_ip_defaults, sizeof(dcn10_ip_defaults));
 	memcpy(dc->dcn_soc, &dcn10_soc_defaults, sizeof(dcn10_soc_defaults));
 
-	/* Other architectures we build for build this with soft-float */
+	DC_FP_START();
 	dcn10_resource_construct_fp(dc);
+	DC_FP_END();
 
 	if (!dc->config.is_vmin_only_asic)
 		if (ASICREV_IS_RAVEN2(dc->ctx->asic_id.hw_internal_rev))
diff --git a/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_dwb.h b/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_dwb.h
index a85ed228dfc2..a9dd9ae23ec9 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_dwb.h
+++ b/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_dwb.h
@@ -27,204 +27,177 @@
 #define TO_DCN20_DWBC(dwbc_base) \
 	container_of(dwbc_base, struct dcn20_dwbc, base)
 
-/* DCN */
-#define BASE_INNER(seg) \
-	DCE_BASE__INST0_SEG ## seg
-
-#define BASE(seg) \
-	BASE_INNER(seg)
-
-#define SR(reg_name)\
-		.reg_name = BASE(mm ## reg_name ## _BASE_IDX) +  \
-					mm ## reg_name
-
-#define SRI(reg_name, block, id)\
-	.reg_name = BASE(mm ## block ## id ## _ ## reg_name ## _BASE_IDX) + \
-					mm ## block ## id ## _ ## reg_name
-
-#define SRI2(reg_name, block, id)\
-	.reg_name = BASE(mm ## reg_name ## _BASE_IDX) + \
-					mm ## reg_name
-
-#define SRII(reg_name, block, id)\
-	.reg_name[id] = BASE(mm ## block ## id ## _ ## reg_name ## _BASE_IDX) + \
-					mm ## block ## id ## _ ## reg_name
-
-#define SF(reg_name, field_name, post_fix)\
-	.field_name = reg_name ## __ ## field_name ## post_fix
-
-
 #define DWBC_COMMON_REG_LIST_DCN2_0(inst) \
-	SRI2(WB_ENABLE, CNV, inst),\
-	SRI2(WB_EC_CONFIG, CNV, inst),\
-	SRI2(CNV_MODE, CNV, inst),\
-	SRI2(CNV_WINDOW_START, CNV, inst),\
-	SRI2(CNV_WINDOW_SIZE, CNV, inst),\
-	SRI2(CNV_UPDATE, CNV, inst),\
-	SRI2(CNV_SOURCE_SIZE, CNV, inst),\
-	SRI2(CNV_TEST_CNTL, CNV, inst),\
-	SRI2(CNV_TEST_CRC_RED, CNV, inst),\
-	SRI2(CNV_TEST_CRC_GREEN, CNV, inst),\
-	SRI2(CNV_TEST_CRC_BLUE, CNV, inst),\
-	SRI2(WBSCL_COEF_RAM_SELECT, WBSCL, inst),\
-	SRI2(WBSCL_COEF_RAM_TAP_DATA, WBSCL, inst),\
-	SRI2(WBSCL_MODE, WBSCL, inst),\
-	SRI2(WBSCL_TAP_CONTROL, WBSCL, inst),\
-	SRI2(WBSCL_DEST_SIZE, WBSCL, inst),\
-	SRI2(WBSCL_HORZ_FILTER_SCALE_RATIO, WBSCL, inst),\
-	SRI2(WBSCL_HORZ_FILTER_INIT_Y_RGB, WBSCL, inst),\
-	SRI2(WBSCL_HORZ_FILTER_INIT_CBCR, WBSCL, inst),\
-	SRI2(WBSCL_VERT_FILTER_SCALE_RATIO, WBSCL, inst),\
-	SRI2(WBSCL_VERT_FILTER_INIT_Y_RGB, WBSCL, inst),\
-	SRI2(WBSCL_VERT_FILTER_INIT_CBCR, WBSCL, inst),\
-	SRI2(WBSCL_ROUND_OFFSET, WBSCL, inst),\
-	SRI2(WBSCL_OVERFLOW_STATUS, WBSCL, inst),\
-	SRI2(WBSCL_COEF_RAM_CONFLICT_STATUS, WBSCL, inst),\
-	SRI2(WBSCL_TEST_CNTL, WBSCL, inst),\
-	SRI2(WBSCL_TEST_CRC_RED, WBSCL, inst),\
-	SRI2(WBSCL_TEST_CRC_GREEN, WBSCL, inst),\
-	SRI2(WBSCL_TEST_CRC_BLUE, WBSCL, inst),\
-	SRI2(WBSCL_BACKPRESSURE_CNT_EN, WBSCL, inst),\
-	SRI2(WB_MCIF_BACKPRESSURE_CNT, WBSCL, inst),\
-	SRI2(WBSCL_CLAMP_Y_RGB, WBSCL, inst),\
-	SRI2(WBSCL_CLAMP_CBCR, WBSCL, inst),\
-	SRI2(WBSCL_OUTSIDE_PIX_STRATEGY, WBSCL, inst),\
-	SRI2(WBSCL_OUTSIDE_PIX_STRATEGY_CBCR, WBSCL, inst),\
-	SRI2(WBSCL_DEBUG, WBSCL, inst),\
-	SRI2(WBSCL_TEST_DEBUG_INDEX, WBSCL, inst),\
-	SRI2(WBSCL_TEST_DEBUG_DATA, WBSCL, inst),\
-	SRI2(WB_DEBUG_CTRL, CNV, inst),\
-	SRI2(WB_DBG_MODE, CNV, inst),\
-	SRI2(WB_HW_DEBUG, CNV, inst),\
-	SRI2(CNV_TEST_DEBUG_INDEX, CNV, inst),\
-	SRI2(CNV_TEST_DEBUG_DATA, CNV, inst),\
-	SRI2(WB_SOFT_RESET, CNV, inst),\
-	SRI2(WB_WARM_UP_MODE_CTL1, CNV, inst),\
-	SRI2(WB_WARM_UP_MODE_CTL2, CNV, inst)
+	SRI2_DWB(WB_ENABLE, CNV, inst),\
+	SRI2_DWB(WB_EC_CONFIG, CNV, inst),\
+	SRI2_DWB(CNV_MODE, CNV, inst),\
+	SRI2_DWB(CNV_WINDOW_START, CNV, inst),\
+	SRI2_DWB(CNV_WINDOW_SIZE, CNV, inst),\
+	SRI2_DWB(CNV_UPDATE, CNV, inst),\
+	SRI2_DWB(CNV_SOURCE_SIZE, CNV, inst),\
+	SRI2_DWB(CNV_TEST_CNTL, CNV, inst),\
+	SRI2_DWB(CNV_TEST_CRC_RED, CNV, inst),\
+	SRI2_DWB(CNV_TEST_CRC_GREEN, CNV, inst),\
+	SRI2_DWB(CNV_TEST_CRC_BLUE, CNV, inst),\
+	SRI2_DWB(WBSCL_COEF_RAM_SELECT, WBSCL, inst),\
+	SRI2_DWB(WBSCL_COEF_RAM_TAP_DATA, WBSCL, inst),\
+	SRI2_DWB(WBSCL_MODE, WBSCL, inst),\
+	SRI2_DWB(WBSCL_TAP_CONTROL, WBSCL, inst),\
+	SRI2_DWB(WBSCL_DEST_SIZE, WBSCL, inst),\
+	SRI2_DWB(WBSCL_HORZ_FILTER_SCALE_RATIO, WBSCL, inst),\
+	SRI2_DWB(WBSCL_HORZ_FILTER_INIT_Y_RGB, WBSCL, inst),\
+	SRI2_DWB(WBSCL_HORZ_FILTER_INIT_CBCR, WBSCL, inst),\
+	SRI2_DWB(WBSCL_VERT_FILTER_SCALE_RATIO, WBSCL, inst),\
+	SRI2_DWB(WBSCL_VERT_FILTER_INIT_Y_RGB, WBSCL, inst),\
+	SRI2_DWB(WBSCL_VERT_FILTER_INIT_CBCR, WBSCL, inst),\
+	SRI2_DWB(WBSCL_ROUND_OFFSET, WBSCL, inst),\
+	SRI2_DWB(WBSCL_OVERFLOW_STATUS, WBSCL, inst),\
+	SRI2_DWB(WBSCL_COEF_RAM_CONFLICT_STATUS, WBSCL, inst),\
+	SRI2_DWB(WBSCL_TEST_CNTL, WBSCL, inst),\
+	SRI2_DWB(WBSCL_TEST_CRC_RED, WBSCL, inst),\
+	SRI2_DWB(WBSCL_TEST_CRC_GREEN, WBSCL, inst),\
+	SRI2_DWB(WBSCL_TEST_CRC_BLUE, WBSCL, inst),\
+	SRI2_DWB(WBSCL_BACKPRESSURE_CNT_EN, WBSCL, inst),\
+	SRI2_DWB(WB_MCIF_BACKPRESSURE_CNT, WBSCL, inst),\
+	SRI2_DWB(WBSCL_CLAMP_Y_RGB, WBSCL, inst),\
+	SRI2_DWB(WBSCL_CLAMP_CBCR, WBSCL, inst),\
+	SRI2_DWB(WBSCL_OUTSIDE_PIX_STRATEGY, WBSCL, inst),\
+	SRI2_DWB(WBSCL_OUTSIDE_PIX_STRATEGY_CBCR, WBSCL, inst),\
+	SRI2_DWB(WBSCL_DEBUG, WBSCL, inst),\
+	SRI2_DWB(WBSCL_TEST_DEBUG_INDEX, WBSCL, inst),\
+	SRI2_DWB(WBSCL_TEST_DEBUG_DATA, WBSCL, inst),\
+	SRI2_DWB(WB_DEBUG_CTRL, CNV, inst),\
+	SRI2_DWB(WB_DBG_MODE, CNV, inst),\
+	SRI2_DWB(WB_HW_DEBUG, CNV, inst),\
+	SRI2_DWB(CNV_TEST_DEBUG_INDEX, CNV, inst),\
+	SRI2_DWB(CNV_TEST_DEBUG_DATA, CNV, inst),\
+	SRI2_DWB(WB_SOFT_RESET, CNV, inst),\
+	SRI2_DWB(WB_WARM_UP_MODE_CTL1, CNV, inst),\
+	SRI2_DWB(WB_WARM_UP_MODE_CTL2, CNV, inst)
 
 #define DWBC_COMMON_MASK_SH_LIST_DCN2_0(mask_sh) \
-	SF(WB_ENABLE, WB_ENABLE, mask_sh),\
-	SF(WB_EC_CONFIG, DISPCLK_R_WB_GATE_DIS, mask_sh),\
-	SF(WB_EC_CONFIG, DISPCLK_G_WB_GATE_DIS, mask_sh),\
-	SF(WB_EC_CONFIG, DISPCLK_G_WBSCL_GATE_DIS, mask_sh),\
-	SF(WB_EC_CONFIG, WB_TEST_CLK_SEL, mask_sh),\
-	SF(WB_EC_CONFIG, WB_LB_LS_DIS, mask_sh),\
-	SF(WB_EC_CONFIG, WB_LB_SD_DIS, mask_sh),\
-	SF(WB_EC_CONFIG, WB_LUT_LS_DIS, mask_sh),\
-	SF(WB_EC_CONFIG, WBSCL_LB_MEM_PWR_MODE_SEL, mask_sh),\
-	SF(WB_EC_CONFIG, WBSCL_LB_MEM_PWR_DIS, mask_sh),\
-	SF(WB_EC_CONFIG, WBSCL_LB_MEM_PWR_FORCE, mask_sh),\
-	SF(WB_EC_CONFIG, WBSCL_LB_MEM_PWR_STATE, mask_sh),\
-	SF(WB_EC_CONFIG, WB_RAM_PW_SAVE_MODE, mask_sh),\
-	SF(WB_EC_CONFIG, WBSCL_LUT_MEM_PWR_STATE, mask_sh),\
-	SF(CNV_MODE, CNV_OUT_BPC, mask_sh),\
-	SF(CNV_MODE, CNV_FRAME_CAPTURE_RATE, mask_sh),\
-	SF(CNV_MODE, CNV_WINDOW_CROP_EN, mask_sh),\
-	SF(CNV_MODE, CNV_STEREO_TYPE, mask_sh),\
-	SF(CNV_MODE, CNV_INTERLACED_MODE, mask_sh),\
-	SF(CNV_MODE, CNV_EYE_SELECTION, mask_sh),\
-	SF(CNV_MODE, CNV_STEREO_POLARITY, mask_sh),\
-	SF(CNV_MODE, CNV_INTERLACED_FIELD_ORDER, mask_sh),\
-	SF(CNV_MODE, CNV_STEREO_SPLIT, mask_sh),\
-	SF(CNV_MODE, CNV_NEW_CONTENT, mask_sh),\
-	SF(CNV_MODE, CNV_FRAME_CAPTURE_EN_CURRENT, mask_sh),\
-	SF(CNV_MODE, CNV_FRAME_CAPTURE_EN, mask_sh),\
-	SF(CNV_WINDOW_START, CNV_WINDOW_START_X, mask_sh),\
-	SF(CNV_WINDOW_START, CNV_WINDOW_START_Y, mask_sh),\
-	SF(CNV_WINDOW_SIZE, CNV_WINDOW_WIDTH, mask_sh),\
-	SF(CNV_WINDOW_SIZE, CNV_WINDOW_HEIGHT, mask_sh),\
-	SF(CNV_UPDATE, CNV_UPDATE_PENDING, mask_sh),\
-	SF(CNV_UPDATE, CNV_UPDATE_TAKEN, mask_sh),\
-	SF(CNV_UPDATE, CNV_UPDATE_LOCK, mask_sh),\
-	SF(CNV_SOURCE_SIZE, CNV_SOURCE_WIDTH, mask_sh),\
-	SF(CNV_SOURCE_SIZE, CNV_SOURCE_HEIGHT, mask_sh),\
-	SF(CNV_TEST_CNTL, CNV_TEST_CRC_EN, mask_sh),\
-	SF(CNV_TEST_CNTL, CNV_TEST_CRC_CONT_EN, mask_sh),\
-	SF(CNV_TEST_CRC_RED, CNV_TEST_CRC_RED_MASK, mask_sh),\
-	SF(CNV_TEST_CRC_RED, CNV_TEST_CRC_SIG_RED, mask_sh),\
-	SF(CNV_TEST_CRC_GREEN, CNV_TEST_CRC_GREEN_MASK, mask_sh),\
-	SF(CNV_TEST_CRC_GREEN, CNV_TEST_CRC_SIG_GREEN, mask_sh),\
-	SF(CNV_TEST_CRC_BLUE, CNV_TEST_CRC_BLUE_MASK, mask_sh),\
-	SF(CNV_TEST_CRC_BLUE, CNV_TEST_CRC_SIG_BLUE, mask_sh),\
-	SF(WB_DEBUG_CTRL, WB_DEBUG_EN, mask_sh),\
-	SF(WB_DEBUG_CTRL, WB_DEBUG_SEL, mask_sh),\
-	SF(WB_DBG_MODE, WB_DBG_MODE_EN, mask_sh),\
-	SF(WB_DBG_MODE, WB_DBG_DIN_FMT, mask_sh),\
-	SF(WB_DBG_MODE, WB_DBG_36MODE, mask_sh),\
-	SF(WB_DBG_MODE, WB_DBG_CMAP, mask_sh),\
-	SF(WB_DBG_MODE, WB_DBG_PXLRATE_ERROR, mask_sh),\
-	SF(WB_DBG_MODE, WB_DBG_SOURCE_WIDTH, mask_sh),\
-	SF(WB_HW_DEBUG, WB_HW_DEBUG, mask_sh),\
-	SF(WB_SOFT_RESET, WB_SOFT_RESET, mask_sh),\
-	SF(CNV_TEST_DEBUG_INDEX, CNV_TEST_DEBUG_INDEX, mask_sh),\
-	SF(CNV_TEST_DEBUG_INDEX, CNV_TEST_DEBUG_WRITE_EN, mask_sh),\
-	SF(CNV_TEST_DEBUG_DATA, CNV_TEST_DEBUG_DATA, mask_sh),\
-	SF(WBSCL_COEF_RAM_SELECT, WBSCL_COEF_RAM_TAP_PAIR_IDX, mask_sh),\
-	SF(WBSCL_COEF_RAM_SELECT, WBSCL_COEF_RAM_PHASE, mask_sh),\
-	SF(WBSCL_COEF_RAM_SELECT, WBSCL_COEF_RAM_FILTER_TYPE, mask_sh),\
-	SF(WBSCL_COEF_RAM_TAP_DATA, WBSCL_COEF_RAM_EVEN_TAP_COEF, mask_sh),\
-	SF(WBSCL_COEF_RAM_TAP_DATA, WBSCL_COEF_RAM_EVEN_TAP_COEF_EN, mask_sh),\
-	SF(WBSCL_COEF_RAM_TAP_DATA, WBSCL_COEF_RAM_ODD_TAP_COEF, mask_sh),\
-	SF(WBSCL_COEF_RAM_TAP_DATA, WBSCL_COEF_RAM_ODD_TAP_COEF_EN, mask_sh),\
-	SF(WBSCL_MODE, WBSCL_MODE, mask_sh),\
-	SF(WBSCL_MODE, WBSCL_OUT_BIT_DEPTH, mask_sh),\
-	SF(WBSCL_TAP_CONTROL, WBSCL_V_NUM_OF_TAPS_Y_RGB, mask_sh),\
-	SF(WBSCL_TAP_CONTROL, WBSCL_V_NUM_OF_TAPS_CBCR, mask_sh),\
-	SF(WBSCL_TAP_CONTROL, WBSCL_H_NUM_OF_TAPS_Y_RGB, mask_sh),\
-	SF(WBSCL_TAP_CONTROL, WBSCL_H_NUM_OF_TAPS_CBCR, mask_sh),\
-	SF(WBSCL_DEST_SIZE, WBSCL_DEST_HEIGHT, mask_sh),\
-	SF(WBSCL_DEST_SIZE, WBSCL_DEST_WIDTH, mask_sh),\
-	SF(WBSCL_HORZ_FILTER_SCALE_RATIO, WBSCL_H_SCALE_RATIO, mask_sh),\
-	SF(WBSCL_HORZ_FILTER_INIT_Y_RGB, WBSCL_H_INIT_FRAC_Y_RGB, mask_sh),\
-	SF(WBSCL_HORZ_FILTER_INIT_Y_RGB, WBSCL_H_INIT_INT_Y_RGB, mask_sh),\
-	SF(WBSCL_HORZ_FILTER_INIT_CBCR, WBSCL_H_INIT_FRAC_CBCR, mask_sh),\
-	SF(WBSCL_HORZ_FILTER_INIT_CBCR, WBSCL_H_INIT_INT_CBCR, mask_sh),\
-	SF(WBSCL_VERT_FILTER_SCALE_RATIO, WBSCL_V_SCALE_RATIO, mask_sh),\
-	SF(WBSCL_VERT_FILTER_INIT_Y_RGB, WBSCL_V_INIT_FRAC_Y_RGB, mask_sh),\
-	SF(WBSCL_VERT_FILTER_INIT_Y_RGB, WBSCL_V_INIT_INT_Y_RGB, mask_sh),\
-	SF(WBSCL_VERT_FILTER_INIT_CBCR, WBSCL_V_INIT_FRAC_CBCR, mask_sh),\
-	SF(WBSCL_VERT_FILTER_INIT_CBCR, WBSCL_V_INIT_INT_CBCR, mask_sh),\
-	SF(WBSCL_ROUND_OFFSET, WBSCL_ROUND_OFFSET_Y_RGB, mask_sh),\
-	SF(WBSCL_ROUND_OFFSET, WBSCL_ROUND_OFFSET_CBCR, mask_sh),\
-	SF(WBSCL_OVERFLOW_STATUS, WBSCL_DATA_OVERFLOW_FLAG, mask_sh),\
-	SF(WBSCL_OVERFLOW_STATUS, WBSCL_DATA_OVERFLOW_ACK, mask_sh),\
-	SF(WBSCL_OVERFLOW_STATUS, WBSCL_DATA_OVERFLOW_MASK, mask_sh),\
-	SF(WBSCL_OVERFLOW_STATUS, WBSCL_DATA_OVERFLOW_INT_STATUS, mask_sh),\
-	SF(WBSCL_OVERFLOW_STATUS, WBSCL_DATA_OVERFLOW_INT_TYPE, mask_sh),\
-	SF(WBSCL_COEF_RAM_CONFLICT_STATUS, WBSCL_HOST_CONFLICT_FLAG, mask_sh),\
-	SF(WBSCL_COEF_RAM_CONFLICT_STATUS, WBSCL_HOST_CONFLICT_ACK, mask_sh),\
-	SF(WBSCL_COEF_RAM_CONFLICT_STATUS, WBSCL_HOST_CONFLICT_MASK, mask_sh),\
-	SF(WBSCL_COEF_RAM_CONFLICT_STATUS, WBSCL_HOST_CONFLICT_INT_STATUS, mask_sh),\
-	SF(WBSCL_COEF_RAM_CONFLICT_STATUS, WBSCL_HOST_CONFLICT_INT_TYPE, mask_sh),\
-	SF(WBSCL_TEST_CNTL, WBSCL_TEST_CRC_EN, mask_sh),\
-	SF(WBSCL_TEST_CNTL, WBSCL_TEST_CRC_CONT_EN, mask_sh),\
-	SF(WBSCL_TEST_CRC_RED, WBSCL_TEST_CRC_RED_MASK, mask_sh),\
-	SF(WBSCL_TEST_CRC_RED, WBSCL_TEST_CRC_SIG_RED, mask_sh),\
-	SF(WBSCL_TEST_CRC_GREEN, WBSCL_TEST_CRC_GREEN_MASK, mask_sh),\
-	SF(WBSCL_TEST_CRC_GREEN, WBSCL_TEST_CRC_SIG_GREEN, mask_sh),\
-	SF(WBSCL_TEST_CRC_BLUE, WBSCL_TEST_CRC_BLUE_MASK, mask_sh),\
-	SF(WBSCL_TEST_CRC_BLUE, WBSCL_TEST_CRC_SIG_BLUE, mask_sh),\
-	SF(WBSCL_BACKPRESSURE_CNT_EN, WBSCL_BACKPRESSURE_CNT_EN, mask_sh),\
-	SF(WB_MCIF_BACKPRESSURE_CNT, WB_MCIF_Y_MAX_BACKPRESSURE, mask_sh),\
-	SF(WB_MCIF_BACKPRESSURE_CNT, WB_MCIF_C_MAX_BACKPRESSURE, mask_sh),\
-	SF(WBSCL_CLAMP_Y_RGB, WBSCL_CLAMP_UPPER_Y_RGB, mask_sh),\
-	SF(WBSCL_CLAMP_Y_RGB, WBSCL_CLAMP_LOWER_Y_RGB, mask_sh),\
-	SF(WBSCL_CLAMP_CBCR, WBSCL_CLAMP_UPPER_CBCR, mask_sh),\
-	SF(WBSCL_CLAMP_CBCR, WBSCL_CLAMP_LOWER_CBCR, mask_sh),\
-	SF(WBSCL_OUTSIDE_PIX_STRATEGY, WBSCL_OUTSIDE_PIX_STRATEGY, mask_sh),\
-	SF(WBSCL_OUTSIDE_PIX_STRATEGY, WBSCL_BLACK_COLOR_G_Y, mask_sh),\
-	SF(WBSCL_OUTSIDE_PIX_STRATEGY_CBCR, WBSCL_BLACK_COLOR_B_CB, mask_sh),\
-	SF(WBSCL_OUTSIDE_PIX_STRATEGY_CBCR, WBSCL_BLACK_COLOR_R_CR, mask_sh),\
-	SF(WBSCL_DEBUG, WBSCL_DEBUG, mask_sh),\
-	SF(WBSCL_TEST_DEBUG_INDEX, WBSCL_TEST_DEBUG_INDEX, mask_sh),\
-	SF(WBSCL_TEST_DEBUG_INDEX, WBSCL_TEST_DEBUG_WRITE_EN, mask_sh),\
-	SF(WBSCL_TEST_DEBUG_DATA, WBSCL_TEST_DEBUG_DATA, mask_sh),\
-	SF(WB_WARM_UP_MODE_CTL1, WIDTH_WARMUP, mask_sh),\
-	SF(WB_WARM_UP_MODE_CTL1, HEIGHT_WARMUP, mask_sh),\
-	SF(WB_WARM_UP_MODE_CTL1, GMC_WARM_UP_ENABLE, mask_sh),\
-	SF(WB_WARM_UP_MODE_CTL2, DATA_VALUE_WARMUP, mask_sh),\
-	SF(WB_WARM_UP_MODE_CTL2, MODE_WARMUP, mask_sh),\
-	SF(WB_WARM_UP_MODE_CTL2, DATA_DEPTH_WARMUP, mask_sh)
+	SF_DWB(WB_ENABLE, WB_ENABLE, mask_sh),\
+	SF_DWB(WB_EC_CONFIG, DISPCLK_R_WB_GATE_DIS, mask_sh),\
+	SF_DWB(WB_EC_CONFIG, DISPCLK_G_WB_GATE_DIS, mask_sh),\
+	SF_DWB(WB_EC_CONFIG, DISPCLK_G_WBSCL_GATE_DIS, mask_sh),\
+	SF_DWB(WB_EC_CONFIG, WB_TEST_CLK_SEL, mask_sh),\
+	SF_DWB(WB_EC_CONFIG, WB_LB_LS_DIS, mask_sh),\
+	SF_DWB(WB_EC_CONFIG, WB_LB_SD_DIS, mask_sh),\
+	SF_DWB(WB_EC_CONFIG, WB_LUT_LS_DIS, mask_sh),\
+	SF_DWB(WB_EC_CONFIG, WBSCL_LB_MEM_PWR_MODE_SEL, mask_sh),\
+	SF_DWB(WB_EC_CONFIG, WBSCL_LB_MEM_PWR_DIS, mask_sh),\
+	SF_DWB(WB_EC_CONFIG, WBSCL_LB_MEM_PWR_FORCE, mask_sh),\
+	SF_DWB(WB_EC_CONFIG, WBSCL_LB_MEM_PWR_STATE, mask_sh),\
+	SF_DWB(WB_EC_CONFIG, WB_RAM_PW_SAVE_MODE, mask_sh),\
+	SF_DWB(WB_EC_CONFIG, WBSCL_LUT_MEM_PWR_STATE, mask_sh),\
+	SF_DWB(CNV_MODE, CNV_OUT_BPC, mask_sh),\
+	SF_DWB(CNV_MODE, CNV_FRAME_CAPTURE_RATE, mask_sh),\
+	SF_DWB(CNV_MODE, CNV_WINDOW_CROP_EN, mask_sh),\
+	SF_DWB(CNV_MODE, CNV_STEREO_TYPE, mask_sh),\
+	SF_DWB(CNV_MODE, CNV_INTERLACED_MODE, mask_sh),\
+	SF_DWB(CNV_MODE, CNV_EYE_SELECTION, mask_sh),\
+	SF_DWB(CNV_MODE, CNV_STEREO_POLARITY, mask_sh),\
+	SF_DWB(CNV_MODE, CNV_INTERLACED_FIELD_ORDER, mask_sh),\
+	SF_DWB(CNV_MODE, CNV_STEREO_SPLIT, mask_sh),\
+	SF_DWB(CNV_MODE, CNV_NEW_CONTENT, mask_sh),\
+	SF_DWB(CNV_MODE, CNV_FRAME_CAPTURE_EN_CURRENT, mask_sh),\
+	SF_DWB(CNV_MODE, CNV_FRAME_CAPTURE_EN, mask_sh),\
+	SF_DWB(CNV_WINDOW_START, CNV_WINDOW_START_X, mask_sh),\
+	SF_DWB(CNV_WINDOW_START, CNV_WINDOW_START_Y, mask_sh),\
+	SF_DWB(CNV_WINDOW_SIZE, CNV_WINDOW_WIDTH, mask_sh),\
+	SF_DWB(CNV_WINDOW_SIZE, CNV_WINDOW_HEIGHT, mask_sh),\
+	SF_DWB(CNV_UPDATE, CNV_UPDATE_PENDING, mask_sh),\
+	SF_DWB(CNV_UPDATE, CNV_UPDATE_TAKEN, mask_sh),\
+	SF_DWB(CNV_UPDATE, CNV_UPDATE_LOCK, mask_sh),\
+	SF_DWB(CNV_SOURCE_SIZE, CNV_SOURCE_WIDTH, mask_sh),\
+	SF_DWB(CNV_SOURCE_SIZE, CNV_SOURCE_HEIGHT, mask_sh),\
+	SF_DWB(CNV_TEST_CNTL, CNV_TEST_CRC_EN, mask_sh),\
+	SF_DWB(CNV_TEST_CNTL, CNV_TEST_CRC_CONT_EN, mask_sh),\
+	SF_DWB(CNV_TEST_CRC_RED, CNV_TEST_CRC_RED_MASK, mask_sh),\
+	SF_DWB(CNV_TEST_CRC_RED, CNV_TEST_CRC_SIG_RED, mask_sh),\
+	SF_DWB(CNV_TEST_CRC_GREEN, CNV_TEST_CRC_GREEN_MASK, mask_sh),\
+	SF_DWB(CNV_TEST_CRC_GREEN, CNV_TEST_CRC_SIG_GREEN, mask_sh),\
+	SF_DWB(CNV_TEST_CRC_BLUE, CNV_TEST_CRC_BLUE_MASK, mask_sh),\
+	SF_DWB(CNV_TEST_CRC_BLUE, CNV_TEST_CRC_SIG_BLUE, mask_sh),\
+	SF_DWB(WB_DEBUG_CTRL, WB_DEBUG_EN, mask_sh),\
+	SF_DWB(WB_DEBUG_CTRL, WB_DEBUG_SEL, mask_sh),\
+	SF_DWB(WB_DBG_MODE, WB_DBG_MODE_EN, mask_sh),\
+	SF_DWB(WB_DBG_MODE, WB_DBG_DIN_FMT, mask_sh),\
+	SF_DWB(WB_DBG_MODE, WB_DBG_36MODE, mask_sh),\
+	SF_DWB(WB_DBG_MODE, WB_DBG_CMAP, mask_sh),\
+	SF_DWB(WB_DBG_MODE, WB_DBG_PXLRATE_ERROR, mask_sh),\
+	SF_DWB(WB_DBG_MODE, WB_DBG_SOURCE_WIDTH, mask_sh),\
+	SF_DWB(WB_HW_DEBUG, WB_HW_DEBUG, mask_sh),\
+	SF_DWB(WB_SOFT_RESET, WB_SOFT_RESET, mask_sh),\
+	SF_DWB(CNV_TEST_DEBUG_INDEX, CNV_TEST_DEBUG_INDEX, mask_sh),\
+	SF_DWB(CNV_TEST_DEBUG_INDEX, CNV_TEST_DEBUG_WRITE_EN, mask_sh),\
+	SF_DWB(CNV_TEST_DEBUG_DATA, CNV_TEST_DEBUG_DATA, mask_sh),\
+	SF_DWB(WBSCL_COEF_RAM_SELECT, WBSCL_COEF_RAM_TAP_PAIR_IDX, mask_sh),\
+	SF_DWB(WBSCL_COEF_RAM_SELECT, WBSCL_COEF_RAM_PHASE, mask_sh),\
+	SF_DWB(WBSCL_COEF_RAM_SELECT, WBSCL_COEF_RAM_FILTER_TYPE, mask_sh),\
+	SF_DWB(WBSCL_COEF_RAM_TAP_DATA, WBSCL_COEF_RAM_EVEN_TAP_COEF, mask_sh),\
+	SF_DWB(WBSCL_COEF_RAM_TAP_DATA, WBSCL_COEF_RAM_EVEN_TAP_COEF_EN, mask_sh),\
+	SF_DWB(WBSCL_COEF_RAM_TAP_DATA, WBSCL_COEF_RAM_ODD_TAP_COEF, mask_sh),\
+	SF_DWB(WBSCL_COEF_RAM_TAP_DATA, WBSCL_COEF_RAM_ODD_TAP_COEF_EN, mask_sh),\
+	SF_DWB(WBSCL_MODE, WBSCL_MODE, mask_sh),\
+	SF_DWB(WBSCL_MODE, WBSCL_OUT_BIT_DEPTH, mask_sh),\
+	SF_DWB(WBSCL_TAP_CONTROL, WBSCL_V_NUM_OF_TAPS_Y_RGB, mask_sh),\
+	SF_DWB(WBSCL_TAP_CONTROL, WBSCL_V_NUM_OF_TAPS_CBCR, mask_sh),\
+	SF_DWB(WBSCL_TAP_CONTROL, WBSCL_H_NUM_OF_TAPS_Y_RGB, mask_sh),\
+	SF_DWB(WBSCL_TAP_CONTROL, WBSCL_H_NUM_OF_TAPS_CBCR, mask_sh),\
+	SF_DWB(WBSCL_DEST_SIZE, WBSCL_DEST_HEIGHT, mask_sh),\
+	SF_DWB(WBSCL_DEST_SIZE, WBSCL_DEST_WIDTH, mask_sh),\
+	SF_DWB(WBSCL_HORZ_FILTER_SCALE_RATIO, WBSCL_H_SCALE_RATIO, mask_sh),\
+	SF_DWB(WBSCL_HORZ_FILTER_INIT_Y_RGB, WBSCL_H_INIT_FRAC_Y_RGB, mask_sh),\
+	SF_DWB(WBSCL_HORZ_FILTER_INIT_Y_RGB, WBSCL_H_INIT_INT_Y_RGB, mask_sh),\
+	SF_DWB(WBSCL_HORZ_FILTER_INIT_CBCR, WBSCL_H_INIT_FRAC_CBCR, mask_sh),\
+	SF_DWB(WBSCL_HORZ_FILTER_INIT_CBCR, WBSCL_H_INIT_INT_CBCR, mask_sh),\
+	SF_DWB(WBSCL_VERT_FILTER_SCALE_RATIO, WBSCL_V_SCALE_RATIO, mask_sh),\
+	SF_DWB(WBSCL_VERT_FILTER_INIT_Y_RGB, WBSCL_V_INIT_FRAC_Y_RGB, mask_sh),\
+	SF_DWB(WBSCL_VERT_FILTER_INIT_Y_RGB, WBSCL_V_INIT_INT_Y_RGB, mask_sh),\
+	SF_DWB(WBSCL_VERT_FILTER_INIT_CBCR, WBSCL_V_INIT_FRAC_CBCR, mask_sh),\
+	SF_DWB(WBSCL_VERT_FILTER_INIT_CBCR, WBSCL_V_INIT_INT_CBCR, mask_sh),\
+	SF_DWB(WBSCL_ROUND_OFFSET, WBSCL_ROUND_OFFSET_Y_RGB, mask_sh),\
+	SF_DWB(WBSCL_ROUND_OFFSET, WBSCL_ROUND_OFFSET_CBCR, mask_sh),\
+	SF_DWB(WBSCL_OVERFLOW_STATUS, WBSCL_DATA_OVERFLOW_FLAG, mask_sh),\
+	SF_DWB(WBSCL_OVERFLOW_STATUS, WBSCL_DATA_OVERFLOW_ACK, mask_sh),\
+	SF_DWB(WBSCL_OVERFLOW_STATUS, WBSCL_DATA_OVERFLOW_MASK, mask_sh),\
+	SF_DWB(WBSCL_OVERFLOW_STATUS, WBSCL_DATA_OVERFLOW_INT_STATUS, mask_sh),\
+	SF_DWB(WBSCL_OVERFLOW_STATUS, WBSCL_DATA_OVERFLOW_INT_TYPE, mask_sh),\
+	SF_DWB(WBSCL_COEF_RAM_CONFLICT_STATUS, WBSCL_HOST_CONFLICT_FLAG, mask_sh),\
+	SF_DWB(WBSCL_COEF_RAM_CONFLICT_STATUS, WBSCL_HOST_CONFLICT_ACK, mask_sh),\
+	SF_DWB(WBSCL_COEF_RAM_CONFLICT_STATUS, WBSCL_HOST_CONFLICT_MASK, mask_sh),\
+	SF_DWB(WBSCL_COEF_RAM_CONFLICT_STATUS, WBSCL_HOST_CONFLICT_INT_STATUS, mask_sh),\
+	SF_DWB(WBSCL_COEF_RAM_CONFLICT_STATUS, WBSCL_HOST_CONFLICT_INT_TYPE, mask_sh),\
+	SF_DWB(WBSCL_TEST_CNTL, WBSCL_TEST_CRC_EN, mask_sh),\
+	SF_DWB(WBSCL_TEST_CNTL, WBSCL_TEST_CRC_CONT_EN, mask_sh),\
+	SF_DWB(WBSCL_TEST_CRC_RED, WBSCL_TEST_CRC_RED_MASK, mask_sh),\
+	SF_DWB(WBSCL_TEST_CRC_RED, WBSCL_TEST_CRC_SIG_RED, mask_sh),\
+	SF_DWB(WBSCL_TEST_CRC_GREEN, WBSCL_TEST_CRC_GREEN_MASK, mask_sh),\
+	SF_DWB(WBSCL_TEST_CRC_GREEN, WBSCL_TEST_CRC_SIG_GREEN, mask_sh),\
+	SF_DWB(WBSCL_TEST_CRC_BLUE, WBSCL_TEST_CRC_BLUE_MASK, mask_sh),\
+	SF_DWB(WBSCL_TEST_CRC_BLUE, WBSCL_TEST_CRC_SIG_BLUE, mask_sh),\
+	SF_DWB(WBSCL_BACKPRESSURE_CNT_EN, WBSCL_BACKPRESSURE_CNT_EN, mask_sh),\
+	SF_DWB(WB_MCIF_BACKPRESSURE_CNT, WB_MCIF_Y_MAX_BACKPRESSURE, mask_sh),\
+	SF_DWB(WB_MCIF_BACKPRESSURE_CNT, WB_MCIF_C_MAX_BACKPRESSURE, mask_sh),\
+	SF_DWB(WBSCL_CLAMP_Y_RGB, WBSCL_CLAMP_UPPER_Y_RGB, mask_sh),\
+	SF_DWB(WBSCL_CLAMP_Y_RGB, WBSCL_CLAMP_LOWER_Y_RGB, mask_sh),\
+	SF_DWB(WBSCL_CLAMP_CBCR, WBSCL_CLAMP_UPPER_CBCR, mask_sh),\
+	SF_DWB(WBSCL_CLAMP_CBCR, WBSCL_CLAMP_LOWER_CBCR, mask_sh),\
+	SF_DWB(WBSCL_OUTSIDE_PIX_STRATEGY, WBSCL_OUTSIDE_PIX_STRATEGY, mask_sh),\
+	SF_DWB(WBSCL_OUTSIDE_PIX_STRATEGY, WBSCL_BLACK_COLOR_G_Y, mask_sh),\
+	SF_DWB(WBSCL_OUTSIDE_PIX_STRATEGY_CBCR, WBSCL_BLACK_COLOR_B_CB, mask_sh),\
+	SF_DWB(WBSCL_OUTSIDE_PIX_STRATEGY_CBCR, WBSCL_BLACK_COLOR_R_CR, mask_sh),\
+	SF_DWB(WBSCL_DEBUG, WBSCL_DEBUG, mask_sh),\
+	SF_DWB(WBSCL_TEST_DEBUG_INDEX, WBSCL_TEST_DEBUG_INDEX, mask_sh),\
+	SF_DWB(WBSCL_TEST_DEBUG_INDEX, WBSCL_TEST_DEBUG_WRITE_EN, mask_sh),\
+	SF_DWB(WBSCL_TEST_DEBUG_DATA, WBSCL_TEST_DEBUG_DATA, mask_sh),\
+	SF_DWB(WB_WARM_UP_MODE_CTL1, WIDTH_WARMUP, mask_sh),\
+	SF_DWB(WB_WARM_UP_MODE_CTL1, HEIGHT_WARMUP, mask_sh),\
+	SF_DWB(WB_WARM_UP_MODE_CTL1, GMC_WARM_UP_ENABLE, mask_sh),\
+	SF_DWB(WB_WARM_UP_MODE_CTL2, DATA_VALUE_WARMUP, mask_sh),\
+	SF_DWB(WB_WARM_UP_MODE_CTL2, MODE_WARMUP, mask_sh),\
+	SF_DWB(WB_WARM_UP_MODE_CTL2, DATA_DEPTH_WARMUP, mask_sh)
 
 #define DWBC_REG_FIELD_LIST_DCN2_0(type) \
 	type WB_ENABLE;\
diff --git a/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_hubbub.c b/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_hubbub.c
index aacb1fb5c73e..24bd93219936 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_hubbub.c
+++ b/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_hubbub.c
@@ -500,7 +500,7 @@ void hubbub2_wm_read_state(struct hubbub *hubbub,
 		s->sr_enter = REG_READ(DCHUBBUB_ARB_ALLOW_SR_ENTER_WATERMARK_A);
 		s->sr_exit = REG_READ(DCHUBBUB_ARB_ALLOW_SR_EXIT_WATERMARK_A);
 	}
-	s->dram_clk_chanage = REG_READ(DCHUBBUB_ARB_ALLOW_DRAM_CLK_CHANGE_WATERMARK_A);
+	s->dram_clk_change = REG_READ(DCHUBBUB_ARB_ALLOW_DRAM_CLK_CHANGE_WATERMARK_A);
 
 	s = &wm->sets[1];
 	s->wm_set = 1;
@@ -511,7 +511,7 @@ void hubbub2_wm_read_state(struct hubbub *hubbub,
 		s->sr_enter = REG_READ(DCHUBBUB_ARB_ALLOW_SR_ENTER_WATERMARK_B);
 		s->sr_exit = REG_READ(DCHUBBUB_ARB_ALLOW_SR_EXIT_WATERMARK_B);
 	}
-	s->dram_clk_chanage = REG_READ(DCHUBBUB_ARB_ALLOW_DRAM_CLK_CHANGE_WATERMARK_B);
+	s->dram_clk_change = REG_READ(DCHUBBUB_ARB_ALLOW_DRAM_CLK_CHANGE_WATERMARK_B);
 
 	s = &wm->sets[2];
 	s->wm_set = 2;
@@ -522,7 +522,7 @@ void hubbub2_wm_read_state(struct hubbub *hubbub,
 		s->sr_enter = REG_READ(DCHUBBUB_ARB_ALLOW_SR_ENTER_WATERMARK_C);
 		s->sr_exit = REG_READ(DCHUBBUB_ARB_ALLOW_SR_EXIT_WATERMARK_C);
 	}
-	s->dram_clk_chanage = REG_READ(DCHUBBUB_ARB_ALLOW_DRAM_CLK_CHANGE_WATERMARK_C);
+	s->dram_clk_change = REG_READ(DCHUBBUB_ARB_ALLOW_DRAM_CLK_CHANGE_WATERMARK_C);
 
 	s = &wm->sets[3];
 	s->wm_set = 3;
@@ -533,7 +533,7 @@ void hubbub2_wm_read_state(struct hubbub *hubbub,
 		s->sr_enter = REG_READ(DCHUBBUB_ARB_ALLOW_SR_ENTER_WATERMARK_D);
 		s->sr_exit = REG_READ(DCHUBBUB_ARB_ALLOW_SR_EXIT_WATERMARK_D);
 	}
-	s->dram_clk_chanage = REG_READ(DCHUBBUB_ARB_ALLOW_DRAM_CLK_CHANGE_WATERMARK_D);
+	s->dram_clk_change = REG_READ(DCHUBBUB_ARB_ALLOW_DRAM_CLK_CHANGE_WATERMARK_D);
 }
 
 void hubbub2_get_dchub_ref_freq(struct hubbub *hubbub,
diff --git a/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_hubp.c b/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_hubp.c
index 4996d2810edb..4566bc7abf17 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_hubp.c
+++ b/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_hubp.c
@@ -623,6 +623,10 @@ void hubp2_cursor_set_attributes(
 	hubp->att.size.bits.width    = attr->width;
 	hubp->att.size.bits.height   = attr->height;
 	hubp->att.cur_ctl.bits.mode  = attr->color_format;
+
+	hubp->cur_rect.w = attr->width;
+	hubp->cur_rect.h = attr->height;
+
 	hubp->att.cur_ctl.bits.pitch = hw_pitch;
 	hubp->att.cur_ctl.bits.line_per_chunk = lpc;
 	hubp->att.cur_ctl.bits.cur_2x_magnify = attr->attribute_flags.bits.ENABLE_MAGNIFICATION;
@@ -969,10 +973,12 @@ void hubp2_cursor_set_position(
 		const struct dc_cursor_mi_param *param)
 {
 	struct dcn20_hubp *hubp2 = TO_DCN20_HUBP(hubp);
-	int src_x_offset = pos->x - pos->x_hotspot - param->viewport.x;
-	int src_y_offset = pos->y - pos->y_hotspot - param->viewport.y;
+	int x_pos = pos->x - param->viewport.x;
+	int y_pos = pos->y - param->viewport.y;
 	int x_hotspot = pos->x_hotspot;
 	int y_hotspot = pos->y_hotspot;
+	int src_x_offset = x_pos - pos->x_hotspot;
+	int src_y_offset = y_pos - pos->y_hotspot;
 	int cursor_height = (int)hubp->curs_attr.height;
 	int cursor_width = (int)hubp->curs_attr.width;
 	uint32_t dst_x_offset;
@@ -990,18 +996,26 @@ void hubp2_cursor_set_position(
 	if (hubp->curs_attr.address.quad_part == 0)
 		return;
 
-	// Rotated cursor width/height and hotspots tweaks for offset calculation
+	// Transform cursor width / height and hotspots for offset calculations
 	if (param->rotation == ROTATION_ANGLE_90 || param->rotation == ROTATION_ANGLE_270) {
 		swap(cursor_height, cursor_width);
+		swap(x_hotspot, y_hotspot);
+
 		if (param->rotation == ROTATION_ANGLE_90) {
-			src_x_offset = pos->x - pos->y_hotspot - param->viewport.x;
-			src_y_offset = pos->y - pos->x_hotspot - param->viewport.y;
+			// hotspot = (-y, x)
+			src_x_offset = x_pos - (cursor_width - x_hotspot);
+			src_y_offset = y_pos - y_hotspot;
+		} else if (param->rotation == ROTATION_ANGLE_270) {
+			// hotspot = (y, -x)
+			src_x_offset = x_pos - x_hotspot;
+			src_y_offset = y_pos - (cursor_height - y_hotspot);
 		}
 	} else if (param->rotation == ROTATION_ANGLE_180) {
+		// hotspot = (-x, -y)
 		if (!param->mirror)
-			src_x_offset = pos->x - param->viewport.x;
+			src_x_offset = x_pos - (cursor_width - x_hotspot);
 
-		src_y_offset = pos->y - param->viewport.y;
+		src_y_offset = y_pos - (cursor_height - y_hotspot);
 	}
 
 	dst_x_offset = (src_x_offset >= 0) ? src_x_offset : 0;
@@ -1038,8 +1052,8 @@ void hubp2_cursor_set_position(
 			CURSOR_Y_POSITION, pos->y);
 
 	REG_SET_2(CURSOR_HOT_SPOT, 0,
-			CURSOR_HOT_SPOT_X, x_hotspot,
-			CURSOR_HOT_SPOT_Y, y_hotspot);
+			CURSOR_HOT_SPOT_X, pos->x_hotspot,
+			CURSOR_HOT_SPOT_Y, pos->y_hotspot);
 
 	REG_SET(CURSOR_DST_OFFSET, 0,
 			CURSOR_DST_X_OFFSET, dst_x_offset);
@@ -1048,8 +1062,8 @@ void hubp2_cursor_set_position(
 	hubp->pos.cur_ctl.bits.cur_enable = cur_en;
 	hubp->pos.position.bits.x_pos = pos->x;
 	hubp->pos.position.bits.y_pos = pos->y;
-	hubp->pos.hot_spot.bits.x_hot = x_hotspot;
-	hubp->pos.hot_spot.bits.y_hot = y_hotspot;
+	hubp->pos.hot_spot.bits.x_hot = pos->x_hotspot;
+	hubp->pos.hot_spot.bits.y_hot = pos->y_hotspot;
 	hubp->pos.dst_offset.bits.dst_x_offset = dst_x_offset;
 	/* Cursor Rectangle Cache
 	 * Cursor bitmaps have different hotspot values
diff --git a/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_hwseq.c b/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_hwseq.c
index d732b6f031a1..6291a241158a 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_hwseq.c
+++ b/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_hwseq.c
@@ -1079,6 +1079,29 @@ void dcn20_blank_pixel_data(
 				0);
 	}
 
+	if (!blank && dc->debug.enable_single_display_2to1_odm_policy) {
+		/* when exiting dynamic ODM need to reinit DPG state for unused pipes */
+		struct pipe_ctx *old_odm_pipe = dc->current_state->res_ctx.pipe_ctx[pipe_ctx->pipe_idx].next_odm_pipe;
+
+		odm_pipe = pipe_ctx->next_odm_pipe;
+
+		while (old_odm_pipe) {
+			if (!odm_pipe || old_odm_pipe->pipe_idx != odm_pipe->pipe_idx)
+				dc->hwss.set_disp_pattern_generator(dc,
+						old_odm_pipe,
+						CONTROLLER_DP_TEST_PATTERN_VIDEOMODE,
+						CONTROLLER_DP_COLOR_SPACE_UDEFINED,
+						COLOR_DEPTH_888,
+						NULL,
+						0,
+						0,
+						0);
+			old_odm_pipe = old_odm_pipe->next_odm_pipe;
+			if (odm_pipe)
+				odm_pipe = odm_pipe->next_odm_pipe;
+		}
+	}
+
 	if (!blank)
 		if (stream_res->abm) {
 			dc->hwss.set_pipe(pipe_ctx);
@@ -1270,16 +1293,6 @@ void dcn20_pipe_control_lock(
 					lock,
 					&hw_locks,
 					&inst_flags);
-	} else if (pipe->stream && pipe->stream->mall_stream_config.type == SUBVP_MAIN) {
-		union dmub_inbox0_cmd_lock_hw hw_lock_cmd = { 0 };
-		hw_lock_cmd.bits.command_code = DMUB_INBOX0_CMD__HW_LOCK;
-		hw_lock_cmd.bits.hw_lock_client = HW_LOCK_CLIENT_DRIVER;
-		hw_lock_cmd.bits.lock_pipe = 1;
-		hw_lock_cmd.bits.otg_inst = pipe->stream_res.tg->inst;
-		hw_lock_cmd.bits.lock = lock;
-		if (!lock)
-			hw_lock_cmd.bits.should_release = 1;
-		dmub_hw_lock_mgr_inbox0_cmd(dc->ctx->dmub_srv, hw_lock_cmd);
 	} else if (pipe->plane_state != NULL && pipe->plane_state->triplebuffer_flips) {
 		if (lock)
 			pipe->stream_res.tg->funcs->triplebuffer_lock(pipe->stream_res.tg);
@@ -1297,6 +1310,19 @@ static void dcn20_detect_pipe_changes(struct pipe_ctx *old_pipe, struct pipe_ctx
 {
 	new_pipe->update_flags.raw = 0;
 
+	/* If non-phantom pipe is being transitioned to a phantom pipe,
+	 * set disable and return immediately. This is because the pipe
+	 * that was previously in use must be fully disabled before we
+	 * can "enable" it as a phantom pipe (since the OTG will certainly
+	 * be different). The post_unlock sequence will set the correct
+	 * update flags to enable the phantom pipe.
+	 */
+	if (old_pipe->plane_state && !old_pipe->plane_state->is_phantom &&
+			new_pipe->plane_state && new_pipe->plane_state->is_phantom) {
+		new_pipe->update_flags.bits.disable = 1;
+		return;
+	}
+
 	/* Exit on unchanged, unused pipe */
 	if (!old_pipe->plane_state && !new_pipe->plane_state)
 		return;
@@ -1626,6 +1652,31 @@ static void dcn20_update_dchubp_dpp(
 		hubp->funcs->phantom_hubp_post_enable(hubp);
 }
 
+static int calculate_vready_offset_for_group(struct pipe_ctx *pipe)
+{
+	struct pipe_ctx *other_pipe;
+	int vready_offset = pipe->pipe_dlg_param.vready_offset;
+
+	/* Always use the largest vready_offset of all connected pipes */
+	for (other_pipe = pipe->bottom_pipe; other_pipe != NULL; other_pipe = other_pipe->bottom_pipe) {
+		if (other_pipe->pipe_dlg_param.vready_offset > vready_offset)
+			vready_offset = other_pipe->pipe_dlg_param.vready_offset;
+	}
+	for (other_pipe = pipe->top_pipe; other_pipe != NULL; other_pipe = other_pipe->top_pipe) {
+		if (other_pipe->pipe_dlg_param.vready_offset > vready_offset)
+			vready_offset = other_pipe->pipe_dlg_param.vready_offset;
+	}
+	for (other_pipe = pipe->next_odm_pipe; other_pipe != NULL; other_pipe = other_pipe->next_odm_pipe) {
+		if (other_pipe->pipe_dlg_param.vready_offset > vready_offset)
+			vready_offset = other_pipe->pipe_dlg_param.vready_offset;
+	}
+	for (other_pipe = pipe->prev_odm_pipe; other_pipe != NULL; other_pipe = other_pipe->prev_odm_pipe) {
+		if (other_pipe->pipe_dlg_param.vready_offset > vready_offset)
+			vready_offset = other_pipe->pipe_dlg_param.vready_offset;
+	}
+
+	return vready_offset;
+}
 
 static void dcn20_program_pipe(
 		struct dc *dc,
@@ -1644,16 +1695,14 @@ static void dcn20_program_pipe(
 			&& !pipe_ctx->prev_odm_pipe) {
 		pipe_ctx->stream_res.tg->funcs->program_global_sync(
 				pipe_ctx->stream_res.tg,
-				pipe_ctx->pipe_dlg_param.vready_offset,
+				calculate_vready_offset_for_group(pipe_ctx),
 				pipe_ctx->pipe_dlg_param.vstartup_start,
 				pipe_ctx->pipe_dlg_param.vupdate_offset,
 				pipe_ctx->pipe_dlg_param.vupdate_width);
 
 		if (pipe_ctx->stream->mall_stream_config.type != SUBVP_PHANTOM) {
-			pipe_ctx->stream_res.tg->funcs->wait_for_state(
-				pipe_ctx->stream_res.tg, CRTC_STATE_VBLANK);
-			pipe_ctx->stream_res.tg->funcs->wait_for_state(
-				pipe_ctx->stream_res.tg, CRTC_STATE_VACTIVE);
+			pipe_ctx->stream_res.tg->funcs->wait_for_state(pipe_ctx->stream_res.tg, CRTC_STATE_VBLANK);
+			pipe_ctx->stream_res.tg->funcs->wait_for_state(pipe_ctx->stream_res.tg, CRTC_STATE_VACTIVE);
 		}
 
 		pipe_ctx->stream_res.tg->funcs->set_vtg_params(
@@ -1692,7 +1741,10 @@ static void dcn20_program_pipe(
 	 * only do gamma programming for powering on, internal memcmp to avoid
 	 * updating on slave planes
 	 */
-	if (pipe_ctx->update_flags.bits.enable || pipe_ctx->stream->update_flags.bits.out_tf)
+	if (pipe_ctx->update_flags.bits.enable ||
+			pipe_ctx->update_flags.bits.plane_changed ||
+			pipe_ctx->stream->update_flags.bits.out_tf ||
+			pipe_ctx->plane_state->update_flags.bits.output_tf_change)
 		hws->funcs.set_output_transfer_func(dc, pipe_ctx, pipe_ctx->stream);
 
 	/* If the pipe has been enabled or has a different opp, we
@@ -1823,6 +1875,17 @@ void dcn20_program_front_end_for_ctx(
 			context->stream_status[0].plane_count > 1) {
 			pipe->plane_res.hubp->funcs->hubp_wait_pipe_read_start(pipe->plane_res.hubp);
 		}
+
+		/* when dynamic ODM is active, pipes must be reconfigured when all planes are
+		 * disabled, as some transitions will leave software and hardware state
+		 * mismatched.
+		 */
+		if (dc->debug.enable_single_display_2to1_odm_policy &&
+			pipe->stream &&
+			pipe->update_flags.bits.disable &&
+			!pipe->prev_odm_pipe &&
+			hws->funcs.update_odm)
+			hws->funcs.update_odm(dc, context, pipe);
 	}
 }
 
@@ -1856,27 +1919,7 @@ void dcn20_post_unlock_program_front_end(
 
 			for (j = 0; j < TIMEOUT_FOR_PIPE_ENABLE_MS*1000
 					&& hubp->funcs->hubp_is_flip_pending(hubp); j++)
-				mdelay(1);
-		}
-	}
-
-	for (i = 0; i < dc->res_pool->pipe_count; i++) {
-		struct pipe_ctx *pipe = &context->res_ctx.pipe_ctx[i];
-		struct pipe_ctx *old_pipe = &dc->current_state->res_ctx.pipe_ctx[i];
-
-		/* If an active, non-phantom pipe is being transitioned into a phantom
-		 * pipe, wait for the double buffer update to complete first before we do
-		 * phantom pipe programming (HUBP_VTG_SEL updates right away so that can
-		 * cause issues).
-		 */
-		if (pipe->stream && pipe->stream->mall_stream_config.type == SUBVP_PHANTOM &&
-				old_pipe->stream && old_pipe->stream->mall_stream_config.type != SUBVP_PHANTOM) {
-			old_pipe->stream_res.tg->funcs->wait_for_state(
-					old_pipe->stream_res.tg,
-					CRTC_STATE_VBLANK);
-			old_pipe->stream_res.tg->funcs->wait_for_state(
-					old_pipe->stream_res.tg,
-					CRTC_STATE_VACTIVE);
+				udelay(1);
 		}
 	}
 
@@ -1891,6 +1934,11 @@ void dcn20_post_unlock_program_front_end(
 			 */
 			while (pipe) {
 				if (pipe->stream && pipe->stream->mall_stream_config.type == SUBVP_PHANTOM) {
+					/* When turning on the phantom pipe we want to run through the
+					 * entire enable sequence, so apply all the "enable" flags.
+					 */
+					if (dc->hwss.apply_update_flags_for_phantom)
+						dc->hwss.apply_update_flags_for_phantom(pipe);
 					if (dc->hwss.update_phantom_vp_position)
 						dc->hwss.update_phantom_vp_position(dc, context, pipe);
 					dcn20_program_pipe(dc, pipe, context);
@@ -1963,10 +2011,13 @@ void dcn20_prepare_bandwidth(
 
 	/* decrease compbuf size */
 	if (hubbub->funcs->program_compbuf_size) {
-		if (context->bw_ctx.dml.ip.min_comp_buffer_size_kbytes)
+		if (context->bw_ctx.dml.ip.min_comp_buffer_size_kbytes) {
 			compbuf_size_kb = context->bw_ctx.dml.ip.min_comp_buffer_size_kbytes;
-		else
+			dc->wm_optimized_required |= (compbuf_size_kb != dc->current_state->bw_ctx.dml.ip.min_comp_buffer_size_kbytes);
+		} else {
 			compbuf_size_kb = context->bw_ctx.bw.dcn.compbuf_size_kb;
+			dc->wm_optimized_required |= (compbuf_size_kb != dc->current_state->bw_ctx.bw.dcn.compbuf_size_kb);
+		}
 
 		hubbub->funcs->program_compbuf_size(hubbub, compbuf_size_kb, false);
 	}
@@ -2047,7 +2098,7 @@ bool dcn20_update_bandwidth(
 
 			pipe_ctx->stream_res.tg->funcs->program_global_sync(
 					pipe_ctx->stream_res.tg,
-					pipe_ctx->pipe_dlg_param.vready_offset,
+					calculate_vready_offset_for_group(pipe_ctx),
 					pipe_ctx->pipe_dlg_param.vstartup_start,
 					pipe_ctx->pipe_dlg_param.vupdate_offset,
 					pipe_ctx->pipe_dlg_param.vupdate_width);
@@ -2611,14 +2662,6 @@ void dcn20_enable_stream(struct pipe_ctx *pipe_ctx)
 
 	if (dc->hwseq->funcs.set_pixels_per_cycle)
 		dc->hwseq->funcs.set_pixels_per_cycle(pipe_ctx);
-
-	/* enable audio only within mode set */
-	if (pipe_ctx->stream_res.audio != NULL) {
-		if (is_dp_128b_132b_signal(pipe_ctx))
-			pipe_ctx->stream_res.hpo_dp_stream_enc->funcs->dp_audio_enable(pipe_ctx->stream_res.hpo_dp_stream_enc);
-		else if (dc_is_dp_signal(pipe_ctx->stream->signal))
-			pipe_ctx->stream_res.stream_enc->funcs->dp_audio_enable(pipe_ctx->stream_res.stream_enc);
-	}
 }
 
 void dcn20_program_dmdata_engine(struct pipe_ctx *pipe_ctx)
diff --git a/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_mmhubbub.h b/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_mmhubbub.h
index 7bcee5894d2e..5ab32aa51e13 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_mmhubbub.h
+++ b/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_mmhubbub.h
@@ -29,13 +29,6 @@
 #define TO_DCN20_MMHUBBUB(mcif_wb_base) \
 	container_of(mcif_wb_base, struct dcn20_mmhubbub, base)
 
-/* DCN */
-#define BASE_INNER(seg) \
-	DCE_BASE__INST0_SEG ## seg
-
-#define BASE(seg) \
-	BASE_INNER(seg)
-
 #define MCIF_WB_COMMON_REG_LIST_DCN2_0(inst) \
 	SRI(MCIF_WB_BUFMGR_SW_CONTROL, MCIF_WB, inst),\
 	SRI(MCIF_WB_BUFMGR_CUR_LINE_R, MCIF_WB, inst),\
diff --git a/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_resource.c b/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_resource.c
index 8224b9bf01d1..8a0dd0d7134b 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_resource.c
+++ b/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_resource.c
@@ -124,8 +124,6 @@ enum dcn20_clk_src_array_id {
  * macros to expend register list macro defined in HW object header file */
 
 /* DCN */
-/* TODO awful hack. fixup dcn20_dwb.h */
-#undef BASE_INNER
 #define BASE_INNER(seg) DCN_BASE__INST0_SEG ## seg
 
 #define BASE(seg) BASE_INNER(seg)
@@ -138,6 +136,15 @@ enum dcn20_clk_src_array_id {
 	.reg_name = BASE(mm ## block ## id ## _ ## reg_name ## _BASE_IDX) + \
 					mm ## block ## id ## _ ## reg_name
 
+#define SRI2_DWB(reg_name, block, id)\
+	.reg_name = BASE(mm ## reg_name ## _BASE_IDX) + \
+					mm ## reg_name
+#define SF_DWB(reg_name, field_name, post_fix)\
+	.field_name = reg_name ## __ ## field_name ## post_fix
+
+#define SF_DWB2(reg_name, block, id, field_name, post_fix)	\
+	.field_name = reg_name ## __ ## field_name ## post_fix
+
 #define SRIR(var_name, reg_name, block, id)\
 	.var_name = BASE(mm ## block ## id ## _ ## reg_name ## _BASE_IDX) + \
 					mm ## block ## id ## _ ## reg_name
@@ -1454,6 +1461,22 @@ enum dc_status dcn20_remove_stream_from_ctx(struct dc *dc, struct dc_state *new_
 	return result;
 }
 
+/**
+ * dcn20_split_stream_for_odm - Check if stream can be splited for ODM
+ *
+ * @dc: DC object with resource pool info required for pipe split
+ * @res_ctx: Persistent state of resources
+ * @prev_odm_pipe: Reference to the previous ODM pipe
+ * @next_odm_pipe: Reference to the next ODM pipe
+ *
+ * This function takes a logically active pipe and a logically free pipe and
+ * halves all the scaling parameters that need to be halved while populating
+ * the free pipe with the required resources and configuring the next/previous
+ * ODM pipe pointers.
+ *
+ * Return:
+ * Return true if split stream for ODM is possible, otherwise, return false.
+ */
 bool dcn20_split_stream_for_odm(
 		const struct dc *dc,
 		struct resource_context *res_ctx,
diff --git a/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_vmid.h b/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_vmid.h
index f1ef46e8da5b..e7a1b7fa2cce 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_vmid.h
+++ b/drivers/gpu/drm/amd/display/dc/dcn20/dcn20_vmid.h
@@ -28,12 +28,6 @@
 
 #include "vmid.h"
 
-#define BASE_INNER(seg) \
-	DCE_BASE__INST0_SEG ## seg
-
-#define BASE(seg) \
-	BASE_INNER(seg)
-
 #define DCN20_VMID_REG_LIST(id)\
 	SRI(CNTL, DCN_VM_CONTEXT, id),\
 	SRI(PAGE_TABLE_BASE_ADDR_HI32, DCN_VM_CONTEXT, id),\
diff --git a/drivers/gpu/drm/amd/display/dc/dcn21/dcn21_hubbub.c b/drivers/gpu/drm/amd/display/dc/dcn21/dcn21_hubbub.c
index c5e200d09038..aeb0e0d9b70a 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn21/dcn21_hubbub.c
+++ b/drivers/gpu/drm/amd/display/dc/dcn21/dcn21_hubbub.c
@@ -635,7 +635,7 @@ void hubbub21_wm_read_state(struct hubbub *hubbub,
 			DCHUBBUB_ARB_ALLOW_SR_EXIT_WATERMARK_A, &s->sr_exit);
 
 	REG_GET(DCHUBBUB_ARB_ALLOW_DRAM_CLK_CHANGE_WATERMARK_A,
-			 DCHUBBUB_ARB_ALLOW_DRAM_CLK_CHANGE_WATERMARK_A, &s->dram_clk_chanage);
+			 DCHUBBUB_ARB_ALLOW_DRAM_CLK_CHANGE_WATERMARK_A, &s->dram_clk_change);
 
 	s = &wm->sets[1];
 	s->wm_set = 1;
@@ -649,7 +649,7 @@ void hubbub21_wm_read_state(struct hubbub *hubbub,
 			DCHUBBUB_ARB_ALLOW_SR_EXIT_WATERMARK_B, &s->sr_exit);
 
 	REG_GET(DCHUBBUB_ARB_ALLOW_DRAM_CLK_CHANGE_WATERMARK_B,
-			DCHUBBUB_ARB_ALLOW_DRAM_CLK_CHANGE_WATERMARK_B, &s->dram_clk_chanage);
+			DCHUBBUB_ARB_ALLOW_DRAM_CLK_CHANGE_WATERMARK_B, &s->dram_clk_change);
 
 	s = &wm->sets[2];
 	s->wm_set = 2;
@@ -663,7 +663,7 @@ void hubbub21_wm_read_state(struct hubbub *hubbub,
 			DCHUBBUB_ARB_ALLOW_SR_EXIT_WATERMARK_C, &s->sr_exit);
 
 	REG_GET(DCHUBBUB_ARB_ALLOW_DRAM_CLK_CHANGE_WATERMARK_C,
-			DCHUBBUB_ARB_ALLOW_DRAM_CLK_CHANGE_WATERMARK_C, &s->dram_clk_chanage);
+			DCHUBBUB_ARB_ALLOW_DRAM_CLK_CHANGE_WATERMARK_C, &s->dram_clk_change);
 
 	s = &wm->sets[3];
 	s->wm_set = 3;
@@ -677,7 +677,7 @@ void hubbub21_wm_read_state(struct hubbub *hubbub,
 			DCHUBBUB_ARB_ALLOW_SR_EXIT_WATERMARK_D, &s->sr_exit);
 
 	REG_GET(DCHUBBUB_ARB_ALLOW_DRAM_CLK_CHANGE_WATERMARK_D,
-			DCHUBBUB_ARB_ALLOW_DRAM_CLK_CHANGE_WATERMARK_D, &s->dram_clk_chanage);
+			DCHUBBUB_ARB_ALLOW_DRAM_CLK_CHANGE_WATERMARK_D, &s->dram_clk_change);
 }
 
 static void hubbub21_apply_DEDCN21_147_wa(struct hubbub *hubbub)
diff --git a/drivers/gpu/drm/amd/display/dc/dcn21/dcn21_resource.c b/drivers/gpu/drm/amd/display/dc/dcn21/dcn21_resource.c
index 887081472c0d..fbcf0afeae0d 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn21/dcn21_resource.c
+++ b/drivers/gpu/drm/amd/display/dc/dcn21/dcn21_resource.c
@@ -94,8 +94,6 @@
  * macros to expend register list macro defined in HW object header file */
 
 /* DCN */
-/* TODO awful hack. fixup dcn20_dwb.h */
-#undef BASE_INNER
 #define BASE_INNER(seg) DMU_BASE__INST0_SEG ## seg
 
 #define BASE(seg) BASE_INNER(seg)
@@ -671,12 +669,15 @@ static const struct dc_debug_options debug_defaults_diags = {
 		.disable_pplib_wm_range = true,
 		.disable_stutter = true,
 		.disable_48mhz_pwrdwn = true,
-		.disable_psr = true,
 		.enable_tri_buf = true,
 		.use_max_lb = true
 };
 
 static const struct dc_panel_config panel_config_defaults = {
+		.psr = {
+			.disable_psr = false,
+			.disallow_psrsu = false,
+		},
 		.ilr = {
 			.optimize_edp_link_rate = true,
 		},
diff --git a/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_dwb.h b/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_dwb.h
index 1010930cf071..fc00ec0a0881 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_dwb.h
+++ b/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_dwb.h
@@ -27,21 +27,6 @@
 #define TO_DCN30_DWBC(dwbc_base) \
 	container_of(dwbc_base, struct dcn30_dwbc, base)
 
-/* DCN */
-#define BASE_INNER(seg) \
-	DCE_BASE__INST0_SEG ## seg
-
-#define BASE(seg) \
-	BASE_INNER(seg)
-
-#define SF_DWB(reg_name, block, id, field_name, post_fix)\
-	.field_name = block ## id ## _ ## reg_name ## __ ## field_name ## post_fix
-
- /* set field name */
-#define SF_DWB2(reg_name, block, id, field_name, post_fix)\
-	.field_name = reg_name ## __ ## field_name ## post_fix
-
-
 #define DWBC_COMMON_REG_LIST_DCN30(inst) \
 	SR(DWB_ENABLE_CLK_CTRL),\
 	SR(DWB_MEM_PWR_CTRL),\
diff --git a/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_mmhubbub.h b/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_mmhubbub.h
index 7446e54bf5aa..376620a8f02f 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_mmhubbub.h
+++ b/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_mmhubbub.h
@@ -31,13 +31,6 @@
 #define TO_DCN30_MMHUBBUB(mcif_wb_base) \
 	container_of(mcif_wb_base, struct dcn30_mmhubbub, base)
 
-/* DCN */
-#define BASE_INNER(seg) \
-	DCE_BASE__INST0_SEG ## seg
-
-#define BASE(seg) \
-	BASE_INNER(seg)
-
 #define MCIF_WB_COMMON_REG_LIST_DCN3_0(inst) \
 	SRI(MCIF_WB_BUFMGR_SW_CONTROL, MCIF_WB, inst),\
 	SRI(MCIF_WB_BUFMGR_STATUS, MCIF_WB, inst),\
diff --git a/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_optc.c b/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_optc.c
index 892d3c4d01a1..867d60151aeb 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_optc.c
+++ b/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_optc.c
@@ -30,6 +30,7 @@
 #include "dc_dmub_srv.h"
 
 #include "dml/dcn30/dcn30_fpu.h"
+#include "dc_trace.h"
 
 #define REG(reg)\
 	optc1->tg_regs->reg
@@ -58,6 +59,8 @@ void optc3_triplebuffer_lock(struct timing_generator *optc)
 		REG_WAIT(OTG_MASTER_UPDATE_LOCK,
 				UPDATE_LOCK_STATUS, 1,
 				1, 10);
+
+	TRACE_OPTC_LOCK_UNLOCK_STATE(optc1, optc->inst, true);
 }
 
 void optc3_lock_doublebuffer_enable(struct timing_generator *optc)
@@ -93,6 +96,8 @@ void optc3_lock_doublebuffer_enable(struct timing_generator *optc)
 		MASTER_UPDATE_LOCK_VUPDATE_KEEPOUT_START_OFFSET, 0,
 		MASTER_UPDATE_LOCK_VUPDATE_KEEPOUT_END_OFFSET, 100,
 		OTG_MASTER_UPDATE_LOCK_VUPDATE_KEEPOUT_EN, 1);
+
+	TRACE_OPTC_LOCK_UNLOCK_STATE(optc1, optc->inst, true);
 }
 
 void optc3_lock_doublebuffer_disable(struct timing_generator *optc)
@@ -108,6 +113,8 @@ void optc3_lock_doublebuffer_disable(struct timing_generator *optc)
 
 	REG_UPDATE(OTG_GLOBAL_CONTROL2, GLOBAL_UPDATE_LOCK_EN, 0);
 	REG_UPDATE(OTG_GLOBAL_CONTROL0, MASTER_UPDATE_LOCK_DB_EN, 0);
+
+	TRACE_OPTC_LOCK_UNLOCK_STATE(optc1, optc->inst, true);
 }
 
 void optc3_lock(struct timing_generator *optc)
@@ -122,6 +129,8 @@ void optc3_lock(struct timing_generator *optc)
 	REG_WAIT(OTG_MASTER_UPDATE_LOCK,
 			UPDATE_LOCK_STATUS, 1,
 			1, 10);
+
+	TRACE_OPTC_LOCK_UNLOCK_STATE(optc1, optc->inst, true);
 }
 
 void optc3_set_out_mux(struct timing_generator *optc, enum otg_out_mux_dest dest)
diff --git a/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_resource.c b/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_resource.c
index 020f512e9690..c18c52a60100 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_resource.c
+++ b/drivers/gpu/drm/amd/display/dc/dcn30/dcn30_resource.c
@@ -108,8 +108,6 @@ enum dcn30_clk_src_array_id {
  */
 
 /* DCN */
-/* TODO awful hack. fixup dcn20_dwb.h */
-#undef BASE_INNER
 #define BASE_INNER(seg) DCN_BASE__INST0_SEG ## seg
 
 #define BASE(seg) BASE_INNER(seg)
@@ -142,6 +140,9 @@ enum dcn30_clk_src_array_id {
 	.reg_name[id] = BASE(mm ## block ## id ## _ ## temp_name ## _BASE_IDX) + \
 					mm ## block ## id ## _ ## temp_name
 
+#define SF_DWB2(reg_name, block, id, field_name, post_fix)	\
+	.field_name = reg_name ## __ ## field_name ## post_fix
+
 #define DCCG_SRII(reg_name, block, id)\
 	.block ## _ ## reg_name[id] = BASE(mm ## block ## id ## _ ## reg_name ## _BASE_IDX) + \
 					mm ## block ## id ## _ ## reg_name
@@ -723,7 +724,6 @@ static const struct dc_debug_options debug_defaults_drv = {
 	.underflow_assert_delay_us = 0xFFFFFFFF,
 	.dwb_fi_phase = -1, // -1 = disable,
 	.dmub_command_table = true,
-	.disable_psr = false,
 	.use_max_lb = true,
 	.exit_idle_opt_for_cursor_updates = true
 };
@@ -742,11 +742,17 @@ static const struct dc_debug_options debug_defaults_diags = {
 	.scl_reset_length10 = true,
 	.dwb_fi_phase = -1, // -1 = disable
 	.dmub_command_table = true,
-	.disable_psr = true,
 	.enable_tri_buf = true,
 	.use_max_lb = true
 };
 
+static const struct dc_panel_config panel_config_defaults = {
+	.psr = {
+		.disable_psr = false,
+		.disallow_psrsu = false,
+	},
+};
+
 static void dcn30_dpp_destroy(struct dpp **dpp)
 {
 	kfree(TO_DCN20_DPP(*dpp));
@@ -1323,6 +1329,7 @@ static struct clock_source *dcn30_clock_source_create(
 		return &clk_src->base;
 	}
 
+	kfree(clk_src);
 	BREAK_TO_DEBUGGER();
 	return NULL;
 }
@@ -2212,6 +2219,11 @@ void dcn30_update_bw_bounding_box(struct dc *dc, struct clk_bw_params *bw_params
 	}
 }
 
+static void dcn30_get_panel_config_defaults(struct dc_panel_config *panel_config)
+{
+	*panel_config = panel_config_defaults;
+}
+
 static const struct resource_funcs dcn30_res_pool_funcs = {
 	.destroy = dcn30_destroy_resource_pool,
 	.link_enc_create = dcn30_link_encoder_create,
@@ -2231,6 +2243,7 @@ static const struct resource_funcs dcn30_res_pool_funcs = {
 	.release_post_bldn_3dlut = dcn30_release_post_bldn_3dlut,
 	.update_bw_bounding_box = dcn30_update_bw_bounding_box,
 	.patch_unknown_plane_state = dcn20_patch_unknown_plane_state,
+	.get_panel_config_defaults = dcn30_get_panel_config_defaults,
 };
 
 #define CTX ctx
diff --git a/drivers/gpu/drm/amd/display/dc/dcn301/dcn301_resource.c b/drivers/gpu/drm/amd/display/dc/dcn301/dcn301_resource.c
index f04595b750ab..8cf10351f271 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn301/dcn301_resource.c
+++ b/drivers/gpu/drm/amd/display/dc/dcn301/dcn301_resource.c
@@ -107,8 +107,6 @@ enum dcn301_clk_src_array_id {
  */
 
 /* DCN */
-/* TODO awful hack. fixup dcn20_dwb.h */
-#undef BASE_INNER
 #define BASE_INNER(seg) DCN_BASE__INST0_SEG ## seg
 
 #define BASE(seg) BASE_INNER(seg)
@@ -146,6 +144,9 @@ enum dcn301_clk_src_array_id {
 	.reg_name[id] = BASE(mm ## block ## id ## _ ## temp_name ## _BASE_IDX) + \
 					mm ## block ## id ## _ ## temp_name
 
+#define SF_DWB2(reg_name, block, id, field_name, post_fix)	\
+	.field_name = reg_name ## __ ## field_name ## post_fix
+
 #define DCCG_SRII(reg_name, block, id)\
 	.block ## _ ## reg_name[id] = BASE(mm ## block ## id ## _ ## reg_name ## _BASE_IDX) + \
 					mm ## block ## id ## _ ## reg_name
@@ -1288,6 +1289,7 @@ static struct clock_source *dcn301_clock_source_create(
 		return &clk_src->base;
 	}
 
+	kfree(clk_src);
 	BREAK_TO_DEBUGGER();
 	return NULL;
 }
@@ -1491,6 +1493,8 @@ static bool dcn301_resource_construct(
 	dc->caps.color.mpc.ogam_rom_caps.hlg = 0;
 	dc->caps.color.mpc.ocsc = 1;
 
+	dc->caps.dp_hdmi21_pcon_support = true;
+
 	/* read VBIOS LTTPR caps */
 	if (ctx->dc_bios->funcs->get_lttpr_caps) {
 		enum bp_result bp_query_result;
diff --git a/drivers/gpu/drm/amd/display/dc/dcn302/dcn302_resource.c b/drivers/gpu/drm/amd/display/dc/dcn302/dcn302_resource.c
index b925b6ddde5a..47cffd0e6830 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn302/dcn302_resource.c
+++ b/drivers/gpu/drm/amd/display/dc/dcn302/dcn302_resource.c
@@ -112,10 +112,16 @@ static const struct dc_debug_options debug_defaults_diags = {
 		.dwb_fi_phase = -1, // -1 = disable
 		.dmub_command_table = true,
 		.enable_tri_buf = true,
-		.disable_psr = true,
 		.use_max_lb = true
 };
 
+static const struct dc_panel_config panel_config_defaults = {
+		.psr = {
+			.disable_psr = false,
+			.disallow_psrsu = false,
+		},
+};
+
 enum dcn302_clk_src_array_id {
 	DCN302_CLK_SRC_PLL0,
 	DCN302_CLK_SRC_PLL1,
@@ -177,7 +183,6 @@ static const struct dc_plane_cap plane_cap = {
 		mm ## reg_name
 
 /* DCN */
-#undef BASE_INNER
 #define BASE_INNER(seg) DCN_BASE__INST0_SEG ## seg
 
 #define BASE(seg) BASE_INNER(seg)
@@ -210,6 +215,9 @@ static const struct dc_plane_cap plane_cap = {
 		.reg_name[id] = BASE(mm ## block ## id ## _ ## temp_name ## _BASE_IDX) + \
 		mm ## block ## id ## _ ## temp_name
 
+#define SF_DWB2(reg_name, block, id, field_name, post_fix)	\
+	.field_name = reg_name ## __ ## field_name ## post_fix
+
 #define SRII_MPC_RMU(reg_name, block, id)\
 		.RMU##_##reg_name[id] = BASE(mm ## block ## id ## _ ## reg_name ## _BASE_IDX) + \
 		mm ## block ## id ## _ ## reg_name
@@ -458,6 +466,7 @@ static struct clock_source *dcn302_clock_source_create(struct dc_context *ctx, s
 		return &clk_src->base;
 	}
 
+	kfree(clk_src);
 	BREAK_TO_DEBUGGER();
 	return NULL;
 }
@@ -1132,6 +1141,11 @@ void dcn302_update_bw_bounding_box(struct dc *dc, struct clk_bw_params *bw_param
 	DC_FP_END();
 }
 
+static void dcn302_get_panel_config_defaults(struct dc_panel_config *panel_config)
+{
+	*panel_config = panel_config_defaults;
+}
+
 static struct resource_funcs dcn302_res_pool_funcs = {
 		.destroy = dcn302_destroy_resource_pool,
 		.link_enc_create = dcn302_link_encoder_create,
@@ -1151,6 +1165,7 @@ static struct resource_funcs dcn302_res_pool_funcs = {
 		.release_post_bldn_3dlut = dcn30_release_post_bldn_3dlut,
 		.update_bw_bounding_box = dcn302_update_bw_bounding_box,
 		.patch_unknown_plane_state = dcn20_patch_unknown_plane_state,
+		.get_panel_config_defaults = dcn302_get_panel_config_defaults,
 };
 
 static struct dc_cap_funcs cap_funcs = {
@@ -1266,6 +1281,8 @@ static bool dcn302_resource_construct(
 	dc->caps.color.mpc.ogam_rom_caps.hlg = 0;
 	dc->caps.color.mpc.ocsc = 1;
 
+	dc->caps.dp_hdmi21_pcon_support = true;
+
 	/* read VBIOS LTTPR caps */
 	if (ctx->dc_bios->funcs->get_lttpr_caps) {
 		enum bp_result bp_query_result;
diff --git a/drivers/gpu/drm/amd/display/dc/dcn303/dcn303_resource.c b/drivers/gpu/drm/amd/display/dc/dcn303/dcn303_resource.c
index 527d5c902878..c14d35894b2e 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn303/dcn303_resource.c
+++ b/drivers/gpu/drm/amd/display/dc/dcn303/dcn303_resource.c
@@ -96,7 +96,13 @@ static const struct dc_debug_options debug_defaults_diags = {
 		.dwb_fi_phase = -1, // -1 = disable
 		.dmub_command_table = true,
 		.enable_tri_buf = true,
-		.disable_psr = true,
+};
+
+static const struct dc_panel_config panel_config_defaults = {
+		.psr = {
+			.disable_psr = false,
+			.disallow_psrsu = false,
+		},
 };
 
 enum dcn303_clk_src_array_id {
@@ -156,7 +162,6 @@ static const struct dc_plane_cap plane_cap = {
 		mm ## reg_name
 
 /* DCN */
-#undef BASE_INNER
 #define BASE_INNER(seg) DCN_BASE__INST0_SEG ## seg
 
 #define BASE(seg) BASE_INNER(seg)
@@ -189,6 +194,9 @@ static const struct dc_plane_cap plane_cap = {
 		.reg_name[id] = BASE(mm ## block ## id ## _ ## temp_name ## _BASE_IDX) + \
 		mm ## block ## id ## _ ## temp_name
 
+#define SF_DWB2(reg_name, block, id, field_name, post_fix)	\
+	.field_name = reg_name ## __ ## field_name ## post_fix
+
 #define SRII_MPC_RMU(reg_name, block, id)\
 		.RMU##_##reg_name[id] = BASE(mm ## block ## id ## _ ## reg_name ## _BASE_IDX) + \
 		mm ## block ## id ## _ ## reg_name
@@ -425,6 +433,7 @@ static struct clock_source *dcn303_clock_source_create(struct dc_context *ctx, s
 		return &clk_src->base;
 	}
 
+	kfree(clk_src);
 	BREAK_TO_DEBUGGER();
 	return NULL;
 }
@@ -1055,6 +1064,10 @@ static void dcn303_destroy_resource_pool(struct resource_pool **pool)
 	*pool = NULL;
 }
 
+static void dcn303_get_panel_config_defaults(struct dc_panel_config *panel_config)
+{
+	*panel_config = panel_config_defaults;
+}
 
 void dcn303_update_bw_bounding_box(struct dc *dc, struct clk_bw_params *bw_params)
 {
@@ -1082,6 +1095,7 @@ static struct resource_funcs dcn303_res_pool_funcs = {
 		.release_post_bldn_3dlut = dcn30_release_post_bldn_3dlut,
 		.update_bw_bounding_box = dcn303_update_bw_bounding_box,
 		.patch_unknown_plane_state = dcn20_patch_unknown_plane_state,
+		.get_panel_config_defaults = dcn303_get_panel_config_defaults,
 };
 
 static struct dc_cap_funcs cap_funcs = {
@@ -1198,6 +1212,8 @@ static bool dcn303_resource_construct(
 	dc->caps.color.mpc.ogam_rom_caps.hlg = 0;
 	dc->caps.color.mpc.ocsc = 1;
 
+	dc->caps.dp_hdmi21_pcon_support = true;
+
 	/* read VBIOS LTTPR caps */
 	if (ctx->dc_bios->funcs->get_lttpr_caps) {
 		enum bp_result bp_query_result;
diff --git a/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_apg.c b/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_apg.c
index de5e18c2a3ac..24e9ff65434d 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_apg.c
+++ b/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_apg.c
@@ -134,23 +134,10 @@ static void apg31_se_audio_setup(
 
 	/* Disable forced mem power off */
 	REG_UPDATE(APG_MEM_PWR, APG_MEM_PWR_FORCE, 0);
-
-	apg31_enable(apg);
-}
-
-static void apg31_audio_mute_control(
-	struct apg *apg,
-	bool mute)
-{
-	if (mute)
-		apg31_disable(apg);
-	else
-		apg31_enable(apg);
 }
 
 static struct apg_funcs dcn31_apg_funcs = {
 	.se_audio_setup			= apg31_se_audio_setup,
-	.audio_mute_control		= apg31_audio_mute_control,
 	.enable_apg			= apg31_enable,
 	.disable_apg			= apg31_disable,
 };
diff --git a/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_apg.h b/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_apg.h
index 24f568e120d8..1b81f6773c53 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_apg.h
+++ b/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_apg.h
@@ -84,10 +84,6 @@ struct apg_funcs {
 		unsigned int az_inst,
 		struct audio_info *audio_info);
 
-	void (*audio_mute_control)(
-		struct apg *apg,
-		bool mute);
-
 	void (*enable_apg)(
 		struct apg *apg);
 
diff --git a/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_hpo_dp_stream_encoder.c b/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_hpo_dp_stream_encoder.c
index 814f401db3b3..16639bd03adf 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_hpo_dp_stream_encoder.c
+++ b/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_hpo_dp_stream_encoder.c
@@ -600,14 +600,6 @@ static void dcn31_hpo_dp_stream_enc_map_stream_to_link(
 	}
 }
 
-static void dcn31_hpo_dp_stream_enc_mute_control(
-	struct hpo_dp_stream_encoder *enc,
-	bool mute)
-{
-	ASSERT(enc->apg);
-	enc->apg->funcs->audio_mute_control(enc->apg, mute);
-}
-
 static void dcn31_hpo_dp_stream_enc_audio_setup(
 	struct hpo_dp_stream_encoder *enc,
 	unsigned int az_inst,
@@ -726,7 +718,6 @@ static const struct hpo_dp_stream_encoder_funcs dcn30_str_enc_funcs = {
 	.stop_dp_info_packets = dcn31_hpo_dp_stream_enc_stop_dp_info_packets,
 	.dp_set_dsc_pps_info_packet = dcn31_hpo_dp_stream_enc_set_dsc_pps_info_packet,
 	.map_stream_to_link = dcn31_hpo_dp_stream_enc_map_stream_to_link,
-	.audio_mute_control = dcn31_hpo_dp_stream_enc_mute_control,
 	.dp_audio_setup = dcn31_hpo_dp_stream_enc_audio_setup,
 	.dp_audio_enable = dcn31_hpo_dp_stream_enc_audio_enable,
 	.dp_audio_disable = dcn31_hpo_dp_stream_enc_audio_disable,
diff --git a/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_hubp.c b/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_hubp.c
index 84e1486f3d51..39a57bcd7866 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_hubp.c
+++ b/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_hubp.c
@@ -87,6 +87,7 @@ static struct hubp_funcs dcn31_hubp_funcs = {
 	.hubp_init = hubp3_init,
 	.set_unbounded_requesting = hubp31_set_unbounded_requesting,
 	.hubp_soft_reset = hubp31_soft_reset,
+	.hubp_set_flip_int = hubp1_set_flip_int,
 	.hubp_in_blank = hubp1_in_blank,
 	.program_extended_blank = hubp31_program_extended_blank,
 };
diff --git a/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_hwseq.c b/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_hwseq.c
index bdf101547484..4226a051df41 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_hwseq.c
+++ b/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_hwseq.c
@@ -89,7 +89,8 @@ static void enable_memory_low_power(struct dc *dc)
 		REG_UPDATE(MMHUBBUB_MEM_PWR_CNTL, VGA_MEM_PWR_FORCE, 1);
 	}
 
-	if (dc->debug.enable_mem_low_power.bits.mpc)
+	if (dc->debug.enable_mem_low_power.bits.mpc &&
+		dc->res_pool->mpc->funcs->set_mpc_mem_lp_mode)
 		dc->res_pool->mpc->funcs->set_mpc_mem_lp_mode(dc->res_pool->mpc);
 
 
@@ -141,7 +142,8 @@ void dcn31_init_hw(struct dc *dc)
 
 	if (!dcb->funcs->is_accelerated_mode(dcb)) {
 		hws->funcs.bios_golden_init(dc);
-		hws->funcs.disable_vga(dc->hwseq);
+		if (hws->funcs.disable_vga)
+			hws->funcs.disable_vga(dc->hwseq);
 	}
 	// Initialize the dccg
 	if (res_pool->dccg->funcs->dccg_init)
@@ -621,3 +623,43 @@ void dcn31_setup_hpo_hw_control(const struct dce_hwseq *hws, bool enable)
 	if (hws->ctx->dc->debug.hpo_optimization)
 		REG_UPDATE(HPO_TOP_HW_CONTROL, HPO_IO_EN, !!enable);
 }
+void dcn31_set_drr(struct pipe_ctx **pipe_ctx,
+		int num_pipes, struct dc_crtc_timing_adjust adjust)
+{
+	int i = 0;
+	struct drr_params params = {0};
+	unsigned int event_triggers = 0x2;/*Bit[1]: OTG_TRIG_A*/
+	unsigned int num_frames = 2;
+	params.vertical_total_max = adjust.v_total_max;
+	params.vertical_total_min = adjust.v_total_min;
+	params.vertical_total_mid = adjust.v_total_mid;
+	params.vertical_total_mid_frame_num = adjust.v_total_mid_frame_num;
+	for (i = 0; i < num_pipes; i++) {
+		if ((pipe_ctx[i]->stream_res.tg != NULL) && pipe_ctx[i]->stream_res.tg->funcs) {
+			if (pipe_ctx[i]->stream_res.tg->funcs->set_drr)
+				pipe_ctx[i]->stream_res.tg->funcs->set_drr(
+					pipe_ctx[i]->stream_res.tg, &params);
+			if (adjust.v_total_max != 0 && adjust.v_total_min != 0)
+				if (pipe_ctx[i]->stream_res.tg->funcs->set_static_screen_control)
+					pipe_ctx[i]->stream_res.tg->funcs->set_static_screen_control(
+						pipe_ctx[i]->stream_res.tg,
+						event_triggers, num_frames);
+		}
+	}
+}
+void dcn31_set_static_screen_control(struct pipe_ctx **pipe_ctx,
+		int num_pipes, const struct dc_static_screen_params *params)
+{
+	unsigned int i;
+	unsigned int triggers = 0;
+	if (params->triggers.surface_update)
+		triggers |= 0x600;/*bit 9 and bit10 : 110 0000 0000*/
+	if (params->triggers.cursor_update)
+		triggers |= 0x10;/*bit4*/
+	if (params->triggers.force_trigger)
+		triggers |= 0x1;
+	for (i = 0; i < num_pipes; i++)
+		pipe_ctx[i]->stream_res.tg->funcs->
+			set_static_screen_control(pipe_ctx[i]->stream_res.tg,
+					triggers, params->num_frames);
+}
diff --git a/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_hwseq.h b/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_hwseq.h
index edfc01d6ad73..e7e03a8722e0 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_hwseq.h
+++ b/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_hwseq.h
@@ -56,4 +56,8 @@ bool dcn31_is_abm_supported(struct dc *dc,
 void dcn31_init_pipes(struct dc *dc, struct dc_state *context);
 void dcn31_setup_hpo_hw_control(const struct dce_hwseq *hws, bool enable);
 
+void dcn31_set_static_screen_control(struct pipe_ctx **pipe_ctx,
+		int num_pipes, const struct dc_static_screen_params *params);
+void dcn31_set_drr(struct pipe_ctx **pipe_ctx,
+		int num_pipes, struct dc_crtc_timing_adjust adjust);
 #endif /* __DC_HWSS_DCN31_H__ */
diff --git a/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_init.c b/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_init.c
index 3a32810bbe38..7c2da70ffe21 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_init.c
+++ b/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_init.c
@@ -64,9 +64,9 @@ static const struct hw_sequencer_funcs dcn31_funcs = {
 	.prepare_bandwidth = dcn20_prepare_bandwidth,
 	.optimize_bandwidth = dcn20_optimize_bandwidth,
 	.update_bandwidth = dcn20_update_bandwidth,
-	.set_drr = dcn10_set_drr,
+	.set_drr = dcn31_set_drr,
 	.get_position = dcn10_get_position,
-	.set_static_screen_control = dcn10_set_static_screen_control,
+	.set_static_screen_control = dcn31_set_static_screen_control,
 	.setup_stereo = dcn10_setup_stereo,
 	.set_avmute = dcn30_set_avmute,
 	.log_hw_state = dcn10_log_hw_state,
diff --git a/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_optc.c b/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_optc.c
index 63a677c8ee27..fe449f7aa771 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_optc.c
+++ b/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_optc.c
@@ -40,6 +40,7 @@
 #define FN(reg_name, field_name) \
 	optc1->tg_shift->field_name, optc1->tg_mask->field_name
 
+#define STATIC_SCREEN_EVENT_MASK_DRR_DOUBLE_BUFFER_UPDATE_EN 0x2000 /*bit 13*/
 static void optc31_set_odm_combine(struct timing_generator *optc, int *opp_id, int opp_cnt,
 		struct dc_crtc_timing *timing)
 {
@@ -231,6 +232,32 @@ void optc3_init_odm(struct timing_generator *optc)
 			OPTC_MEM_SEL, 0);
 	optc1->opp_count = 1;
 }
+void optc31_set_static_screen_control(
+	struct timing_generator *optc,
+	uint32_t event_triggers,
+	uint32_t num_frames)
+{
+	struct optc *optc1 = DCN10TG_FROM_TG(optc);
+	uint32_t framecount;
+	uint32_t events;
+
+	if (num_frames > 0xFF)
+		num_frames = 0xFF;
+	REG_GET_2(OTG_STATIC_SCREEN_CONTROL,
+			OTG_STATIC_SCREEN_EVENT_MASK, &events,
+			OTG_STATIC_SCREEN_FRAME_COUNT, &framecount);
+
+	if (events == event_triggers && num_frames == framecount)
+		return;
+	if ((event_triggers & STATIC_SCREEN_EVENT_MASK_DRR_DOUBLE_BUFFER_UPDATE_EN)
+			!= 0)
+		event_triggers = event_triggers &
+		~STATIC_SCREEN_EVENT_MASK_DRR_DOUBLE_BUFFER_UPDATE_EN;
+
+	REG_UPDATE_2(OTG_STATIC_SCREEN_CONTROL,
+			OTG_STATIC_SCREEN_EVENT_MASK, event_triggers,
+			OTG_STATIC_SCREEN_FRAME_COUNT, num_frames);
+}
 
 static struct timing_generator_funcs dcn31_tg_funcs = {
 		.validate_timing = optc1_validate_timing,
@@ -266,7 +293,7 @@ static struct timing_generator_funcs dcn31_tg_funcs = {
 		.set_drr = optc31_set_drr,
 		.get_last_used_drr_vtotal = optc2_get_last_used_drr_vtotal,
 		.set_vtotal_min_max = optc1_set_vtotal_min_max,
-		.set_static_screen_control = optc1_set_static_screen_control,
+		.set_static_screen_control = optc31_set_static_screen_control,
 		.program_stereo = optc1_program_stereo,
 		.is_stereo_left_eye = optc1_is_stereo_left_eye,
 		.tg_init = optc3_tg_init,
diff --git a/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_optc.h b/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_optc.h
index 30b81a448ce2..5fc6c63580d7 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_optc.h
+++ b/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_optc.h
@@ -263,5 +263,8 @@ bool optc31_immediate_disable_crtc(struct timing_generator *optc);
 void optc31_set_drr(struct timing_generator *optc, const struct drr_params *params);
 
 void optc3_init_odm(struct timing_generator *optc);
-
+void optc31_set_static_screen_control(
+	struct timing_generator *optc,
+	uint32_t event_triggers,
+	uint32_t num_frames);
 #endif /* __DC_OPTC_DCN31_H__ */
diff --git a/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_resource.c b/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_resource.c
index fddc21a5a04c..3ca517dcc82d 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_resource.c
+++ b/drivers/gpu/drm/amd/display/dc/dcn31/dcn31_resource.c
@@ -119,8 +119,6 @@ enum dcn31_clk_src_array_id {
  */
 
 /* DCN */
-/* TODO awful hack. fixup dcn20_dwb.h */
-#undef BASE_INNER
 #define BASE_INNER(seg) DCN_BASE__INST0_SEG ## seg
 
 #define BASE(seg) BASE_INNER(seg)
@@ -153,6 +151,9 @@ enum dcn31_clk_src_array_id {
 	.reg_name[id] = BASE(reg ## block ## id ## _ ## temp_name ## _BASE_IDX) + \
 					reg ## block ## id ## _ ## temp_name
 
+#define SF_DWB2(reg_name, block, id, field_name, post_fix)	\
+	.field_name = reg_name ## __ ## field_name ## post_fix
+
 #define DCCG_SRII(reg_name, block, id)\
 	.block ## _ ## reg_name[id] = BASE(reg ## block ## id ## _ ## reg_name ## _BASE_IDX) + \
 					reg ## block ## id ## _ ## reg_name
@@ -911,6 +912,10 @@ static const struct dc_debug_options debug_defaults_diags = {
 };
 
 static const struct dc_panel_config panel_config_defaults = {
+	.psr = {
+		.disable_psr = false,
+		.disallow_psrsu = false,
+	},
 	.ilr = {
 		.optimize_edp_link_rate = true,
 	},
@@ -1625,6 +1630,7 @@ static struct clock_source *dcn31_clock_source_create(
 		return &clk_src->base;
 	}
 
+	kfree(clk_src);
 	BREAK_TO_DEBUGGER();
 	return NULL;
 }
@@ -1634,6 +1640,31 @@ static bool is_dual_plane(enum surface_pixel_format format)
 	return format >= SURFACE_PIXEL_FORMAT_VIDEO_BEGIN || format == SURFACE_PIXEL_FORMAT_GRPH_RGBE_ALPHA;
 }
 
+int dcn31x_populate_dml_pipes_from_context(struct dc *dc,
+					  struct dc_state *context,
+					  display_e2e_pipe_params_st *pipes,
+					  bool fast_validate)
+{
+	uint32_t pipe_cnt;
+	int i;
+
+	dc_assert_fp_enabled();
+
+	pipe_cnt = dcn20_populate_dml_pipes_from_context(dc, context, pipes, fast_validate);
+
+	for (i = 0; i < pipe_cnt; i++) {
+		pipes[i].pipe.src.gpuvm = 1;
+		if (dc->debug.dml_hostvm_override == DML_HOSTVM_NO_OVERRIDE) {
+			//pipes[pipe_cnt].pipe.src.hostvm = dc->res_pool->hubbub->riommu_active;
+			pipes[i].pipe.src.hostvm = dc->vm_pa_config.is_hvm_enabled;
+		} else if (dc->debug.dml_hostvm_override == DML_HOSTVM_OVERRIDE_FALSE)
+			pipes[i].pipe.src.hostvm = false;
+		else if (dc->debug.dml_hostvm_override == DML_HOSTVM_OVERRIDE_TRUE)
+			pipes[i].pipe.src.hostvm = true;
+	}
+	return pipe_cnt;
+}
+
 int dcn31_populate_dml_pipes_from_context(
 	struct dc *dc, struct dc_state *context,
 	display_e2e_pipe_params_st *pipes,
@@ -1645,7 +1676,7 @@ int dcn31_populate_dml_pipes_from_context(
 	bool upscaled = false;
 
 	DC_FP_START();
-	dcn20_populate_dml_pipes_from_context(dc, context, pipes, fast_validate);
+	dcn31x_populate_dml_pipes_from_context(dc, context, pipes, fast_validate);
 	DC_FP_END();
 
 	for (i = 0, pipe_cnt = 0; i < dc->res_pool->pipe_count; i++) {
@@ -1675,12 +1706,6 @@ int dcn31_populate_dml_pipes_from_context(
 		dcn31_zero_pipe_dcc_fraction(pipes, pipe_cnt);
 		DC_FP_END();
 
-		if (dc->debug.dml_hostvm_override == DML_HOSTVM_NO_OVERRIDE)
-			pipes[pipe_cnt].pipe.src.hostvm = dc->res_pool->hubbub->riommu_active;
-		else if (dc->debug.dml_hostvm_override == DML_HOSTVM_OVERRIDE_FALSE)
-			pipes[pipe_cnt].pipe.src.hostvm = false;
-		else if (dc->debug.dml_hostvm_override == DML_HOSTVM_OVERRIDE_TRUE)
-			pipes[pipe_cnt].pipe.src.hostvm = true;
 
 		if (pipes[pipe_cnt].dout.dsc_enable) {
 			switch (timing->display_color_depth) {
@@ -1898,6 +1923,8 @@ static bool dcn31_resource_construct(
 	dc->caps.max_slave_rgb_planes = 2;
 	dc->caps.post_blend_color_processing = true;
 	dc->caps.force_dp_tps4_for_cp2520 = true;
+	if (dc->config.forceHBR2CP2520)
+		dc->caps.force_dp_tps4_for_cp2520 = false;
 	dc->caps.dp_hpo = true;
 	dc->caps.dp_hdmi21_pcon_support = true;
 	dc->caps.edp_dsc_support = true;
diff --git a/drivers/gpu/drm/amd/display/dc/dcn314/dcn314_dccg.c b/drivers/gpu/drm/amd/display/dc/dcn314/dcn314_dccg.c
index 1bd7e0f327d8..0b769ee71405 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn314/dcn314_dccg.c
+++ b/drivers/gpu/drm/amd/display/dc/dcn314/dcn314_dccg.c
@@ -96,8 +96,15 @@ static void dccg314_set_pixel_rate_div(
 	struct dcn_dccg *dccg_dcn = TO_DCN_DCCG(dccg);
 	enum pixel_rate_div cur_k1 = PIXEL_RATE_DIV_NA, cur_k2 = PIXEL_RATE_DIV_NA;
 
+	// Don't program 0xF into the register field. Not valid since
+	// K1 / K2 field is only 1 / 2 bits wide
+	if (k1 == PIXEL_RATE_DIV_NA || k2 == PIXEL_RATE_DIV_NA) {
+		BREAK_TO_DEBUGGER();
+		return;
+	}
+
 	dccg314_get_pixel_rate_div(dccg, otg_inst, &cur_k1, &cur_k2);
-	if (k1 == PIXEL_RATE_DIV_NA || k2 == PIXEL_RATE_DIV_NA || (k1 == cur_k1 && k2 == cur_k2))
+	if (k1 == cur_k1 && k2 == cur_k2)
 		return;
 
 	switch (otg_inst) {
diff --git a/drivers/gpu/drm/amd/display/dc/dcn314/dcn314_dio_stream_encoder.c b/drivers/gpu/drm/amd/display/dc/dcn314/dcn314_dio_stream_encoder.c
index 7e773bf7b895..38842f938bed 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn314/dcn314_dio_stream_encoder.c
+++ b/drivers/gpu/drm/amd/display/dc/dcn314/dcn314_dio_stream_encoder.c
@@ -49,18 +49,30 @@
 #define CTX \
 	enc1->base.ctx
 
+static void enc314_reset_fifo(struct stream_encoder *enc, bool reset)
+{
+	struct dcn10_stream_encoder *enc1 = DCN10STRENC_FROM_STRENC(enc);
+	uint32_t reset_val = reset ? 1 : 0;
+	uint32_t is_symclk_on;
+
+	REG_UPDATE(DIG_FIFO_CTRL0, DIG_FIFO_RESET, reset_val);
+	REG_GET(DIG_FE_CNTL, DIG_SYMCLK_FE_ON, &is_symclk_on);
+
+	if (is_symclk_on)
+		REG_WAIT(DIG_FIFO_CTRL0, DIG_FIFO_RESET_DONE, reset_val, 10, 5000);
+	else
+		udelay(10);
+}
 
 static void enc314_enable_fifo(struct stream_encoder *enc)
 {
 	struct dcn10_stream_encoder *enc1 = DCN10STRENC_FROM_STRENC(enc);
 
-	/* TODO: Confirm if we need to wait for DIG_SYMCLK_FE_ON */
-	REG_WAIT(DIG_FE_CNTL, DIG_SYMCLK_FE_ON, 1, 10, 5000);
 	REG_UPDATE(DIG_FIFO_CTRL0, DIG_FIFO_READ_START_LEVEL, 0x7);
-	REG_UPDATE(DIG_FIFO_CTRL0, DIG_FIFO_RESET, 1);
-	REG_WAIT(DIG_FIFO_CTRL0, DIG_FIFO_RESET_DONE, 1, 10, 5000);
-	REG_UPDATE(DIG_FIFO_CTRL0, DIG_FIFO_RESET, 0);
-	REG_WAIT(DIG_FIFO_CTRL0, DIG_FIFO_RESET_DONE, 0, 10, 5000);
+
+	enc314_reset_fifo(enc, true);
+	enc314_reset_fifo(enc, false);
+
 	REG_UPDATE(DIG_FIFO_CTRL0, DIG_FIFO_ENABLE, 1);
 }
 
diff --git a/drivers/gpu/drm/amd/display/dc/dcn314/dcn314_hwseq.c b/drivers/gpu/drm/amd/display/dc/dcn314/dcn314_hwseq.c
index 588c1c71241f..a0741794db62 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn314/dcn314_hwseq.c
+++ b/drivers/gpu/drm/amd/display/dc/dcn314/dcn314_hwseq.c
@@ -348,10 +348,8 @@ unsigned int dcn314_calculate_dccg_k1_k2_values(struct pipe_ctx *pipe_ctx, unsig
 	two_pix_per_container = optc2_is_two_pixels_per_containter(&stream->timing);
 	odm_combine_factor = get_odm_config(pipe_ctx, NULL);
 
-	if (pipe_ctx->stream->signal == SIGNAL_TYPE_VIRTUAL)
-		return odm_combine_factor;
-
 	if (is_dp_128b_132b_signal(pipe_ctx)) {
+		*k1_div = PIXEL_RATE_DIV_BY_1;
 		*k2_div = PIXEL_RATE_DIV_BY_1;
 	} else if (dc_is_hdmi_tmds_signal(pipe_ctx->stream->signal) || dc_is_dvi_signal(pipe_ctx->stream->signal)) {
 		*k1_div = PIXEL_RATE_DIV_BY_1;
@@ -359,7 +357,7 @@ unsigned int dcn314_calculate_dccg_k1_k2_values(struct pipe_ctx *pipe_ctx, unsig
 			*k2_div = PIXEL_RATE_DIV_BY_2;
 		else
 			*k2_div = PIXEL_RATE_DIV_BY_4;
-	} else if (dc_is_dp_signal(pipe_ctx->stream->signal)) {
+	} else if (dc_is_dp_signal(pipe_ctx->stream->signal) || dc_is_virtual_signal(pipe_ctx->stream->signal)) {
 		if (two_pix_per_container) {
 			*k1_div = PIXEL_RATE_DIV_BY_1;
 			*k2_div = PIXEL_RATE_DIV_BY_2;
diff --git a/drivers/gpu/drm/amd/display/dc/dcn314/dcn314_init.c b/drivers/gpu/drm/amd/display/dc/dcn314/dcn314_init.c
index 5b6c2d94ec71..31feb4b0edee 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn314/dcn314_init.c
+++ b/drivers/gpu/drm/amd/display/dc/dcn314/dcn314_init.c
@@ -66,9 +66,9 @@ static const struct hw_sequencer_funcs dcn314_funcs = {
 	.prepare_bandwidth = dcn20_prepare_bandwidth,
 	.optimize_bandwidth = dcn20_optimize_bandwidth,
 	.update_bandwidth = dcn20_update_bandwidth,
-	.set_drr = dcn10_set_drr,
+	.set_drr = dcn31_set_drr,
 	.get_position = dcn10_get_position,
-	.set_static_screen_control = dcn10_set_static_screen_control,
+	.set_static_screen_control = dcn31_set_static_screen_control,
 	.setup_stereo = dcn10_setup_stereo,
 	.set_avmute = dcn30_set_avmute,
 	.log_hw_state = dcn10_log_hw_state,
diff --git a/drivers/gpu/drm/amd/display/dc/dcn314/dcn314_optc.c b/drivers/gpu/drm/amd/display/dc/dcn314/dcn314_optc.c
index 47eb162f1a75..41edbd64ea21 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn314/dcn314_optc.c
+++ b/drivers/gpu/drm/amd/display/dc/dcn314/dcn314_optc.c
@@ -228,7 +228,7 @@ static struct timing_generator_funcs dcn314_tg_funcs = {
 		.set_drr = optc31_set_drr,
 		.get_last_used_drr_vtotal = optc2_get_last_used_drr_vtotal,
 		.set_vtotal_min_max = optc1_set_vtotal_min_max,
-		.set_static_screen_control = optc1_set_static_screen_control,
+		.set_static_screen_control = optc31_set_static_screen_control,
 		.program_stereo = optc1_program_stereo,
 		.is_stereo_left_eye = optc1_is_stereo_left_eye,
 		.tg_init = optc3_tg_init,
@@ -237,11 +237,10 @@ static struct timing_generator_funcs dcn314_tg_funcs = {
 		.clear_optc_underflow = optc1_clear_optc_underflow,
 		.setup_global_swap_lock = NULL,
 		.get_crc = optc1_get_crc,
-		.configure_crc = optc2_configure_crc,
+		.configure_crc = optc1_configure_crc,
 		.set_dsc_config = optc3_set_dsc_config,
 		.get_dsc_status = optc2_get_dsc_status,
 		.set_dwb_source = NULL,
-		.set_odm_bypass = optc3_set_odm_bypass,
 		.set_odm_combine = optc314_set_odm_combine,
 		.get_optc_source = optc2_get_optc_source,
 		.set_out_mux = optc3_set_out_mux,
diff --git a/drivers/gpu/drm/amd/display/dc/dcn314/dcn314_resource.c b/drivers/gpu/drm/amd/display/dc/dcn314/dcn314_resource.c
index d0ad72caead2..f9ea1e86707f 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn314/dcn314_resource.c
+++ b/drivers/gpu/drm/amd/display/dc/dcn314/dcn314_resource.c
@@ -184,6 +184,9 @@ enum dcn31_clk_src_array_id {
 	.reg_name[id] = BASE(reg ## block ## id ## _ ## temp_name ## _BASE_IDX) + \
 					reg ## block ## id ## _ ## temp_name
 
+#define SF_DWB2(reg_name, block, id, field_name, post_fix)	\
+	.field_name = reg_name ## __ ## field_name ## post_fix
+
 #define DCCG_SRII(reg_name, block, id)\
 	.block ## _ ## reg_name[id] = BASE(reg ## block ## id ## _ ## reg_name ## _BASE_IDX) + \
 					reg ## block ## id ## _ ## reg_name
@@ -847,7 +850,7 @@ static const struct resource_caps res_cap_dcn314 = {
 	.num_ddc = 5,
 	.num_vmid = 16,
 	.num_mpc_3dlut = 2,
-	.num_dsc = 3,
+	.num_dsc = 4,
 };
 
 static const struct dc_plane_cap plane_cap = {
@@ -883,6 +886,7 @@ static const struct dc_plane_cap plane_cap = {
 static const struct dc_debug_options debug_defaults_drv = {
 	.disable_z10 = false,
 	.enable_z9_disable_interface = true,
+	.psr_skip_crtc_disable = true,
 	.disable_dmcu = true,
 	.force_abm_enable = false,
 	.timing_trace = false,
@@ -937,6 +941,10 @@ static const struct dc_debug_options debug_defaults_diags = {
 };
 
 static const struct dc_panel_config panel_config_defaults = {
+	.psr = {
+		.disable_psr = false,
+		.disallow_psrsu = false,
+	},
 	.ilr = {
 		.optimize_edp_link_rate = true,
 	},
@@ -1766,6 +1774,8 @@ static bool dcn314_resource_construct(
 	dc->caps.max_slave_rgb_planes = 2;
 	dc->caps.post_blend_color_processing = true;
 	dc->caps.force_dp_tps4_for_cp2520 = true;
+	if (dc->config.forceHBR2CP2520)
+		dc->caps.force_dp_tps4_for_cp2520 = false;
 	dc->caps.dp_hpo = true;
 	dc->caps.dp_hdmi21_pcon_support = true;
 	dc->caps.edp_dsc_support = true;
diff --git a/drivers/gpu/drm/amd/display/dc/dcn315/dcn315_resource.c b/drivers/gpu/drm/amd/display/dc/dcn315/dcn315_resource.c
index 58746c437554..7887078c5f64 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn315/dcn315_resource.c
+++ b/drivers/gpu/drm/amd/display/dc/dcn315/dcn315_resource.c
@@ -151,8 +151,6 @@ enum dcn31_clk_src_array_id {
  */
 
 /* DCN */
-/* TODO awful hack. fixup dcn20_dwb.h */
-#undef BASE_INNER
 #define BASE_INNER(seg) DCN_BASE__INST0_SEG ## seg
 
 #define BASE(seg) BASE_INNER(seg)
@@ -185,6 +183,9 @@ enum dcn31_clk_src_array_id {
 	.reg_name[id] = BASE(reg ## block ## id ## _ ## temp_name ## _BASE_IDX) + \
 					reg ## block ## id ## _ ## temp_name
 
+#define SF_DWB2(reg_name, block, id, field_name, post_fix)	\
+	.field_name = reg_name ## __ ## field_name ## post_fix
+
 #define DCCG_SRII(reg_name, block, id)\
 	.block ## _ ## reg_name[id] = BASE(reg ## block ## id ## _ ## reg_name ## _BASE_IDX) + \
 					reg ## block ## id ## _ ## reg_name
@@ -907,6 +908,10 @@ static const struct dc_debug_options debug_defaults_diags = {
 };
 
 static const struct dc_panel_config panel_config_defaults = {
+	.psr = {
+		.disable_psr = false,
+		.disallow_psrsu = false,
+	},
 	.ilr = {
 		.optimize_edp_link_rate = true,
 	},
@@ -1623,6 +1628,7 @@ static struct clock_source *dcn31_clock_source_create(
 		return &clk_src->base;
 	}
 
+	kfree(clk_src);
 	BREAK_TO_DEBUGGER();
 	return NULL;
 }
@@ -1643,7 +1649,7 @@ static int dcn315_populate_dml_pipes_from_context(
 	const int max_usable_det = context->bw_ctx.dml.ip.config_return_buffer_size_in_kbytes - DCN3_15_MIN_COMPBUF_SIZE_KB;
 
 	DC_FP_START();
-	dcn20_populate_dml_pipes_from_context(dc, context, pipes, fast_validate);
+	dcn31x_populate_dml_pipes_from_context(dc, context, pipes, fast_validate);
 	DC_FP_END();
 
 	for (i = 0, pipe_cnt = 0; i < dc->res_pool->pipe_count; i++) {
@@ -1662,7 +1668,6 @@ static int dcn315_populate_dml_pipes_from_context(
 		pipes[pipe_cnt].pipe.src.immediate_flip = true;
 
 		pipes[pipe_cnt].pipe.src.unbounded_req_mode = false;
-		pipes[pipe_cnt].pipe.src.gpuvm = true;
 		pipes[pipe_cnt].pipe.dest.vfront_porch = timing->v_front_porch;
 		pipes[pipe_cnt].pipe.src.dcc_rate = 3;
 		pipes[pipe_cnt].dout.dsc_input_bpc = 0;
@@ -1703,7 +1708,9 @@ static int dcn315_populate_dml_pipes_from_context(
 			dc->config.enable_4to1MPC = true;
 			context->bw_ctx.dml.ip.det_buffer_size_kbytes =
 					(max_usable_det / DCN3_15_CRB_SEGMENT_SIZE_KB / 4) * DCN3_15_CRB_SEGMENT_SIZE_KB;
-		} else if (!is_dual_plane(pipe->plane_state->format) && pipe->plane_state->src_rect.width <= 5120) {
+		} else if (!is_dual_plane(pipe->plane_state->format)
+				&& pipe->plane_state->src_rect.width <= 5120
+				&& pipe->stream->timing.pix_clk_100hz < dcn_get_max_non_odm_pix_rate_100hz(&dc->dml.soc)) {
 			/* Limit to 5k max to avoid forced pipe split when there is not enough detile for swath */
 			context->bw_ctx.dml.ip.det_buffer_size_kbytes = 192;
 			pipes[0].pipe.src.unbounded_req_mode = true;
@@ -1779,6 +1786,8 @@ static bool dcn315_resource_construct(
 	dc->caps.max_slave_rgb_planes = 2;
 	dc->caps.post_blend_color_processing = true;
 	dc->caps.force_dp_tps4_for_cp2520 = true;
+	if (dc->config.forceHBR2CP2520)
+		dc->caps.force_dp_tps4_for_cp2520 = false;
 	dc->caps.dp_hpo = true;
 	dc->caps.dp_hdmi21_pcon_support = true;
 	dc->caps.edp_dsc_support = true;
diff --git a/drivers/gpu/drm/amd/display/dc/dcn316/dcn316_resource.c b/drivers/gpu/drm/amd/display/dc/dcn316/dcn316_resource.c
index 6b40a11ac83a..b4d5076e124c 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn316/dcn316_resource.c
+++ b/drivers/gpu/drm/amd/display/dc/dcn316/dcn316_resource.c
@@ -142,8 +142,6 @@ enum dcn31_clk_src_array_id {
  */
 
 /* DCN */
-/* TODO awful hack. fixup dcn20_dwb.h */
-#undef BASE_INNER
 #define BASE_INNER(seg) DCN_BASE__INST0_SEG ## seg
 
 #define BASE(seg) BASE_INNER(seg)
@@ -176,6 +174,9 @@ enum dcn31_clk_src_array_id {
 	.reg_name[id] = BASE(reg ## block ## id ## _ ## temp_name ## _BASE_IDX) + \
 					reg ## block ## id ## _ ## temp_name
 
+#define SF_DWB2(reg_name, block, id, field_name, post_fix)	\
+	.field_name = reg_name ## __ ## field_name ## post_fix
+
 #define DCCG_SRII(reg_name, block, id)\
 	.block ## _ ## reg_name[id] = BASE(reg ## block ## id ## _ ## reg_name ## _BASE_IDX) + \
 					reg ## block ## id ## _ ## reg_name
@@ -906,6 +907,10 @@ static const struct dc_debug_options debug_defaults_diags = {
 };
 
 static const struct dc_panel_config panel_config_defaults = {
+	.psr = {
+		.disable_psr = false,
+		.disallow_psrsu = false,
+	},
 	.ilr = {
 		.optimize_edp_link_rate = true,
 	},
@@ -1646,7 +1651,7 @@ static int dcn316_populate_dml_pipes_from_context(
 	const int max_usable_det = context->bw_ctx.dml.ip.config_return_buffer_size_in_kbytes - DCN3_16_MIN_COMPBUF_SIZE_KB;
 
 	DC_FP_START();
-	dcn20_populate_dml_pipes_from_context(dc, context, pipes, fast_validate);
+	dcn31x_populate_dml_pipes_from_context(dc, context, pipes, fast_validate);
 	DC_FP_END();
 
 	for (i = 0, pipe_cnt = 0; i < dc->res_pool->pipe_count; i++) {
@@ -1665,7 +1670,6 @@ static int dcn316_populate_dml_pipes_from_context(
 		pipes[pipe_cnt].pipe.src.immediate_flip = true;
 
 		pipes[pipe_cnt].pipe.src.unbounded_req_mode = false;
-		pipes[pipe_cnt].pipe.src.gpuvm = true;
 		pipes[pipe_cnt].pipe.dest.vfront_porch = timing->v_front_porch;
 		pipes[pipe_cnt].pipe.src.dcc_rate = 3;
 		pipes[pipe_cnt].dout.dsc_input_bpc = 0;
@@ -1781,6 +1785,8 @@ static bool dcn316_resource_construct(
 	dc->caps.max_slave_rgb_planes = 2;
 	dc->caps.post_blend_color_processing = true;
 	dc->caps.force_dp_tps4_for_cp2520 = true;
+	if (dc->config.forceHBR2CP2520)
+		dc->caps.force_dp_tps4_for_cp2520 = false;
 	dc->caps.dp_hpo = true;
 	dc->caps.dp_hdmi21_pcon_support = true;
 	dc->caps.edp_dsc_support = true;
diff --git a/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_dccg.c b/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_dccg.c
index e4daed44ef5f..e4472c6be6c3 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_dccg.c
+++ b/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_dccg.c
@@ -96,8 +96,10 @@ static void dccg32_set_pixel_rate_div(
 
 	// Don't program 0xF into the register field. Not valid since
 	// K1 / K2 field is only 1 / 2 bits wide
-	if (k1 == PIXEL_RATE_DIV_NA || k2 == PIXEL_RATE_DIV_NA)
+	if (k1 == PIXEL_RATE_DIV_NA || k2 == PIXEL_RATE_DIV_NA) {
+		BREAK_TO_DEBUGGER();
 		return;
+	}
 
 	dccg32_get_pixel_rate_div(dccg, otg_inst, &cur_k1, &cur_k2);
 	if (k1 == cur_k1 && k2 == cur_k2)
@@ -223,11 +225,7 @@ static void dccg32_set_dtbclk_dto(
 	} else {
 		REG_UPDATE_2(OTG_PIXEL_RATE_CNTL[params->otg_inst],
 				DTBCLK_DTO_ENABLE[params->otg_inst], 0,
-				PIPE_DTO_SRC_SEL[params->otg_inst], 1);
-		if (params->is_hdmi)
-			REG_UPDATE(OTG_PIXEL_RATE_CNTL[params->otg_inst],
-				PIPE_DTO_SRC_SEL[params->otg_inst], 0);
-
+				PIPE_DTO_SRC_SEL[params->otg_inst], params->is_hdmi ? 0 : 1);
 		REG_WRITE(DTBCLK_DTO_MODULO[params->otg_inst], 0);
 		REG_WRITE(DTBCLK_DTO_PHASE[params->otg_inst], 0);
 	}
diff --git a/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_hubbub.c b/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_hubbub.c
index 9fbb72369c10..9501403a48a9 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_hubbub.c
+++ b/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_hubbub.c
@@ -41,6 +41,10 @@
 #define FN(reg_name, field_name) \
 	hubbub2->shifts->field_name, hubbub2->masks->field_name
 
+/**
+ * @DCN32_CRB_SEGMENT_SIZE_KB: Maximum Configurable Return Buffer size for
+ * DCN32
+ */
 #define DCN32_CRB_SEGMENT_SIZE_KB 64
 
 static void dcn32_init_crb(struct hubbub *hubbub)
@@ -68,6 +72,23 @@ static void dcn32_init_crb(struct hubbub *hubbub)
 	REG_UPDATE(DCHUBBUB_DEBUG_CTRL_0, DET_DEPTH, 0x47F);
 }
 
+void hubbub32_set_request_limit(struct hubbub *hubbub, int memory_channel_count, int words_per_channel)
+{
+	struct dcn20_hubbub *hubbub2 = TO_DCN20_HUBBUB(hubbub);
+
+	uint32_t request_limit = 3 * memory_channel_count * words_per_channel / 4;
+
+	ASSERT((request_limit & (~0xFFF)) == 0); //field is only 24 bits long
+	ASSERT(request_limit > 0); //field is only 24 bits long
+
+	if (request_limit > 0xFFF)
+		request_limit = 0xFFF;
+
+	if (request_limit > 0)
+		REG_UPDATE(SDPIF_REQUEST_RATE_LIMIT, SDPIF_REQUEST_RATE_LIMIT, request_limit);
+}
+
+
 void dcn32_program_det_size(struct hubbub *hubbub, int hubp_inst, unsigned int det_buffer_size_in_kbyte)
 {
 	struct dcn20_hubbub *hubbub2 = TO_DCN20_HUBBUB(hubbub);
@@ -844,7 +865,7 @@ static void hubbub32_wm_read_state(struct hubbub *hubbub,
 			DCHUBBUB_ARB_ALLOW_SR_EXIT_WATERMARK_A, &s->sr_exit);
 
 	REG_GET(DCHUBBUB_ARB_UCLK_PSTATE_CHANGE_WATERMARK_A,
-			 DCHUBBUB_ARB_UCLK_PSTATE_CHANGE_WATERMARK_A, &s->dram_clk_chanage);
+			 DCHUBBUB_ARB_UCLK_PSTATE_CHANGE_WATERMARK_A, &s->dram_clk_change);
 
 	REG_GET(DCHUBBUB_ARB_USR_RETRAINING_WATERMARK_A,
 			 DCHUBBUB_ARB_USR_RETRAINING_WATERMARK_A, &s->usr_retrain);
@@ -864,7 +885,7 @@ static void hubbub32_wm_read_state(struct hubbub *hubbub,
 			DCHUBBUB_ARB_ALLOW_SR_EXIT_WATERMARK_B, &s->sr_exit);
 
 	REG_GET(DCHUBBUB_ARB_UCLK_PSTATE_CHANGE_WATERMARK_B,
-			DCHUBBUB_ARB_UCLK_PSTATE_CHANGE_WATERMARK_B, &s->dram_clk_chanage);
+			DCHUBBUB_ARB_UCLK_PSTATE_CHANGE_WATERMARK_B, &s->dram_clk_change);
 
 	REG_GET(DCHUBBUB_ARB_USR_RETRAINING_WATERMARK_B,
 			 DCHUBBUB_ARB_USR_RETRAINING_WATERMARK_B, &s->usr_retrain);
@@ -884,7 +905,7 @@ static void hubbub32_wm_read_state(struct hubbub *hubbub,
 			DCHUBBUB_ARB_ALLOW_SR_EXIT_WATERMARK_C, &s->sr_exit);
 
 	REG_GET(DCHUBBUB_ARB_UCLK_PSTATE_CHANGE_WATERMARK_C,
-			DCHUBBUB_ARB_UCLK_PSTATE_CHANGE_WATERMARK_C, &s->dram_clk_chanage);
+			DCHUBBUB_ARB_UCLK_PSTATE_CHANGE_WATERMARK_C, &s->dram_clk_change);
 
 	REG_GET(DCHUBBUB_ARB_USR_RETRAINING_WATERMARK_C,
 			 DCHUBBUB_ARB_USR_RETRAINING_WATERMARK_C, &s->usr_retrain);
@@ -904,7 +925,7 @@ static void hubbub32_wm_read_state(struct hubbub *hubbub,
 			DCHUBBUB_ARB_ALLOW_SR_EXIT_WATERMARK_D, &s->sr_exit);
 
 	REG_GET(DCHUBBUB_ARB_UCLK_PSTATE_CHANGE_WATERMARK_D,
-			DCHUBBUB_ARB_UCLK_PSTATE_CHANGE_WATERMARK_D, &s->dram_clk_chanage);
+			DCHUBBUB_ARB_UCLK_PSTATE_CHANGE_WATERMARK_D, &s->dram_clk_change);
 
 	REG_GET(DCHUBBUB_ARB_USR_RETRAINING_WATERMARK_D,
 			 DCHUBBUB_ARB_USR_RETRAINING_WATERMARK_D, &s->usr_retrain);
@@ -945,6 +966,7 @@ static const struct hubbub_funcs hubbub32_funcs = {
 	.init_crb = dcn32_init_crb,
 	.hubbub_read_state = hubbub2_read_state,
 	.force_usr_retraining_allow = hubbub32_force_usr_retraining_allow,
+	.set_request_limit = hubbub32_set_request_limit
 };
 
 void hubbub32_construct(struct dcn20_hubbub *hubbub2,
diff --git a/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_hubbub.h b/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_hubbub.h
index cda94e0e31bf..786f9ce07f92 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_hubbub.h
+++ b/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_hubbub.h
@@ -82,7 +82,8 @@
 	SR(DCN_VM_FAULT_ADDR_MSB),\
 	SR(DCN_VM_FAULT_ADDR_LSB),\
 	SR(DCN_VM_FAULT_CNTL),\
-	SR(DCN_VM_FAULT_STATUS)
+	SR(DCN_VM_FAULT_STATUS),\
+	SR(SDPIF_REQUEST_RATE_LIMIT)
 
 #define HUBBUB_MASK_SH_LIST_DCN32(mask_sh)\
 	HUBBUB_SF(DCHUBBUB_GLOBAL_TIMER_CNTL, DCHUBBUB_GLOBAL_TIMER_ENABLE, mask_sh), \
@@ -159,7 +160,8 @@
 	HUBBUB_SF(DCN_VM_FAULT_STATUS, DCN_VM_ERROR_VMID, mask_sh), \
 	HUBBUB_SF(DCN_VM_FAULT_STATUS, DCN_VM_ERROR_TABLE_LEVEL, mask_sh), \
 	HUBBUB_SF(DCN_VM_FAULT_STATUS, DCN_VM_ERROR_PIPE, mask_sh), \
-	HUBBUB_SF(DCN_VM_FAULT_STATUS, DCN_VM_ERROR_INTERRUPT_STATUS, mask_sh)
+	HUBBUB_SF(DCN_VM_FAULT_STATUS, DCN_VM_ERROR_INTERRUPT_STATUS, mask_sh),\
+	HUBBUB_SF(SDPIF_REQUEST_RATE_LIMIT, SDPIF_REQUEST_RATE_LIMIT, mask_sh)
 
 bool hubbub32_program_urgent_watermarks(
 		struct hubbub *hubbub,
@@ -200,4 +202,6 @@ void hubbub32_construct(struct dcn20_hubbub *hubbub2,
 	int pixel_chunk_size_kb,
 	int config_return_buffer_size_kb);
 
+void hubbub32_set_request_limit(struct hubbub *hubbub, int umc_count, int words_per_umc);
+
 #endif
diff --git a/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_hwseq.c b/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_hwseq.c
index cf5bd9713f54..b8767be1e4c5 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_hwseq.c
+++ b/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_hwseq.c
@@ -227,8 +227,13 @@ static uint32_t dcn32_calculate_cab_allocation(struct dc *dc, struct dc_state *c
 	for (i = 0; i < dc->res_pool->pipe_count; i++) {
 		struct pipe_ctx *pipe = &dc->current_state->res_ctx.pipe_ctx[i];
 
+		/* If PSR is supported on an eDP panel that's connected, but that panel is
+		 * not in PSR at the time of trying to enter MALL SS, we have to include it
+		 * in the static screen CAB calculation
+		 */
 		if (!pipe->stream || !pipe->plane_state ||
-				pipe->stream->link->psr_settings.psr_version != DC_PSR_VERSION_UNSUPPORTED ||
+				(pipe->stream->link->psr_settings.psr_version != DC_PSR_VERSION_UNSUPPORTED &&
+				pipe->stream->link->psr_settings.psr_allow_active) ||
 				pipe->stream->mall_stream_config.type == SUBVP_PHANTOM)
 			continue;
 
@@ -257,11 +262,11 @@ static uint32_t dcn32_calculate_cab_allocation(struct dc *dc, struct dc_state *c
 		num_mblks = ((mall_alloc_width_blk_aligned + mblk_width - 1) / mblk_width) *
 				((mall_alloc_height_blk_aligned + mblk_height - 1) / mblk_height);
 
-		/* For DCC:
-		 * meta_num_mblk = CEILING(full_mblk_width_ub_l*full_mblk_height_ub_l*Bpe/256/mblk_bytes, 1)
+		/*For DCC:
+		 * meta_num_mblk = CEILING(meta_pitch*full_vp_height*Bpe/256/mblk_bytes, 1)
 		 */
 		if (pipe->plane_state->dcc.enable)
-			num_mblks += (mall_alloc_width_blk_aligned * mall_alloc_width_blk_aligned * bytes_per_pixel +
+			num_mblks += (pipe->plane_state->dcc.meta_pitch * pipe->plane_res.scl_data.viewport.height * bytes_per_pixel +
 					(256 * DCN3_2_MALL_MBLK_SIZE_BYTES) - 1) / (256 * DCN3_2_MALL_MBLK_SIZE_BYTES);
 
 		bytes_in_mall = num_mblks * DCN3_2_MALL_MBLK_SIZE_BYTES;
@@ -283,8 +288,7 @@ static uint32_t dcn32_calculate_cab_allocation(struct dc *dc, struct dc_state *c
 			using the max for calculation */
 
 		if (hubp->curs_attr.width > 0) {
-				// Round cursor width to next multiple of 64
-				cursor_size = (((hubp->curs_attr.width + 63) / 64) * 64) * hubp->curs_attr.height;
+				cursor_size = hubp->curs_attr.pitch * hubp->curs_attr.height;
 
 				switch (pipe->stream->cursor_attributes.color_format) {
 				case CURSOR_MODE_MONO:
@@ -309,11 +313,11 @@ static uint32_t dcn32_calculate_cab_allocation(struct dc *dc, struct dc_state *c
 						cursor_size > 16384) {
 					/* cursor_num_mblk = CEILING(num_cursors*cursor_width*cursor_width*cursor_Bpe/mblk_bytes, 1)
 					 */
-					cache_lines_used += (((hubp->curs_attr.width * hubp->curs_attr.height * cursor_bpp +
-										DCN3_2_MALL_MBLK_SIZE_BYTES - 1) / DCN3_2_MALL_MBLK_SIZE_BYTES) *
-										DCN3_2_MALL_MBLK_SIZE_BYTES) / dc->caps.cache_line_size + 2;
+					cache_lines_used += (((cursor_size + DCN3_2_MALL_MBLK_SIZE_BYTES - 1) /
+							DCN3_2_MALL_MBLK_SIZE_BYTES) * DCN3_2_MALL_MBLK_SIZE_BYTES) /
+							dc->caps.cache_line_size + 2;
+					break;
 				}
-				break;
 			}
 	}
 
@@ -699,11 +703,7 @@ void dcn32_subvp_update_force_pstate(struct dc *dc, struct dc_state *context)
 	for (i = 0; i < dc->res_pool->pipe_count; i++) {
 		struct pipe_ctx *pipe = &context->res_ctx.pipe_ctx[i];
 
-		// For SubVP + DRR, also force disallow on the DRR pipe
-		// (We will force allow in the DMUB sequence -- some DRR timings by default won't allow P-State so we have
-		// to force once the vblank is stretched).
-		if (pipe->stream && pipe->plane_state && (pipe->stream->mall_stream_config.type == SUBVP_MAIN ||
-				(pipe->stream->mall_stream_config.type == SUBVP_NONE && pipe->stream->ignore_msa_timing_param))) {
+		if (pipe->stream && pipe->plane_state && (pipe->stream->mall_stream_config.type == SUBVP_MAIN)) {
 			struct hubp *hubp = pipe->plane_res.hubp;
 
 			if (hubp && hubp->funcs->hubp_update_force_pstate_disallow)
@@ -727,10 +727,7 @@ void dcn32_update_mall_sel(struct dc *dc, struct dc_state *context)
 		struct hubp *hubp = pipe->plane_res.hubp;
 
 		if (pipe->stream && pipe->plane_state && hubp && hubp->funcs->hubp_update_mall_sel) {
-			//Round cursor width up to next multiple of 64
-			int cursor_width = ((hubp->curs_attr.width + 63) / 64) * 64;
-			int cursor_height = hubp->curs_attr.height;
-			int cursor_size = cursor_width * cursor_height;
+			int cursor_size = hubp->curs_attr.pitch * hubp->curs_attr.height;
 
 			switch (hubp->curs_attr.color_format) {
 			case CURSOR_MODE_MONO:
@@ -784,6 +781,10 @@ void dcn32_program_mall_pipe_config(struct dc *dc, struct dc_state *context)
 	if (hws && hws->funcs.update_mall_sel)
 		hws->funcs.update_mall_sel(dc, context);
 
+	//update subvp force pstate
+	if (hws && hws->funcs.subvp_update_force_pstate)
+		dc->hwseq->funcs.subvp_update_force_pstate(dc, context);
+
 	// Program FORCE_ONE_ROW_FOR_FRAME and CURSOR_REQ_MODE for main subvp pipes
 	for (i = 0; i < dc->res_pool->pipe_count; i++) {
 		struct pipe_ctx *pipe = &context->res_ctx.pipe_ctx[i];
@@ -984,15 +985,14 @@ void dcn32_init_hw(struct dc *dc)
 	if (dc->res_pool->hubbub->funcs->init_crb)
 		dc->res_pool->hubbub->funcs->init_crb(dc->res_pool->hubbub);
 
+	if (dc->res_pool->hubbub->funcs->set_request_limit && dc->config.sdpif_request_limit_words_per_umc > 0)
+		dc->res_pool->hubbub->funcs->set_request_limit(dc->res_pool->hubbub, dc->ctx->dc_bios->vram_info.num_chans, dc->config.sdpif_request_limit_words_per_umc);
+
 	// Get DMCUB capabilities
 	if (dc->ctx->dmub_srv) {
 		dc_dmub_srv_query_caps_cmd(dc->ctx->dmub_srv->dmub);
 		dc->caps.dmub_caps.psr = dc->ctx->dmub_srv->dmub->feature_caps.psr;
 	}
-
-	/* Enable support for ODM and windowed MPO if policy flag is set */
-	if (dc->debug.enable_single_display_2to1_odm_policy)
-		dc->config.enable_windowed_mpo_odm = true;
 }
 
 static int calc_mpc_flow_ctrl_cnt(const struct dc_stream_state *stream,
@@ -1175,10 +1175,8 @@ unsigned int dcn32_calculate_dccg_k1_k2_values(struct pipe_ctx *pipe_ctx, unsign
 	two_pix_per_container = optc2_is_two_pixels_per_containter(&stream->timing);
 	odm_combine_factor = get_odm_config(pipe_ctx, NULL);
 
-	if (pipe_ctx->stream->signal == SIGNAL_TYPE_VIRTUAL)
-		return odm_combine_factor;
-
 	if (is_dp_128b_132b_signal(pipe_ctx)) {
+		*k1_div = PIXEL_RATE_DIV_BY_1;
 		*k2_div = PIXEL_RATE_DIV_BY_1;
 	} else if (dc_is_hdmi_tmds_signal(pipe_ctx->stream->signal) || dc_is_dvi_signal(pipe_ctx->stream->signal)) {
 		*k1_div = PIXEL_RATE_DIV_BY_1;
@@ -1186,7 +1184,7 @@ unsigned int dcn32_calculate_dccg_k1_k2_values(struct pipe_ctx *pipe_ctx, unsign
 			*k2_div = PIXEL_RATE_DIV_BY_2;
 		else
 			*k2_div = PIXEL_RATE_DIV_BY_4;
-	} else if (dc_is_dp_signal(pipe_ctx->stream->signal)) {
+	} else if (dc_is_dp_signal(pipe_ctx->stream->signal) || dc_is_virtual_signal(pipe_ctx->stream->signal)) {
 		if (two_pix_per_container) {
 			*k1_div = PIXEL_RATE_DIV_BY_1;
 			*k2_div = PIXEL_RATE_DIV_BY_2;
@@ -1371,6 +1369,33 @@ void dcn32_update_phantom_vp_position(struct dc *dc,
 	}
 }
 
+/* Treat the phantom pipe as if it needs to be fully enabled.
+ * If the pipe was previously in use but not phantom, it would
+ * have been disabled earlier in the sequence so we need to run
+ * the full enable sequence.
+ */
+void dcn32_apply_update_flags_for_phantom(struct pipe_ctx *phantom_pipe)
+{
+	phantom_pipe->update_flags.raw = 0;
+	if (phantom_pipe->stream && phantom_pipe->stream->mall_stream_config.type == SUBVP_PHANTOM) {
+		if (phantom_pipe->stream && phantom_pipe->plane_state) {
+			phantom_pipe->update_flags.bits.enable = 1;
+			phantom_pipe->update_flags.bits.mpcc = 1;
+			phantom_pipe->update_flags.bits.dppclk = 1;
+			phantom_pipe->update_flags.bits.hubp_interdependent = 1;
+			phantom_pipe->update_flags.bits.hubp_rq_dlg_ttu = 1;
+			phantom_pipe->update_flags.bits.gamut_remap = 1;
+			phantom_pipe->update_flags.bits.scaler = 1;
+			phantom_pipe->update_flags.bits.viewport = 1;
+			phantom_pipe->update_flags.bits.det_size = 1;
+			if (!phantom_pipe->top_pipe && !phantom_pipe->prev_odm_pipe) {
+				phantom_pipe->update_flags.bits.odm = 1;
+				phantom_pipe->update_flags.bits.global_sync = 1;
+			}
+		}
+	}
+}
+
 bool dcn32_dsc_pg_status(
 		struct dce_hwseq *hws,
 		unsigned int dsc_inst)
diff --git a/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_hwseq.h b/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_hwseq.h
index ac3657a5b9ea..7de36529cf99 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_hwseq.h
+++ b/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_hwseq.h
@@ -92,6 +92,8 @@ void dcn32_update_phantom_vp_position(struct dc *dc,
 		struct dc_state *context,
 		struct pipe_ctx *phantom_pipe);
 
+void dcn32_apply_update_flags_for_phantom(struct pipe_ctx *phantom_pipe);
+
 bool dcn32_dsc_pg_status(
 		struct dce_hwseq *hws,
 		unsigned int dsc_inst);
diff --git a/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_init.c b/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_init.c
index 45a949ba6f3f..dc4649458567 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_init.c
+++ b/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_init.c
@@ -110,6 +110,7 @@ static const struct hw_sequencer_funcs dcn32_funcs = {
 	.update_visual_confirm_color = dcn20_update_visual_confirm_color,
 	.update_phantom_vp_position = dcn32_update_phantom_vp_position,
 	.update_dsc_pg = dcn32_update_dsc_pg,
+	.apply_update_flags_for_phantom = dcn32_apply_update_flags_for_phantom,
 };
 
 static const struct hwseq_private_funcs dcn32_private_funcs = {
diff --git a/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_mmhubbub.c b/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_mmhubbub.c
index 41b0baf8e183..c3b089ba511a 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_mmhubbub.c
+++ b/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_mmhubbub.c
@@ -211,7 +211,7 @@ static void mmhubbub32_config_mcif_arb(struct mcif_wb *mcif_wb,
 	REG_UPDATE(MCIF_WB_ARBITRATION_CONTROL, MCIF_WB_CLIENT_ARBITRATION_SLICE,  params->arbitration_slice);
 }
 
-const struct mcif_wb_funcs dcn32_mmhubbub_funcs = {
+static const struct mcif_wb_funcs dcn32_mmhubbub_funcs = {
 	.warmup_mcif		= mmhubbub32_warmup_mcif,
 	.enable_mcif		= mmhubbub2_enable_mcif,
 	.disable_mcif		= mmhubbub2_disable_mcif,
diff --git a/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_mpc.c b/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_mpc.c
index 4edd0655965b..206a5ddbaf6d 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_mpc.c
+++ b/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_mpc.c
@@ -982,7 +982,7 @@ static bool mpc32_program_3dlut(
 	return true;
 }
 
-const struct mpc_funcs dcn32_mpc_funcs = {
+static const struct mpc_funcs dcn32_mpc_funcs = {
 	.read_mpcc_state = mpc1_read_mpcc_state,
 	.insert_plane = mpc1_insert_plane,
 	.remove_mpcc = mpc1_remove_mpcc,
diff --git a/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_optc.c b/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_optc.c
index 2b33eeb213e2..2ee798965bc2 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_optc.c
+++ b/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_optc.c
@@ -167,6 +167,13 @@ static void optc32_phantom_crtc_post_enable(struct timing_generator *optc)
 	REG_WAIT(OTG_CLOCK_CONTROL, OTG_BUSY, 0, 1, 100000);
 }
 
+static void optc32_disable_phantom_otg(struct timing_generator *optc)
+{
+	struct optc *optc1 = DCN10TG_FROM_TG(optc);
+
+	REG_UPDATE(OTG_CONTROL, OTG_MASTER_EN, 0);
+}
+
 static void optc32_set_odm_bypass(struct timing_generator *optc,
 		const struct dc_crtc_timing *dc_crtc_timing)
 {
@@ -260,6 +267,7 @@ static struct timing_generator_funcs dcn32_tg_funcs = {
 		.enable_crtc = optc32_enable_crtc,
 		.disable_crtc = optc32_disable_crtc,
 		.phantom_crtc_post_enable = optc32_phantom_crtc_post_enable,
+		.disable_phantom_crtc = optc32_disable_phantom_otg,
 		/* used by enable_timing_synchronization. Not need for FPGA */
 		.is_counter_moving = optc1_is_counter_moving,
 		.get_position = optc1_get_position,
diff --git a/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_resource.c b/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_resource.c
index a88dd7b3d1c1..e4dbc8353ea3 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_resource.c
+++ b/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_resource.c
@@ -106,8 +106,6 @@ enum dcn32_clk_src_array_id {
  */
 
 /* DCN */
-/* TODO awful hack. fixup dcn20_dwb.h */
-#undef BASE_INNER
 #define BASE_INNER(seg) ctx->dcn_reg_offsets[seg]
 
 #define BASE(seg) BASE_INNER(seg)
@@ -167,6 +165,9 @@ enum dcn32_clk_src_array_id {
 	REG_STRUCT.reg_name[id] = BASE(reg ## block ## id ## _ ## temp_name ## _BASE_IDX) + \
 		reg ## block ## id ## _ ## temp_name
 
+#define SF_DWB2(reg_name, block, id, field_name, post_fix)	\
+	.field_name = reg_name ## __ ## field_name ## post_fix
+
 #define DCCG_SRII(reg_name, block, id)\
 	REG_STRUCT.block ## _ ## reg_name[id] = BASE(reg ## block ## id ## _ ## reg_name ## _BASE_IDX) + \
 		reg ## block ## id ## _ ## reg_name
@@ -722,8 +723,9 @@ static const struct dc_debug_options debug_defaults_drv = {
 	/* Must match enable_single_display_2to1_odm_policy to support dynamic ODM transitions*/
 	.enable_double_buffered_dsc_pg_support = true,
 	.enable_dp_dig_pixel_rate_div_policy = 1,
-	.allow_sw_cursor_fallback = false,
+	.allow_sw_cursor_fallback = false, // Linux can't do SW cursor "fallback"
 	.alloc_extra_way_for_cursor = true,
+	.min_prefetch_in_strobe_ns = 60000, // 60us
 };
 
 static const struct dc_debug_options debug_defaults_diags = {
@@ -829,6 +831,7 @@ static struct clock_source *dcn32_clock_source_create(
 		return &clk_src->base;
 	}
 
+	kfree(clk_src);
 	BREAK_TO_DEBUGGER();
 	return NULL;
 }
@@ -1678,7 +1681,7 @@ static void dcn32_enable_phantom_plane(struct dc *dc,
 
 		/* Shadow pipe has small viewport. */
 		phantom_plane->clip_rect.y = 0;
-		phantom_plane->clip_rect.height = phantom_stream->timing.v_addressable;
+		phantom_plane->clip_rect.height = phantom_stream->src.height;
 
 		phantom_plane->is_phantom = true;
 
@@ -1718,8 +1721,29 @@ static struct dc_stream_state *dcn32_enable_phantom_stream(struct dc *dc,
 	return phantom_stream;
 }
 
+void dcn32_retain_phantom_pipes(struct dc *dc, struct dc_state *context)
+{
+	int i;
+	struct dc_plane_state *phantom_plane = NULL;
+	struct dc_stream_state *phantom_stream = NULL;
+
+	for (i = 0; i < dc->res_pool->pipe_count; i++) {
+		struct pipe_ctx *pipe = &context->res_ctx.pipe_ctx[i];
+
+		if (!pipe->top_pipe && !pipe->prev_odm_pipe &&
+				pipe->plane_state && pipe->stream &&
+				pipe->stream->mall_stream_config.type == SUBVP_PHANTOM) {
+			phantom_plane = pipe->plane_state;
+			phantom_stream = pipe->stream;
+
+			dc_plane_state_retain(phantom_plane);
+			dc_stream_retain(phantom_stream);
+		}
+	}
+}
+
 // return true if removed piped from ctx, false otherwise
-bool dcn32_remove_phantom_pipes(struct dc *dc, struct dc_state *context)
+bool dcn32_remove_phantom_pipes(struct dc *dc, struct dc_state *context, bool fast_update)
 {
 	int i;
 	bool removed_pipe = false;
@@ -1746,14 +1770,23 @@ bool dcn32_remove_phantom_pipes(struct dc *dc, struct dc_state *context)
 			removed_pipe = true;
 		}
 
-		// Clear all phantom stream info
-		if (pipe->stream) {
-			pipe->stream->mall_stream_config.type = SUBVP_NONE;
-			pipe->stream->mall_stream_config.paired_stream = NULL;
-		}
+		/* For non-full updates, a shallow copy of the current state
+		 * is created. In this case we don't want to erase the current
+		 * state (there can be 2 HIRQL threads, one in flip, and one in
+		 * checkMPO) that can cause a race condition.
+		 *
+		 * This is just a workaround, needs a proper fix.
+		 */
+		if (!fast_update) {
+			// Clear all phantom stream info
+			if (pipe->stream) {
+				pipe->stream->mall_stream_config.type = SUBVP_NONE;
+				pipe->stream->mall_stream_config.paired_stream = NULL;
+			}
 
-		if (pipe->plane_state) {
-			pipe->plane_state->is_phantom = false;
+			if (pipe->plane_state) {
+				pipe->plane_state->is_phantom = false;
+			}
 		}
 	}
 	return removed_pipe;
@@ -1900,7 +1933,7 @@ int dcn32_populate_dml_pipes_from_context(
 
 		pipes[pipe_cnt].pipe.dest.odm_combine_policy = dm_odm_combine_policy_dal;
 		if (context->stream_count == 1 &&
-				context->stream_status[0].plane_count <= 1 &&
+				context->stream_status[0].plane_count == 1 &&
 				!dc_is_hdmi_signal(res_ctx->pipe_ctx[i].stream->signal) &&
 				is_h_timing_divisible_by_2(res_ctx->pipe_ctx[i].stream) &&
 				pipe->stream->timing.pix_clk_100hz * 100 > DCN3_2_VMIN_DISPCLK_HZ &&
@@ -1918,30 +1951,36 @@ int dcn32_populate_dml_pipes_from_context(
 		timing = &pipe->stream->timing;
 
 		pipes[pipe_cnt].pipe.src.gpuvm = true;
-		pipes[pipe_cnt].pipe.src.dcc_fraction_of_zs_req_luma = 0;
-		pipes[pipe_cnt].pipe.src.dcc_fraction_of_zs_req_chroma = 0;
+		DC_FP_START();
+		dcn32_zero_pipe_dcc_fraction(pipes, pipe_cnt);
+		DC_FP_END();
 		pipes[pipe_cnt].pipe.dest.vfront_porch = timing->v_front_porch;
 		pipes[pipe_cnt].pipe.src.gpuvm_min_page_size_kbytes = 256; // according to spreadsheet
 		pipes[pipe_cnt].pipe.src.unbounded_req_mode = false;
 		pipes[pipe_cnt].pipe.scale_ratio_depth.lb_depth = dm_lb_19;
 
-		switch (pipe->stream->mall_stream_config.type) {
-		case SUBVP_MAIN:
-			pipes[pipe_cnt].pipe.src.use_mall_for_pstate_change = dm_use_mall_pstate_change_sub_viewport;
-			subvp_in_use = true;
-			break;
-		case SUBVP_PHANTOM:
-			pipes[pipe_cnt].pipe.src.use_mall_for_pstate_change = dm_use_mall_pstate_change_phantom_pipe;
-			pipes[pipe_cnt].pipe.src.use_mall_for_static_screen = dm_use_mall_static_screen_disable;
-			// Disallow unbounded req for SubVP according to DCHUB programming guide
-			pipes[pipe_cnt].pipe.src.unbounded_req_mode = false;
-			break;
-		case SUBVP_NONE:
-			pipes[pipe_cnt].pipe.src.use_mall_for_pstate_change = dm_use_mall_pstate_change_disable;
-			pipes[pipe_cnt].pipe.src.use_mall_for_static_screen = dm_use_mall_static_screen_disable;
-			break;
-		default:
-			break;
+		/* Only populate DML input with subvp info for full updates.
+		 * This is just a workaround -- needs a proper fix.
+		 */
+		if (!fast_validate) {
+			switch (pipe->stream->mall_stream_config.type) {
+			case SUBVP_MAIN:
+				pipes[pipe_cnt].pipe.src.use_mall_for_pstate_change = dm_use_mall_pstate_change_sub_viewport;
+				subvp_in_use = true;
+				break;
+			case SUBVP_PHANTOM:
+				pipes[pipe_cnt].pipe.src.use_mall_for_pstate_change = dm_use_mall_pstate_change_phantom_pipe;
+				pipes[pipe_cnt].pipe.src.use_mall_for_static_screen = dm_use_mall_static_screen_disable;
+				// Disallow unbounded req for SubVP according to DCHUB programming guide
+				pipes[pipe_cnt].pipe.src.unbounded_req_mode = false;
+				break;
+			case SUBVP_NONE:
+				pipes[pipe_cnt].pipe.src.use_mall_for_pstate_change = dm_use_mall_pstate_change_disable;
+				pipes[pipe_cnt].pipe.src.use_mall_for_static_screen = dm_use_mall_static_screen_disable;
+				break;
+			default:
+				break;
+			}
 		}
 
 		pipes[pipe_cnt].dout.dsc_input_bpc = 0;
@@ -2029,6 +2068,9 @@ static struct resource_funcs dcn32_res_pool_funcs = {
 	.update_soc_for_wm_a = dcn30_update_soc_for_wm_a,
 	.add_phantom_pipes = dcn32_add_phantom_pipes,
 	.remove_phantom_pipes = dcn32_remove_phantom_pipes,
+	.retain_phantom_pipes = dcn32_retain_phantom_pipes,
+	.save_mall_state = dcn32_save_mall_state,
+	.restore_mall_state = dcn32_restore_mall_state,
 };
 
 
@@ -2115,16 +2157,20 @@ static bool dcn32_resource_construct(
 	dc->caps.cache_num_ways = 16;
 	dc->caps.max_cab_allocation_bytes = 67108864; // 64MB = 1024 * 1024 * 64
 	dc->caps.subvp_fw_processing_delay_us = 15;
+	dc->caps.subvp_drr_max_vblank_margin_us = 40;
 	dc->caps.subvp_prefetch_end_to_mall_start_us = 15;
 	dc->caps.subvp_swath_height_margin_lines = 16;
 	dc->caps.subvp_pstate_allow_width_us = 20;
 	dc->caps.subvp_vertical_int_margin_us = 30;
+	dc->caps.subvp_drr_vblank_start_margin_us = 100; // 100us margin
 
 	dc->caps.max_slave_planes = 2;
 	dc->caps.max_slave_yuv_planes = 2;
 	dc->caps.max_slave_rgb_planes = 2;
 	dc->caps.post_blend_color_processing = true;
 	dc->caps.force_dp_tps4_for_cp2520 = true;
+	if (dc->config.forceHBR2CP2520)
+		dc->caps.force_dp_tps4_for_cp2520 = false;
 	dc->caps.dp_hpo = true;
 	dc->caps.dp_hdmi21_pcon_support = true;
 	dc->caps.edp_dsc_support = true;
@@ -2408,6 +2454,9 @@ static bool dcn32_resource_construct(
 		pool->base.oem_device = NULL;
 	}
 
+	if (ASICREV_IS_GC_11_0_3(dc->ctx->asic_id.hw_internal_rev) && (dc->config.sdpif_request_limit_words_per_umc == 0))
+		dc->config.sdpif_request_limit_words_per_umc = 16;
+
 	DC_FP_END();
 
 	return true;
diff --git a/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_resource.h b/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_resource.h
index f76120e67c16..13fbc574910b 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_resource.h
+++ b/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_resource.h
@@ -45,17 +45,6 @@
 extern struct _vcs_dpi_ip_params_st dcn3_2_ip;
 extern struct _vcs_dpi_soc_bounding_box_st dcn3_2_soc;
 
-/* Temp struct used to save and restore MALL config
- * during validation.
- *
- * TODO: Move MALL config into dc_state instead of stream struct
- * to avoid needing to save/restore.
- */
-struct mall_temp_config {
-	struct mall_stream_config mall_stream_config[MAX_PIPES];
-	bool is_phantom_plane[MAX_PIPES];
-};
-
 struct dcn32_resource_pool {
 	struct resource_pool base;
 };
@@ -81,6 +70,9 @@ bool dcn32_release_post_bldn_3dlut(
 		struct dc_transfer_func **shaper);
 
 bool dcn32_remove_phantom_pipes(struct dc *dc,
+		struct dc_state *context, bool fast_update);
+
+void dcn32_retain_phantom_pipes(struct dc *dc,
 		struct dc_state *context);
 
 void dcn32_add_phantom_pipes(struct dc *dc,
@@ -1244,7 +1236,8 @@ void dcn32_restore_mall_state(struct dc *dc,
       SR(DCHUBBUB_ARB_FCLK_PSTATE_CHANGE_WATERMARK_C),                         \
       SR(DCHUBBUB_ARB_FCLK_PSTATE_CHANGE_WATERMARK_D),                         \
       SR(DCN_VM_FAULT_ADDR_MSB), SR(DCN_VM_FAULT_ADDR_LSB),                    \
-      SR(DCN_VM_FAULT_CNTL), SR(DCN_VM_FAULT_STATUS)                           \
+      SR(DCN_VM_FAULT_CNTL), SR(DCN_VM_FAULT_STATUS),                          \
+      SR(SDPIF_REQUEST_RATE_LIMIT)                                             \
   )
 
 /* DCCG */
diff --git a/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_resource_helpers.c b/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_resource_helpers.c
index d51d0c40ae5b..783935c4e664 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_resource_helpers.c
+++ b/drivers/gpu/drm/amd/display/dc/dcn32/dcn32_resource_helpers.c
@@ -97,21 +97,21 @@ uint32_t dcn32_helper_calculate_num_ways_for_subvp(struct dc *dc, struct dc_stat
 			 * FLOOR(vp_x_start, blk_width)
 			 */
 			full_vp_width_blk_aligned = ((pipe->plane_res.scl_data.viewport.x +
-					pipe->plane_res.scl_data.viewport.width + mblk_width - 1) / mblk_width * mblk_width) +
+					pipe->plane_res.scl_data.viewport.width + mblk_width - 1) / mblk_width * mblk_width) -
 					(pipe->plane_res.scl_data.viewport.x / mblk_width * mblk_width);
 
 			/* full_vp_height_blk_aligned = FLOOR(vp_y_start + full_vp_height + blk_height - 1, blk_height) -
 			 * FLOOR(vp_y_start, blk_height)
 			 */
 			full_vp_height_blk_aligned = ((pipe->plane_res.scl_data.viewport.y +
-					full_vp_height + mblk_height - 1) / mblk_height * mblk_height) +
+					full_vp_height + mblk_height - 1) / mblk_height * mblk_height) -
 					(pipe->plane_res.scl_data.viewport.y / mblk_height * mblk_height);
 
 			/* mall_alloc_width_blk_aligned_l/c = full_vp_width_blk_aligned_l/c */
 			mall_alloc_width_blk_aligned = full_vp_width_blk_aligned;
 
 			/* mall_alloc_height_blk_aligned_l/c = CEILING(sub_vp_height_l/c - 1, blk_height_l/c) + blk_height_l/c */
-			mall_alloc_height_blk_aligned = (pipe->stream->timing.v_addressable - 1 + mblk_height - 1) /
+			mall_alloc_height_blk_aligned = (pipe->plane_res.scl_data.viewport.height - 1 + mblk_height - 1) /
 					mblk_height * mblk_height + mblk_height;
 
 			/* full_mblk_width_ub_l/c = mall_alloc_width_blk_aligned_l/c;
@@ -121,14 +121,19 @@ uint32_t dcn32_helper_calculate_num_ways_for_subvp(struct dc *dc, struct dc_stat
 			 */
 			num_mblks = ((mall_alloc_width_blk_aligned + mblk_width - 1) / mblk_width) *
 					((mall_alloc_height_blk_aligned + mblk_height - 1) / mblk_height);
+
+			/*For DCC:
+			 * meta_num_mblk = CEILING(meta_pitch*full_vp_height*Bpe/256/mblk_bytes, 1)
+			 */
+			if (pipe->plane_state->dcc.enable)
+				num_mblks += (pipe->plane_state->dcc.meta_pitch * pipe->plane_res.scl_data.viewport.height * bytes_per_pixel +
+								(256 * DCN3_2_MALL_MBLK_SIZE_BYTES) - 1) / (256 * DCN3_2_MALL_MBLK_SIZE_BYTES);
+
 			bytes_in_mall = num_mblks * DCN3_2_MALL_MBLK_SIZE_BYTES;
 			// cache lines used is total bytes / cache_line size. Add +2 for worst case alignment
 			// (MALL is 64-byte aligned)
 			cache_lines_per_plane = bytes_in_mall / dc->caps.cache_line_size + 2;
 
-			/* For DCC divide by 256 */
-			if (pipe->plane_state->dcc.enable)
-				cache_lines_per_plane = cache_lines_per_plane + (cache_lines_per_plane / 256) + 1;
 			cache_lines_used += cache_lines_per_plane;
 		}
 	}
@@ -200,7 +205,7 @@ bool dcn32_all_pipes_have_stream_and_plane(struct dc *dc,
 		struct pipe_ctx *pipe = &context->res_ctx.pipe_ctx[i];
 
 		if (!pipe->stream)
-			return false;
+			continue;
 
 		if (!pipe->plane_state)
 			return false;
diff --git a/drivers/gpu/drm/amd/display/dc/dcn321/dcn321_resource.c b/drivers/gpu/drm/amd/display/dc/dcn321/dcn321_resource.c
index 61087f2385a9..d1f36df03c2e 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn321/dcn321_resource.c
+++ b/drivers/gpu/drm/amd/display/dc/dcn321/dcn321_resource.c
@@ -109,8 +109,6 @@ enum dcn321_clk_src_array_id {
  */
 
 /* DCN */
-/* TODO awful hack. fixup dcn20_dwb.h */
-#undef BASE_INNER
 #define BASE_INNER(seg) ctx->dcn_reg_offsets[seg]
 
 #define BASE(seg) BASE_INNER(seg)
@@ -174,6 +172,9 @@ enum dcn321_clk_src_array_id {
 	REG_STRUCT.block ## _ ## reg_name[id] = BASE(reg ## block ## id ## _ ## reg_name ## _BASE_IDX) + \
 		reg ## block ## id ## _ ## reg_name
 
+#define SF_DWB2(reg_name, block, id, field_name, post_fix) \
+	.field_name = reg_name ## __ ## field_name ## post_fix
+
 #define VUPDATE_SRII(reg_name, block, id)\
 	REG_STRUCT.reg_name[id] = BASE(reg ## reg_name ## _ ## block ## id ## _BASE_IDX) + \
 		reg ## reg_name ## _ ## block ## id
@@ -720,8 +721,9 @@ static const struct dc_debug_options debug_defaults_drv = {
 	/*must match enable_single_display_2to1_odm_policy to support dynamic ODM transitions*/
 	.enable_double_buffered_dsc_pg_support = true,
 	.enable_dp_dig_pixel_rate_div_policy = 1,
-	.allow_sw_cursor_fallback = false,
+	.allow_sw_cursor_fallback = false, // Linux can't do SW cursor "fallback"
 	.alloc_extra_way_for_cursor = true,
+	.min_prefetch_in_strobe_ns = 60000, // 60us
 };
 
 static const struct dc_debug_options debug_defaults_diags = {
@@ -741,7 +743,7 @@ static const struct dc_debug_options debug_defaults_diags = {
 	.dmub_command_table = true,
 	.enable_tri_buf = true,
 	.use_max_lb = true,
-	.force_disable_subvp = true
+	.force_disable_subvp = true,
 };
 
 
@@ -828,6 +830,7 @@ static struct clock_source *dcn321_clock_source_create(
 		return &clk_src->base;
 	}
 
+	kfree(clk_src);
 	BREAK_TO_DEBUGGER();
 	return NULL;
 }
@@ -1618,6 +1621,9 @@ static struct resource_funcs dcn321_res_pool_funcs = {
 	.update_soc_for_wm_a = dcn30_update_soc_for_wm_a,
 	.add_phantom_pipes = dcn32_add_phantom_pipes,
 	.remove_phantom_pipes = dcn32_remove_phantom_pipes,
+	.retain_phantom_pipes = dcn32_retain_phantom_pipes,
+	.save_mall_state = dcn32_save_mall_state,
+	.restore_mall_state = dcn32_restore_mall_state,
 };
 
 
@@ -1703,10 +1709,12 @@ static bool dcn321_resource_construct(
 	dc->caps.cache_num_ways = 16;
 	dc->caps.max_cab_allocation_bytes = 33554432; // 32MB = 1024 * 1024 * 32
 	dc->caps.subvp_fw_processing_delay_us = 15;
+	dc->caps.subvp_drr_max_vblank_margin_us = 40;
 	dc->caps.subvp_prefetch_end_to_mall_start_us = 15;
 	dc->caps.subvp_swath_height_margin_lines = 16;
 	dc->caps.subvp_pstate_allow_width_us = 20;
 	dc->caps.subvp_vertical_int_margin_us = 30;
+	dc->caps.subvp_drr_vblank_start_margin_us = 100; // 100us margin
 	dc->caps.max_slave_planes = 1;
 	dc->caps.max_slave_yuv_planes = 1;
 	dc->caps.max_slave_rgb_planes = 1;
diff --git a/drivers/gpu/drm/amd/display/dc/dm_helpers.h b/drivers/gpu/drm/amd/display/dc/dm_helpers.h
index e3e5c39895a3..af1c50ed905a 100644
--- a/drivers/gpu/drm/amd/display/dc/dm_helpers.h
+++ b/drivers/gpu/drm/amd/display/dc/dm_helpers.h
@@ -116,6 +116,11 @@ bool dm_helpers_dp_mst_start_top_mgr(
 bool dm_helpers_dp_mst_stop_top_mgr(
 		struct dc_context *ctx,
 		struct dc_link *link);
+
+void dm_helpers_dp_mst_update_branch_bandwidth(
+		struct dc_context *ctx,
+		struct dc_link *link);
+
 /**
  * OS specific aux read callback.
  */
diff --git a/drivers/gpu/drm/amd/display/dc/dml/Makefile b/drivers/gpu/drm/amd/display/dc/dml/Makefile
index ca7d24000621..0ecea87cf48f 100644
--- a/drivers/gpu/drm/amd/display/dc/dml/Makefile
+++ b/drivers/gpu/drm/amd/display/dc/dml/Makefile
@@ -33,6 +33,10 @@ ifdef CONFIG_PPC64
 dml_ccflags := -mhard-float -maltivec
 endif
 
+ifdef CONFIG_ARM64
+dml_rcflags := -mgeneral-regs-only
+endif
+
 ifdef CONFIG_CC_IS_GCC
 ifneq ($(call gcc-min-version, 70100),y)
 IS_OLD_GCC = 1
@@ -55,8 +59,6 @@ frame_warn_flag := -Wframe-larger-than=2048
 endif
 
 CFLAGS_$(AMDDALPATH)/dc/dml/display_mode_lib.o := $(dml_ccflags)
-
-ifdef CONFIG_DRM_AMD_DC_DCN
 CFLAGS_$(AMDDALPATH)/dc/dml/display_mode_vba.o := $(dml_ccflags)
 CFLAGS_$(AMDDALPATH)/dc/dml/dcn10/dcn10_fpu.o := $(dml_ccflags)
 CFLAGS_$(AMDDALPATH)/dc/dml/dcn20/dcn20_fpu.o := $(dml_ccflags)
@@ -88,7 +90,6 @@ CFLAGS_$(AMDDALPATH)/dc/dml/calcs/dcn_calcs.o := $(dml_ccflags)
 CFLAGS_$(AMDDALPATH)/dc/dml/calcs/dcn_calc_auto.o := $(dml_ccflags)
 CFLAGS_$(AMDDALPATH)/dc/dml/calcs/dcn_calc_math.o := $(dml_ccflags) -Wno-tautological-compare
 CFLAGS_REMOVE_$(AMDDALPATH)/dc/dml/display_mode_vba.o := $(dml_rcflags)
-CFLAGS_REMOVE_$(AMDDALPATH)/dc/dml/dcn2x/dcn2x.o := $(dml_rcflags)
 CFLAGS_REMOVE_$(AMDDALPATH)/dc/dml/dcn20/display_mode_vba_20.o := $(dml_rcflags)
 CFLAGS_REMOVE_$(AMDDALPATH)/dc/dml/dcn20/display_rq_dlg_calc_20.o := $(dml_rcflags)
 CFLAGS_REMOVE_$(AMDDALPATH)/dc/dml/dcn20/display_mode_vba_20v2.o := $(dml_rcflags)
@@ -105,7 +106,18 @@ CFLAGS_REMOVE_$(AMDDALPATH)/dc/dml/dcn32/display_mode_vba_util_32.o := $(dml_rcf
 CFLAGS_REMOVE_$(AMDDALPATH)/dc/dml/dcn301/dcn301_fpu.o := $(dml_rcflags)
 CFLAGS_REMOVE_$(AMDDALPATH)/dc/dml/display_mode_lib.o := $(dml_rcflags)
 CFLAGS_REMOVE_$(AMDDALPATH)/dc/dml/dsc/rc_calc_fpu.o  := $(dml_rcflags)
-endif
+CFLAGS_REMOVE_$(AMDDALPATH)/dc/dml/dcn10/dcn10_fpu.o := $(dml_rcflags)
+CFLAGS_REMOVE_$(AMDDALPATH)/dc/dml/dcn20/dcn20_fpu.o := $(dml_rcflags)
+CFLAGS_REMOVE_$(AMDDALPATH)/dc/dml/dcn314/display_mode_vba_314.o := $(dml_rcflags)
+CFLAGS_REMOVE_$(AMDDALPATH)/dc/dml/dcn314/display_rq_dlg_calc_314.o := $(dml_rcflags)
+CFLAGS_REMOVE_$(AMDDALPATH)/dc/dml/dcn314/dcn314_fpu.o := $(dml_rcflags)
+CFLAGS_REMOVE_$(AMDDALPATH)/dc/dml/dcn30/dcn30_fpu.o := $(dml_rcflags)
+CFLAGS_REMOVE_$(AMDDALPATH)/dc/dml/dcn32/dcn32_fpu.o := $(dml_rcflags)
+CFLAGS_REMOVE_$(AMDDALPATH)/dc/dml/dcn321/dcn321_fpu.o := $(dml_rcflags)
+CFLAGS_REMOVE_$(AMDDALPATH)/dc/dml/dcn31/dcn31_fpu.o := $(dml_rcflags)
+CFLAGS_REMOVE_$(AMDDALPATH)/dc/dml/dcn302/dcn302_fpu.o := $(dml_rcflags)
+CFLAGS_REMOVE_$(AMDDALPATH)/dc/dml/dcn303/dcn303_fpu.o := $(dml_rcflags)
+CFLAGS_REMOVE_$(AMDDALPATH)/dc/dml/calcs/dcn_calc_math.o := $(dml_rcflags)
 CFLAGS_$(AMDDALPATH)/dc/dml/dml1_display_rq_dlg_calc.o := $(dml_ccflags)
 CFLAGS_$(AMDDALPATH)/dc/dml/display_rq_dlg_helpers.o := $(dml_ccflags)
 CFLAGS_REMOVE_$(AMDDALPATH)/dc/dml/dml1_display_rq_dlg_calc.o := $(dml_rcflags)
diff --git a/drivers/gpu/drm/amd/display/dc/dml/dc_features.h b/drivers/gpu/drm/amd/display/dc/dml/dc_features.h
index 74e86732e301..2cbdd75429ff 100644
--- a/drivers/gpu/drm/amd/display/dc/dml/dc_features.h
+++ b/drivers/gpu/drm/amd/display/dc/dml/dc_features.h
@@ -29,6 +29,13 @@
 #define DC__PRESENT 1
 #define DC__PRESENT__1 1
 #define DC__NUM_DPP 4
+
+/**
+ * @DC__VOLTAGE_STATES:
+ *
+ * Define the maximum amount of states supported by the ASIC. Every ASIC has a
+ * specific number of states; this macro defines the maximum number of states.
+ */
 #define DC__VOLTAGE_STATES 20
 #define DC__NUM_DPP__4 1
 #define DC__NUM_DPP__0_PRESENT 1
diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn10/dcn10_fpu.c b/drivers/gpu/drm/amd/display/dc/dml/dcn10/dcn10_fpu.c
index 99644d896222..c5e84190c17a 100644
--- a/drivers/gpu/drm/amd/display/dc/dml/dcn10/dcn10_fpu.c
+++ b/drivers/gpu/drm/amd/display/dc/dml/dcn10/dcn10_fpu.c
@@ -27,6 +27,8 @@
 #include "dcn10/dcn10_resource.h"
 
 #include "dcn10_fpu.h"
+#include "resource.h"
+#include "amdgpu_dm/dc_fpu.h"
 
 /**
  * DOC: DCN10 FPU manipulation Overview
@@ -121,3 +123,37 @@ struct _vcs_dpi_soc_bounding_box_st dcn1_0_soc = {
 	.writeback_dram_clock_change_latency_us = 23.0,
 	.return_bus_width_bytes = 64,
 };
+
+void dcn10_resource_construct_fp(struct dc *dc)
+{
+	dc_assert_fp_enabled();
+	if (dc->ctx->dce_version == DCN_VERSION_1_01) {
+		struct dcn_soc_bounding_box *dcn_soc = dc->dcn_soc;
+		struct dcn_ip_params *dcn_ip = dc->dcn_ip;
+		struct display_mode_lib *dml = &dc->dml;
+
+		dml->ip.max_num_dpp = 3;
+		/* TODO how to handle 23.84? */
+		dcn_soc->dram_clock_change_latency = 23;
+		dcn_ip->max_num_dpp = 3;
+	}
+	if (ASICREV_IS_RV1_F0(dc->ctx->asic_id.hw_internal_rev)) {
+		dc->dcn_soc->urgent_latency = 3;
+		dc->debug.disable_dmcu = true;
+		dc->dcn_soc->fabric_and_dram_bandwidth_vmax0p9 = 41.60f;
+	}
+
+	dc->dcn_soc->number_of_channels = dc->ctx->asic_id.vram_width / ddr4_dram_width;
+	ASSERT(dc->dcn_soc->number_of_channels < 3);
+	if (dc->dcn_soc->number_of_channels == 0)/*old sbios bug*/
+		dc->dcn_soc->number_of_channels = 2;
+
+	if (dc->dcn_soc->number_of_channels == 1) {
+		dc->dcn_soc->fabric_and_dram_bandwidth_vmax0p9 = 19.2f;
+		dc->dcn_soc->fabric_and_dram_bandwidth_vnom0p8 = 17.066f;
+		dc->dcn_soc->fabric_and_dram_bandwidth_vmid0p72 = 14.933f;
+		dc->dcn_soc->fabric_and_dram_bandwidth_vmin0p65 = 12.8f;
+		if (ASICREV_IS_RV1_F0(dc->ctx->asic_id.hw_internal_rev))
+			dc->dcn_soc->fabric_and_dram_bandwidth_vmax0p9 = 20.80f;
+	}
+}
diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn10/dcn10_fpu.h b/drivers/gpu/drm/amd/display/dc/dml/dcn10/dcn10_fpu.h
index e74ed4b4ce5b..63219ecd8478 100644
--- a/drivers/gpu/drm/amd/display/dc/dml/dcn10/dcn10_fpu.h
+++ b/drivers/gpu/drm/amd/display/dc/dml/dcn10/dcn10_fpu.h
@@ -27,4 +27,6 @@
 #ifndef __DCN10_FPU_H__
 #define __DCN10_FPU_H__
 
+void dcn10_resource_construct_fp(struct dc *dc);
+
 #endif /* __DCN20_FPU_H__ */
diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn20/dcn20_fpu.c b/drivers/gpu/drm/amd/display/dc/dml/dcn20/dcn20_fpu.c
index d680f1c5b69f..c26da3bb2892 100644
--- a/drivers/gpu/drm/amd/display/dc/dml/dcn20/dcn20_fpu.c
+++ b/drivers/gpu/drm/amd/display/dc/dml/dcn20/dcn20_fpu.c
@@ -565,7 +565,7 @@ struct _vcs_dpi_soc_bounding_box_st dcn2_1_soc = {
 				.dppclk_mhz = 847.06,
 				.phyclk_mhz = 810.0,
 				.socclk_mhz = 953.0,
-				.dscclk_mhz = 489.0,
+				.dscclk_mhz = 300.0,
 				.dram_speed_mts = 2400.0,
 			},
 			{
@@ -576,7 +576,7 @@ struct _vcs_dpi_soc_bounding_box_st dcn2_1_soc = {
 				.dppclk_mhz = 960.00,
 				.phyclk_mhz = 810.0,
 				.socclk_mhz = 278.0,
-				.dscclk_mhz = 287.67,
+				.dscclk_mhz = 342.86,
 				.dram_speed_mts = 2666.0,
 			},
 			{
@@ -587,7 +587,7 @@ struct _vcs_dpi_soc_bounding_box_st dcn2_1_soc = {
 				.dppclk_mhz = 1028.57,
 				.phyclk_mhz = 810.0,
 				.socclk_mhz = 715.0,
-				.dscclk_mhz = 318.334,
+				.dscclk_mhz = 369.23,
 				.dram_speed_mts = 3200.0,
 			},
 			{
@@ -949,6 +949,7 @@ static enum dcn_zstate_support_state  decide_zstate_support(struct dc *dc, struc
 	int plane_count;
 	int i;
 	unsigned int optimized_min_dst_y_next_start_us;
+	bool allow_z8 = context->bw_ctx.dml.vba.StutterPeriod > 1000.0;
 
 	plane_count = 0;
 	optimized_min_dst_y_next_start_us = 0;
@@ -963,6 +964,8 @@ static enum dcn_zstate_support_state  decide_zstate_support(struct dc *dc, struc
 	 * 	2. single eDP, on link 0, 1 plane and stutter period > 5ms
 	 * Z10 only cases:
 	 * 	1. single eDP, on link 0, 1 plane and stutter period >= 5ms
+	 * Z8 cases:
+	 * 	1. stutter period sufficient
 	 * Zstate not allowed cases:
 	 * 	1. Everything else
 	 */
@@ -989,12 +992,15 @@ static enum dcn_zstate_support_state  decide_zstate_support(struct dc *dc, struc
 
 		if (context->bw_ctx.dml.vba.StutterPeriod > 5000.0 || optimized_min_dst_y_next_start_us > 5000)
 			return DCN_ZSTATE_SUPPORT_ALLOW;
-		else if (link->psr_settings.psr_version == DC_PSR_VERSION_1 && !dc->debug.disable_psr)
-			return DCN_ZSTATE_SUPPORT_ALLOW_Z10_ONLY;
+		else if (link->psr_settings.psr_version == DC_PSR_VERSION_1 && !link->panel_config.psr.disable_psr)
+			return allow_z8 ? DCN_ZSTATE_SUPPORT_ALLOW_Z8_Z10_ONLY : DCN_ZSTATE_SUPPORT_ALLOW_Z10_ONLY;
 		else
-			return DCN_ZSTATE_SUPPORT_DISALLOW;
-	} else
+			return allow_z8 ? DCN_ZSTATE_SUPPORT_ALLOW_Z8_ONLY : DCN_ZSTATE_SUPPORT_DISALLOW;
+	} else if (allow_z8) {
+		return DCN_ZSTATE_SUPPORT_ALLOW_Z8_ONLY;
+	} else {
 		return DCN_ZSTATE_SUPPORT_DISALLOW;
+	}
 }
 
 void dcn20_calculate_dlg_params(
@@ -1228,6 +1234,7 @@ int dcn20_populate_dml_pipes_from_context(
 		pipes[pipe_cnt].pipe.src.dcc = false;
 		pipes[pipe_cnt].pipe.src.dcc_rate = 1;
 		pipes[pipe_cnt].pipe.dest.synchronized_vblank_all_planes = synchronized_vblank;
+		pipes[pipe_cnt].pipe.dest.synchronize_timings = synchronized_vblank;
 		pipes[pipe_cnt].pipe.dest.hblank_start = timing->h_total - timing->h_front_porch;
 		pipes[pipe_cnt].pipe.dest.hblank_end = pipes[pipe_cnt].pipe.dest.hblank_start
 				- timing->h_addressable
@@ -1295,6 +1302,8 @@ int dcn20_populate_dml_pipes_from_context(
 		case SIGNAL_TYPE_DISPLAY_PORT_MST:
 		case SIGNAL_TYPE_DISPLAY_PORT:
 			pipes[pipe_cnt].dout.output_type = dm_dp;
+			if (is_dp_128b_132b_signal(&res_ctx->pipe_ctx[i]))
+				pipes[pipe_cnt].dout.output_type = dm_dp2p0;
 			break;
 		case SIGNAL_TYPE_EDP:
 			pipes[pipe_cnt].dout.output_type = dm_edp;
diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn30/dcn30_fpu.c b/drivers/gpu/drm/amd/display/dc/dml/dcn30/dcn30_fpu.c
index e1e92daba668..d4c0f9cdac8e 100644
--- a/drivers/gpu/drm/amd/display/dc/dml/dcn30/dcn30_fpu.c
+++ b/drivers/gpu/drm/amd/display/dc/dml/dcn30/dcn30_fpu.c
@@ -520,9 +520,7 @@ void dcn30_fpu_calculate_wm_and_dlg(
 		pipe_idx++;
 	}
 
-	DC_FP_START();
 	dcn20_calculate_dlg_params(dc, context, pipes, pipe_cnt, vlevel);
-	DC_FP_END();
 
 	if (!pstate_en)
 		/* Restore full p-state latency */
diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn30/display_mode_vba_30.c b/drivers/gpu/drm/amd/display/dc/dml/dcn30/display_mode_vba_30.c
index 479e2c1a1301..379729b02847 100644
--- a/drivers/gpu/drm/amd/display/dc/dml/dcn30/display_mode_vba_30.c
+++ b/drivers/gpu/drm/amd/display/dc/dml/dcn30/display_mode_vba_30.c
@@ -4851,7 +4851,7 @@ void dml30_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode_l
 							v->SwathHeightYThisState[k],
 							v->SwathHeightCThisState[k],
 							v->HTotal[k] / v->PixelClock[k],
-							v->UrgentLatency,
+							v->UrgLatency[i],
 							v->CursorBufferSize,
 							v->CursorWidth[k][0],
 							v->CursorBPP[k][0],
diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn31/dcn31_fpu.c b/drivers/gpu/drm/amd/display/dc/dml/dcn31/dcn31_fpu.c
index 7dd0845d1bd9..b37d14369a62 100644
--- a/drivers/gpu/drm/amd/display/dc/dml/dcn31/dcn31_fpu.c
+++ b/drivers/gpu/drm/amd/display/dc/dml/dcn31/dcn31_fpu.c
@@ -483,7 +483,7 @@ void dcn31_calculate_wm_and_dlg_fp(
 		int pipe_cnt,
 		int vlevel)
 {
-	int i, pipe_idx, active_dpp_count = 0;
+	int i, pipe_idx, active_hubp_count = 0;
 	double dcfclk = context->bw_ctx.dml.vba.DCFCLKState[vlevel][context->bw_ctx.dml.vba.maxMpcComb];
 
 	dc_assert_fp_enabled();
@@ -529,7 +529,7 @@ void dcn31_calculate_wm_and_dlg_fp(
 			continue;
 
 		if (context->res_ctx.pipe_ctx[i].plane_state)
-			active_dpp_count++;
+			active_hubp_count++;
 
 		pipes[pipe_idx].clks_cfg.dispclk_mhz = get_dispclk_calculated(&context->bw_ctx.dml, pipes, pipe_cnt);
 		pipes[pipe_idx].clks_cfg.dppclk_mhz = get_dppclk_calculated(&context->bw_ctx.dml, pipes, pipe_cnt, pipe_idx);
@@ -547,9 +547,22 @@ void dcn31_calculate_wm_and_dlg_fp(
 	}
 
 	dcn20_calculate_dlg_params(dc, context, pipes, pipe_cnt, vlevel);
-	/* For 31x apu pstate change is only supported if possible in vactive or if there are no active dpps */
+	/* For 31x apu pstate change is only supported if possible in vactive*/
 	context->bw_ctx.bw.dcn.clk.p_state_change_support =
-			context->bw_ctx.dml.vba.DRAMClockChangeSupport[vlevel][context->bw_ctx.dml.vba.maxMpcComb] == dm_dram_clock_change_vactive || !active_dpp_count;
+			context->bw_ctx.dml.vba.DRAMClockChangeSupport[vlevel][context->bw_ctx.dml.vba.maxMpcComb] == dm_dram_clock_change_vactive;
+	/* If DCN isn't making memory requests we can allow pstate change and lower clocks */
+	if (!active_hubp_count) {
+		context->bw_ctx.bw.dcn.clk.socclk_khz = 0;
+		context->bw_ctx.bw.dcn.clk.dppclk_khz = 0;
+		context->bw_ctx.bw.dcn.clk.dcfclk_khz = 0;
+		context->bw_ctx.bw.dcn.clk.dcfclk_deep_sleep_khz = 0;
+		context->bw_ctx.bw.dcn.clk.dramclk_khz = 0;
+		context->bw_ctx.bw.dcn.clk.fclk_khz = 0;
+		context->bw_ctx.bw.dcn.clk.p_state_change_support = true;
+		for (i = 0; i < dc->res_pool->pipe_count; i++)
+			if (context->res_ctx.pipe_ctx[i].stream)
+				context->res_ctx.pipe_ctx[i].plane_res.bw.dppclk_khz = 0;
+	}
 }
 
 void dcn31_update_bw_bounding_box(struct dc *dc, struct clk_bw_params *bw_params)
@@ -797,3 +810,8 @@ void dcn316_update_bw_bounding_box(struct dc *dc, struct clk_bw_params *bw_param
 	else
 		dml_init_instance(&dc->dml, &dcn3_16_soc, &dcn3_16_ip, DML_PROJECT_DCN31_FPGA);
 }
+
+int dcn_get_max_non_odm_pix_rate_100hz(struct _vcs_dpi_soc_bounding_box_st *soc)
+{
+	return soc->clock_limits[0].dispclk_mhz * 10000.0 / (1.0 + soc->dcn_downspread_percent / 100.0);
+}
diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn31/dcn31_fpu.h b/drivers/gpu/drm/amd/display/dc/dml/dcn31/dcn31_fpu.h
index fd58b2561ec9..687d3522cc33 100644
--- a/drivers/gpu/drm/amd/display/dc/dml/dcn31/dcn31_fpu.h
+++ b/drivers/gpu/drm/amd/display/dc/dml/dcn31/dcn31_fpu.h
@@ -46,5 +46,10 @@ void dcn31_calculate_wm_and_dlg_fp(
 void dcn31_update_bw_bounding_box(struct dc *dc, struct clk_bw_params *bw_params);
 void dcn315_update_bw_bounding_box(struct dc *dc, struct clk_bw_params *bw_params);
 void dcn316_update_bw_bounding_box(struct dc *dc, struct clk_bw_params *bw_params);
+int dcn_get_max_non_odm_pix_rate_100hz(struct _vcs_dpi_soc_bounding_box_st *soc);
 
+int dcn31x_populate_dml_pipes_from_context(struct dc *dc,
+					  struct dc_state *context,
+					  display_e2e_pipe_params_st *pipes,
+					  bool fast_validate);
 #endif /* __DCN31_FPU_H__*/
diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn31/display_mode_vba_31.c b/drivers/gpu/drm/amd/display/dc/dml/dcn31/display_mode_vba_31.c
index b612edb14417..ec351c8418cb 100644
--- a/drivers/gpu/drm/amd/display/dc/dml/dcn31/display_mode_vba_31.c
+++ b/drivers/gpu/drm/amd/display/dc/dml/dcn31/display_mode_vba_31.c
@@ -1052,17 +1052,16 @@ static bool CalculatePrefetchSchedule(
 	else
 		bytes_pp = myPipe->BytePerPixelY + myPipe->BytePerPixelC;
 	/*rev 99*/
-	prefetch_bw_pr = dml_min(1, bytes_pp * myPipe->PixelClock / (double) myPipe->DPPPerPlane);
+	prefetch_bw_pr = bytes_pp * myPipe->PixelClock / (double) myPipe->DPPPerPlane;
+	prefetch_bw_pr = dml_min(1, myPipe->VRatio) * prefetch_bw_pr;
 	max_Tsw = dml_max(PrefetchSourceLinesY, PrefetchSourceLinesC) * LineTime;
 	prefetch_sw_bytes = PrefetchSourceLinesY * swath_width_luma_ub * myPipe->BytePerPixelY + PrefetchSourceLinesC * swath_width_chroma_ub * myPipe->BytePerPixelC;
-	prefetch_bw_oto = dml_max(bytes_pp * myPipe->PixelClock / myPipe->DPPPerPlane, prefetch_sw_bytes / (dml_max(PrefetchSourceLinesY, PrefetchSourceLinesC) * LineTime));
 	prefetch_bw_oto = dml_max(prefetch_bw_pr, prefetch_sw_bytes / max_Tsw);
 
 	min_Lsw = dml_max(1, dml_max(PrefetchSourceLinesY, PrefetchSourceLinesC) / max_vratio_pre);
 	Lsw_oto = dml_ceil(4 * dml_max(prefetch_sw_bytes / prefetch_bw_oto / LineTime, min_Lsw), 1) / 4;
 	Tsw_oto = Lsw_oto * LineTime;
 
-	prefetch_bw_oto = (PrefetchSourceLinesY * swath_width_luma_ub * myPipe->BytePerPixelY + PrefetchSourceLinesC * swath_width_chroma_ub * myPipe->BytePerPixelC) / Tsw_oto;
 
 #ifdef __DML_VBA_DEBUG__
 	dml_print("DML: HTotal: %d\n", myPipe->HTotal);
@@ -5083,7 +5082,7 @@ void dml31_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode_l
 							v->SwathHeightYThisState[k],
 							v->SwathHeightCThisState[k],
 							v->HTotal[k] / v->PixelClock[k],
-							v->UrgentLatency,
+							v->UrgLatency[i],
 							v->CursorBufferSize,
 							v->CursorWidth[k][0],
 							v->CursorBPP[k][0],
@@ -5361,6 +5360,58 @@ void dml31_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode_l
 				v->ModeSupport[i][j] = true;
 			} else {
 				v->ModeSupport[i][j] = false;
+#ifdef __DML_VBA_DEBUG__
+				if (v->ScaleRatioAndTapsSupport == false)
+					dml_print("DML SUPPORT:     ScaleRatioAndTapsSupport failed");
+				if (v->SourceFormatPixelAndScanSupport == false)
+					dml_print("DML SUPPORT:     SourceFormatPixelAndScanSupport failed");
+				if (v->ViewportSizeSupport[i][j] == false)
+					dml_print("DML SUPPORT:     ViewportSizeSupport failed");
+				if (v->LinkCapacitySupport[i] == false)
+					dml_print("DML SUPPORT:     LinkCapacitySupport failed");
+				if (v->ODMCombine4To1SupportCheckOK[i] == false)
+					dml_print("DML SUPPORT:     DSC422NativeNotSupported failed");
+				if (v->NotEnoughDSCUnits[i] == true)
+					dml_print("DML SUPPORT:     NotEnoughDSCUnits");
+				if (v->DTBCLKRequiredMoreThanSupported[i] == true)
+					dml_print("DML SUPPORT:     DTBCLKRequiredMoreThanSupported");
+				if (v->ROBSupport[i][j] == false)
+					dml_print("DML SUPPORT:     ROBSupport failed");
+				if (v->DISPCLK_DPPCLK_Support[i][j] == false)
+					dml_print("DML SUPPORT:     DISPCLK_DPPCLK_Support failed");
+				if (v->TotalAvailablePipesSupport[i][j] == false)
+					dml_print("DML SUPPORT:     DSC422NativeNotSupported failed");
+				if (EnoughWritebackUnits == false)
+					dml_print("DML SUPPORT:     DSC422NativeNotSupported failed");
+				if (v->WritebackLatencySupport == false)
+					dml_print("DML SUPPORT:     WritebackLatencySupport failed");
+				if (v->WritebackScaleRatioAndTapsSupport == false)
+					dml_print("DML SUPPORT:     DSC422NativeNotSupported ");
+				if (v->CursorSupport == false)
+					dml_print("DML SUPPORT:     DSC422NativeNotSupported failed");
+				if (v->PitchSupport == false)
+					dml_print("DML SUPPORT:     PitchSupport failed");
+				if (ViewportExceedsSurface == true)
+					dml_print("DML SUPPORT:     ViewportExceedsSurface failed");
+				if (v->PrefetchSupported[i][j] == false)
+					dml_print("DML SUPPORT:     PrefetchSupported failed");
+				if (v->DynamicMetadataSupported[i][j] == false)
+					dml_print("DML SUPPORT:     DSC422NativeNotSupported failed");
+				if (v->TotalVerticalActiveBandwidthSupport[i][j] == false)
+					dml_print("DML SUPPORT:     TotalVerticalActiveBandwidthSupport failed");
+				if (v->VRatioInPrefetchSupported[i][j] == false)
+					dml_print("DML SUPPORT:     VRatioInPrefetchSupported failed");
+				if (v->PTEBufferSizeNotExceeded[i][j] == false)
+					dml_print("DML SUPPORT:     PTEBufferSizeNotExceeded failed");
+				if (v->NonsupportedDSCInputBPC == true)
+					dml_print("DML SUPPORT:     NonsupportedDSCInputBPC failed");
+				if (!((v->HostVMEnable == false
+					&& v->ImmediateFlipRequirement[0] != dm_immediate_flip_required)
+							|| v->ImmediateFlipSupportedForState[i][j] == true))
+					dml_print("DML SUPPORT:     ImmediateFlipRequirement failed");
+				if (FMTBufferExceeded == true)
+					dml_print("DML SUPPORT:     FMTBufferExceeded failed");
+#endif
 			}
 		}
 	}
diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn314/dcn314_fpu.c b/drivers/gpu/drm/amd/display/dc/dml/dcn314/dcn314_fpu.c
index cf420ad2b8dc..6a1cf6adea77 100644
--- a/drivers/gpu/drm/amd/display/dc/dml/dcn314/dcn314_fpu.c
+++ b/drivers/gpu/drm/amd/display/dc/dml/dcn314/dcn314_fpu.c
@@ -29,6 +29,7 @@
 #include "dcn31/dcn31_hubbub.h"
 #include "dcn314_fpu.h"
 #include "dml/dcn20/dcn20_fpu.h"
+#include "dml/dcn31/dcn31_fpu.h"
 #include "dml/display_mode_vba.h"
 
 struct _vcs_dpi_ip_params_st dcn3_14_ip = {
@@ -146,10 +147,10 @@ struct _vcs_dpi_soc_bounding_box_st dcn3_14_soc = {
 		},
 	},
 	.num_states = 5,
-	.sr_exit_time_us = 9.0,
-	.sr_enter_plus_exit_time_us = 11.0,
-	.sr_exit_z8_time_us = 442.0,
-	.sr_enter_plus_exit_z8_time_us = 560.0,
+	.sr_exit_time_us = 16.5,
+	.sr_enter_plus_exit_time_us = 18.5,
+	.sr_exit_z8_time_us = 280.0,
+	.sr_enter_plus_exit_z8_time_us = 350.0,
 	.writeback_latency_us = 12.0,
 	.dram_channel_width_bytes = 4,
 	.round_trip_ping_latency_dcfclk_cycles = 106,
@@ -264,11 +265,8 @@ void dcn314_update_bw_bounding_box_fpu(struct dc *dc, struct clk_bw_params *bw_p
 		dc->dml.soc.dispclk_dppclk_vco_speed_mhz = max_dispclk_mhz * 2;
 	}
 
-	if ((int)(dcn3_14_soc.dram_clock_change_latency_us * 1000)
-				!= dc->debug.dram_clock_change_latency_ns
-			&& dc->debug.dram_clock_change_latency_ns) {
-		dcn3_14_soc.dram_clock_change_latency_us = dc->debug.dram_clock_change_latency_ns / 1000;
-	}
+	dcn20_patch_bounding_box(dc, &dcn3_14_soc);
+
 	if (!IS_FPGA_MAXIMUS_DC(dc->ctx->dce_environment))
 		dml_init_instance(&dc->dml, &dcn3_14_soc, &dcn3_14_ip, DML_PROJECT_DCN314);
 	else
@@ -291,7 +289,7 @@ int dcn314_populate_dml_pipes_from_context_fpu(struct dc *dc, struct dc_state *c
 
 	dc_assert_fp_enabled();
 
-	dcn20_populate_dml_pipes_from_context(dc, context, pipes, fast_validate);
+	dcn31x_populate_dml_pipes_from_context(dc, context, pipes, fast_validate);
 
 	for (i = 0, pipe_cnt = 0; i < dc->res_pool->pipe_count; i++) {
 		struct dc_crtc_timing *timing;
@@ -318,8 +316,6 @@ int dcn314_populate_dml_pipes_from_context_fpu(struct dc *dc, struct dc_state *c
 		pipes[pipe_cnt].pipe.src.immediate_flip = true;
 
 		pipes[pipe_cnt].pipe.src.unbounded_req_mode = false;
-		pipes[pipe_cnt].pipe.src.hostvm = dc->res_pool->hubbub->riommu_active;
-		pipes[pipe_cnt].pipe.src.gpuvm = true;
 		pipes[pipe_cnt].pipe.src.dcc_fraction_of_zs_req_luma = 0;
 		pipes[pipe_cnt].pipe.src.dcc_fraction_of_zs_req_chroma = 0;
 		pipes[pipe_cnt].pipe.dest.vfront_porch = timing->v_front_porch;
diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn314/display_mode_vba_314.c b/drivers/gpu/drm/amd/display/dc/dml/dcn314/display_mode_vba_314.c
index 0d12fd079cd6..950669f2c10d 100644
--- a/drivers/gpu/drm/amd/display/dc/dml/dcn314/display_mode_vba_314.c
+++ b/drivers/gpu/drm/amd/display/dc/dml/dcn314/display_mode_vba_314.c
@@ -1074,17 +1074,16 @@ static bool CalculatePrefetchSchedule(
 	else
 		bytes_pp = myPipe->BytePerPixelY + myPipe->BytePerPixelC;
 	/*rev 99*/
-	prefetch_bw_pr = dml_min(1, bytes_pp * myPipe->PixelClock / (double) myPipe->DPPPerPlane);
+	prefetch_bw_pr = bytes_pp * myPipe->PixelClock / (double) myPipe->DPPPerPlane;
+	prefetch_bw_pr = dml_min(1, myPipe->VRatio) * prefetch_bw_pr;
 	max_Tsw = dml_max(PrefetchSourceLinesY, PrefetchSourceLinesC) * LineTime;
 	prefetch_sw_bytes = PrefetchSourceLinesY * swath_width_luma_ub * myPipe->BytePerPixelY + PrefetchSourceLinesC * swath_width_chroma_ub * myPipe->BytePerPixelC;
-	prefetch_bw_oto = dml_max(bytes_pp * myPipe->PixelClock / myPipe->DPPPerPlane, prefetch_sw_bytes / (dml_max(PrefetchSourceLinesY, PrefetchSourceLinesC) * LineTime));
 	prefetch_bw_oto = dml_max(prefetch_bw_pr, prefetch_sw_bytes / max_Tsw);
 
 	min_Lsw = dml_max(1, dml_max(PrefetchSourceLinesY, PrefetchSourceLinesC) / max_vratio_pre);
 	Lsw_oto = dml_ceil(4 * dml_max(prefetch_sw_bytes / prefetch_bw_oto / LineTime, min_Lsw), 1) / 4;
 	Tsw_oto = Lsw_oto * LineTime;
 
-	prefetch_bw_oto = (PrefetchSourceLinesY * swath_width_luma_ub * myPipe->BytePerPixelY + PrefetchSourceLinesC * swath_width_chroma_ub * myPipe->BytePerPixelC) / Tsw_oto;
 
 #ifdef __DML_VBA_DEBUG__
 	dml_print("DML: HTotal: %d\n", myPipe->HTotal);
@@ -5180,7 +5179,7 @@ void dml314_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode_
 							v->SwathHeightYThisState[k],
 							v->SwathHeightCThisState[k],
 							v->HTotal[k] / v->PixelClock[k],
-							v->UrgentLatency,
+							v->UrgLatency[i],
 							v->CursorBufferSize,
 							v->CursorWidth[k][0],
 							v->CursorBPP[k][0],
diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn32/dcn32_fpu.c b/drivers/gpu/drm/amd/display/dc/dml/dcn32/dcn32_fpu.c
index 819de0f11012..f94abd124021 100644
--- a/drivers/gpu/drm/amd/display/dc/dml/dcn32/dcn32_fpu.c
+++ b/drivers/gpu/drm/amd/display/dc/dml/dcn32/dcn32_fpu.c
@@ -157,7 +157,7 @@ struct _vcs_dpi_soc_bounding_box_st dcn3_2_soc = {
 	.dispclk_dppclk_vco_speed_mhz = 4300.0,
 	.do_urgent_latency_adjustment = true,
 	.urgent_latency_adjustment_fabric_clock_component_us = 1.0,
-	.urgent_latency_adjustment_fabric_clock_reference_mhz = 1000,
+	.urgent_latency_adjustment_fabric_clock_reference_mhz = 3000,
 };
 
 void dcn32_build_wm_range_table_fpu(struct clk_mgr_internal *clk_mgr)
@@ -211,7 +211,7 @@ void dcn32_build_wm_range_table_fpu(struct clk_mgr_internal *clk_mgr)
 	/* 'DalDummyClockChangeLatencyNs' registry key option set to 0x7FFFFFFF can be used to disable Set C for dummy p-state */
 	if (clk_mgr->base.ctx->dc->bb_overrides.dummy_clock_change_latency_ns != 0x7FFFFFFF) {
 		clk_mgr->base.bw_params->wm_table.nv_entries[WM_C].valid = true;
-		clk_mgr->base.bw_params->wm_table.nv_entries[WM_C].dml_input.pstate_latency_us = 38;
+		clk_mgr->base.bw_params->wm_table.nv_entries[WM_C].dml_input.pstate_latency_us = 50;
 		clk_mgr->base.bw_params->wm_table.nv_entries[WM_C].dml_input.fclk_change_latency_us = fclk_change_latency_us;
 		clk_mgr->base.bw_params->wm_table.nv_entries[WM_C].dml_input.sr_exit_time_us = sr_exit_time_us;
 		clk_mgr->base.bw_params->wm_table.nv_entries[WM_C].dml_input.sr_enter_plus_exit_time_us = sr_enter_plus_exit_time_us;
@@ -221,7 +221,7 @@ void dcn32_build_wm_range_table_fpu(struct clk_mgr_internal *clk_mgr)
 		clk_mgr->base.bw_params->wm_table.nv_entries[WM_C].pmfw_breakdown.min_uclk = min_uclk_mhz;
 		clk_mgr->base.bw_params->wm_table.nv_entries[WM_C].pmfw_breakdown.max_uclk = 0xFFFF;
 		clk_mgr->base.bw_params->dummy_pstate_table[0].dram_speed_mts = clk_mgr->base.bw_params->clk_table.entries[0].memclk_mhz * 16;
-		clk_mgr->base.bw_params->dummy_pstate_table[0].dummy_pstate_latency_us = 38;
+		clk_mgr->base.bw_params->dummy_pstate_table[0].dummy_pstate_latency_us = 50;
 		clk_mgr->base.bw_params->dummy_pstate_table[1].dram_speed_mts = clk_mgr->base.bw_params->clk_table.entries[1].memclk_mhz * 16;
 		clk_mgr->base.bw_params->dummy_pstate_table[1].dummy_pstate_latency_us = 9;
 		clk_mgr->base.bw_params->dummy_pstate_table[2].dram_speed_mts = clk_mgr->base.bw_params->clk_table.entries[2].memclk_mhz * 16;
@@ -256,16 +256,24 @@ int dcn32_find_dummy_latency_index_for_fw_based_mclk_switch(struct dc *dc,
 							    int vlevel)
 {
 	const int max_latency_table_entries = 4;
-	const struct vba_vars_st *vba = &context->bw_ctx.dml.vba;
+	struct vba_vars_st *vba = &context->bw_ctx.dml.vba;
 	int dummy_latency_index = 0;
+	enum clock_change_support temp_clock_change_support = vba->DRAMClockChangeSupport[vlevel][context->bw_ctx.dml.vba.maxMpcComb];
 
 	dc_assert_fp_enabled();
 
 	while (dummy_latency_index < max_latency_table_entries) {
+		if (temp_clock_change_support != dm_dram_clock_change_unsupported)
+			vba->DRAMClockChangeSupport[vlevel][context->bw_ctx.dml.vba.maxMpcComb] = temp_clock_change_support;
 		context->bw_ctx.dml.soc.dram_clock_change_latency_us =
 				dc->clk_mgr->bw_params->dummy_pstate_table[dummy_latency_index].dummy_pstate_latency_us;
 		dcn32_internal_validate_bw(dc, context, pipes, &pipe_cnt, &vlevel, false);
 
+		/* for subvp + DRR case, if subvp pipes are still present we support pstate */
+		if (vba->DRAMClockChangeSupport[vlevel][vba->maxMpcComb] == dm_dram_clock_change_unsupported &&
+				dcn32_subvp_in_use(dc, context))
+			vba->DRAMClockChangeSupport[vlevel][context->bw_ctx.dml.vba.maxMpcComb] = temp_clock_change_support;
+
 		if (vlevel < context->bw_ctx.dml.vba.soc.num_states &&
 				vba->DRAMClockChangeSupport[vlevel][vba->maxMpcComb] != dm_dram_clock_change_unsupported)
 			break;
@@ -531,9 +539,12 @@ void dcn32_set_phantom_stream_timing(struct dc *dc,
 	unsigned int i, pipe_idx;
 	struct pipe_ctx *pipe;
 	uint32_t phantom_vactive, phantom_bp, pstate_width_fw_delay_lines;
+	unsigned int num_dpp;
 	unsigned int vlevel = context->bw_ctx.dml.vba.VoltageLevel;
 	unsigned int dcfclk = context->bw_ctx.dml.vba.DCFCLKState[vlevel][context->bw_ctx.dml.vba.maxMpcComb];
 	unsigned int socclk = context->bw_ctx.dml.vba.SOCCLKPerState[vlevel];
+	struct vba_vars_st *vba = &context->bw_ctx.dml.vba;
+	struct dc_stream_state *main_stream = ref_pipe->stream;
 
 	dc_assert_fp_enabled();
 
@@ -569,13 +580,26 @@ void dcn32_set_phantom_stream_timing(struct dc *dc,
 	phantom_vactive = get_subviewport_lines_needed_in_mall(&context->bw_ctx.dml, pipes, pipe_cnt, pipe_idx) +
 				pstate_width_fw_delay_lines + dc->caps.subvp_swath_height_margin_lines;
 
+	// W/A for DCC corruption with certain high resolution timings.
+	// Determing if pipesplit is used. If so, add meta_row_height to the phantom vactive.
+	num_dpp = vba->NoOfDPP[vba->VoltageLevel][vba->maxMpcComb][vba->pipe_plane[pipe_idx]];
+	phantom_vactive += num_dpp > 1 ? vba->meta_row_height[vba->pipe_plane[pipe_idx]] : 0;
+
+	/* dc->debug.subvp_extra_lines 0 by default*/
+	phantom_vactive += dc->debug.subvp_extra_lines;
+
 	// For backporch of phantom pipe, use vstartup of the main pipe
 	phantom_bp = get_vstartup(&context->bw_ctx.dml, pipes, pipe_cnt, pipe_idx);
 
 	phantom_stream->dst.y = 0;
 	phantom_stream->dst.height = phantom_vactive;
+	/* When scaling, DML provides the end to end required number of lines for MALL.
+	 * dst.height is always correct for this case, but src.height is not which causes a
+	 * delta between main and phantom pipe scaling outputs. Need to adjust src.height on
+	 * phantom for this case.
+	 */
 	phantom_stream->src.y = 0;
-	phantom_stream->src.height = phantom_vactive;
+	phantom_stream->src.height = (double)phantom_vactive * (double)main_stream->src.height / (double)main_stream->dst.height;
 
 	phantom_stream->timing.v_addressable = phantom_vactive;
 	phantom_stream->timing.v_front_porch = 1;
@@ -1128,7 +1152,7 @@ static void dcn32_full_validate_bw_helper(struct dc *dc,
 				context->bw_ctx.dml.soc.allow_for_pstate_or_stutter_in_vblank_final ==
 					dm_prefetch_support_uclk_fclk_and_stutter) {
 				context->bw_ctx.dml.soc.allow_for_pstate_or_stutter_in_vblank_final =
-								dm_prefetch_support_stutter;
+								dm_prefetch_support_fclk_and_stutter;
 				/* There are params (such as FabricClock) that need to be recalculated
 				 * after validation fails (otherwise it will be 0). Calculation for
 				 * phantom vactive requires call into DML, so we must ensure all the
@@ -1179,7 +1203,7 @@ static void dcn32_full_validate_bw_helper(struct dc *dc,
 		// If SubVP pipe config is unsupported (or cannot be used for UCLK switching)
 		// remove phantom pipes and repopulate dml pipes
 		if (!found_supported_config) {
-			dc->res_pool->funcs->remove_phantom_pipes(dc, context);
+			dc->res_pool->funcs->remove_phantom_pipes(dc, context, false);
 			vba->DRAMClockChangeSupport[*vlevel][vba->maxMpcComb] = dm_dram_clock_change_unsupported;
 			*pipe_cnt = dc->res_pool->funcs->populate_dml_pipes(dc, context, pipes, false);
 
@@ -1191,9 +1215,7 @@ static void dcn32_full_validate_bw_helper(struct dc *dc,
 			}
 		} else {
 			// Most populate phantom DLG params before programming hardware / timing for phantom pipe
-			DC_FP_START();
 			dcn32_helper_populate_phantom_dlg_params(dc, context, pipes, *pipe_cnt);
-			DC_FP_END();
 
 			/* Call validate_apply_pipe_split flags after calling DML getters for
 			 * phantom dlg params, or some of the VBA params indicating pipe split
@@ -1230,7 +1252,7 @@ static void dcn32_calculate_dlg_params(struct dc *dc, struct dc_state *context,
 				       display_e2e_pipe_params_st *pipes,
 				       int pipe_cnt, int vlevel)
 {
-	int i, pipe_idx;
+	int i, pipe_idx, active_hubp_count = 0;
 	bool usr_retraining_support = false;
 	bool unbounded_req_enabled = false;
 
@@ -1275,6 +1297,8 @@ static void dcn32_calculate_dlg_params(struct dc *dc, struct dc_state *context,
 	for (i = 0, pipe_idx = 0; i < dc->res_pool->pipe_count; i++) {
 		if (!context->res_ctx.pipe_ctx[i].stream)
 			continue;
+		if (context->res_ctx.pipe_ctx[i].plane_state)
+			active_hubp_count++;
 		pipes[pipe_idx].pipe.dest.vstartup_start = get_vstartup(&context->bw_ctx.dml, pipes, pipe_cnt,
 				pipe_idx);
 		pipes[pipe_idx].pipe.dest.vupdate_offset = get_vupdate_offset(&context->bw_ctx.dml, pipes, pipe_cnt,
@@ -1296,10 +1320,23 @@ static void dcn32_calculate_dlg_params(struct dc *dc, struct dc_state *context,
 
 		if (context->bw_ctx.bw.dcn.clk.dppclk_khz < pipes[pipe_idx].clks_cfg.dppclk_mhz * 1000)
 			context->bw_ctx.bw.dcn.clk.dppclk_khz = pipes[pipe_idx].clks_cfg.dppclk_mhz * 1000;
-		context->res_ctx.pipe_ctx[i].plane_res.bw.dppclk_khz = pipes[pipe_idx].clks_cfg.dppclk_mhz * 1000;
+		if (context->res_ctx.pipe_ctx[i].plane_state)
+			context->res_ctx.pipe_ctx[i].plane_res.bw.dppclk_khz = pipes[pipe_idx].clks_cfg.dppclk_mhz * 1000;
+		else
+			context->res_ctx.pipe_ctx[i].plane_res.bw.dppclk_khz = 0;
 		context->res_ctx.pipe_ctx[i].pipe_dlg_param = pipes[pipe_idx].pipe.dest;
 		pipe_idx++;
 	}
+	/* If DCN isn't making memory requests we can allow pstate change and lower clocks */
+	if (!active_hubp_count) {
+		context->bw_ctx.bw.dcn.clk.socclk_khz = 0;
+		context->bw_ctx.bw.dcn.clk.dppclk_khz = 0;
+		context->bw_ctx.bw.dcn.clk.dcfclk_khz = 0;
+		context->bw_ctx.bw.dcn.clk.dcfclk_deep_sleep_khz = 0;
+		context->bw_ctx.bw.dcn.clk.dramclk_khz = 0;
+		context->bw_ctx.bw.dcn.clk.fclk_khz = 0;
+		context->bw_ctx.bw.dcn.clk.p_state_change_support = true;
+	}
 	/*save a original dppclock copy*/
 	context->bw_ctx.bw.dcn.clk.bw_dppclk_khz = context->bw_ctx.bw.dcn.clk.dppclk_khz;
 	context->bw_ctx.bw.dcn.clk.bw_dispclk_khz = context->bw_ctx.bw.dcn.clk.dispclk_khz;
@@ -1481,7 +1518,7 @@ bool dcn32_internal_validate_bw(struct dc *dc,
 		return false;
 
 	// For each full update, remove all existing phantom pipes first
-	dc->res_pool->funcs->remove_phantom_pipes(dc, context);
+	dc->res_pool->funcs->remove_phantom_pipes(dc, context, fast_validate);
 
 	dc->res_pool->funcs->update_soc_for_wm_a(dc, context);
 
@@ -1494,11 +1531,8 @@ bool dcn32_internal_validate_bw(struct dc *dc,
 
 	dml_log_pipe_params(&context->bw_ctx.dml, pipes, pipe_cnt);
 
-	if (!fast_validate) {
-		DC_FP_START();
+	if (!fast_validate)
 		dcn32_full_validate_bw_helper(dc, context, pipes, &vlevel, split, merge, &pipe_cnt);
-		DC_FP_END();
-	}
 
 	if (fast_validate ||
 			(dc->debug.dml_disallow_alternate_prefetch_modes &&
@@ -1734,6 +1768,10 @@ bool dcn32_internal_validate_bw(struct dc *dc,
 	}
 
 	if (repopulate_pipes) {
+		int flag_max_mpc_comb = vba->maxMpcComb;
+		int flag_vlevel = vlevel;
+		int i;
+
 		pipe_cnt = dc->res_pool->funcs->populate_dml_pipes(dc, context, pipes, fast_validate);
 
 		/* repopulate_pipes = 1 means the pipes were either split or merged. In this case
@@ -1741,10 +1779,28 @@ bool dcn32_internal_validate_bw(struct dc *dc,
 		 * ensure all the params are calculated correctly. We do not need to run the
 		 * pipe split check again after this call (pipes are already split / merged).
 		 * */
-		if (!fast_validate) {
-			context->bw_ctx.dml.soc.allow_for_pstate_or_stutter_in_vblank_final =
-						dm_prefetch_support_uclk_fclk_and_stutter_if_possible;
-			vlevel = dml_get_voltage_level(&context->bw_ctx.dml, pipes, pipe_cnt);
+		context->bw_ctx.dml.soc.allow_for_pstate_or_stutter_in_vblank_final =
+					dm_prefetch_support_uclk_fclk_and_stutter_if_possible;
+		vlevel = dml_get_voltage_level(&context->bw_ctx.dml, pipes, pipe_cnt);
+		if (vlevel == context->bw_ctx.dml.soc.num_states) {
+			/* failed after DET size changes */
+			goto validate_fail;
+		} else if (flag_max_mpc_comb == 0 &&
+				flag_max_mpc_comb != context->bw_ctx.dml.vba.maxMpcComb) {
+			/* check the context constructed with pipe split flags is still valid*/
+			bool flags_valid = false;
+			for (i = flag_vlevel; i < context->bw_ctx.dml.soc.num_states; i++) {
+				if (vba->ModeSupport[i][flag_max_mpc_comb]) {
+					vba->maxMpcComb = flag_max_mpc_comb;
+					vba->VoltageLevel = i;
+					vlevel = i;
+					flags_valid = true;
+				}
+			}
+
+			/* this should never happen */
+			if (!flags_valid)
+				goto validate_fail;
 		}
 	}
 	*vlevel_out = vlevel;
@@ -1775,14 +1831,38 @@ void dcn32_calculate_wm_and_dlg_fpu(struct dc *dc, struct dc_state *context,
 	unsigned int dummy_latency_index = 0;
 	int maxMpcComb = context->bw_ctx.dml.vba.maxMpcComb;
 	unsigned int min_dram_speed_mts = context->bw_ctx.dml.vba.DRAMSpeed;
+	bool subvp_in_use = dcn32_subvp_in_use(dc, context);
 	unsigned int min_dram_speed_mts_margin;
+	bool need_fclk_lat_as_dummy = false;
+	bool is_subvp_p_drr = true;
 
 	dc_assert_fp_enabled();
 
-	// Override DRAMClockChangeSupport for SubVP + DRR case where the DRR cannot switch without stretching it's VBLANK
-	if (!pstate_en && dcn32_subvp_in_use(dc, context)) {
-		context->bw_ctx.dml.vba.DRAMClockChangeSupport[vlevel][context->bw_ctx.dml.vba.maxMpcComb] = dm_dram_clock_change_vblank_w_mall_sub_vp;
-		pstate_en = true;
+	/* need to find dummy latency index for subvp */
+	if (subvp_in_use) {
+		/* Override DRAMClockChangeSupport for SubVP + DRR case where the DRR cannot switch without stretching it's VBLANK */
+		if (!pstate_en) {
+			context->bw_ctx.dml.vba.DRAMClockChangeSupport[vlevel][context->bw_ctx.dml.vba.maxMpcComb] = dm_dram_clock_change_vblank_w_mall_sub_vp;
+			pstate_en = true;
+			is_subvp_p_drr = true;
+		}
+		dummy_latency_index = dcn32_find_dummy_latency_index_for_fw_based_mclk_switch(dc,
+						context, pipes, pipe_cnt, vlevel);
+
+		/* For DCN32/321 need to validate with fclk pstate change latency equal to dummy so prefetch is
+		 * scheduled correctly to account for dummy pstate.
+		 */
+		if (context->bw_ctx.dml.soc.fclk_change_latency_us < dc->clk_mgr->bw_params->dummy_pstate_table[dummy_latency_index].dummy_pstate_latency_us) {
+			need_fclk_lat_as_dummy = true;
+			context->bw_ctx.dml.soc.fclk_change_latency_us =
+					dc->clk_mgr->bw_params->dummy_pstate_table[dummy_latency_index].dummy_pstate_latency_us;
+		}
+		context->bw_ctx.dml.soc.dram_clock_change_latency_us =
+							dc->clk_mgr->bw_params->wm_table.nv_entries[WM_A].dml_input.pstate_latency_us;
+		dcn32_internal_validate_bw(dc, context, pipes, &pipe_cnt, &vlevel, false);
+		if (is_subvp_p_drr) {
+			context->bw_ctx.dml.vba.DRAMClockChangeSupport[vlevel][context->bw_ctx.dml.vba.maxMpcComb] = dm_dram_clock_change_vblank_w_mall_sub_vp;
+		}
 	}
 
 	context->bw_ctx.bw.dcn.clk.fw_based_mclk_switching = false;
@@ -1803,6 +1883,14 @@ void dcn32_calculate_wm_and_dlg_fpu(struct dc *dc, struct dc_state *context,
 			 */
 			context->bw_ctx.dml.soc.dram_clock_change_latency_us =
 					dc->clk_mgr->bw_params->wm_table.nv_entries[WM_A].dml_input.pstate_latency_us;
+			/* For DCN32/321 need to validate with fclk pstate change latency equal to dummy so
+			 * prefetch is scheduled correctly to account for dummy pstate.
+			 */
+			if (context->bw_ctx.dml.soc.fclk_change_latency_us < dc->clk_mgr->bw_params->dummy_pstate_table[dummy_latency_index].dummy_pstate_latency_us) {
+				need_fclk_lat_as_dummy = true;
+				context->bw_ctx.dml.soc.fclk_change_latency_us =
+						dc->clk_mgr->bw_params->dummy_pstate_table[dummy_latency_index].dummy_pstate_latency_us;
+			}
 			dcn32_internal_validate_bw(dc, context, pipes, &pipe_cnt, &vlevel, false);
 			maxMpcComb = context->bw_ctx.dml.vba.maxMpcComb;
 			dcfclk_from_fw_based_mclk_switching = context->bw_ctx.dml.vba.DCFCLKState[vlevel][context->bw_ctx.dml.vba.maxMpcComb];
@@ -1904,13 +1992,13 @@ void dcn32_calculate_wm_and_dlg_fpu(struct dc *dc, struct dc_state *context,
 
 		if (context->bw_ctx.dml.vba.DRAMClockChangeSupport[vlevel][maxMpcComb] ==
 			dm_dram_clock_change_unsupported) {
-			int min_dram_speed_mts_offset = dc->clk_mgr->bw_params->clk_table.num_entries - 1;
+			int min_dram_speed_mts_offset = dc->clk_mgr->bw_params->clk_table.num_entries_per_clk.num_memclk_levels - 1;
 
 			min_dram_speed_mts =
 				dc->clk_mgr->bw_params->clk_table.entries[min_dram_speed_mts_offset].memclk_mhz * 16;
 		}
 
-		if (!context->bw_ctx.bw.dcn.clk.fw_based_mclk_switching) {
+		if (!context->bw_ctx.bw.dcn.clk.fw_based_mclk_switching && !subvp_in_use) {
 			/* find largest table entry that is lower than dram speed,
 			 * but lower than DPM0 still uses DPM0
 			 */
@@ -1990,6 +2078,11 @@ void dcn32_calculate_wm_and_dlg_fpu(struct dc *dc, struct dc_state *context,
 
 	context->perf_params.stutter_period_us = context->bw_ctx.dml.vba.StutterPeriod;
 
+	/* for proper prefetch calculations, if dummy lat > fclk lat, use fclk lat = dummy lat */
+	if (need_fclk_lat_as_dummy)
+		context->bw_ctx.dml.soc.fclk_change_latency_us =
+				dc->clk_mgr->bw_params->dummy_pstate_table[dummy_latency_index].dummy_pstate_latency_us;
+
 	dcn32_calculate_dlg_params(dc, context, pipes, pipe_cnt, vlevel);
 
 	if (!pstate_en)
@@ -1997,8 +2090,14 @@ void dcn32_calculate_wm_and_dlg_fpu(struct dc *dc, struct dc_state *context,
 		context->bw_ctx.dml.soc.dram_clock_change_latency_us =
 				dc->clk_mgr->bw_params->wm_table.nv_entries[WM_A].dml_input.pstate_latency_us;
 
-	if (context->bw_ctx.bw.dcn.clk.fw_based_mclk_switching)
+	if (context->bw_ctx.bw.dcn.clk.fw_based_mclk_switching) {
 		dcn30_setup_mclk_switch_using_fw_based_vblank_stretch(dc, context);
+	}
+
+	/* revert fclk lat changes if required */
+	if (need_fclk_lat_as_dummy)
+		context->bw_ctx.dml.soc.fclk_change_latency_us =
+				dc->clk_mgr->bw_params->wm_table.nv_entries[WM_A].dml_input.fclk_change_latency_us;
 }
 
 static void dcn32_get_optimal_dcfclk_fclk_for_uclk(unsigned int uclk_mts,
@@ -2145,9 +2244,7 @@ static int build_synthetic_soc_states(struct clk_bw_params *bw_params,
 		entry.fabricclk_mhz = 0;
 		entry.dram_speed_mts = 0;
 
-		DC_FP_START();
 		insert_entry_into_table_sorted(table, num_entries, &entry);
-		DC_FP_END();
 	}
 
 	// Insert the max DCFCLK
@@ -2155,9 +2252,7 @@ static int build_synthetic_soc_states(struct clk_bw_params *bw_params,
 	entry.fabricclk_mhz = 0;
 	entry.dram_speed_mts = 0;
 
-	DC_FP_START();
 	insert_entry_into_table_sorted(table, num_entries, &entry);
-	DC_FP_END();
 
 	// Insert the UCLK DPMS
 	for (i = 0; i < num_uclk_dpms; i++) {
@@ -2165,9 +2260,7 @@ static int build_synthetic_soc_states(struct clk_bw_params *bw_params,
 		entry.fabricclk_mhz = 0;
 		entry.dram_speed_mts = bw_params->clk_table.entries[i].memclk_mhz * 16;
 
-		DC_FP_START();
 		insert_entry_into_table_sorted(table, num_entries, &entry);
-		DC_FP_END();
 	}
 
 	// If FCLK is coarse grained, insert individual DPMs.
@@ -2177,9 +2270,7 @@ static int build_synthetic_soc_states(struct clk_bw_params *bw_params,
 			entry.fabricclk_mhz = bw_params->clk_table.entries[i].fclk_mhz;
 			entry.dram_speed_mts = 0;
 
-			DC_FP_START();
 			insert_entry_into_table_sorted(table, num_entries, &entry);
-			DC_FP_END();
 		}
 	}
 	// If FCLK fine grained, only insert max
@@ -2188,9 +2279,7 @@ static int build_synthetic_soc_states(struct clk_bw_params *bw_params,
 		entry.fabricclk_mhz = max_fclk_mhz;
 		entry.dram_speed_mts = 0;
 
-		DC_FP_START();
 		insert_entry_into_table_sorted(table, num_entries, &entry);
-		DC_FP_END();
 	}
 
 	// At this point, the table contains all "points of interest" based on
@@ -2359,9 +2448,13 @@ void dcn32_update_bw_bounding_box_fpu(struct dc *dc, struct clk_bw_params *bw_pa
 
 		if (dc->ctx->dc_bios->vram_info.dram_channel_width_bytes)
 			dcn3_2_soc.dram_channel_width_bytes = dc->ctx->dc_bios->vram_info.dram_channel_width_bytes;
-
 	}
 
+	/* DML DSC delay factor workaround */
+	dcn3_2_ip.dsc_delay_factor_wa = dc->debug.dsc_delay_factor_wa_x1000 / 1000.0;
+
+	dcn3_2_ip.min_prefetch_in_strobe_us = dc->debug.min_prefetch_in_strobe_ns / 1000.0;
+
 	/* Override dispclk_dppclk_vco_speed_mhz from Clk Mgr */
 	dcn3_2_soc.dispclk_dppclk_vco_speed_mhz = dc->clk_mgr->dentist_vco_freq_khz / 1000.0;
 	dc->dml.soc.dispclk_dppclk_vco_speed_mhz = dc->clk_mgr->dentist_vco_freq_khz / 1000.0;
@@ -2521,3 +2614,11 @@ void dcn32_update_bw_bounding_box_fpu(struct dc *dc, struct clk_bw_params *bw_pa
 	}
 }
 
+void dcn32_zero_pipe_dcc_fraction(display_e2e_pipe_params_st *pipes,
+				  int pipe_cnt)
+{
+	dc_assert_fp_enabled();
+
+	pipes[pipe_cnt].pipe.src.dcc_fraction_of_zs_req_luma = 0;
+	pipes[pipe_cnt].pipe.src.dcc_fraction_of_zs_req_chroma = 0;
+}
diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn32/dcn32_fpu.h b/drivers/gpu/drm/amd/display/dc/dml/dcn32/dcn32_fpu.h
index 3a3dc2ce4c73..ab010e7e840b 100644
--- a/drivers/gpu/drm/amd/display/dc/dml/dcn32/dcn32_fpu.h
+++ b/drivers/gpu/drm/amd/display/dc/dml/dcn32/dcn32_fpu.h
@@ -73,4 +73,7 @@ int dcn32_find_dummy_latency_index_for_fw_based_mclk_switch(struct dc *dc,
 
 void dcn32_patch_dpm_table(struct clk_bw_params *bw_params);
 
+void dcn32_zero_pipe_dcc_fraction(display_e2e_pipe_params_st *pipes,
+				  int pipe_cnt);
+
 #endif
diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn32/display_mode_vba_32.c b/drivers/gpu/drm/amd/display/dc/dml/dcn32/display_mode_vba_32.c
index 5b91660a6496..4b8f5fa0f0ad 100644
--- a/drivers/gpu/drm/amd/display/dc/dml/dcn32/display_mode_vba_32.c
+++ b/drivers/gpu/drm/amd/display/dc/dml/dcn32/display_mode_vba_32.c
@@ -364,10 +364,11 @@ static void DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerforman
 	for (k = 0; k < mode_lib->vba.NumberOfActiveSurfaces; ++k) {
 		v->DSCDelay[k] = dml32_DSCDelayRequirement(mode_lib->vba.DSCEnabled[k],
 				mode_lib->vba.ODMCombineEnabled[k], mode_lib->vba.DSCInputBitPerComponent[k],
-				mode_lib->vba.OutputBpp[k], mode_lib->vba.HActive[k], mode_lib->vba.HTotal[k],
+				mode_lib->vba.OutputBppPerState[mode_lib->vba.VoltageLevel][k],
+				mode_lib->vba.HActive[k], mode_lib->vba.HTotal[k],
 				mode_lib->vba.NumberOfDSCSlices[k], mode_lib->vba.OutputFormat[k],
 				mode_lib->vba.Output[k], mode_lib->vba.PixelClock[k],
-				mode_lib->vba.PixelClockBackEnd[k]);
+				mode_lib->vba.PixelClockBackEnd[k], mode_lib->vba.ip.dsc_delay_factor_wa);
 	}
 
 	for (k = 0; k < mode_lib->vba.NumberOfActiveSurfaces; ++k)
@@ -669,6 +670,25 @@ static void DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerforman
 		v->cursor_bw[k] = mode_lib->vba.NumberOfCursors[k] * mode_lib->vba.CursorWidth[k][0] * mode_lib->vba.CursorBPP[k][0] / 8 / (mode_lib->vba.HTotal[k] / mode_lib->vba.PixelClock[k]) * mode_lib->vba.VRatio[k];
 	}
 
+	v->NotEnoughDETSwathFillLatencyHiding = dml32_CalculateDETSwathFillLatencyHiding(
+						mode_lib->vba.NumberOfActiveSurfaces,
+						mode_lib->vba.ReturnBW,
+						v->UrgentLatency,
+						mode_lib->vba.SwathHeightY,
+						mode_lib->vba.SwathHeightC,
+						v->swath_width_luma_ub,
+						v->swath_width_chroma_ub,
+						v->BytePerPixelDETY,
+						v->BytePerPixelDETC,
+						mode_lib->vba.DETBufferSizeY,
+						mode_lib->vba.DETBufferSizeC,
+						mode_lib->vba.DPPPerPlane,
+						mode_lib->vba.HTotal,
+						mode_lib->vba.PixelClock,
+						mode_lib->vba.VRatio,
+						mode_lib->vba.VRatioChroma,
+						mode_lib->vba.UsesMALLForPStateChange);
+
 	for (k = 0; k < mode_lib->vba.NumberOfActiveSurfaces; ++k) {
 		v->MaxVStartupLines[k] = ((mode_lib->vba.Interlace[k] &&
 				!mode_lib->vba.ProgressiveToInterlaceUnitInOPP) ?
@@ -717,6 +737,8 @@ static void DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerforman
 
 	do {
 		MaxTotalRDBandwidth = 0;
+		DestinationLineTimesForPrefetchLessThan2 = false;
+		VRatioPrefetchMoreThanMax = false;
 #ifdef __DML_VBA_DEBUG__
 		dml_print("DML::%s: Start loop: VStartup = %d\n", __func__, mode_lib->vba.VStartupLines);
 #endif
@@ -785,6 +807,8 @@ static void DISPCLKDPPCLKDCFCLKDeepSleepPrefetchParametersWatermarksAndPerforman
 					v->SwathHeightY[k],
 					v->SwathHeightC[k],
 					TWait,
+					v->DRAMSpeedPerState[mode_lib->vba.VoltageLevel] <= MEM_STROBE_FREQ_MHZ ?
+							mode_lib->vba.ip.min_prefetch_in_strobe_us : 0,
 					/* Output */
 					&v->DSTXAfterScaler[k],
 					&v->DSTYAfterScaler[k],
@@ -1627,7 +1651,7 @@ static void mode_support_configuration(struct vba_vars_st *v,
 				&& !mode_lib->vba.MSOOrODMSplitWithNonDPLink
 				&& !mode_lib->vba.NotEnoughLanesForMSO
 				&& mode_lib->vba.LinkCapacitySupport[i] == true && !mode_lib->vba.P2IWith420
-				&& !mode_lib->vba.DSCOnlyIfNecessaryWithBPP
+				//&& !mode_lib->vba.DSCOnlyIfNecessaryWithBPP
 				&& !mode_lib->vba.DSC422NativeNotSupported
 				&& !mode_lib->vba.MPCCombineMethodIncompatible
 				&& mode_lib->vba.ODMCombine2To1SupportCheckOK[i] == true
@@ -1660,6 +1684,8 @@ static void mode_support_configuration(struct vba_vars_st *v,
 				&& mode_lib->vba.DCCMetaBufferSizeNotExceeded[i][j] == true
 				&& mode_lib->vba.NonsupportedDSCInputBPC == false
 				&& !mode_lib->vba.ExceededMALLSize
+				&& (mode_lib->vba.NotEnoughDETSwathFillLatencyHidingPerState[i][j] == false
+				|| i == v->soc.num_states - 1)
 				&& ((mode_lib->vba.HostVMEnable == false
 				&& !mode_lib->vba.ImmediateFlipRequiredFinal)
 				|| mode_lib->vba.ImmediateFlipSupportedForState[i][j])
@@ -2475,7 +2501,8 @@ void dml32_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode_l
 					mode_lib->vba.OutputBppPerState[i][k], mode_lib->vba.HActive[k],
 					mode_lib->vba.HTotal[k], mode_lib->vba.NumberOfDSCSlices[k],
 					mode_lib->vba.OutputFormat[k], mode_lib->vba.Output[k],
-					mode_lib->vba.PixelClock[k], mode_lib->vba.PixelClockBackEnd[k]);
+					mode_lib->vba.PixelClock[k], mode_lib->vba.PixelClockBackEnd[k],
+					mode_lib->vba.ip.dsc_delay_factor_wa);
 		}
 
 		for (k = 0; k <= mode_lib->vba.NumberOfActiveSurfaces - 1; k++) {
@@ -3152,6 +3179,25 @@ void dml32_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode_l
 					mode_lib->vba.UrgentBurstFactorChroma,
 					mode_lib->vba.UrgentBurstFactorCursor);
 
+			mode_lib->vba.NotEnoughDETSwathFillLatencyHidingPerState[i][j] = dml32_CalculateDETSwathFillLatencyHiding(
+					mode_lib->vba.NumberOfActiveSurfaces,
+					mode_lib->vba.ReturnBWPerState[i][j],
+					mode_lib->vba.UrgLatency[i],
+					mode_lib->vba.SwathHeightYThisState,
+					mode_lib->vba.SwathHeightCThisState,
+					mode_lib->vba.swath_width_luma_ub_this_state,
+					mode_lib->vba.swath_width_chroma_ub_this_state,
+					mode_lib->vba.BytePerPixelInDETY,
+					mode_lib->vba.BytePerPixelInDETC,
+					mode_lib->vba.DETBufferSizeYThisState,
+					mode_lib->vba.DETBufferSizeCThisState,
+					mode_lib->vba.NoOfDPPThisState,
+					mode_lib->vba.HTotal,
+					mode_lib->vba.PixelClock,
+					mode_lib->vba.VRatio,
+					mode_lib->vba.VRatioChroma,
+					mode_lib->vba.UsesMALLForPStateChange);
+
 			v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.VMDataOnlyReturnBWPerState = dml32_get_return_bw_mbps_vm_only(&mode_lib->vba.soc, i,
 					mode_lib->vba.DCFCLKState[i][j], mode_lib->vba.FabricClockPerState[i],
 					mode_lib->vba.DRAMSpeedPerState[i]);
@@ -3190,6 +3236,7 @@ void dml32_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode_l
 							mode_lib->vba.FCLKChangeLatency, mode_lib->vba.UrgLatency[i],
 							mode_lib->vba.SREnterPlusExitTime);
 
+					memset(&v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull, 0, sizeof(DmlPipe));
 					v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.myPipe.Dppclk = mode_lib->vba.RequiredDPPCLK[i][j][k];
 					v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.myPipe.Dispclk = mode_lib->vba.RequiredDISPCLK[i][j];
 					v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.myPipe.PixelClock = mode_lib->vba.PixelClock[k];
@@ -3242,6 +3289,8 @@ void dml32_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode_l
 							v->swath_width_chroma_ub_this_state[k],
 							v->SwathHeightYThisState[k],
 							v->SwathHeightCThisState[k], v->TWait,
+							v->DRAMSpeedPerState[i] <= MEM_STROBE_FREQ_MHZ ?
+									mode_lib->vba.ip.min_prefetch_in_strobe_us : 0,
 
 							/* Output */
 							&v->dummy_vars.dml32_ModeSupportAndSystemConfigurationFull.DSTXAfterScaler[k],
diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn32/display_mode_vba_32.h b/drivers/gpu/drm/amd/display/dc/dml/dcn32/display_mode_vba_32.h
index c62e0991358b..c8b28c83ddf4 100644
--- a/drivers/gpu/drm/amd/display/dc/dml/dcn32/display_mode_vba_32.h
+++ b/drivers/gpu/drm/amd/display/dc/dml/dcn32/display_mode_vba_32.h
@@ -46,9 +46,14 @@
 // Prefetch schedule max vratio
 #define __DML_MAX_VRATIO_PRE__ 4.0
 
+#define __DML_VBA_MAX_DST_Y_PRE__    63.75
+
 #define BPP_INVALID 0
 #define BPP_BLENDED_PIPE 0xffffffff
 
+#define MEM_STROBE_FREQ_MHZ 1600
+#define MEM_STROBE_MAX_DELIVERY_TIME_US 60.0
+
 struct display_mode_lib;
 
 void dml32_ModeSupportAndSystemConfigurationFull(struct display_mode_lib *mode_lib);
diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn32/display_mode_vba_util_32.c b/drivers/gpu/drm/amd/display/dc/dml/dcn32/display_mode_vba_util_32.c
index ad66e241f9ae..5af601cff1a0 100644
--- a/drivers/gpu/drm/amd/display/dc/dml/dcn32/display_mode_vba_util_32.c
+++ b/drivers/gpu/drm/amd/display/dc/dml/dcn32/display_mode_vba_util_32.c
@@ -1726,7 +1726,8 @@ unsigned int dml32_DSCDelayRequirement(bool DSCEnabled,
 		enum output_format_class  OutputFormat,
 		enum output_encoder_class Output,
 		double PixelClock,
-		double PixelClockBackEnd)
+		double PixelClockBackEnd,
+		double dsc_delay_factor_wa)
 {
 	unsigned int DSCDelayRequirement_val;
 
@@ -1746,7 +1747,7 @@ unsigned int dml32_DSCDelayRequirement(bool DSCEnabled,
 		}
 
 		DSCDelayRequirement_val = DSCDelayRequirement_val + (HTotal - HActive) *
-				dml_ceil(DSCDelayRequirement_val / HActive, 1);
+				dml_ceil((double)DSCDelayRequirement_val / HActive, 1);
 
 		DSCDelayRequirement_val = DSCDelayRequirement_val * PixelClock / PixelClockBackEnd;
 
@@ -1764,7 +1765,7 @@ unsigned int dml32_DSCDelayRequirement(bool DSCEnabled,
 	dml_print("DML::%s: DSCDelayRequirement_val = %d\n", __func__, DSCDelayRequirement_val);
 #endif
 
-	return DSCDelayRequirement_val;
+	return dml_ceil(DSCDelayRequirement_val * dsc_delay_factor_wa, 1);
 }
 
 void dml32_CalculateSurfaceSizeInMall(
@@ -3416,6 +3417,7 @@ bool dml32_CalculatePrefetchSchedule(
 		unsigned int SwathHeightY,
 		unsigned int SwathHeightC,
 		double TWait,
+		double TPreReq,
 		/* Output */
 		double   *DSTXAfterScaler,
 		double   *DSTYAfterScaler,
@@ -3666,6 +3668,7 @@ bool dml32_CalculatePrefetchSchedule(
 	dst_y_prefetch_equ = VStartup - (*TSetup + dml_max(TWait + TCalc, *Tdmdl)) / LineTime -
 			(*DSTYAfterScaler + (double) *DSTXAfterScaler / (double) myPipe->HTotal);
 
+	dst_y_prefetch_equ = dml_min(dst_y_prefetch_equ, __DML_VBA_MAX_DST_Y_PRE__);
 #ifdef __DML_VBA_DEBUG__
 	dml_print("DML::%s: HTotal = %d\n", __func__, myPipe->HTotal);
 	dml_print("DML::%s: min_Lsw = %f\n", __func__, min_Lsw);
@@ -3725,7 +3728,8 @@ bool dml32_CalculatePrefetchSchedule(
 	*VRatioPrefetchY = 0;
 	*VRatioPrefetchC = 0;
 	*RequiredPrefetchPixDataBWLuma = 0;
-	if (dst_y_prefetch_equ > 1) {
+	if (dst_y_prefetch_equ > 1 &&
+			(Tpre_rounded >= TPreReq || dst_y_prefetch_equ == __DML_VBA_MAX_DST_Y_PRE__)) {
 		double PrefetchBandwidth1;
 		double PrefetchBandwidth2;
 		double PrefetchBandwidth3;
@@ -3871,7 +3875,11 @@ bool dml32_CalculatePrefetchSchedule(
 		}
 
 		if (dst_y_prefetch_oto < dst_y_prefetch_equ) {
-			*DestinationLinesForPrefetch = dst_y_prefetch_oto;
+			if (dst_y_prefetch_oto * LineTime < TPreReq) {
+				*DestinationLinesForPrefetch = dst_y_prefetch_equ;
+			} else {
+				*DestinationLinesForPrefetch = dst_y_prefetch_oto;
+			}
 			TimeForFetchingMetaPTE = Tvm_oto;
 			TimeForFetchingRowInVBlank = Tr0_oto;
 			*PrefetchBandwidth = prefetch_bw_oto;
@@ -4396,7 +4404,7 @@ void dml32_CalculateWatermarksMALLUseAndDRAMSpeedChangeSupport(
 
 		if (v->NumberOfActiveSurfaces > 1) {
 			ActiveClockChangeLatencyHidingY = ActiveClockChangeLatencyHidingY
-					- (1 - 1 / v->NumberOfActiveSurfaces) * SwathHeightY[k] * v->HTotal[k]
+					- (1.0 - 1.0 / v->NumberOfActiveSurfaces) * SwathHeightY[k] * v->HTotal[k]
 							/ v->PixelClock[k] / v->VRatio[k];
 		}
 
@@ -6220,3 +6228,72 @@ void dml32_CalculateImmediateFlipBandwithSupport(unsigned int NumberOfActiveSurf
 	*ImmediateFlipBandwidthSupport = (*TotalBandwidth <= ReturnBW);
 	*FractionOfUrgentBandwidth = *TotalBandwidth / ReturnBW;
 }
+
+bool dml32_CalculateDETSwathFillLatencyHiding(unsigned int NumberOfActiveSurfaces,
+		double ReturnBW,
+		double UrgentLatency,
+		unsigned int SwathHeightY[],
+		unsigned int SwathHeightC[],
+		unsigned int SwathWidthY[],
+		unsigned int SwathWidthC[],
+		double  BytePerPixelInDETY[],
+		double  BytePerPixelInDETC[],
+		unsigned int    DETBufferSizeY[],
+		unsigned int    DETBufferSizeC[],
+		unsigned int	NumOfDPP[],
+		unsigned int	HTotal[],
+		double	PixelClock[],
+		double	VRatioY[],
+		double	VRatioC[],
+		enum dm_use_mall_for_pstate_change_mode UsesMALLForPStateChange[DC__NUM_DPP__MAX])
+{
+	int k;
+	double SwathSizeAllSurfaces = 0;
+	double SwathSizeAllSurfacesInFetchTimeUs;
+	double DETSwathLatencyHidingUs;
+	double DETSwathLatencyHidingYUs;
+	double DETSwathLatencyHidingCUs;
+	double SwathSizePerSurfaceY[DC__NUM_DPP__MAX];
+	double SwathSizePerSurfaceC[DC__NUM_DPP__MAX];
+	bool NotEnoughDETSwathFillLatencyHiding = false;
+
+	/* calculate sum of single swath size for all pipes in bytes*/
+	for (k = 0; k < NumberOfActiveSurfaces; k++) {
+		SwathSizePerSurfaceY[k] += SwathHeightY[k] * SwathWidthY[k] * BytePerPixelInDETY[k] * NumOfDPP[k];
+
+		if (SwathHeightC[k] != 0)
+			SwathSizePerSurfaceC[k] += SwathHeightC[k] * SwathWidthC[k] * BytePerPixelInDETC[k] * NumOfDPP[k];
+		else
+			SwathSizePerSurfaceC[k] = 0;
+
+		SwathSizeAllSurfaces += SwathSizePerSurfaceY[k] + SwathSizePerSurfaceC[k];
+	}
+
+	SwathSizeAllSurfacesInFetchTimeUs = SwathSizeAllSurfaces / ReturnBW + UrgentLatency;
+
+	/* ensure all DET - 1 swath can hide a fetch for all surfaces */
+	for (k = 0; k < NumberOfActiveSurfaces; k++) {
+		double LineTime = HTotal[k] / PixelClock[k];
+
+		/* only care if surface is not phantom */
+		if (UsesMALLForPStateChange[k] != dm_use_mall_pstate_change_phantom_pipe) {
+			DETSwathLatencyHidingYUs = (dml_floor(DETBufferSizeY[k] / BytePerPixelInDETY[k] / SwathWidthY[k], 1.0) - SwathHeightY[k]) / VRatioY[k] * LineTime;
+
+			if (SwathHeightC[k] != 0) {
+				DETSwathLatencyHidingCUs = (dml_floor(DETBufferSizeC[k] / BytePerPixelInDETC[k] / SwathWidthC[k], 1.0) - SwathHeightC[k]) / VRatioC[k] * LineTime;
+
+				DETSwathLatencyHidingUs = dml_min(DETSwathLatencyHidingYUs, DETSwathLatencyHidingCUs);
+			} else {
+				DETSwathLatencyHidingUs = DETSwathLatencyHidingYUs;
+			}
+
+			/* DET must be able to hide time to fetch 1 swath for each surface */
+			if (DETSwathLatencyHidingUs < SwathSizeAllSurfacesInFetchTimeUs) {
+				NotEnoughDETSwathFillLatencyHiding = true;
+				break;
+			}
+		}
+	}
+
+	return NotEnoughDETSwathFillLatencyHiding;
+}
diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn32/display_mode_vba_util_32.h b/drivers/gpu/drm/amd/display/dc/dml/dcn32/display_mode_vba_util_32.h
index 55cead0d4237..779c6805f599 100644
--- a/drivers/gpu/drm/amd/display/dc/dml/dcn32/display_mode_vba_util_32.h
+++ b/drivers/gpu/drm/amd/display/dc/dml/dcn32/display_mode_vba_util_32.h
@@ -30,7 +30,7 @@
 #include "os_types.h"
 #include "../dc_features.h"
 #include "../display_mode_structs.h"
-#include "dml/display_mode_vba.h"
+#include "../display_mode_vba.h"
 
 unsigned int dml32_dscceComputeDelay(
 		unsigned int bpc,
@@ -327,7 +327,8 @@ unsigned int dml32_DSCDelayRequirement(bool DSCEnabled,
 		enum output_format_class  OutputFormat,
 		enum output_encoder_class Output,
 		double PixelClock,
-		double PixelClockBackEnd);
+		double PixelClockBackEnd,
+		double dsc_delay_factor_wa);
 
 void dml32_CalculateSurfaceSizeInMall(
 		unsigned int NumberOfActiveSurfaces,
@@ -742,6 +743,7 @@ bool dml32_CalculatePrefetchSchedule(
 		unsigned int SwathHeightY,
 		unsigned int SwathHeightC,
 		double TWait,
+		double TPreReq,
 		/* Output */
 		double   *DSTXAfterScaler,
 		double   *DSTYAfterScaler,
@@ -1139,4 +1141,22 @@ void dml32_CalculateImmediateFlipBandwithSupport(unsigned int NumberOfActiveSurf
 		double  *FractionOfUrgentBandwidth,
 		bool *ImmediateFlipBandwidthSupport);
 
+bool dml32_CalculateDETSwathFillLatencyHiding(unsigned int NumberOfActiveSurfaces,
+		double ReturnBW,
+		double UrgentLatency,
+		unsigned int SwathHeightY[],
+		unsigned int SwathHeightC[],
+		unsigned int SwathWidthY[],
+		unsigned int SwathWidthC[],
+		double  BytePerPixelInDETY[],
+		double  BytePerPixelInDETC[],
+		unsigned int    DETBufferSizeY[],
+		unsigned int    DETBufferSizeC[],
+		unsigned int	NumOfDPP[],
+		unsigned int	HTotal[],
+		double	PixelClock[],
+		double	VRatioY[],
+		double	VRatioC[],
+		enum dm_use_mall_for_pstate_change_mode UsesMALLForPStateChange[DC__NUM_DPP__MAX]);
+
 #endif
diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn32/display_rq_dlg_calc_32.c b/drivers/gpu/drm/amd/display/dc/dml/dcn32/display_rq_dlg_calc_32.c
index a1276f6b9581..395ae8761980 100644
--- a/drivers/gpu/drm/amd/display/dc/dml/dcn32/display_rq_dlg_calc_32.c
+++ b/drivers/gpu/drm/amd/display/dc/dml/dcn32/display_rq_dlg_calc_32.c
@@ -291,8 +291,8 @@ void dml32_rq_dlg_get_dlg_reg(struct display_mode_lib *mode_lib,
 
 	dml_print("DML_DLG: %s: vready_after_vcount0 = %d\n", __func__, dlg_regs->vready_after_vcount0);
 
-	dst_x_after_scaler = get_dst_x_after_scaler(mode_lib, e2e_pipe_param, num_pipes, pipe_idx);
-	dst_y_after_scaler = get_dst_y_after_scaler(mode_lib, e2e_pipe_param, num_pipes, pipe_idx);
+	dst_x_after_scaler = dml_ceil(get_dst_x_after_scaler(mode_lib, e2e_pipe_param, num_pipes, pipe_idx), 1);
+	dst_y_after_scaler = dml_ceil(get_dst_y_after_scaler(mode_lib, e2e_pipe_param, num_pipes, pipe_idx), 1);
 
 	// do some adjustment on the dst_after scaler to account for odm combine mode
 	dml_print("DML_DLG: %s: input dst_x_after_scaler   = %d\n", __func__, dst_x_after_scaler);
diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn321/dcn321_fpu.c b/drivers/gpu/drm/amd/display/dc/dml/dcn321/dcn321_fpu.c
index dd90f241e906..f4b176599be7 100644
--- a/drivers/gpu/drm/amd/display/dc/dml/dcn321/dcn321_fpu.c
+++ b/drivers/gpu/drm/amd/display/dc/dml/dcn321/dcn321_fpu.c
@@ -29,6 +29,7 @@
 #include "dcn321_fpu.h"
 #include "dcn32/dcn32_resource.h"
 #include "dcn321/dcn321_resource.h"
+#include "dml/dcn32/display_mode_vba_util_32.h"
 
 #define DCN3_2_DEFAULT_DET_SIZE 256
 
@@ -119,15 +120,15 @@ struct _vcs_dpi_soc_bounding_box_st dcn3_21_soc = {
 		},
 	},
 	.num_states = 1,
-	.sr_exit_time_us = 12.36,
-	.sr_enter_plus_exit_time_us = 16.72,
+	.sr_exit_time_us = 19.95,
+	.sr_enter_plus_exit_time_us = 24.36,
 	.sr_exit_z8_time_us = 285.0,
 	.sr_enter_plus_exit_z8_time_us = 320,
 	.writeback_latency_us = 12.0,
 	.round_trip_ping_latency_dcfclk_cycles = 263,
-	.urgent_latency_pixel_data_only_us = 4.0,
-	.urgent_latency_pixel_mixed_with_vm_data_us = 4.0,
-	.urgent_latency_vm_data_only_us = 4.0,
+	.urgent_latency_pixel_data_only_us = 4,
+	.urgent_latency_pixel_mixed_with_vm_data_us = 4,
+	.urgent_latency_vm_data_only_us = 4,
 	.fclk_change_latency_us = 20,
 	.usr_retraining_latency_us = 2,
 	.smn_latency_us = 2,
@@ -155,7 +156,7 @@ struct _vcs_dpi_soc_bounding_box_st dcn3_21_soc = {
 	.dispclk_dppclk_vco_speed_mhz = 4300.0,
 	.do_urgent_latency_adjustment = true,
 	.urgent_latency_adjustment_fabric_clock_component_us = 1.0,
-	.urgent_latency_adjustment_fabric_clock_reference_mhz = 1000,
+	.urgent_latency_adjustment_fabric_clock_reference_mhz = 3000,
 };
 
 static void get_optimal_ntuple(struct _vcs_dpi_voltage_scaling_st *entry)
@@ -538,9 +539,13 @@ void dcn321_update_bw_bounding_box_fpu(struct dc *dc, struct clk_bw_params *bw_p
 
 		if (dc->ctx->dc_bios->vram_info.dram_channel_width_bytes)
 			dcn3_21_soc.dram_channel_width_bytes = dc->ctx->dc_bios->vram_info.dram_channel_width_bytes;
-
 	}
 
+	/* DML DSC delay factor workaround */
+	dcn3_21_ip.dsc_delay_factor_wa = dc->debug.dsc_delay_factor_wa_x1000 / 1000.0;
+
+	dcn3_21_ip.min_prefetch_in_strobe_us = dc->debug.min_prefetch_in_strobe_ns / 1000.0;
+
 	/* Override dispclk_dppclk_vco_speed_mhz from Clk Mgr */
 	dcn3_21_soc.dispclk_dppclk_vco_speed_mhz = dc->clk_mgr->dentist_vco_freq_khz / 1000.0;
 	dc->dml.soc.dispclk_dppclk_vco_speed_mhz = dc->clk_mgr->dentist_vco_freq_khz / 1000.0;
diff --git a/drivers/gpu/drm/amd/display/dc/dml/display_mode_enums.h b/drivers/gpu/drm/amd/display/dc/dml/display_mode_enums.h
index f394b3f3922a..0bffae95f3a2 100644
--- a/drivers/gpu/drm/amd/display/dc/dml/display_mode_enums.h
+++ b/drivers/gpu/drm/amd/display/dc/dml/display_mode_enums.h
@@ -105,14 +105,39 @@ enum source_macro_tile_size {
 enum cursor_bpp {
 	dm_cur_2bit = 0, dm_cur_32bit = 1, dm_cur_64bit = 2
 };
+
+/**
+ * @enum clock_change_support - It represents possible reasons to change the DRAM clock.
+ *
+ * DC may change the DRAM clock during its execution, and this enum tracks all
+ * the available methods. Note that every ASIC has their specific way to deal
+ * with these clock switch.
+ */
 enum clock_change_support {
+	/**
+	 * @dm_dram_clock_change_uninitialized: If you see this, we might have
+	 * a code initialization issue
+	 */
 	dm_dram_clock_change_uninitialized = 0,
+
+	/**
+	 * @dm_dram_clock_change_vactive: Support DRAM switch in VActive
+	 */
 	dm_dram_clock_change_vactive,
+
+	/**
+	 * @dm_dram_clock_change_vblank: Support DRAM switch in VBlank
+	 */
 	dm_dram_clock_change_vblank,
+
 	dm_dram_clock_change_vactive_w_mall_full_frame,
 	dm_dram_clock_change_vactive_w_mall_sub_vp,
 	dm_dram_clock_change_vblank_w_mall_full_frame,
 	dm_dram_clock_change_vblank_w_mall_sub_vp,
+
+	/**
+	 * @dm_dram_clock_change_unsupported: Do not support DRAM switch
+	 */
 	dm_dram_clock_change_unsupported
 };
 
diff --git a/drivers/gpu/drm/amd/display/dc/dml/display_mode_structs.h b/drivers/gpu/drm/amd/display/dc/dml/display_mode_structs.h
index f33a8879b05a..64d602e6412f 100644
--- a/drivers/gpu/drm/amd/display/dc/dml/display_mode_structs.h
+++ b/drivers/gpu/drm/amd/display/dc/dml/display_mode_structs.h
@@ -364,6 +364,10 @@ struct _vcs_dpi_ip_params_st {
 	unsigned int max_num_dp2p0_outputs;
 	unsigned int max_num_dp2p0_streams;
 	unsigned int VBlankNomDefaultUS;
+
+	/* DM workarounds */
+	double dsc_delay_factor_wa; // TODO: Remove after implementing root cause fix
+	double min_prefetch_in_strobe_us;
 };
 
 struct _vcs_dpi_display_xfc_params_st {
diff --git a/drivers/gpu/drm/amd/display/dc/dml/display_mode_vba.c b/drivers/gpu/drm/amd/display/dc/dml/display_mode_vba.c
index 03924aed8d5c..8e6585dab20e 100644
--- a/drivers/gpu/drm/amd/display/dc/dml/display_mode_vba.c
+++ b/drivers/gpu/drm/amd/display/dc/dml/display_mode_vba.c
@@ -625,7 +625,7 @@ static void fetch_pipe_params(struct display_mode_lib *mode_lib)
 		mode_lib->vba.skip_dio_check[mode_lib->vba.NumberOfActivePlanes] =
 				dout->is_virtual;
 
-		if (!dout->dsc_enable)
+		if (dout->dsc_enable)
 			mode_lib->vba.ForcedOutputLinkBPP[mode_lib->vba.NumberOfActivePlanes] = dout->output_bpp;
 		else
 			mode_lib->vba.ForcedOutputLinkBPP[mode_lib->vba.NumberOfActivePlanes] = 0.0;
diff --git a/drivers/gpu/drm/amd/display/dc/dml/display_mode_vba.h b/drivers/gpu/drm/amd/display/dc/dml/display_mode_vba.h
index 630f3395e90a..81e53e67cd0b 100644
--- a/drivers/gpu/drm/amd/display/dc/dml/display_mode_vba.h
+++ b/drivers/gpu/drm/amd/display/dc/dml/display_mode_vba.h
@@ -419,6 +419,15 @@ struct vba_vars_st {
 	double MinPixelChunkSizeBytes;
 	unsigned int DCCMetaBufferSizeBytes;
 	// Pipe/Plane Parameters
+
+	/** @VoltageLevel:
+	 * Every ASIC has a fixed number of DPM states, and some devices might
+	 * have some particular voltage configuration that does not map
+	 * directly to the DPM states. This field tells how many states the
+	 * target device supports; even though this field combines the DPM and
+	 * special SOC voltages, it mostly matches the total number of DPM
+	 * states.
+	 */
 	int VoltageLevel;
 	double FabricClock;
 	double DRAMSpeed;
@@ -1041,6 +1050,7 @@ struct vba_vars_st {
 	double MinFullDETBufferingTime;
 	double AverageReadBandwidthGBytePerSecond;
 	bool   FirstMainPlane;
+	bool NotEnoughDETSwathFillLatencyHiding;
 
 	unsigned int ViewportWidthChroma[DC__NUM_DPP__MAX];
 	unsigned int ViewportHeightChroma[DC__NUM_DPP__MAX];
@@ -1153,7 +1163,7 @@ struct vba_vars_st {
 	double UrgBurstFactorLumaPre[DC__NUM_DPP__MAX];
 	double UrgBurstFactorChromaPre[DC__NUM_DPP__MAX];
 	bool NotUrgentLatencyHidingPre[DC__NUM_DPP__MAX];
-	bool LinkCapacitySupport[DC__NUM_DPP__MAX];
+	bool LinkCapacitySupport[DC__VOLTAGE_STATES];
 	bool VREADY_AT_OR_AFTER_VSYNC[DC__NUM_DPP__MAX];
 	unsigned int MIN_DST_Y_NEXT_START[DC__NUM_DPP__MAX];
 	unsigned int VFrontPorch[DC__NUM_DPP__MAX];
@@ -1224,6 +1234,7 @@ struct vba_vars_st {
 	unsigned int BlockWidthC[DC__NUM_DPP__MAX];
 	unsigned int SubViewportLinesNeededInMALL[DC__NUM_DPP__MAX];
 	bool VActiveBandwithSupport[DC__VOLTAGE_STATES][2];
+	bool NotEnoughDETSwathFillLatencyHidingPerState[DC__VOLTAGE_STATES][2];
 	struct dummy_vars dummy_vars;
 };
 
diff --git a/drivers/gpu/drm/amd/display/dc/dml/dsc/qp_tables.h b/drivers/gpu/drm/amd/display/dc/dml/dsc/qp_tables.h
index e5fac9f4181d..dcff0dd2b6a1 100644
--- a/drivers/gpu/drm/amd/display/dc/dml/dsc/qp_tables.h
+++ b/drivers/gpu/drm/amd/display/dc/dml/dsc/qp_tables.h
@@ -25,7 +25,7 @@
  */
 
 
-const qp_table   qp_table_422_10bpc_min = {
+static const qp_table   qp_table_422_10bpc_min = {
 	{   6, { 0, 4, 5, 6, 6, 6, 6, 7, 7, 8, 9, 9, 9, 12, 16} },
 	{ 6.5, { 0, 4, 5, 6, 6, 6, 6, 7, 7, 8, 9, 9, 9, 12, 16} },
 	{   7, { 0, 4, 5, 6, 6, 6, 6, 7, 7, 7, 9, 9, 9, 11, 15} },
@@ -58,7 +58,7 @@ const qp_table   qp_table_422_10bpc_min = {
 };
 
 
-const qp_table   qp_table_444_8bpc_max = {
+static const qp_table   qp_table_444_8bpc_max = {
 	{   6, { 4, 6, 8, 8, 9, 9, 9, 10, 11, 12, 12, 12, 12, 13, 15} },
 	{ 6.5, { 4, 6, 7, 8, 8, 8, 9, 10, 11, 11, 12, 12, 12, 13, 15} },
 	{   7, { 4, 5, 7, 7, 8, 8, 8, 9, 10, 11, 11, 12, 12, 13, 14} },
@@ -99,7 +99,7 @@ const qp_table   qp_table_444_8bpc_max = {
 };
 
 
-const qp_table   qp_table_420_12bpc_max = {
+static const qp_table   qp_table_420_12bpc_max = {
 	{   4, {11, 12, 13, 14, 15, 15, 15, 16, 17, 18, 18, 19, 19, 21, 22} },
 	{ 4.5, {10, 11, 12, 13, 14, 15, 15, 16, 17, 18, 18, 19, 19, 20, 21} },
 	{   5, { 9, 11, 12, 13, 14, 15, 15, 16, 17, 17, 18, 18, 19, 20, 21} },
@@ -132,7 +132,7 @@ const qp_table   qp_table_420_12bpc_max = {
 };
 
 
-const qp_table   qp_table_444_10bpc_min = {
+static const qp_table   qp_table_444_10bpc_min = {
 	{   6, { 0, 4, 7, 7, 9, 9, 9, 9, 9, 10, 10, 10, 10, 12, 18} },
 	{ 6.5, { 0, 4, 6, 7, 8, 8, 9, 9, 9, 9, 10, 10, 10, 12, 18} },
 	{   7, { 0, 4, 6, 6, 8, 8, 8, 8, 8, 9, 9, 10, 10, 12, 17} },
@@ -185,7 +185,7 @@ const qp_table   qp_table_444_10bpc_min = {
 };
 
 
-const qp_table   qp_table_420_8bpc_max = {
+static const qp_table   qp_table_420_8bpc_max = {
 	{   4, { 4, 4, 5, 6, 7, 7, 7, 8, 9, 10, 10, 11, 11, 13, 14} },
 	{ 4.5, { 4, 4, 5, 6, 7, 7, 7, 8, 9, 10, 10, 11, 11, 12, 13} },
 	{   5, { 3, 4, 5, 6, 7, 7, 7, 8, 9, 9, 10, 10, 11, 12, 13} },
@@ -206,7 +206,7 @@ const qp_table   qp_table_420_8bpc_max = {
 };
 
 
-const qp_table   qp_table_444_8bpc_min = {
+static const qp_table   qp_table_444_8bpc_min = {
 	{   6, { 0, 1, 3, 3, 5, 5, 5, 5, 5, 6, 6, 6, 6, 9, 14} },
 	{ 6.5, { 0, 1, 2, 3, 4, 4, 5, 5, 5, 5, 6, 6, 6, 9, 14} },
 	{   7, { 0, 0, 2, 2, 4, 4, 4, 4, 4, 5, 5, 6, 6, 9, 13} },
@@ -247,7 +247,7 @@ const qp_table   qp_table_444_8bpc_min = {
 };
 
 
-const qp_table   qp_table_444_12bpc_min = {
+static const qp_table   qp_table_444_12bpc_min = {
 	{   6, { 0, 5, 11, 11, 13, 13, 13, 13, 13, 14, 14, 14, 14, 17, 22} },
 	{ 6.5, { 0, 5, 10, 11, 12, 12, 13, 13, 13, 13, 14, 14, 14, 17, 22} },
 	{   7, { 0, 5, 10, 10, 12, 12, 12, 12, 12, 13, 13, 14, 14, 17, 21} },
@@ -312,7 +312,7 @@ const qp_table   qp_table_444_12bpc_min = {
 };
 
 
-const qp_table   qp_table_420_12bpc_min = {
+static const qp_table   qp_table_420_12bpc_min = {
 	{   4, { 0, 4, 9, 10, 11, 11, 11, 11, 11, 11, 13, 13, 13, 15, 21} },
 	{ 4.5, { 0, 4, 8, 9, 10, 11, 11, 11, 11, 11, 13, 13, 13, 15, 20} },
 	{   5, { 0, 4, 8, 9, 10, 11, 11, 11, 11, 11, 13, 13, 13, 15, 20} },
@@ -345,7 +345,7 @@ const qp_table   qp_table_420_12bpc_min = {
 };
 
 
-const qp_table   qp_table_422_12bpc_min = {
+static const qp_table   qp_table_422_12bpc_min = {
 	{   6, { 0, 4, 9, 10, 11, 11, 11, 11, 11, 11, 13, 13, 13, 16, 20} },
 	{ 6.5, { 0, 4, 9, 10, 11, 11, 11, 11, 11, 11, 13, 13, 13, 16, 20} },
 	{   7, { 0, 4, 9, 10, 11, 11, 11, 11, 11, 11, 13, 13, 13, 15, 19} },
@@ -386,7 +386,7 @@ const qp_table   qp_table_422_12bpc_min = {
 };
 
 
-const qp_table   qp_table_422_12bpc_max = {
+static const qp_table   qp_table_422_12bpc_max = {
 	{   6, {12, 12, 13, 14, 15, 15, 15, 16, 17, 18, 18, 19, 19, 20, 21} },
 	{ 6.5, {12, 12, 13, 14, 15, 15, 15, 16, 17, 18, 18, 19, 19, 20, 21} },
 	{   7, {11, 12, 13, 14, 15, 15, 15, 16, 17, 17, 18, 18, 19, 19, 20} },
@@ -427,7 +427,7 @@ const qp_table   qp_table_422_12bpc_max = {
 };
 
 
-const qp_table   qp_table_444_12bpc_max = {
+static const qp_table   qp_table_444_12bpc_max = {
 	{   6, {12, 14, 16, 16, 17, 17, 17, 18, 19, 20, 20, 20, 20, 21, 23} },
 	{ 6.5, {12, 14, 15, 16, 16, 16, 17, 18, 19, 19, 20, 20, 20, 21, 23} },
 	{   7, {12, 13, 15, 15, 16, 16, 16, 17, 18, 19, 19, 20, 20, 21, 22} },
@@ -492,7 +492,7 @@ const qp_table   qp_table_444_12bpc_max = {
 };
 
 
-const qp_table   qp_table_420_8bpc_min = {
+static const qp_table   qp_table_420_8bpc_min = {
 	{   4, { 0, 0, 1, 1, 3, 3, 3, 3, 3, 3, 5, 5, 5, 9, 13} },
 	{ 4.5, { 0, 0, 1, 1, 3, 3, 3, 3, 3, 3, 5, 5, 5, 8, 12} },
 	{   5, { 0, 0, 1, 1, 3, 3, 3, 3, 3, 3, 5, 5, 5, 8, 12} },
@@ -513,7 +513,7 @@ const qp_table   qp_table_420_8bpc_min = {
 };
 
 
-const qp_table   qp_table_422_8bpc_min = {
+static const qp_table   qp_table_422_8bpc_min = {
 	{   6, { 0, 0, 1, 2, 3, 3, 3, 3, 3, 3, 5, 5, 5, 8, 12} },
 	{ 6.5, { 0, 0, 1, 2, 3, 3, 3, 3, 3, 3, 5, 5, 5, 8, 12} },
 	{   7, { 0, 0, 1, 2, 3, 3, 3, 3, 3, 3, 5, 5, 5, 7, 11} },
@@ -538,7 +538,7 @@ const qp_table   qp_table_422_8bpc_min = {
 };
 
 
-const qp_table   qp_table_422_10bpc_max = {
+static const qp_table   qp_table_422_10bpc_max = {
 	{   6, { 8, 8, 9, 10, 11, 11, 11, 12, 13, 14, 14, 15, 15, 16, 17} },
 	{ 6.5, { 8, 8, 9, 10, 11, 11, 11, 12, 13, 14, 14, 15, 15, 16, 17} },
 	{   7, { 7, 8, 9, 10, 11, 11, 11, 12, 13, 13, 14, 14, 15, 15, 16} },
@@ -571,7 +571,7 @@ const qp_table   qp_table_422_10bpc_max = {
 };
 
 
-const qp_table qp_table_420_10bpc_max = {
+static const qp_table qp_table_420_10bpc_max = {
 	{   4, { 8, 8, 9, 10, 11, 11, 11, 12, 13, 14, 14, 15, 15, 17, 18} },
 	{ 4.5, { 8, 8, 9, 10, 11, 11, 11, 12, 13, 14, 14, 15, 15, 16, 17} },
 	{   5, { 7, 8, 9, 10, 11, 11, 11, 12, 13, 13, 14, 14, 15, 16, 17} },
@@ -598,7 +598,7 @@ const qp_table qp_table_420_10bpc_max = {
 };
 
 
-const qp_table   qp_table_420_10bpc_min = {
+static const qp_table   qp_table_420_10bpc_min = {
 	{   4, { 0, 4, 4, 5, 7, 7, 7, 7, 7, 7, 9, 9, 9, 13, 17} },
 	{ 4.5, { 0, 4, 4, 5, 7, 7, 7, 7, 7, 7, 9, 9, 9, 12, 16} },
 	{   5, { 0, 4, 4, 5, 7, 7, 7, 7, 7, 7, 9, 9, 9, 12, 16} },
@@ -625,7 +625,7 @@ const qp_table   qp_table_420_10bpc_min = {
 };
 
 
-const qp_table   qp_table_444_10bpc_max = {
+static const qp_table   qp_table_444_10bpc_max = {
 	{   6, { 8, 10, 12, 12, 13, 13, 13, 14, 15, 16, 16, 16, 16, 17, 19} },
 	{ 6.5, { 8, 10, 11, 12, 12, 12, 13, 14, 15, 15, 16, 16, 16, 17, 19} },
 	{   7, { 8, 9, 11, 11, 12, 12, 12, 13, 14, 15, 15, 16, 16, 17, 18} },
@@ -678,7 +678,7 @@ const qp_table   qp_table_444_10bpc_max = {
 };
 
 
-const qp_table   qp_table_422_8bpc_max = {
+static const qp_table   qp_table_422_8bpc_max = {
 	{   6, { 4, 4, 5, 6, 7, 7, 7, 8, 9, 10, 10, 11, 11, 12, 13} },
 	{ 6.5, { 4, 4, 5, 6, 7, 7, 7, 8, 9, 10, 10, 11, 11, 12, 13} },
 	{   7, { 3, 4, 5, 6, 7, 7, 7, 8, 9, 9, 10, 10, 11, 11, 12} },
diff --git a/drivers/gpu/drm/amd/display/dc/gpio/dcn32/hw_factory_dcn32.c b/drivers/gpu/drm/amd/display/dc/gpio/dcn32/hw_factory_dcn32.c
index d635b73af46f..9fd8b269dd79 100644
--- a/drivers/gpu/drm/amd/display/dc/gpio/dcn32/hw_factory_dcn32.c
+++ b/drivers/gpu/drm/amd/display/dc/gpio/dcn32/hw_factory_dcn32.c
@@ -108,6 +108,13 @@ static const struct ddc_registers ddc_data_regs_dcn[] = {
 	ddc_data_regs_dcn2(4),
 	ddc_data_regs_dcn2(5),
 	{
+		// add a dummy entry for cases no such port
+		{0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,},
+		.ddc_setup = 0,
+		.phy_aux_cntl = 0,
+		.dc_gpio_aux_ctrl_5 = 0
+	},
+	{
 			DDC_GPIO_VGA_REG_LIST(DATA),
 			.ddc_setup = 0,
 			.phy_aux_cntl = 0,
@@ -122,6 +129,13 @@ static const struct ddc_registers ddc_clk_regs_dcn[] = {
 	ddc_clk_regs_dcn2(4),
 	ddc_clk_regs_dcn2(5),
 	{
+		// add a dummy entry for cases no such port
+		{0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,},
+		.ddc_setup = 0,
+		.phy_aux_cntl = 0,
+		.dc_gpio_aux_ctrl_5 = 0
+	},
+	{
 			DDC_GPIO_VGA_REG_LIST(CLK),
 			.ddc_setup = 0,
 			.phy_aux_cntl = 0,
@@ -242,8 +256,8 @@ static const struct hw_factory_funcs funcs = {
  */
 void dal_hw_factory_dcn32_init(struct hw_factory *factory)
 {
-	factory->number_of_pins[GPIO_ID_DDC_DATA] = 6;
-	factory->number_of_pins[GPIO_ID_DDC_CLOCK] = 6;
+	factory->number_of_pins[GPIO_ID_DDC_DATA] = 8;
+	factory->number_of_pins[GPIO_ID_DDC_CLOCK] = 8;
 	factory->number_of_pins[GPIO_ID_GENERIC] = 4;
 	factory->number_of_pins[GPIO_ID_HPD] = 5;
 	factory->number_of_pins[GPIO_ID_GPIO_PAD] = 28;
diff --git a/drivers/gpu/drm/amd/display/dc/gpio/hw_ddc.c b/drivers/gpu/drm/amd/display/dc/gpio/hw_ddc.c
index 6fd38cdd68c0..525bc8881950 100644
--- a/drivers/gpu/drm/amd/display/dc/gpio/hw_ddc.c
+++ b/drivers/gpu/drm/amd/display/dc/gpio/hw_ddc.c
@@ -94,11 +94,14 @@ static enum gpio_result set_config(
 		 * is required for detection of AUX mode */
 		if (hw_gpio->base.en != GPIO_DDC_LINE_VIP_PAD) {
 			if (!ddc_data_pd_en || !ddc_clk_pd_en) {
-
-				REG_SET_2(gpio.MASK_reg, regval,
+				if (hw_gpio->base.en == GPIO_DDC_LINE_DDC_VGA) {
+					// bit 4 of mask has different usage in some cases
+					REG_SET(gpio.MASK_reg, regval, DC_GPIO_DDC1DATA_PD_EN, 1);
+				} else {
+					REG_SET_2(gpio.MASK_reg, regval,
 						DC_GPIO_DDC1DATA_PD_EN, 1,
 						DC_GPIO_DDC1CLK_PD_EN, 1);
-
+				}
 				if (config_data->type ==
 						GPIO_CONFIG_TYPE_I2C_AUX_DUAL_MODE)
 					msleep(3);
diff --git a/drivers/gpu/drm/amd/display/dc/inc/core_types.h b/drivers/gpu/drm/amd/display/dc/inc/core_types.h
index 9498105c98ab..525f8f0b8732 100644
--- a/drivers/gpu/drm/amd/display/dc/inc/core_types.h
+++ b/drivers/gpu/drm/amd/display/dc/inc/core_types.h
@@ -115,6 +115,13 @@ struct resource_funcs {
 				int vlevel);
 	void (*update_soc_for_wm_a)(
 				struct dc *dc, struct dc_state *context);
+
+	/**
+	 * @populate_dml_pipes - Populate pipe data struct
+	 *
+	 * Returns:
+	 * Total of pipes available in the specific ASIC.
+	 */
 	int (*populate_dml_pipes)(
 		struct dc *dc,
 		struct dc_state *context,
@@ -233,8 +240,11 @@ struct resource_funcs {
 			unsigned int pipe_cnt,
             unsigned int index);
 
-	bool (*remove_phantom_pipes)(struct dc *dc, struct dc_state *context);
+	bool (*remove_phantom_pipes)(struct dc *dc, struct dc_state *context, bool fast_update);
+	void (*retain_phantom_pipes)(struct dc *dc, struct dc_state *context);
 	void (*get_panel_config_defaults)(struct dc_panel_config *panel_config);
+	void (*save_mall_state)(struct dc *dc, struct dc_state *context, struct mall_temp_config *temp_config);
+	void (*restore_mall_state)(struct dc *dc, struct dc_state *context, struct mall_temp_config *temp_config);
 };
 
 struct audio_support{
@@ -413,7 +423,10 @@ struct pipe_ctx {
 
 	struct pll_settings pll_settings;
 
-	/* link config records software decision for what link config should be
+	/**
+	 * @link_config:
+	 *
+	 * link config records software decision for what link config should be
 	 * enabled given current link capability and stream during hw resource
 	 * mapping. This is to decouple the dependency on link capability during
 	 * dc commit or update.
@@ -507,33 +520,62 @@ struct bw_context {
 	union bw_output bw;
 	struct display_mode_lib dml;
 };
+
 /**
- * struct dc_state - The full description of a state requested by a user
- *
- * @streams: Stream properties
- * @stream_status: The planes on a given stream
- * @res_ctx: Persistent state of resources
- * @bw_ctx: The output from bandwidth and watermark calculations and the DML
- * @pp_display_cfg: PowerPlay clocks and settings
- * @dcn_bw_vars: non-stack memory to support bandwidth calculations
- *
+ * struct dc_state - The full description of a state requested by users
  */
 struct dc_state {
+	/**
+	 * @streams: Stream state properties
+	 */
 	struct dc_stream_state *streams[MAX_PIPES];
+
+	/**
+	 * @stream_status: Planes status on a given stream
+	 */
 	struct dc_stream_status stream_status[MAX_PIPES];
+
+	/**
+	 * @stream_count: Total of streams in use
+	 */
 	uint8_t stream_count;
 	uint8_t stream_mask;
 
+	/**
+	 * @res_ctx: Persistent state of resources
+	 */
 	struct resource_context res_ctx;
 
+	/**
+	 * @bw_ctx: The output from bandwidth and watermark calculations and the DML
+	 *
+	 * Each context must have its own instance of VBA, and in order to
+	 * initialize and obtain IP and SOC, the base DML instance from DC is
+	 * initially copied into every context.
+	 */
 	struct bw_context bw_ctx;
 
-	/* Note: these are big structures, do *not* put on stack! */
+	/**
+	 * @pp_display_cfg: PowerPlay clocks and settings
+	 * Note: this is a big struct, do *not* put on stack!
+	 */
 	struct dm_pp_display_configuration pp_display_cfg;
+
+	/**
+	 * @dcn_bw_vars: non-stack memory to support bandwidth calculations
+	 * Note: this is a big struct, do *not* put on stack!
+	 */
 	struct dcn_bw_internal_vars dcn_bw_vars;
 
 	struct clk_mgr *clk_mgr;
 
+	/**
+	 * @refcount: refcount reference
+	 *
+	 * Notice that dc_state is used around the code to capture the current
+	 * context, so we need to pass it everywhere. That's why we want to use
+	 * kref in this struct.
+	 */
 	struct kref refcount;
 
 	struct {
diff --git a/drivers/gpu/drm/amd/display/dc/inc/dc_link_dp.h b/drivers/gpu/drm/amd/display/dc/inc/dc_link_dp.h
index b304d450b038..e8d8c5cb1309 100644
--- a/drivers/gpu/drm/amd/display/dc/inc/dc_link_dp.h
+++ b/drivers/gpu/drm/amd/display/dc/inc/dc_link_dp.h
@@ -193,7 +193,7 @@ enum dc_status dpcd_configure_lttpr_mode(
 		struct link_training_settings *lt_settings);
 
 enum dp_link_encoding dp_get_link_encoding_format(const struct dc_link_settings *link_settings);
-bool dp_retrieve_lttpr_cap(struct dc_link *link);
+enum dc_status dp_retrieve_lttpr_cap(struct dc_link *link);
 bool dp_is_lttpr_present(struct dc_link *link);
 enum lttpr_mode dp_decide_lttpr_mode(struct dc_link *link, struct dc_link_settings *link_setting);
 void dp_get_lttpr_mode_override(struct dc_link *link, enum lttpr_mode *override);
diff --git a/drivers/gpu/drm/amd/display/dc/inc/hw/dchubbub.h b/drivers/gpu/drm/amd/display/dc/inc/hw/dchubbub.h
index e7571c6f5ead..5b0265c0df61 100644
--- a/drivers/gpu/drm/amd/display/dc/inc/hw/dchubbub.h
+++ b/drivers/gpu/drm/amd/display/dc/inc/hw/dchubbub.h
@@ -46,7 +46,7 @@ struct dcn_hubbub_wm_set {
 	uint32_t pte_meta_urgent;
 	uint32_t sr_enter;
 	uint32_t sr_exit;
-	uint32_t dram_clk_chanage;
+	uint32_t dram_clk_change;
 	uint32_t usr_retrain;
 	uint32_t fclk_pstate_change;
 };
@@ -167,10 +167,26 @@ struct hubbub_funcs {
 	void (*force_pstate_change_control)(struct hubbub *hubbub, bool force, bool allow);
 
 	void (*init_watermarks)(struct hubbub *hubbub);
+
+	/**
+	 * @program_det_size:
+	 *
+	 * DE-Tile buffers (DET) is a memory that is used to convert the tiled
+	 * data into linear, which the rest of the display can use to generate
+	 * the graphics output. One of the main features of this component is
+	 * that each pipe has a configurable DET buffer which means that when a
+	 * pipe is not enabled, the device can assign the memory to other
+	 * enabled pipes to try to be more efficient.
+	 *
+	 * DET logic is handled by dchubbub. Some ASICs provide a feature named
+	 * Configurable Return Buffer (CRB) segments which can be allocated to
+	 * compressed or detiled buffers.
+	 */
 	void (*program_det_size)(struct hubbub *hubbub, int hubp_inst, unsigned det_buffer_size_in_kbyte);
 	void (*program_compbuf_size)(struct hubbub *hubbub, unsigned compbuf_size_kb, bool safe_to_increase);
 	void (*init_crb)(struct hubbub *hubbub);
 	void (*force_usr_retraining_allow)(struct hubbub *hubbub, bool allow);
+	void (*set_request_limit)(struct hubbub *hubbub, int memory_channel_count, int words_per_channel);
 };
 
 struct hubbub {
diff --git a/drivers/gpu/drm/amd/display/dc/inc/hw/dmcu.h b/drivers/gpu/drm/amd/display/dc/inc/hw/dmcu.h
index 8df2765cce78..de3113ecbc77 100644
--- a/drivers/gpu/drm/amd/display/dc/inc/hw/dmcu.h
+++ b/drivers/gpu/drm/amd/display/dc/inc/hw/dmcu.h
@@ -56,20 +56,6 @@ struct dmcu {
 	bool auto_load_dmcu;
 };
 
-#if defined(CONFIG_DRM_AMD_SECURE_DISPLAY)
-struct crc_region {
-	uint16_t x_start;
-	uint16_t y_start;
-	uint16_t x_end;
-	uint16_t y_end;
-};
-
-struct otg_phy_mux {
-	uint8_t phy_output_num;
-	uint8_t otg_output_num;
-};
-#endif
-
 struct dmcu_funcs {
 	bool (*dmcu_init)(struct dmcu *dmcu);
 	bool (*load_iram)(struct dmcu *dmcu,
@@ -100,7 +86,7 @@ struct dmcu_funcs {
 	bool (*recv_edid_cea_ack)(struct dmcu *dmcu, int *offset);
 #if defined(CONFIG_DRM_AMD_SECURE_DISPLAY)
 	void (*forward_crc_window)(struct dmcu *dmcu,
-			struct crc_region *crc_win,
+			struct rect *rect,
 			struct otg_phy_mux *mux_mapping);
 	void (*stop_crc_win_update)(struct dmcu *dmcu,
 			struct otg_phy_mux *mux_mapping);
diff --git a/drivers/gpu/drm/amd/display/dc/inc/hw/dpp.h b/drivers/gpu/drm/amd/display/dc/inc/hw/dpp.h
index dcb80c4747b0..131fcfa28bca 100644
--- a/drivers/gpu/drm/amd/display/dc/inc/hw/dpp.h
+++ b/drivers/gpu/drm/amd/display/dc/inc/hw/dpp.h
@@ -83,10 +83,15 @@ static const struct dpp_input_csc_matrix __maybe_unused dpp_input_csc_matrix[] =
 	{COLOR_SPACE_YCBCR709,
 		{0x3265, 0x2000, 0, 0xe6ce, 0xf105, 0x2000, 0xfa01, 0xa7d, 0,
 						0x2000, 0x3b61, 0xe24f} },
-
 	{COLOR_SPACE_YCBCR709_LIMITED,
 		{0x39a6, 0x2568, 0, 0xe0d6, 0xeedd, 0x2568, 0xf925, 0x9a8, 0,
-						0x2568, 0x43ee, 0xdbb2} }
+						0x2568, 0x43ee, 0xdbb2} },
+	{COLOR_SPACE_2020_YCBCR,
+		{0x2F30, 0x2000, 0, 0xE869, 0xEDB7, 0x2000, 0xFABC, 0xBC6, 0,
+						0x2000, 0x3C34, 0xE1E6} },
+	{COLOR_SPACE_2020_RGB_LIMITEDRANGE,
+		{0x35E0, 0x255F, 0, 0xE2B3, 0xEB20, 0x255F, 0xF9FD, 0xB1E, 0,
+						0x255F, 0x44BD, 0xDB43} }
 };
 
 struct dpp_grph_csc_adjustment {
diff --git a/drivers/gpu/drm/amd/display/dc/inc/hw/hw_shared.h b/drivers/gpu/drm/amd/display/dc/inc/hw/hw_shared.h
index cd2be729846b..a819f0f97c5f 100644
--- a/drivers/gpu/drm/amd/display/dc/inc/hw/hw_shared.h
+++ b/drivers/gpu/drm/amd/display/dc/inc/hw/hw_shared.h
@@ -35,6 +35,13 @@
  ******************************************************************************/
 
 #define MAX_AUDIOS 7
+
+/**
+ * @MAX_PIPES:
+ *
+ * Every ASIC support a fixed number of pipes; MAX_PIPES defines a large number
+ * to be used inside loops and for determining array sizes.
+ */
 #define MAX_PIPES 6
 #define MAX_DIG_LINK_ENCODERS 7
 #define MAX_DWB_PIPES	1
diff --git a/drivers/gpu/drm/amd/display/dc/inc/hw/stream_encoder.h b/drivers/gpu/drm/amd/display/dc/inc/hw/stream_encoder.h
index 42afa1952890..42db4b7b79fd 100644
--- a/drivers/gpu/drm/amd/display/dc/inc/hw/stream_encoder.h
+++ b/drivers/gpu/drm/amd/display/dc/inc/hw/stream_encoder.h
@@ -243,6 +243,9 @@ struct stream_encoder_funcs {
 			uint32_t hubp_requestor_id,
 			enum dynamic_metadata_mode dmdata_mode);
 
+	/**
+	 * @dp_set_odm_combine: Sets up DP stream encoder for ODM.
+	 */
 	void (*dp_set_odm_combine)(
 		struct stream_encoder *enc,
 		bool odm_combine);
@@ -317,9 +320,6 @@ struct hpo_dp_stream_encoder_funcs {
 			uint32_t stream_enc_inst,
 			uint32_t link_enc_inst);
 
-	void (*audio_mute_control)(
-			struct hpo_dp_stream_encoder *enc, bool mute);
-
 	void (*dp_audio_setup)(
 			struct hpo_dp_stream_encoder *enc,
 			unsigned int az_inst,
diff --git a/drivers/gpu/drm/amd/display/dc/inc/hw/timing_generator.h b/drivers/gpu/drm/amd/display/dc/inc/hw/timing_generator.h
index 25a1df45b264..0e42e721dd15 100644
--- a/drivers/gpu/drm/amd/display/dc/inc/hw/timing_generator.h
+++ b/drivers/gpu/drm/amd/display/dc/inc/hw/timing_generator.h
@@ -185,6 +185,7 @@ struct timing_generator_funcs {
 #ifdef CONFIG_DRM_AMD_DC_DCN
 	void (*phantom_crtc_post_enable)(struct timing_generator *tg);
 #endif
+	void (*disable_phantom_crtc)(struct timing_generator *tg);
 	bool (*immediate_disable_crtc)(struct timing_generator *tg);
 	bool (*is_counter_moving)(struct timing_generator *tg);
 	void (*get_position)(struct timing_generator *tg,
@@ -301,6 +302,11 @@ struct timing_generator_funcs {
 	void (*get_dsc_status)(struct timing_generator *optc,
 					uint32_t *dsc_mode);
 	void (*set_odm_bypass)(struct timing_generator *optc, const struct dc_crtc_timing *dc_crtc_timing);
+
+	/**
+	 * @set_odm_combine: Set up the ODM block to read from the correct
+	 * OPP(s) and turn on/off ODM memory.
+	 */
 	void (*set_odm_combine)(struct timing_generator *optc, int *opp_id, int opp_cnt,
 			struct dc_crtc_timing *timing);
 	void (*set_h_timing_div_manual_mode)(struct timing_generator *optc, bool manual_mode);
diff --git a/drivers/gpu/drm/amd/display/dc/inc/hw_sequencer.h b/drivers/gpu/drm/amd/display/dc/inc/hw_sequencer.h
index d04b68dad413..c43523f9ff6d 100644
--- a/drivers/gpu/drm/amd/display/dc/inc/hw_sequencer.h
+++ b/drivers/gpu/drm/amd/display/dc/inc/hw_sequencer.h
@@ -263,6 +263,7 @@ struct hw_sequencer_funcs {
 	void (*update_phantom_vp_position)(struct dc *dc,
 			struct dc_state *context,
 			struct pipe_ctx *phantom_pipe);
+	void (*apply_update_flags_for_phantom)(struct pipe_ctx *phantom_pipe);
 
 	void (*commit_subvp_config)(struct dc *dc, struct dc_state *context);
 	void (*subvp_pipe_control_lock)(struct dc *dc,
diff --git a/drivers/gpu/drm/amd/display/dc/inc/link_hwss.h b/drivers/gpu/drm/amd/display/dc/inc/link_hwss.h
index 89964c980b87..0f69946cce9f 100644
--- a/drivers/gpu/drm/amd/display/dc/inc/link_hwss.h
+++ b/drivers/gpu/drm/amd/display/dc/inc/link_hwss.h
@@ -38,6 +38,7 @@ struct link_resource;
 struct pipe_ctx;
 struct encoder_set_dp_phy_pattern_param;
 struct link_mst_stream_allocation_table;
+struct audio_output;
 
 struct link_hwss_ext {
 	/* function pointers below may require to check for NULL if caller
@@ -79,6 +80,10 @@ struct link_hwss {
 	void (*disable_link_output)(struct dc_link *link,
 			const struct link_resource *link_res,
 			enum signal_type signal);
+	void (*setup_audio_output)(struct pipe_ctx *pipe_ctx,
+			struct audio_output *audio_output, uint32_t audio_inst);
+	void (*enable_audio_packet)(struct pipe_ctx *pipe_ctx);
+	void (*disable_audio_packet)(struct pipe_ctx *pipe_ctx);
 };
 #endif /* __DC_LINK_HWSS_H__ */
 
diff --git a/drivers/gpu/drm/amd/display/dc/irq/dcn201/irq_service_dcn201.c b/drivers/gpu/drm/amd/display/dc/irq/dcn201/irq_service_dcn201.c
index 45f99351a0ab..5f4f6dd79511 100644
--- a/drivers/gpu/drm/amd/display/dc/irq/dcn201/irq_service_dcn201.c
+++ b/drivers/gpu/drm/amd/display/dc/irq/dcn201/irq_service_dcn201.c
@@ -28,20 +28,19 @@
 #include "include/logger_interface.h"
 
 #include "../dce110/irq_service_dce110.h"
+#include "irq_service_dcn201.h"
 
 #include "dcn/dcn_2_0_3_offset.h"
 #include "dcn/dcn_2_0_3_sh_mask.h"
 
 #include "cyan_skillfish_ip_offset.h"
 #include "soc15_hw_ip.h"
-
-#include "irq_service_dcn201.h"
-
 #include "ivsrcid/dcn/irqsrcs_dcn_1_0.h"
 
-static enum dc_irq_source to_dal_irq_source_dcn201(struct irq_service *irq_service,
-						   uint32_t src_id,
-						   uint32_t ext_id)
+enum dc_irq_source to_dal_irq_source_dcn201(
+		struct irq_service *irq_service,
+		uint32_t src_id,
+		uint32_t ext_id)
 {
 	switch (src_id) {
 	case DCN_1_0__SRCID__DC_D1_OTG_VSTARTUP:
@@ -79,7 +78,6 @@ static enum dc_irq_source to_dal_irq_source_dcn201(struct irq_service *irq_servi
 	default:
 		return DC_IRQ_SOURCE_INVALID;
 	}
-	return DC_IRQ_SOURCE_INVALID;
 }
 
 static bool hpd_ack(
@@ -138,6 +136,11 @@ static const struct irq_source_info_funcs vupdate_no_lock_irq_info_funcs = {
 	.ack = NULL
 };
 
+static const struct irq_source_info_funcs dmub_outbox_irq_info_funcs = {
+	.set = NULL,
+	.ack = NULL
+};
+
 #undef BASE_INNER
 #define BASE_INNER(seg) DMU_BASE__INST0_SEG ## seg
 
diff --git a/drivers/gpu/drm/amd/display/dc/irq/dcn201/irq_service_dcn201.h b/drivers/gpu/drm/amd/display/dc/irq/dcn201/irq_service_dcn201.h
index 8e27c5e219a3..0cfd2f2d62e8 100644
--- a/drivers/gpu/drm/amd/display/dc/irq/dcn201/irq_service_dcn201.h
+++ b/drivers/gpu/drm/amd/display/dc/irq/dcn201/irq_service_dcn201.h
@@ -1,5 +1,5 @@
 /*
- * Copyright 2018 Advanced Micro Devices, Inc.
+ * Copyright 2022 Advanced Micro Devices, Inc.
  *
  * Permission is hereby granted, free of charge, to any person obtaining a
  * copy of this software and associated documentation files (the "Software"),
diff --git a/drivers/gpu/drm/amd/display/dc/irq/irq_service.c b/drivers/gpu/drm/amd/display/dc/irq/irq_service.c
index 7bad39bba86b..d100edaedbbb 100644
--- a/drivers/gpu/drm/amd/display/dc/irq/irq_service.c
+++ b/drivers/gpu/drm/amd/display/dc/irq/irq_service.c
@@ -112,8 +112,15 @@ bool dal_irq_service_set(
 
 	dal_irq_service_ack(irq_service, source);
 
-	if (info->funcs && info->funcs->set)
+	if (info->funcs && info->funcs->set) {
+		if (info->funcs->set == dal_irq_service_dummy_set) {
+			DC_LOG_WARNING("%s: src: %d, st: %d\n", __func__,
+				       source, enable);
+			ASSERT(0);
+		}
+
 		return info->funcs->set(irq_service, info, enable);
+	}
 
 	dal_irq_service_set_generic(irq_service, info, enable);
 
@@ -146,8 +153,14 @@ bool dal_irq_service_ack(
 		return false;
 	}
 
-	if (info->funcs && info->funcs->ack)
+	if (info->funcs && info->funcs->ack) {
+		if (info->funcs->ack == dal_irq_service_dummy_ack) {
+			DC_LOG_WARNING("%s: src: %d\n", __func__, source);
+			ASSERT(0);
+		}
+
 		return info->funcs->ack(irq_service, info);
+	}
 
 	dal_irq_service_ack_generic(irq_service, info);
 
diff --git a/drivers/gpu/drm/amd/display/dc/link/link_dp_dpia_bw.c b/drivers/gpu/drm/amd/display/dc/link/link_dp_dpia_bw.c
new file mode 100644
index 000000000000..801a95b34e8c
--- /dev/null
+++ b/drivers/gpu/drm/amd/display/dc/link/link_dp_dpia_bw.c
@@ -0,0 +1,28 @@
+
+/*
+ * Copyright 2022 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * Authors: AMD
+ *
+ */
+/*********************************************************************/
+//				USB4 DPIA BANDWIDTH ALLOCATION LOGIC
+/*********************************************************************/
diff --git a/drivers/gpu/drm/amd/display/dc/link/link_dp_dpia_bw.h b/drivers/gpu/drm/amd/display/dc/link/link_dp_dpia_bw.h
new file mode 100644
index 000000000000..669e995f825f
--- /dev/null
+++ b/drivers/gpu/drm/amd/display/dc/link/link_dp_dpia_bw.h
@@ -0,0 +1,69 @@
+/*
+ * Copyright 2021 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * Authors: AMD
+ *
+ */
+
+#ifndef DC_INC_LINK_DP_DPIA_BW_H_
+#define DC_INC_LINK_DP_DPIA_BW_H_
+
+// XXX: TODO: Re-add for Phase 2
+/* Number of Host Routers per motherboard is 2 and 2 DPIA per host router */
+#define MAX_HR_NUM 2
+
+struct dc_host_router_bw_alloc {
+	int max_bw[MAX_HR_NUM];             // The Max BW that each Host Router has available to be shared btw DPIAs
+	int total_estimated_bw[MAX_HR_NUM]; // The Total Verified and available BW that Host Router has
+};
+
+/*
+ * Enable BW Allocation Mode Support from the DP-Tx side
+ *
+ * @link: pointer to the dc_link struct instance
+ *
+ * return: SUCCESS or FAILURE
+ */
+bool set_dptx_usb4_bw_alloc_support(struct dc_link *link);
+
+/*
+ * Send a request from DP-Tx requesting to allocate BW remotely after
+ * allocating it locally. This will get processed by CM and a CB function
+ * will be called.
+ *
+ * @link: pointer to the dc_link struct instance
+ * @req_bw: The requested bw in Kbyte to allocated
+ *
+ * return: none
+ */
+void set_usb4_req_bw_req(struct dc_link *link, int req_bw);
+
+/*
+ * CB function for when the status of the Req above is complete. We will
+ * find out the result of allocating on CM and update structs accordingly
+ *
+ * @link: pointer to the dc_link struct instance
+ *
+ * return: none
+ */
+void get_usb4_req_bw_resp(struct dc_link *link);
+
+#endif /* DC_INC_LINK_DP_DPIA_BW_H_ */
diff --git a/drivers/gpu/drm/amd/display/dc/link/link_hwss_dio.c b/drivers/gpu/drm/amd/display/dc/link/link_hwss_dio.c
index 4227adbc646a..33148b753c03 100644
--- a/drivers/gpu/drm/amd/display/dc/link/link_hwss_dio.c
+++ b/drivers/gpu/drm/amd/display/dc/link/link_hwss_dio.c
@@ -170,11 +170,63 @@ static void update_dio_stream_allocation_table(struct dc_link *link,
 	link_enc->funcs->update_mst_stream_allocation_table(link_enc, table);
 }
 
+void setup_dio_audio_output(struct pipe_ctx *pipe_ctx,
+		struct audio_output *audio_output, uint32_t audio_inst)
+{
+	if (dc_is_dp_signal(pipe_ctx->stream->signal))
+		pipe_ctx->stream_res.stream_enc->funcs->dp_audio_setup(
+				pipe_ctx->stream_res.stream_enc,
+				audio_inst,
+				&pipe_ctx->stream->audio_info);
+	else
+		pipe_ctx->stream_res.stream_enc->funcs->hdmi_audio_setup(
+				pipe_ctx->stream_res.stream_enc,
+				audio_inst,
+				&pipe_ctx->stream->audio_info,
+				&audio_output->crtc_info);
+}
+
+void enable_dio_audio_packet(struct pipe_ctx *pipe_ctx)
+{
+	if (dc_is_dp_signal(pipe_ctx->stream->signal))
+		pipe_ctx->stream_res.stream_enc->funcs->dp_audio_enable(
+				pipe_ctx->stream_res.stream_enc);
+
+	pipe_ctx->stream_res.stream_enc->funcs->audio_mute_control(
+			pipe_ctx->stream_res.stream_enc, false);
+
+	if (dc_is_dp_signal(pipe_ctx->stream->signal))
+		dp_source_sequence_trace(pipe_ctx->stream->link,
+				DPCD_SOURCE_SEQ_AFTER_ENABLE_AUDIO_STREAM);
+}
+
+void disable_dio_audio_packet(struct pipe_ctx *pipe_ctx)
+{
+	pipe_ctx->stream_res.stream_enc->funcs->audio_mute_control(
+			pipe_ctx->stream_res.stream_enc, true);
+
+	if (pipe_ctx->stream_res.audio) {
+		if (dc_is_dp_signal(pipe_ctx->stream->signal))
+			pipe_ctx->stream_res.stream_enc->funcs->dp_audio_disable(
+					pipe_ctx->stream_res.stream_enc);
+		else
+			pipe_ctx->stream_res.stream_enc->funcs->hdmi_audio_disable(
+					pipe_ctx->stream_res.stream_enc);
+	}
+
+	if (dc_is_dp_signal(pipe_ctx->stream->signal))
+		dp_source_sequence_trace(pipe_ctx->stream->link,
+				DPCD_SOURCE_SEQ_AFTER_DISABLE_AUDIO_STREAM);
+}
+
 static const struct link_hwss dio_link_hwss = {
 	.setup_stream_encoder = setup_dio_stream_encoder,
 	.reset_stream_encoder = reset_dio_stream_encoder,
 	.setup_stream_attribute = setup_dio_stream_attribute,
 	.disable_link_output = disable_dio_link_output,
+	.setup_audio_output = setup_dio_audio_output,
+	.enable_audio_packet = enable_dio_audio_packet,
+	.disable_audio_packet = disable_dio_audio_packet,
 	.ext = {
 		.set_throttled_vcp_size = set_dio_throttled_vcp_size,
 		.enable_dp_link_output = enable_dio_dp_link_output,
diff --git a/drivers/gpu/drm/amd/display/dc/link/link_hwss_dio.h b/drivers/gpu/drm/amd/display/dc/link/link_hwss_dio.h
index 126d37f847a1..9a108c3d7831 100644
--- a/drivers/gpu/drm/amd/display/dc/link/link_hwss_dio.h
+++ b/drivers/gpu/drm/amd/display/dc/link/link_hwss_dio.h
@@ -50,5 +50,9 @@ void set_dio_dp_lane_settings(struct dc_link *link,
 		const struct link_resource *link_res,
 		const struct dc_link_settings *link_settings,
 		const struct dc_lane_settings lane_settings[LANE_COUNT_DP_MAX]);
+void setup_dio_audio_output(struct pipe_ctx *pipe_ctx,
+		struct audio_output *audio_output, uint32_t audio_inst);
+void enable_dio_audio_packet(struct pipe_ctx *pipe_ctx);
+void disable_dio_audio_packet(struct pipe_ctx *pipe_ctx);
 
 #endif /* __LINK_HWSS_DIO_H__ */
diff --git a/drivers/gpu/drm/amd/display/dc/link/link_hwss_dpia.c b/drivers/gpu/drm/amd/display/dc/link/link_hwss_dpia.c
index 64f7ea6a9aa3..861f3cd5b356 100644
--- a/drivers/gpu/drm/amd/display/dc/link/link_hwss_dpia.c
+++ b/drivers/gpu/drm/amd/display/dc/link/link_hwss_dpia.c
@@ -57,6 +57,9 @@ static const struct link_hwss dpia_link_hwss = {
 	.reset_stream_encoder = reset_dio_stream_encoder,
 	.setup_stream_attribute = setup_dio_stream_attribute,
 	.disable_link_output = disable_dio_link_output,
+	.setup_audio_output = setup_dio_audio_output,
+	.enable_audio_packet = enable_dio_audio_packet,
+	.disable_audio_packet = disable_dio_audio_packet,
 	.ext = {
 		.set_throttled_vcp_size = set_dio_throttled_vcp_size,
 		.enable_dp_link_output = enable_dio_dp_link_output,
diff --git a/drivers/gpu/drm/amd/display/dc/link/link_hwss_hpo_dp.c b/drivers/gpu/drm/amd/display/dc/link/link_hwss_hpo_dp.c
index 153a88381f2c..2f46e1ac4ce0 100644
--- a/drivers/gpu/drm/amd/display/dc/link/link_hwss_hpo_dp.c
+++ b/drivers/gpu/drm/amd/display/dc/link/link_hwss_hpo_dp.c
@@ -262,11 +262,36 @@ static void update_hpo_dp_stream_allocation_table(struct dc_link *link,
 			table);
 }
 
+static void setup_hpo_dp_audio_output(struct pipe_ctx *pipe_ctx,
+		struct audio_output *audio_output, uint32_t audio_inst)
+{
+	pipe_ctx->stream_res.hpo_dp_stream_enc->funcs->dp_audio_setup(
+			pipe_ctx->stream_res.hpo_dp_stream_enc,
+			audio_inst,
+			&pipe_ctx->stream->audio_info);
+}
+
+static void enable_hpo_dp_audio_packet(struct pipe_ctx *pipe_ctx)
+{
+	pipe_ctx->stream_res.hpo_dp_stream_enc->funcs->dp_audio_enable(
+			pipe_ctx->stream_res.hpo_dp_stream_enc);
+}
+
+static void disable_hpo_dp_audio_packet(struct pipe_ctx *pipe_ctx)
+{
+	if (pipe_ctx->stream_res.audio)
+		pipe_ctx->stream_res.hpo_dp_stream_enc->funcs->dp_audio_disable(
+				pipe_ctx->stream_res.hpo_dp_stream_enc);
+}
+
 static const struct link_hwss hpo_dp_link_hwss = {
 	.setup_stream_encoder = setup_hpo_dp_stream_encoder,
 	.reset_stream_encoder = reset_hpo_dp_stream_encoder,
 	.setup_stream_attribute = setup_hpo_dp_stream_attribute,
 	.disable_link_output = disable_hpo_dp_link_output,
+	.setup_audio_output = setup_hpo_dp_audio_output,
+	.enable_audio_packet = enable_hpo_dp_audio_packet,
+	.disable_audio_packet = disable_hpo_dp_audio_packet,
 	.ext = {
 		.set_throttled_vcp_size = set_hpo_dp_throttled_vcp_size,
 		.set_hblank_min_symbol_width = set_hpo_dp_hblank_min_symbol_width,
diff --git a/drivers/gpu/drm/amd/display/dmub/inc/dmub_cmd.h b/drivers/gpu/drm/amd/display/dmub/inc/dmub_cmd.h
index 7a8f61517424..33907feefebb 100644
--- a/drivers/gpu/drm/amd/display/dmub/inc/dmub_cmd.h
+++ b/drivers/gpu/drm/amd/display/dmub/inc/dmub_cmd.h
@@ -225,6 +225,12 @@ union dmub_psr_debug_flags {
 		 * Use TPS3 signal when restore main link.
 		 */
 		uint32_t force_wakeup_by_tps3 : 1;
+
+		/**
+		 * Back to back flip, therefore cannot power down PHY
+		 */
+		uint32_t back_to_back_flip : 1;
+
 	} bitfields;
 
 	/**
@@ -401,8 +407,9 @@ union dmub_fw_boot_options {
 		uint32_t gpint_scratch8: 1; /* 1 if GPINT is in scratch8*/
 		uint32_t usb4_cm_version: 1; /**< 1 CM support */
 		uint32_t dpia_hpd_int_enable_supported: 1; /* 1 if dpia hpd int enable supported */
+		uint32_t usb4_dpia_bw_alloc_supported: 1; /* 1 if USB4 dpia BW allocation supported */
 
-		uint32_t reserved : 16; /**< reserved */
+		uint32_t reserved : 15; /**< reserved */
 	} bits; /**< boot bits */
 	uint32_t all; /**< 32-bit access to bits */
 };
@@ -731,6 +738,11 @@ enum dmub_cmd_type {
 	 */
 
 	/**
+	 * Command type used for all SECURE_DISPLAY commands.
+	 */
+	DMUB_CMD__SECURE_DISPLAY = 85,
+
+	/**
 	 * Command type used to set DPIA HPD interrupt state
 	 */
 	DMUB_CMD__DPIA_HPD_INT_ENABLE = 86,
@@ -1017,13 +1029,14 @@ struct dmub_cmd_fw_assisted_mclk_switch_pipe_data_v2 {
 			uint16_t vtotal;
 			uint16_t htotal;
 			uint8_t vblank_pipe_index;
-			uint8_t padding[2];
+			uint8_t padding[1];
 			struct {
 				uint8_t drr_in_use;
 				uint8_t drr_window_size_ms;	// Indicates largest VMIN/VMAX adjustment per frame
 				uint16_t min_vtotal_supported;	// Min VTOTAL that supports switching in VBLANK
 				uint16_t max_vtotal_supported;	// Max VTOTAL that can support SubVP static scheduling
 				uint8_t use_ramping;		// Use ramping or not
+				uint8_t drr_vblank_start_margin;
 			} drr_info;				// DRR considered as part of SubVP + VBLANK case
 		} vblank_data;
 	} pipe_config;
@@ -1866,9 +1879,13 @@ struct dmub_cmd_psr_copy_settings_data {
 	 */
 	uint8_t use_phy_fsm;
 	/**
+	 * frame delay for frame re-lock
+	 */
+	uint8_t relock_delay_frame_cnt;
+	/**
 	 * Explicit padding to 2 byte boundary.
 	 */
-	uint8_t pad3[2];
+	uint8_t pad3;
 };
 
 /**
@@ -3144,6 +3161,33 @@ struct dmub_rb_cmd_get_usbc_cable_id {
 };
 
 /**
+ * Command type of a DMUB_CMD__SECURE_DISPLAY command
+ */
+enum dmub_cmd_secure_display_type {
+	DMUB_CMD__SECURE_DISPLAY_TEST_CMD = 0,		/* test command to only check if inbox message works */
+	DMUB_CMD__SECURE_DISPLAY_CRC_STOP_UPDATE,
+	DMUB_CMD__SECURE_DISPLAY_CRC_WIN_NOTIFY
+};
+
+/**
+ * Definition of a DMUB_CMD__SECURE_DISPLAY command
+ */
+struct dmub_rb_cmd_secure_display {
+	struct dmub_cmd_header header;
+	/**
+	 * Data passed from driver to dmub firmware.
+	 */
+	struct dmub_cmd_roi_info {
+		uint16_t x_start;
+		uint16_t x_end;
+		uint16_t y_start;
+		uint16_t y_end;
+		uint8_t otg_id;
+		uint8_t phy_id;
+	} roi_info;
+};
+
+/**
  * union dmub_rb_cmd - DMUB inbox command.
  */
 union dmub_rb_cmd {
@@ -3348,6 +3392,11 @@ union dmub_rb_cmd {
 	 */
 	struct dmub_rb_cmd_query_hpd_state query_hpd;
 	/**
+	 * Definition of a DMUB_CMD__SECURE_DISPLAY command.
+	 */
+	struct dmub_rb_cmd_secure_display secure_display;
+
+	/**
 	 * Definition of a DMUB_CMD__DPIA_HPD_INT_ENABLE command.
 	 */
 	struct dmub_rb_cmd_dpia_hpd_int_enable dpia_hpd_int_enable;
diff --git a/drivers/gpu/drm/amd/display/modules/color/color_gamma.c b/drivers/gpu/drm/amd/display/modules/color/color_gamma.c
index 447a0ec9cbe2..f6034213c700 100644
--- a/drivers/gpu/drm/amd/display/modules/color/color_gamma.c
+++ b/drivers/gpu/drm/amd/display/modules/color/color_gamma.c
@@ -61,7 +61,7 @@ static const int32_t numerator01[] = { 31308,   180000, 0,  0,  0};
 static const int32_t numerator02[] = { 12920,   4500,   0,  0,  0};
 static const int32_t numerator03[] = { 55,      99,     0,  0,  0};
 static const int32_t numerator04[] = { 55,      99,     0,  0,  0};
-static const int32_t numerator05[] = { 2400,    2200,   2200, 2400, 2600};
+static const int32_t numerator05[] = { 2400,    2222,   2200, 2400, 2600};
 
 /* one-time setup of X points */
 void setup_x_points_distribution(void)
diff --git a/drivers/gpu/drm/amd/display/modules/freesync/freesync.c b/drivers/gpu/drm/amd/display/modules/freesync/freesync.c
index 0f39ab9dc5b4..c2e00f7b8381 100644
--- a/drivers/gpu/drm/amd/display/modules/freesync/freesync.c
+++ b/drivers/gpu/drm/amd/display/modules/freesync/freesync.c
@@ -688,10 +688,10 @@ static void build_vrr_infopacket_fs2_data(enum color_transfer_func app_tf,
 	if (app_tf != TRANSFER_FUNC_UNKNOWN) {
 		infopacket->valid = true;
 
-		infopacket->sb[6] |= 0x08;  // PB6 = [Bit 3 = Native Color Active]
-
-		if (app_tf == TRANSFER_FUNC_GAMMA_22) {
-			infopacket->sb[9] |= 0x04;  // PB6 = [Bit 2 = Gamma 2.2 EOTF Active]
+		if (app_tf != TRANSFER_FUNC_PQ2084) {
+			infopacket->sb[6] |= 0x08;  // PB6 = [Bit 3 = Native Color Active]
+			if (app_tf == TRANSFER_FUNC_GAMMA_22)
+				infopacket->sb[9] |= 0x04;  // PB6 = [Bit 2 = Gamma 2.2 EOTF Active]
 		}
 	}
 }
diff --git a/drivers/gpu/drm/amd/display/modules/inc/mod_info_packet.h b/drivers/gpu/drm/amd/display/modules/inc/mod_info_packet.h
index 1d8b746b02f2..edf5845f6a1f 100644
--- a/drivers/gpu/drm/amd/display/modules/inc/mod_info_packet.h
+++ b/drivers/gpu/drm/amd/display/modules/inc/mod_info_packet.h
@@ -35,7 +35,8 @@ struct mod_vrr_params;
 
 void mod_build_vsc_infopacket(const struct dc_stream_state *stream,
 		struct dc_info_packet *info_packet,
-		enum dc_color_space cs);
+		enum dc_color_space cs,
+		enum color_transfer_func tf);
 
 void mod_build_hf_vsif_infopacket(const struct dc_stream_state *stream,
 		struct dc_info_packet *info_packet);
diff --git a/drivers/gpu/drm/amd/display/modules/info_packet/info_packet.c b/drivers/gpu/drm/amd/display/modules/info_packet/info_packet.c
index 27ceba9d6d65..69691058ab89 100644
--- a/drivers/gpu/drm/amd/display/modules/info_packet/info_packet.c
+++ b/drivers/gpu/drm/amd/display/modules/info_packet/info_packet.c
@@ -132,7 +132,8 @@ enum ColorimetryYCCDP {
 
 void mod_build_vsc_infopacket(const struct dc_stream_state *stream,
 		struct dc_info_packet *info_packet,
-		enum dc_color_space cs)
+		enum dc_color_space cs,
+		enum color_transfer_func tf)
 {
 	unsigned int vsc_packet_revision = vsc_packet_undefined;
 	unsigned int i;
@@ -382,6 +383,9 @@ void mod_build_vsc_infopacket(const struct dc_stream_state *stream,
 				colorimetryFormat = ColorimetryYCC_DP_AdobeYCC;
 			else if (cs == COLOR_SPACE_2020_YCBCR)
 				colorimetryFormat = ColorimetryYCC_DP_ITU2020YCbCr;
+
+			if (cs == COLOR_SPACE_2020_YCBCR && tf == TRANSFER_FUNC_GAMMA_22)
+				colorimetryFormat = ColorimetryYCC_DP_ITU709;
 			break;
 
 		default:
diff --git a/drivers/gpu/drm/amd/display/modules/power/power_helpers.c b/drivers/gpu/drm/amd/display/modules/power/power_helpers.c
index 235259d6c5a1..9b5d9b2c9a6a 100644
--- a/drivers/gpu/drm/amd/display/modules/power/power_helpers.c
+++ b/drivers/gpu/drm/amd/display/modules/power/power_helpers.c
@@ -102,9 +102,18 @@ static const struct abm_parameters abm_settings_config1[abm_defines_max_level] =
 	{0x82,   0x4d,    0x20,       0x00,     0x00,        0xff,     0xb3, 0x70,     0x70,     0xcccc,  0xcccc},
 };
 
+static const struct abm_parameters abm_settings_config2[abm_defines_max_level] = {
+//  min_red  max_red  bright_pos  dark_pos  bright_gain  contrast  dev   min_knee  max_knee  blRed    blStart
+	{0xf0,   0xbf,    0x20,       0x00,     0x88,        0x99,     0xb3, 0x40,     0xe0,    0x0000,  0xcccc},
+	{0xd8,   0x85,    0x20,       0x00,     0x70,        0x90,     0xa8, 0x40,     0xc8,    0x0700,  0xb333},
+	{0xb8,   0x58,    0x20,       0x00,     0x64,        0x88,     0x78, 0x70,     0xa0,    0x7000,  0x9999},
+	{0x82,   0x40,    0x20,       0x00,     0x00,        0xb8,     0xb3, 0x70,     0x70,    0xc333,  0xb333},
+};
+
 static const struct abm_parameters * const abm_settings[] = {
 	abm_settings_config0,
 	abm_settings_config1,
+	abm_settings_config2,
 };
 
 #define NUM_AMBI_LEVEL    5
diff --git a/drivers/gpu/drm/amd/include/asic_reg/dcn/dcn_3_0_0_offset.h b/drivers/gpu/drm/amd/include/asic_reg/dcn/dcn_3_0_0_offset.h
index 483769fb1736..537aee0536d3 100644
--- a/drivers/gpu/drm/amd/include/asic_reg/dcn/dcn_3_0_0_offset.h
+++ b/drivers/gpu/drm/amd/include/asic_reg/dcn/dcn_3_0_0_offset.h
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: MIT
 #ifndef _dcn_3_0_0_OFFSET_HEADER
 #define _dcn_3_0_0_OFFSET_HEADER
 
diff --git a/drivers/gpu/drm/amd/include/asic_reg/dcn/dcn_3_0_0_sh_mask.h b/drivers/gpu/drm/amd/include/asic_reg/dcn/dcn_3_0_0_sh_mask.h
index b79be3a25a80..f9d90b098519 100644
--- a/drivers/gpu/drm/amd/include/asic_reg/dcn/dcn_3_0_0_sh_mask.h
+++ b/drivers/gpu/drm/amd/include/asic_reg/dcn/dcn_3_0_0_sh_mask.h
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: MIT
 #ifndef _dcn_3_0_0_SH_MASK_HEADER
 #define _dcn_3_0_0_SH_MASK_HEADER
 
diff --git a/drivers/gpu/drm/amd/include/asic_reg/vcn/vcn_2_5_offset.h b/drivers/gpu/drm/amd/include/asic_reg/vcn/vcn_2_5_offset.h
index 363d2139cea2..db7e22720d00 100644
--- a/drivers/gpu/drm/amd/include/asic_reg/vcn/vcn_2_5_offset.h
+++ b/drivers/gpu/drm/amd/include/asic_reg/vcn/vcn_2_5_offset.h
@@ -993,7 +993,8 @@
 #define mmUVD_RAS_VCPU_VCODEC_STATUS_BASE_IDX                  1
 #define mmUVD_RAS_MMSCH_FATAL_ERROR                            0x0058
 #define mmUVD_RAS_MMSCH_FATAL_ERROR_BASE_IDX                   1
-
+#define mmVCN_RAS_CNTL                                                                                 0x04b9
+#define mmVCN_RAS_CNTL_BASE_IDX                                                                        1
 
 /* JPEG 2_6_0 regs */
 #define mmUVD_RAS_JPEG0_STATUS                                 0x0059
diff --git a/drivers/gpu/drm/amd/include/asic_reg/vcn/vcn_2_5_sh_mask.h b/drivers/gpu/drm/amd/include/asic_reg/vcn/vcn_2_5_sh_mask.h
index 8de883b76d90..874a8b7e1feb 100644
--- a/drivers/gpu/drm/amd/include/asic_reg/vcn/vcn_2_5_sh_mask.h
+++ b/drivers/gpu/drm/amd/include/asic_reg/vcn/vcn_2_5_sh_mask.h
@@ -3618,6 +3618,33 @@
 #define UVD_RAS_MMSCH_FATAL_ERROR__POISONED_VF_MASK             0x7FFFFFFFL
 #define UVD_RAS_MMSCH_FATAL_ERROR__POISONED_PF_MASK             0x80000000L
 
+//VCN 2_6_0 VCN_RAS_CNTL
+#define VCN_RAS_CNTL__VCPU_VCODEC_IH_EN__SHIFT                                                                0x0
+#define VCN_RAS_CNTL__MMSCH_FATAL_ERROR_EN__SHIFT                                                             0x1
+#define VCN_RAS_CNTL__VCPU_VCODEC_PMI_EN__SHIFT                                                               0x4
+#define VCN_RAS_CNTL__MMSCH_PMI_EN__SHIFT                                                                     0x5
+#define VCN_RAS_CNTL__VCPU_VCODEC_REARM__SHIFT                                                                0x8
+#define VCN_RAS_CNTL__MMSCH_REARM__SHIFT                                                                      0x9
+#define VCN_RAS_CNTL__VCPU_VCODEC_STALL_EN__SHIFT                                                             0xc
+#define VCN_RAS_CNTL__VCPU_VCODEC_READY__SHIFT                                                                0x10
+#define VCN_RAS_CNTL__MMSCH_READY__SHIFT                                                                      0x11
+#define VCN_RAS_CNTL__VCPU_VCODEC_IH_EN_MASK                                                                  0x00000001L
+#define VCN_RAS_CNTL__MMSCH_FATAL_ERROR_EN_MASK                                                               0x00000002L
+#define VCN_RAS_CNTL__VCPU_VCODEC_PMI_EN_MASK                                                                 0x00000010L
+#define VCN_RAS_CNTL__MMSCH_PMI_EN_MASK                                                                       0x00000020L
+#define VCN_RAS_CNTL__VCPU_VCODEC_REARM_MASK                                                                  0x00000100L
+#define VCN_RAS_CNTL__MMSCH_REARM_MASK                                                                        0x00000200L
+#define VCN_RAS_CNTL__VCPU_VCODEC_STALL_EN_MASK                                                               0x00001000L
+#define VCN_RAS_CNTL__VCPU_VCODEC_READY_MASK                                                                  0x00010000L
+#define VCN_RAS_CNTL__MMSCH_READY_MASK                                                                        0x00020000L
+
+//VCN 2_6_0 UVD_VCPU_INT_EN
+#define UVD_VCPU_INT_EN__RASCNTL_VCPU_VCODEC_EN__SHIFT                                                        0x16
+#define UVD_VCPU_INT_EN__RASCNTL_VCPU_VCODEC_EN_MASK                                                          0x00400000L
+
+//VCN 2_6_0 UVD_SYS_INT_EN
+#define UVD_SYS_INT_EN__RASCNTL_VCPU_VCODEC_EN_MASK                                                           0x04000000L
+
 /* JPEG 2_6_0 UVD_RAS_JPEG0_STATUS */
 #define UVD_RAS_JPEG0_STATUS__POISONED_VF__SHIFT                0x0
 #define UVD_RAS_JPEG0_STATUS__POISONED_PF__SHIFT                0x1f
diff --git a/drivers/gpu/drm/amd/include/atombios.h b/drivers/gpu/drm/amd/include/atombios.h
index 15943bc21bc5..b78360a71bc9 100644
--- a/drivers/gpu/drm/amd/include/atombios.h
+++ b/drivers/gpu/drm/amd/include/atombios.h
@@ -4107,7 +4107,7 @@ typedef struct _ATOM_FAKE_EDID_PATCH_RECORD
 {
   UCHAR ucRecordType;
   UCHAR ucFakeEDIDLength;       // = 128 means EDID length is 128 bytes, otherwise the EDID length = ucFakeEDIDLength*128
-  UCHAR ucFakeEDIDString[1];    // This actually has ucFakeEdidLength elements.
+  UCHAR ucFakeEDIDString[];     // This actually has ucFakeEdidLength elements.
 } ATOM_FAKE_EDID_PATCH_RECORD;
 
 typedef struct  _ATOM_PANEL_RESOLUTION_PATCH_RECORD
@@ -4386,7 +4386,7 @@ typedef struct _ATOM_GPIO_PIN_ASSIGNMENT
 typedef struct _ATOM_GPIO_PIN_LUT
 {
   ATOM_COMMON_TABLE_HEADER  sHeader;
-  ATOM_GPIO_PIN_ASSIGNMENT   asGPIO_Pin[1];
+  ATOM_GPIO_PIN_ASSIGNMENT   asGPIO_Pin[];
 }ATOM_GPIO_PIN_LUT;
 
 /****************************************************************************/
@@ -4513,7 +4513,7 @@ typedef struct  _ATOM_DISPLAY_OBJECT_PATH
   USHORT    usSize;                                        //the size of ATOM_DISPLAY_OBJECT_PATH
   USHORT    usConnObjectId;                                //Connector Object ID
   USHORT    usGPUObjectId;                                 //GPU ID
-  USHORT    usGraphicObjIds[1];                            //1st Encoder Obj source from GPU to last Graphic Obj destinate to connector.
+  USHORT    usGraphicObjIds[];                            //1st Encoder Obj source from GPU to last Graphic Obj destinate to connector.
 }ATOM_DISPLAY_OBJECT_PATH;
 
 typedef struct  _ATOM_DISPLAY_EXTERNAL_OBJECT_PATH
@@ -4530,7 +4530,7 @@ typedef struct _ATOM_DISPLAY_OBJECT_PATH_TABLE
   UCHAR                           ucNumOfDispPath;
   UCHAR                           ucVersion;
   UCHAR                           ucPadding[2];
-  ATOM_DISPLAY_OBJECT_PATH        asDispPath[1];
+  ATOM_DISPLAY_OBJECT_PATH        asDispPath[];
 }ATOM_DISPLAY_OBJECT_PATH_TABLE;
 
 typedef struct _ATOM_OBJECT                                //each object has this structure
@@ -4545,7 +4545,7 @@ typedef struct _ATOM_OBJECT_TABLE                         //Above 4 object table
 {
   UCHAR               ucNumberOfObjects;
   UCHAR               ucPadding[3];
-  ATOM_OBJECT         asObjects[1];
+  ATOM_OBJECT         asObjects[];
 }ATOM_OBJECT_TABLE;
 
 typedef struct _ATOM_SRC_DST_TABLE_FOR_ONE_OBJECT         //usSrcDstTableOffset pointing to this structure
@@ -4733,7 +4733,7 @@ typedef struct  _ATOM_CONNECTOR_DEVICE_TAG_RECORD
   ATOM_COMMON_RECORD_HEADER   sheader;
   UCHAR                       ucNumberOfDevice;
   UCHAR                       ucReserved;
-  ATOM_CONNECTOR_DEVICE_TAG   asDeviceTag[1];         //This Id is same as "ATOM_DEVICE_XXX_SUPPORT", 1 is only for allocation
+  ATOM_CONNECTOR_DEVICE_TAG   asDeviceTag[];	       //This Id is same as "ATOM_DEVICE_XXX_SUPPORT"
 }ATOM_CONNECTOR_DEVICE_TAG_RECORD;
 
 
@@ -4793,7 +4793,7 @@ typedef struct  _ATOM_OBJECT_GPIO_CNTL_RECORD
   ATOM_COMMON_RECORD_HEADER   sheader;
   UCHAR                       ucFlags;                // Future expnadibility
   UCHAR                       ucNumberOfPins;         // Number of GPIO pins used to control the object
-  ATOM_GPIO_PIN_CONTROL_PAIR  asGpio[1];              // the real gpio pin pair determined by number of pins ucNumberOfPins
+  ATOM_GPIO_PIN_CONTROL_PAIR  asGpio[];               // the real gpio pin pair determined by number of pins ucNumberOfPins
 }ATOM_OBJECT_GPIO_CNTL_RECORD;
 
 //Definitions for GPIO pin state
@@ -4982,7 +4982,7 @@ typedef struct  _ATOM_BRACKET_LAYOUT_RECORD
   UCHAR                       ucWidth;
   UCHAR                       ucConnNum;
   UCHAR                       ucReserved;
-  ATOM_CONNECTOR_LAYOUT_INFO  asConnInfo[1];
+  ATOM_CONNECTOR_LAYOUT_INFO  asConnInfo[];
 }ATOM_BRACKET_LAYOUT_RECORD;
 
 
@@ -5146,7 +5146,7 @@ typedef struct  _ATOM_I2C_VOLTAGE_OBJECT_V3
    UCHAR  ucVoltageControlOffset;
    UCHAR  ucVoltageControlFlag;              // Bit0: 0 - One byte data; 1 - Two byte data
    UCHAR  ulReserved[3];
-   VOLTAGE_LUT_ENTRY asVolI2cLut[1];         // end with 0xff
+   VOLTAGE_LUT_ENTRY asVolI2cLut[];         // end with 0xff
 }ATOM_I2C_VOLTAGE_OBJECT_V3;
 
 // ATOM_I2C_VOLTAGE_OBJECT_V3.ucVoltageControlFlag
@@ -5161,7 +5161,7 @@ typedef struct  _ATOM_GPIO_VOLTAGE_OBJECT_V3
    UCHAR  ucPhaseDelay;                      // phase delay in unit of micro second
    UCHAR  ucReserved;
    ULONG  ulGpioMaskVal;                     // GPIO Mask value
-   VOLTAGE_LUT_ENTRY_V2 asVolGpioLut[1];
+   VOLTAGE_LUT_ENTRY_V2 asVolGpioLut[];
 }ATOM_GPIO_VOLTAGE_OBJECT_V3;
 
 typedef struct  _ATOM_LEAKAGE_VOLTAGE_OBJECT_V3
@@ -5171,7 +5171,7 @@ typedef struct  _ATOM_LEAKAGE_VOLTAGE_OBJECT_V3
    UCHAR    ucLeakageEntryNum;               // indicate the entry number of LeakageId/Voltage Lut table
    UCHAR    ucReserved[2];
    ULONG    ulMaxVoltageLevel;
-   LEAKAGE_VOLTAGE_LUT_ENTRY_V2 asLeakageIdLut[1];
+   LEAKAGE_VOLTAGE_LUT_ENTRY_V2 asLeakageIdLut[];
 }ATOM_LEAKAGE_VOLTAGE_OBJECT_V3;
 
 
@@ -6599,7 +6599,7 @@ typedef struct _ATOM_FUSION_SYSTEM_INFO_V3
 typedef struct _ATOM_I2C_DATA_RECORD
 {
   UCHAR         ucNunberOfBytes;                                              //Indicates how many bytes SW needs to write to the external ASIC for one block, besides to "Start" and "Stop"
-  UCHAR         ucI2CData[1];                                                 //I2C data in bytes, should be less than 16 bytes usually
+  UCHAR         ucI2CData[];                                                  //I2C data in bytes, should be less than 16 bytes usually
 }ATOM_I2C_DATA_RECORD;
 
 
@@ -6610,14 +6610,14 @@ typedef struct _ATOM_I2C_DEVICE_SETUP_INFO
   UCHAR                              ucSSChipID;             //SS chip being used
   UCHAR                              ucSSChipSlaveAddr;      //Slave Address to set up this SS chip
   UCHAR                           ucNumOfI2CDataRecords;  //number of data block
-  ATOM_I2C_DATA_RECORD            asI2CData[1];
+  ATOM_I2C_DATA_RECORD            asI2CData[];
 }ATOM_I2C_DEVICE_SETUP_INFO;
 
 //==========================================================================================
 typedef struct  _ATOM_ASIC_MVDD_INFO
 {
   ATOM_COMMON_TABLE_HEADER         sHeader;
-  ATOM_I2C_DEVICE_SETUP_INFO      asI2CSetup[1];
+  ATOM_I2C_DEVICE_SETUP_INFO      asI2CSetup[];
 }ATOM_ASIC_MVDD_INFO;
 
 //==========================================================================================
@@ -6679,7 +6679,7 @@ typedef struct _ATOM_ASIC_INTERNAL_SS_INFO
 typedef struct _ATOM_ASIC_INTERNAL_SS_INFO_V2
 {
   ATOM_COMMON_TABLE_HEADER         sHeader;
-  ATOM_ASIC_SS_ASSIGNMENT_V2        asSpreadSpectrum[1];      //this is point only.
+  ATOM_ASIC_SS_ASSIGNMENT_V2        asSpreadSpectrum[];      //this is point only.
 }ATOM_ASIC_INTERNAL_SS_INFO_V2;
 
 typedef struct _ATOM_ASIC_SS_ASSIGNMENT_V3
@@ -6701,7 +6701,7 @@ typedef struct _ATOM_ASIC_SS_ASSIGNMENT_V3
 typedef struct _ATOM_ASIC_INTERNAL_SS_INFO_V3
 {
   ATOM_COMMON_TABLE_HEADER         sHeader;
-  ATOM_ASIC_SS_ASSIGNMENT_V3        asSpreadSpectrum[1];      //this is pointer only.
+  ATOM_ASIC_SS_ASSIGNMENT_V3        asSpreadSpectrum[];      //this is pointer only.
 }ATOM_ASIC_INTERNAL_SS_INFO_V3;
 
 
@@ -9292,7 +9292,7 @@ typedef struct {
 
 typedef struct {
   VFCT_IMAGE_HEADER   VbiosHeader;
-  UCHAR   VbiosContent[1];
+  UCHAR   VbiosContent[];
 }GOP_VBIOS_CONTENT;
 
 typedef struct {
diff --git a/drivers/gpu/drm/amd/include/atomfirmware.h b/drivers/gpu/drm/amd/include/atomfirmware.h
index ff855cb21d3f..bbe1337a8cee 100644
--- a/drivers/gpu/drm/amd/include/atomfirmware.h
+++ b/drivers/gpu/drm/amd/include/atomfirmware.h
@@ -705,20 +705,65 @@ struct atom_gpio_pin_lut_v2_1
 };
 
 
-/* 
-  ***************************************************************************
-    Data Table vram_usagebyfirmware  structure
-  ***************************************************************************
-*/
+/*
+ * VBIOS/PRE-OS always reserve a FB region at the top of frame buffer. driver should not write
+ * access that region. driver can allocate their own reservation region as long as it does not
+ * overlap firwmare's reservation region.
+ * if (pre-NV1X) atom data table firmwareInfoTable version < 3.3:
+ * in this case, atom data table vram_usagebyfirmwareTable version always <= 2.1
+ *   if VBIOS/UEFI GOP is posted:
+ *     VBIOS/UEFIGOP update used_by_firmware_in_kb = total reserved size by VBIOS
+ *     update start_address_in_kb = total_mem_size_in_kb - used_by_firmware_in_kb;
+ *     ( total_mem_size_in_kb = reg(CONFIG_MEMSIZE)<<10)
+ *     driver can allocate driver reservation region under firmware reservation,
+ *     used_by_driver_in_kb = driver reservation size
+ *     driver reservation start address =  (start_address_in_kb - used_by_driver_in_kb)
+ *     Comment1[hchan]: There is only one reservation at the beginning of the FB reserved by
+ *     host driver. Host driver would overwrite the table with the following
+ *     used_by_firmware_in_kb = total reserved size for pf-vf info exchange and
+ *     set SRIOV_MSG_SHARE_RESERVATION mask start_address_in_kb = 0
+ *   else there is no VBIOS reservation region:
+ *     driver must allocate driver reservation region at top of FB.
+ *     driver set used_by_driver_in_kb = driver reservation size
+ *     driver reservation start address =  (total_mem_size_in_kb - used_by_driver_in_kb)
+ *     same as Comment1
+ * else (NV1X and after):
+ *   if VBIOS/UEFI GOP is posted:
+ *     VBIOS/UEFIGOP update:
+ *       used_by_firmware_in_kb = atom_firmware_Info_v3_3.fw_reserved_size_in_kb;
+ *       start_address_in_kb = total_mem_size_in_kb - used_by_firmware_in_kb;
+ *       (total_mem_size_in_kb = reg(CONFIG_MEMSIZE)<<10)
+ *   if vram_usagebyfirmwareTable version <= 2.1:
+ *     driver can allocate driver reservation region under firmware reservation,
+ *     driver set used_by_driver_in_kb = driver reservation size
+ *     driver reservation start address = start_address_in_kb - used_by_driver_in_kb
+ *     same as Comment1
+ *   else driver can:
+ *     allocate it reservation any place as long as it does overlap pre-OS FW reservation area
+ *     set used_by_driver_region0_in_kb = driver reservation size
+ *     set driver_region0_start_address_in_kb =  driver reservation region start address
+ *     Comment2[hchan]: Host driver can set used_by_firmware_in_kb and start_address_in_kb to
+ *     zero as the reservation for VF as it doesn’t exist.  And Host driver should also
+ *     update atom_firmware_Info table to remove the same VBIOS reservation as well.
+ */
 
 struct vram_usagebyfirmware_v2_1
 {
-  struct  atom_common_table_header  table_header;
-  uint32_t  start_address_in_kb;
-  uint16_t  used_by_firmware_in_kb;
-  uint16_t  used_by_driver_in_kb; 
+	struct  atom_common_table_header  table_header;
+	uint32_t  start_address_in_kb;
+	uint16_t  used_by_firmware_in_kb;
+	uint16_t  used_by_driver_in_kb;
 };
 
+struct vram_usagebyfirmware_v2_2 {
+	struct  atom_common_table_header  table_header;
+	uint32_t  fw_region_start_address_in_kb;
+	uint16_t  used_by_firmware_in_kb;
+	uint16_t  reserved;
+	uint32_t  driver_region0_start_address_in_kb;
+	uint32_t  used_by_driver_region0_in_kb;
+	uint32_t  reserved32[7];
+};
 
 /* 
   ***************************************************************************
diff --git a/drivers/gpu/drm/amd/include/ivsrcid/vcn/irqsrcs_vcn_4_0.h b/drivers/gpu/drm/amd/include/ivsrcid/vcn/irqsrcs_vcn_4_0.h
index a81138c9e491..03cfa0517df2 100644
--- a/drivers/gpu/drm/amd/include/ivsrcid/vcn/irqsrcs_vcn_4_0.h
+++ b/drivers/gpu/drm/amd/include/ivsrcid/vcn/irqsrcs_vcn_4_0.h
@@ -38,4 +38,7 @@
 #define VCN_4_0__SRCID__JPEG6_DECODE					174		// 0xae JRBC6 Decode interrupt
 #define VCN_4_0__SRCID__JPEG7_DECODE					175		// 0xaf JRBC7 Decode interrupt
 
+#define VCN_4_0__SRCID_UVD_POISON					160
+#define VCN_4_0__SRCID_DJPEG0_POISON					161
+#define VCN_4_0__SRCID_EJPEG0_POISON					162
 #endif
diff --git a/drivers/gpu/drm/amd/include/kgd_pp_interface.h b/drivers/gpu/drm/amd/include/kgd_pp_interface.h
index a40ead44778a..d18162e9ed1d 100644
--- a/drivers/gpu/drm/amd/include/kgd_pp_interface.h
+++ b/drivers/gpu/drm/amd/include/kgd_pp_interface.h
@@ -354,7 +354,8 @@ struct amd_pm_funcs {
 	int (*get_power_profile_mode)(void *handle, char *buf);
 	int (*set_power_profile_mode)(void *handle, long *input, uint32_t size);
 	int (*set_fine_grain_clk_vol)(void *handle, uint32_t type, long *input, uint32_t size);
-	int (*odn_edit_dpm_table)(void *handle, uint32_t type, long *input, uint32_t size);
+	int (*odn_edit_dpm_table)(void *handle, enum PP_OD_DPM_TABLE_COMMAND type,
+				  long *input, uint32_t size);
 	int (*set_mp1_state)(void *handle, enum pp_mp1_state mp1_state);
 	int (*smu_i2c_bus_access)(void *handle, bool acquire);
 	int (*gfx_state_change_set)(void *handle, uint32_t state);
diff --git a/drivers/gpu/drm/amd/include/mes_v11_api_def.h b/drivers/gpu/drm/amd/include/mes_v11_api_def.h
index 7e85cdc5bd34..dc694cb246d9 100644
--- a/drivers/gpu/drm/amd/include/mes_v11_api_def.h
+++ b/drivers/gpu/drm/amd/include/mes_v11_api_def.h
@@ -222,7 +222,11 @@ union MESAPI_SET_HW_RESOURCES {
 				uint32_t apply_grbm_remote_register_dummy_read_wa : 1;
 				uint32_t second_gfx_pipe_enabled : 1;
 				uint32_t enable_level_process_quantum_check : 1;
-				uint32_t reserved	: 25;
+				uint32_t legacy_sch_mode : 1;
+				uint32_t disable_add_queue_wptr_mc_addr : 1;
+				uint32_t enable_mes_event_int_logging : 1;
+				uint32_t enable_reg_active_poll : 1;
+				uint32_t reserved	: 21;
 			};
 			uint32_t	uint32_t_all;
 		};
diff --git a/drivers/gpu/drm/amd/include/yellow_carp_offset.h b/drivers/gpu/drm/amd/include/yellow_carp_offset.h
index 28a56b56bcaf..0fea6a746611 100644
--- a/drivers/gpu/drm/amd/include/yellow_carp_offset.h
+++ b/drivers/gpu/drm/amd/include/yellow_carp_offset.h
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: MIT
 #ifndef YELLOW_CARP_OFFSET_H
 #define YELLOW_CARP_OFFSET_H
 
diff --git a/drivers/gpu/drm/amd/pm/powerplay/amd_powerplay.c b/drivers/gpu/drm/amd/pm/powerplay/amd_powerplay.c
index ec055858eb95..304190d5c9d2 100644
--- a/drivers/gpu/drm/amd/pm/powerplay/amd_powerplay.c
+++ b/drivers/gpu/drm/amd/pm/powerplay/amd_powerplay.c
@@ -838,7 +838,8 @@ static int pp_set_fine_grain_clk_vol(void *handle, uint32_t type, long *input, u
 	return hwmgr->hwmgr_func->set_fine_grain_clk_vol(hwmgr, type, input, size);
 }
 
-static int pp_odn_edit_dpm_table(void *handle, uint32_t type, long *input, uint32_t size)
+static int pp_odn_edit_dpm_table(void *handle, enum PP_OD_DPM_TABLE_COMMAND type,
+				 long *input, uint32_t size)
 {
 	struct pp_hwmgr *hwmgr = handle;
 
@@ -1507,7 +1508,7 @@ static void pp_pm_compute_clocks(void *handle)
 	struct pp_hwmgr *hwmgr = handle;
 	struct amdgpu_device *adev = hwmgr->adev;
 
-	if (!amdgpu_device_has_dc_support(adev)) {
+	if (!adev->dc_enabled) {
 		amdgpu_dpm_get_active_displays(adev);
 		adev->pm.pm_display_cfg.num_display = adev->pm.dpm.new_active_crtc_count;
 		adev->pm.pm_display_cfg.vrefresh = amdgpu_dpm_get_vrefresh(adev);
diff --git a/drivers/gpu/drm/amd/pm/powerplay/hwmgr/pp_psm.c b/drivers/gpu/drm/amd/pm/powerplay/hwmgr/pp_psm.c
index 67d7da0b6fed..1d829402cd2e 100644
--- a/drivers/gpu/drm/amd/pm/powerplay/hwmgr/pp_psm.c
+++ b/drivers/gpu/drm/amd/pm/powerplay/hwmgr/pp_psm.c
@@ -75,8 +75,10 @@ int psm_init_power_state_table(struct pp_hwmgr *hwmgr)
 	for (i = 0; i < table_entries; i++) {
 		result = hwmgr->hwmgr_func->get_pp_table_entry(hwmgr, i, state);
 		if (result) {
+			kfree(hwmgr->current_ps);
 			kfree(hwmgr->request_ps);
 			kfree(hwmgr->ps);
+			hwmgr->current_ps = NULL;
 			hwmgr->request_ps = NULL;
 			hwmgr->ps = NULL;
 			return -EINVAL;
diff --git a/drivers/gpu/drm/amd/pm/powerplay/hwmgr/vega10_thermal.c b/drivers/gpu/drm/amd/pm/powerplay/hwmgr/vega10_thermal.c
index dad3e3741a4e..190af79f3236 100644
--- a/drivers/gpu/drm/amd/pm/powerplay/hwmgr/vega10_thermal.c
+++ b/drivers/gpu/drm/amd/pm/powerplay/hwmgr/vega10_thermal.c
@@ -67,22 +67,21 @@ int vega10_fan_ctrl_get_fan_speed_info(struct pp_hwmgr *hwmgr,
 int vega10_fan_ctrl_get_fan_speed_pwm(struct pp_hwmgr *hwmgr,
 		uint32_t *speed)
 {
-	uint32_t current_rpm;
-	uint32_t percent = 0;
-
-	if (hwmgr->thermal_controller.fanInfo.bNoFan)
-		return 0;
+	struct amdgpu_device *adev = hwmgr->adev;
+	uint32_t duty100, duty;
+	uint64_t tmp64;
 
-	if (vega10_get_current_rpm(hwmgr, &current_rpm))
-		return -1;
+	duty100 = REG_GET_FIELD(RREG32_SOC15(THM, 0, mmCG_FDO_CTRL1),
+				CG_FDO_CTRL1, FMAX_DUTY100);
+	duty = REG_GET_FIELD(RREG32_SOC15(THM, 0, mmCG_THERMAL_STATUS),
+				CG_THERMAL_STATUS, FDO_PWM_DUTY);
 
-	if (hwmgr->thermal_controller.
-			advanceFanControlParameters.usMaxFanRPM != 0)
-		percent = current_rpm * 255 /
-			hwmgr->thermal_controller.
-			advanceFanControlParameters.usMaxFanRPM;
+	if (!duty100)
+		return -EINVAL;
 
-	*speed = MIN(percent, 255);
+	tmp64 = (uint64_t)duty * 255;
+	do_div(tmp64, duty100);
+	*speed = MIN((uint32_t)tmp64, 255);
 
 	return 0;
 }
diff --git a/drivers/gpu/drm/amd/pm/powerplay/hwmgr/vega20_hwmgr.c b/drivers/gpu/drm/amd/pm/powerplay/hwmgr/vega20_hwmgr.c
index 97b3ad369046..b30684c84e20 100644
--- a/drivers/gpu/drm/amd/pm/powerplay/hwmgr/vega20_hwmgr.c
+++ b/drivers/gpu/drm/amd/pm/powerplay/hwmgr/vega20_hwmgr.c
@@ -2961,7 +2961,8 @@ static int vega20_odn_edit_dpm_table(struct pp_hwmgr *hwmgr,
 			data->od8_settings.od8_settings_array;
 	OverDriveTable_t *od_table =
 			&(data->smc_state_table.overdrive_table);
-	int32_t input_index, input_clk, input_vol, i;
+	int32_t input_clk, input_vol, i;
+	uint32_t input_index;
 	int od8_id;
 	int ret;
 
diff --git a/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c b/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c
index 4fe75dd2b329..ca3beb5d8f27 100644
--- a/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c
+++ b/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c
@@ -161,7 +161,7 @@ int smu_get_dpm_freq_range(struct smu_context *smu,
 
 int smu_set_gfx_power_up_by_imu(struct smu_context *smu)
 {
-	if (!smu->ppt_funcs && !smu->ppt_funcs->set_gfx_power_up_by_imu)
+	if (!smu->ppt_funcs || !smu->ppt_funcs->set_gfx_power_up_by_imu)
 		return -EOPNOTSUPP;
 
 	return smu->ppt_funcs->set_gfx_power_up_by_imu(smu);
@@ -585,6 +585,7 @@ static int smu_set_funcs(struct amdgpu_device *adev)
 		yellow_carp_set_ppt_funcs(smu);
 		break;
 	case IP_VERSION(13, 0, 4):
+	case IP_VERSION(13, 0, 11):
 		smu_v13_0_4_set_ppt_funcs(smu);
 		break;
 	case IP_VERSION(13, 0, 5):
@@ -1156,22 +1157,21 @@ static int smu_smc_hw_setup(struct smu_context *smu)
 	uint64_t features_supported;
 	int ret = 0;
 
-	if (adev->in_suspend && smu_is_dpm_running(smu)) {
-		dev_info(adev->dev, "dpm has been enabled\n");
-		/* this is needed specifically */
-		switch (adev->ip_versions[MP1_HWIP][0]) {
-		case IP_VERSION(11, 0, 7):
-		case IP_VERSION(11, 0, 11):
-		case IP_VERSION(11, 5, 0):
-		case IP_VERSION(11, 0, 12):
+	switch (adev->ip_versions[MP1_HWIP][0]) {
+	case IP_VERSION(11, 0, 7):
+	case IP_VERSION(11, 0, 11):
+	case IP_VERSION(11, 5, 0):
+	case IP_VERSION(11, 0, 12):
+		if (adev->in_suspend && smu_is_dpm_running(smu)) {
+			dev_info(adev->dev, "dpm has been enabled\n");
 			ret = smu_system_features_control(smu, true);
 			if (ret)
 				dev_err(adev->dev, "Failed system features control!\n");
-			break;
-		default:
-			break;
+			return ret;
 		}
-		return ret;
+		break;
+	default:
+		break;
 	}
 
 	ret = smu_init_display_count(smu, 0);
@@ -1449,6 +1449,7 @@ static int smu_disable_dpms(struct smu_context *smu)
 	switch (adev->ip_versions[MP1_HWIP][0]) {
 	case IP_VERSION(13, 0, 0):
 	case IP_VERSION(13, 0, 7):
+	case IP_VERSION(13, 0, 10):
 		return 0;
 	default:
 		break;
@@ -1517,7 +1518,7 @@ static int smu_disable_dpms(struct smu_context *smu)
 	}
 
 	if (adev->ip_versions[GC_HWIP][0] >= IP_VERSION(9, 4, 2) &&
-	    adev->gfx.rlc.funcs->stop)
+	    !amdgpu_sriov_vf(adev) && adev->gfx.rlc.funcs->stop)
 		adev->gfx.rlc.funcs->stop(adev);
 
 	return ret;
diff --git a/drivers/gpu/drm/amd/pm/swsmu/inc/amdgpu_smu.h b/drivers/gpu/drm/amd/pm/swsmu/inc/amdgpu_smu.h
index e2fa3b066b96..3bc4128a22ac 100644
--- a/drivers/gpu/drm/amd/pm/swsmu/inc/amdgpu_smu.h
+++ b/drivers/gpu/drm/amd/pm/swsmu/inc/amdgpu_smu.h
@@ -168,6 +168,7 @@ struct smu_temperature_range {
 	int mem_crit_max;
 	int mem_emergency_max;
 	int software_shutdown_temp;
+	int software_shutdown_temp_offset;
 };
 
 struct smu_state_validation_block {
@@ -568,6 +569,10 @@ struct smu_context
 	u32 param_reg;
 	u32 msg_reg;
 	u32 resp_reg;
+
+	u32 debug_param_reg;
+	u32 debug_msg_reg;
+	u32 debug_resp_reg;
 };
 
 struct i2c_adapter;
@@ -1388,6 +1393,14 @@ enum smu_cmn2asic_mapping_type {
 	CMN2ASIC_MAPPING_WORKLOAD,
 };
 
+enum smu_baco_seq {
+	BACO_SEQ_BACO = 0,
+	BACO_SEQ_MSR,
+	BACO_SEQ_BAMACO,
+	BACO_SEQ_ULPS,
+	BACO_SEQ_COUNT,
+};
+
 #define MSG_MAP(msg, index, valid_in_vf) \
 	[SMU_MSG_##msg] = {1, (index), (valid_in_vf)}
 
diff --git a/drivers/gpu/drm/amd/pm/swsmu/inc/pmfw_if/smu13_driver_if_v13_0_0.h b/drivers/gpu/drm/amd/pm/swsmu/inc/pmfw_if/smu13_driver_if_v13_0_0.h
index 063f4a737605..d6b964cf73bd 100644
--- a/drivers/gpu/drm/amd/pm/swsmu/inc/pmfw_if/smu13_driver_if_v13_0_0.h
+++ b/drivers/gpu/drm/amd/pm/swsmu/inc/pmfw_if/smu13_driver_if_v13_0_0.h
@@ -25,7 +25,7 @@
 #define SMU13_DRIVER_IF_V13_0_0_H
 
 //Increment this version if SkuTable_t or BoardTable_t change
-#define PPTABLE_VERSION 0x24
+#define PPTABLE_VERSION 0x26
 
 #define NUM_GFXCLK_DPM_LEVELS    16
 #define NUM_SOCCLK_DPM_LEVELS    8
@@ -109,6 +109,22 @@
 #define FEATURE_SPARE_63_BIT                  63
 #define NUM_FEATURES                          64
 
+#define ALLOWED_FEATURE_CTRL_DEFAULT 0xFFFFFFFFFFFFFFFFULL
+#define ALLOWED_FEATURE_CTRL_SCPM	((1 << FEATURE_DPM_GFXCLK_BIT) | \
+									(1 << FEATURE_DPM_GFX_POWER_OPTIMIZER_BIT) | \
+									(1 << FEATURE_DPM_UCLK_BIT) | \
+									(1 << FEATURE_DPM_FCLK_BIT) | \
+									(1 << FEATURE_DPM_SOCCLK_BIT) | \
+									(1 << FEATURE_DPM_MP0CLK_BIT) | \
+									(1 << FEATURE_DPM_LINK_BIT) | \
+									(1 << FEATURE_DPM_DCN_BIT) | \
+									(1 << FEATURE_DS_GFXCLK_BIT) | \
+									(1 << FEATURE_DS_SOCCLK_BIT) | \
+									(1 << FEATURE_DS_FCLK_BIT) | \
+									(1 << FEATURE_DS_LCLK_BIT) | \
+									(1 << FEATURE_DS_DCFCLK_BIT) | \
+									(1 << FEATURE_DS_UCLK_BIT))
+
 //For use with feature control messages
 typedef enum {
   FEATURE_PWR_ALL,
@@ -133,6 +149,7 @@ typedef enum {
 #define DEBUG_OVERRIDE_DISABLE_DFLL                    0x00000200
 #define DEBUG_OVERRIDE_ENABLE_RLC_VF_BRINGUP_MODE      0x00000400
 #define DEBUG_OVERRIDE_DFLL_MASTER_MODE                0x00000800
+#define DEBUG_OVERRIDE_ENABLE_PROFILING_MODE           0x00001000
 
 // VR Mapping Bit Defines
 #define VR_MAPPING_VR_SELECT_MASK  0x01
@@ -262,15 +279,15 @@ typedef enum {
 } I2cControllerPort_e;
 
 typedef enum {
-  I2C_CONTROLLER_NAME_VR_GFX = 0,
-  I2C_CONTROLLER_NAME_VR_SOC,
-  I2C_CONTROLLER_NAME_VR_VMEMP,
-  I2C_CONTROLLER_NAME_VR_VDDIO,
-  I2C_CONTROLLER_NAME_LIQUID0,
-  I2C_CONTROLLER_NAME_LIQUID1,
-  I2C_CONTROLLER_NAME_PLX,
-  I2C_CONTROLLER_NAME_OTHER,
-  I2C_CONTROLLER_NAME_COUNT,
+	I2C_CONTROLLER_NAME_VR_GFX = 0,
+	I2C_CONTROLLER_NAME_VR_SOC,
+	I2C_CONTROLLER_NAME_VR_VMEMP,
+	I2C_CONTROLLER_NAME_VR_VDDIO,
+	I2C_CONTROLLER_NAME_LIQUID0,
+	I2C_CONTROLLER_NAME_LIQUID1,
+	I2C_CONTROLLER_NAME_PLX,
+	I2C_CONTROLLER_NAME_FAN_INTAKE,
+	I2C_CONTROLLER_NAME_COUNT,
 } I2cControllerName_e;
 
 typedef enum {
@@ -282,16 +299,17 @@ typedef enum {
   I2C_CONTROLLER_THROTTLER_LIQUID0,
   I2C_CONTROLLER_THROTTLER_LIQUID1,
   I2C_CONTROLLER_THROTTLER_PLX,
+  I2C_CONTROLLER_THROTTLER_FAN_INTAKE,
   I2C_CONTROLLER_THROTTLER_INA3221,
   I2C_CONTROLLER_THROTTLER_COUNT,
 } I2cControllerThrottler_e;
 
 typedef enum {
-  I2C_CONTROLLER_PROTOCOL_VR_XPDE132G5,
-  I2C_CONTROLLER_PROTOCOL_VR_IR35217,
-  I2C_CONTROLLER_PROTOCOL_TMP_TMP102A,
-  I2C_CONTROLLER_PROTOCOL_INA3221,
-  I2C_CONTROLLER_PROTOCOL_COUNT,
+	I2C_CONTROLLER_PROTOCOL_VR_XPDE132G5,
+	I2C_CONTROLLER_PROTOCOL_VR_IR35217,
+	I2C_CONTROLLER_PROTOCOL_TMP_MAX31875,
+	I2C_CONTROLLER_PROTOCOL_INA3221,
+	I2C_CONTROLLER_PROTOCOL_COUNT,
 } I2cControllerProtocol_e;
 
 typedef struct {
@@ -504,9 +522,9 @@ typedef enum  {
   TEMP_HOTSPOT_M,
   TEMP_MEM,
   TEMP_VR_GFX,
+  TEMP_VR_SOC,
   TEMP_VR_MEM0,
   TEMP_VR_MEM1,
-  TEMP_VR_SOC,
   TEMP_VR_U,
   TEMP_LIQUID0,
   TEMP_LIQUID1,
@@ -658,13 +676,20 @@ typedef struct {
 
 #define PP_NUM_OD_VF_CURVE_POINTS PP_NUM_RTAVFS_PWL_ZONES + 1
 
+typedef enum {
+	FAN_MODE_AUTO = 0,
+	FAN_MODE_MANUAL_LINEAR,
+} FanMode_e;
 
 typedef struct {
   uint32_t FeatureCtrlMask;
 
   //Voltage control
   int16_t                VoltageOffsetPerZoneBoundary[PP_NUM_OD_VF_CURVE_POINTS];
-  uint16_t               reserved[2];
+  uint16_t               VddGfxVmax;         // in mV
+
+  uint8_t                IdlePwrSavingFeaturesCtrl;
+  uint8_t                RuntimePwrSavingFeaturesCtrl;
 
   //Frequency changes
   int16_t                GfxclkFmin;           // MHz
@@ -674,7 +699,7 @@ typedef struct {
 
   //PPT
   int16_t                Ppt;         // %
-  int16_t                reserved1;
+  int16_t                Tdc;
 
   //Fan control
   uint8_t                FanLinearPwmPoints[NUM_OD_FAN_MAX_POINTS];
@@ -701,16 +726,19 @@ typedef struct {
   uint32_t FeatureCtrlMask;
 
   int16_t VoltageOffsetPerZoneBoundary;
-  uint16_t               reserved[2];
+  uint16_t               VddGfxVmax;         // in mV
 
-  uint16_t               GfxclkFmin;           // MHz
-  uint16_t               GfxclkFmax;           // MHz
+  uint8_t                IdlePwrSavingFeaturesCtrl;
+  uint8_t                RuntimePwrSavingFeaturesCtrl;
+
+  int16_t               GfxclkFmin;           // MHz
+  int16_t               GfxclkFmax;           // MHz
   uint16_t               UclkFmin;             // MHz
   uint16_t               UclkFmax;             // MHz
 
   //PPT
   int16_t                Ppt;         // %
-  int16_t                reserved1;
+  int16_t                Tdc;
 
   uint8_t                FanLinearPwmPoints;
   uint8_t                FanLinearTempPoints;
@@ -857,7 +885,8 @@ typedef struct {
   uint16_t  FanStartTempMin;
   uint16_t  FanStartTempMax;
 
-  uint32_t Spare[12];
+  uint16_t  PowerMinPpt0[POWER_SOURCE_COUNT];
+  uint32_t Spare[11];
 
 } MsgLimits_t;
 
@@ -1041,7 +1070,17 @@ typedef struct {
   uint32_t        GfxoffSpare[15];
 
   // GFX GPO
-  uint32_t        GfxGpoSpare[16];
+  uint32_t        DfllBtcMasterScalerM;
+  int32_t         DfllBtcMasterScalerB;
+  uint32_t        DfllBtcSlaveScalerM;
+  int32_t         DfllBtcSlaveScalerB;
+
+  uint32_t        DfllPccAsWaitCtrl; //GDFLL_AS_WAIT_CTRL_PCC register value to be passed to RLC msg
+  uint32_t        DfllPccAsStepCtrl; //GDFLL_AS_STEP_CTRL_PCC register value to be passed to RLC msg
+
+  uint32_t        DfllL2FrequencyBoostM; //Unitless (float)
+  uint32_t        DfllL2FrequencyBoostB; //In MHz (integer)
+  uint32_t        GfxGpoSpare[8];
 
   // GFX DCS
 
@@ -1114,12 +1153,14 @@ typedef struct {
   uint16_t IntakeTempHighIntakeAcousticLimit;
   uint16_t IntakeTempAcouticLimitReleaseRate;
 
-  uint16_t FanStalledTempLimitOffset;
+  int16_t FanAbnormalTempLimitOffset;
   uint16_t FanStalledTriggerRpm;
-  uint16_t FanAbnormalTriggerRpm;
-  uint16_t FanPadding;
+  uint16_t FanAbnormalTriggerRpmCoeff;
+  uint16_t FanAbnormalDetectionEnable;
 
-  uint32_t     FanSpare[14];
+  uint8_t      FanIntakeSensorSupport;
+  uint8_t      FanIntakePadding[3];
+  uint32_t     FanSpare[13];
 
   // SECTION: VDD_GFX AVFS
 
@@ -1198,8 +1239,13 @@ typedef struct {
   int16_t     TotalBoardPowerM;
   int16_t     TotalBoardPowerB;
 
+  //PMFW-11158
+  QuadraticInt_t qFeffCoeffGameClock[POWER_SOURCE_COUNT];
+  QuadraticInt_t qFeffCoeffBaseClock[POWER_SOURCE_COUNT];
+  QuadraticInt_t qFeffCoeffBoostClock[POWER_SOURCE_COUNT];
+
   // SECTION: Sku Reserved
-  uint32_t         Spare[61];
+  uint32_t         Spare[43];
 
   // Padding for MMHUB - do not modify this
   uint32_t     MmHubPadding[8];
@@ -1288,8 +1334,11 @@ typedef struct {
   uint32_t    PostVoltageSetBacoDelay; // in microseconds. Amount of time FW will wait after power good is established or PSI0 command is issued
   uint32_t    BacoEntryDelay; // in milliseconds. Amount of time FW will wait to trigger BACO entry after receiving entry notification from OS
 
+  uint8_t     FuseWritePowerMuxPresent;
+  uint8_t     FuseWritePadding[3];
+
   // SECTION: Board Reserved
-  uint32_t     BoardSpare[64];
+  uint32_t     BoardSpare[63];
 
   // SECTION: Structure Padding
 
@@ -1381,7 +1430,7 @@ typedef struct {
   uint16_t AverageTotalBoardPower;
 
   uint16_t AvgTemperature[TEMP_COUNT];
-  uint16_t TempPadding;
+  uint16_t AvgTemperatureFanIntake;
 
   uint8_t  PcieRate               ;
   uint8_t  PcieWidth              ;
@@ -1550,5 +1599,7 @@ typedef struct {
 #define IH_INTERRUPT_CONTEXT_ID_AUDIO_D0            0x5
 #define IH_INTERRUPT_CONTEXT_ID_AUDIO_D3            0x6
 #define IH_INTERRUPT_CONTEXT_ID_THERMAL_THROTTLING  0x7
+#define IH_INTERRUPT_CONTEXT_ID_FAN_ABNORMAL        0x8
+#define IH_INTERRUPT_CONTEXT_ID_FAN_RECOVERY        0x9
 
 #endif
diff --git a/drivers/gpu/drm/amd/pm/swsmu/inc/pmfw_if/smu13_driver_if_v13_0_7.h b/drivers/gpu/drm/amd/pm/swsmu/inc/pmfw_if/smu13_driver_if_v13_0_7.h
index 25c08f963f49..d6b13933a98f 100644
--- a/drivers/gpu/drm/amd/pm/swsmu/inc/pmfw_if/smu13_driver_if_v13_0_7.h
+++ b/drivers/gpu/drm/amd/pm/swsmu/inc/pmfw_if/smu13_driver_if_v13_0_7.h
@@ -25,10 +25,10 @@
 
 // *** IMPORTANT ***
 // PMFW TEAM: Always increment the interface version on any change to this file
-#define SMU13_DRIVER_IF_VERSION  0x2C
+#define SMU13_DRIVER_IF_VERSION  0x35
 
 //Increment this version if SkuTable_t or BoardTable_t change
-#define PPTABLE_VERSION 0x20
+#define PPTABLE_VERSION 0x27
 
 #define NUM_GFXCLK_DPM_LEVELS    16
 #define NUM_SOCCLK_DPM_LEVELS    8
@@ -96,7 +96,7 @@
 #define FEATURE_MEM_TEMP_READ_BIT             47
 #define FEATURE_ATHUB_MMHUB_PG_BIT            48
 #define FEATURE_SOC_PCC_BIT                   49
-#define FEATURE_SPARE_50_BIT                  50
+#define FEATURE_EDC_PWRBRK_BIT                50
 #define FEATURE_SPARE_51_BIT                  51
 #define FEATURE_SPARE_52_BIT                  52
 #define FEATURE_SPARE_53_BIT                  53
@@ -282,15 +282,15 @@ typedef enum {
 } I2cControllerPort_e;
 
 typedef enum {
-  I2C_CONTROLLER_NAME_VR_GFX = 0,
-  I2C_CONTROLLER_NAME_VR_SOC,
-  I2C_CONTROLLER_NAME_VR_VMEMP,
-  I2C_CONTROLLER_NAME_VR_VDDIO,
-  I2C_CONTROLLER_NAME_LIQUID0,
-  I2C_CONTROLLER_NAME_LIQUID1,
-  I2C_CONTROLLER_NAME_PLX,
-  I2C_CONTROLLER_NAME_OTHER,
-  I2C_CONTROLLER_NAME_COUNT,
+	I2C_CONTROLLER_NAME_VR_GFX = 0,
+	I2C_CONTROLLER_NAME_VR_SOC,
+	I2C_CONTROLLER_NAME_VR_VMEMP,
+	I2C_CONTROLLER_NAME_VR_VDDIO,
+	I2C_CONTROLLER_NAME_LIQUID0,
+	I2C_CONTROLLER_NAME_LIQUID1,
+	I2C_CONTROLLER_NAME_PLX,
+	I2C_CONTROLLER_NAME_FAN_INTAKE,
+	I2C_CONTROLLER_NAME_COUNT,
 } I2cControllerName_e;
 
 typedef enum {
@@ -302,6 +302,7 @@ typedef enum {
   I2C_CONTROLLER_THROTTLER_LIQUID0,
   I2C_CONTROLLER_THROTTLER_LIQUID1,
   I2C_CONTROLLER_THROTTLER_PLX,
+  I2C_CONTROLLER_THROTTLER_FAN_INTAKE,
   I2C_CONTROLLER_THROTTLER_INA3221,
   I2C_CONTROLLER_THROTTLER_COUNT,
 } I2cControllerThrottler_e;
@@ -309,8 +310,9 @@ typedef enum {
 typedef enum {
   I2C_CONTROLLER_PROTOCOL_VR_XPDE132G5,
   I2C_CONTROLLER_PROTOCOL_VR_IR35217,
-  I2C_CONTROLLER_PROTOCOL_TMP_TMP102A,
+  I2C_CONTROLLER_PROTOCOL_TMP_MAX31875,
   I2C_CONTROLLER_PROTOCOL_INA3221,
+  I2C_CONTROLLER_PROTOCOL_TMP_MAX6604,
   I2C_CONTROLLER_PROTOCOL_COUNT,
 } I2cControllerProtocol_e;
 
@@ -690,6 +692,9 @@ typedef struct {
 #define PP_OD_FEATURE_UCLK_BIT      8
 #define PP_OD_FEATURE_ZERO_FAN_BIT      9
 #define PP_OD_FEATURE_TEMPERATURE_BIT 10
+#define PP_OD_FEATURE_POWER_FEATURE_CTRL_BIT 11
+#define PP_OD_FEATURE_ASIC_TDC_BIT 12
+#define PP_OD_FEATURE_COUNT 13
 
 typedef enum {
   PP_OD_POWER_FEATURE_ALWAYS_ENABLED,
@@ -697,6 +702,11 @@ typedef enum {
   PP_OD_POWER_FEATURE_ALWAYS_DISABLED,
 } PP_OD_POWER_FEATURE_e;
 
+typedef enum {
+  FAN_MODE_AUTO = 0,
+  FAN_MODE_MANUAL_LINEAR,
+} FanMode_e;
+
 typedef struct {
   uint32_t FeatureCtrlMask;
 
@@ -708,8 +718,8 @@ typedef struct {
   uint8_t                RuntimePwrSavingFeaturesCtrl;
 
   //Frequency changes
-  int16_t               GfxclkFmin;           // MHz
-  int16_t               GfxclkFmax;           // MHz
+  int16_t                GfxclkFmin;           // MHz
+  int16_t                GfxclkFmax;           // MHz
   uint16_t               UclkFmin;             // MHz
   uint16_t               UclkFmax;             // MHz
 
@@ -730,7 +740,12 @@ typedef struct {
   uint8_t                MaxOpTemp;
   uint8_t                Padding[4];
 
-  uint32_t               Spare[12];
+  uint16_t               GfxVoltageFullCtrlMode;
+  uint16_t               GfxclkFullCtrlMode;
+  uint16_t               UclkFullCtrlMode;
+  int16_t                AsicTdc;
+
+  uint32_t               Spare[10];
   uint32_t               MmHubPadding[8]; // SMU internal use. Adding here instead of external as a workaround
 } OverDriveTable_t;
 
@@ -748,8 +763,8 @@ typedef struct {
   uint8_t                IdlePwrSavingFeaturesCtrl;
   uint8_t                RuntimePwrSavingFeaturesCtrl;
 
-  uint16_t               GfxclkFmin;           // MHz
-  uint16_t               GfxclkFmax;           // MHz
+  int16_t                GfxclkFmin;           // MHz
+  int16_t                GfxclkFmax;           // MHz
   uint16_t               UclkFmin;             // MHz
   uint16_t               UclkFmax;             // MHz
 
@@ -769,7 +784,12 @@ typedef struct {
   uint8_t                MaxOpTemp;
   uint8_t                Padding[4];
 
-  uint32_t               Spare[12];
+  uint16_t               GfxVoltageFullCtrlMode;
+  uint16_t               GfxclkFullCtrlMode;
+  uint16_t               UclkFullCtrlMode;
+  int16_t                AsicTdc;
+
+  uint32_t               Spare[10];
 
 } OverDriveLimits_t;
 
@@ -903,7 +923,8 @@ typedef struct {
   uint16_t  FanStartTempMin;
   uint16_t  FanStartTempMax;
 
-  uint32_t Spare[12];
+  uint16_t  PowerMinPpt0[POWER_SOURCE_COUNT];
+  uint32_t  Spare[11];
 
 } MsgLimits_t;
 
@@ -1086,11 +1107,13 @@ typedef struct {
   uint32_t        GfxoffSpare[15];
 
   // GFX GPO
-  float           DfllBtcMasterScalerM;
+  uint32_t        DfllBtcMasterScalerM;
   int32_t         DfllBtcMasterScalerB;
-  float           DfllBtcSlaveScalerM;
+  uint32_t        DfllBtcSlaveScalerM;
   int32_t         DfllBtcSlaveScalerB;
-  uint32_t        GfxGpoSpare[12];
+  uint32_t        DfllPccAsWaitCtrl; //GDFLL_AS_WAIT_CTRL_PCC register value to be passed to RLC msg
+  uint32_t        DfllPccAsStepCtrl; //GDFLL_AS_STEP_CTRL_PCC register value to be passed to RLC msg
+  uint32_t        GfxGpoSpare[10];
 
   // GFX DCS
 
@@ -1106,7 +1129,10 @@ typedef struct {
   uint16_t        DcsTimeout;           //This is the amount of time SMU FW waits for RLC to put GFX into GFXOFF before reverting to the fallback mechanism of throttling GFXCLK to Fmin.
 
 
-  uint32_t        DcsSpare[16];
+  uint32_t        DcsSpare[14];
+
+  // UCLK section
+  uint16_t     ShadowFreqTableUclk[NUM_UCLK_DPM_LEVELS];     // In MHz
 
   // UCLK section
   uint8_t      UseStrobeModeOptimizations; //Set to indicate that FW should use strobe mode optimizations
@@ -1163,13 +1189,14 @@ typedef struct {
   uint16_t IntakeTempHighIntakeAcousticLimit;
   uint16_t IntakeTempAcouticLimitReleaseRate;
 
-  uint16_t FanStalledTempLimitOffset;
+  int16_t FanAbnormalTempLimitOffset;
   uint16_t FanStalledTriggerRpm;
-  uint16_t FanAbnormalTriggerRpm;
-  uint16_t FanPadding;
-
-  uint32_t     FanSpare[14];
+  uint16_t FanAbnormalTriggerRpmCoeff;
+  uint16_t FanAbnormalDetectionEnable;
 
+  uint8_t      FanIntakeSensorSupport;
+  uint8_t      FanIntakePadding[3];
+  uint32_t     FanSpare[13];
   // SECTION: VDD_GFX AVFS
 
   uint8_t      OverrideGfxAvfsFuses;
@@ -1193,7 +1220,6 @@ typedef struct {
   uint32_t   dGbV_dT_vmin;
   uint32_t   dGbV_dT_vmax;
 
-  //Unused: PMFW-9370
   uint32_t   V2F_vmin_range_low;
   uint32_t   V2F_vmin_range_high;
   uint32_t   V2F_vmax_range_low;
@@ -1238,8 +1264,21 @@ typedef struct {
   // SECTION: Advanced Options
   uint32_t          DebugOverrides;
 
+  // Section: Total Board Power idle vs active coefficients
+  uint8_t     TotalBoardPowerSupport;
+  uint8_t     TotalBoardPowerPadding[3];
+
+  int16_t     TotalIdleBoardPowerM;
+  int16_t     TotalIdleBoardPowerB;
+  int16_t     TotalBoardPowerM;
+  int16_t     TotalBoardPowerB;
+
+  QuadraticInt_t qFeffCoeffGameClock[POWER_SOURCE_COUNT];
+  QuadraticInt_t qFeffCoeffBaseClock[POWER_SOURCE_COUNT];
+  QuadraticInt_t qFeffCoeffBoostClock[POWER_SOURCE_COUNT];
+
   // SECTION: Sku Reserved
-  uint32_t         Spare[64];
+  uint32_t         Spare[43];
 
   // Padding for MMHUB - do not modify this
   uint32_t     MmHubPadding[8];
@@ -1304,7 +1343,8 @@ typedef struct {
   // SECTION: Clock Spread Spectrum
 
   // UCLK Spread Spectrum
-  uint16_t     UclkSpreadPadding;
+  uint8_t      UclkTrainingModeSpreadPercent; // Q4.4
+  uint8_t      UclkSpreadPadding;
   uint16_t     UclkSpreadFreq;      // kHz
 
   // UCLK Spread Spectrum
@@ -1317,11 +1357,7 @@ typedef struct {
 
   // Section: Memory Config
   uint8_t      DramWidth; // Width of interface to the channel for each DRAM module. See DRAM_BIT_WIDTH_TYPE_e
-  uint8_t      PaddingMem1[3];
-
-  // Section: Total Board Power
-  uint16_t     TotalBoardPower;     //Only needed for TCP Estimated case, where TCP = TGP+Total Board Power
-  uint16_t     BoardPowerPadding;
+  uint8_t      PaddingMem1[7];
 
   // SECTION: UMC feature flags
   uint8_t      HsrEnabled;
@@ -1423,8 +1459,11 @@ typedef struct {
   uint16_t Vcn1ActivityPercentage  ;
 
   uint32_t EnergyAccumulator;
-  uint16_t AverageSocketPower    ;
+  uint16_t AverageSocketPower;
+  uint16_t AverageTotalBoardPower;
+
   uint16_t AvgTemperature[TEMP_COUNT];
+  uint16_t AvgTemperatureFanIntake;
 
   uint8_t  PcieRate               ;
   uint8_t  PcieWidth              ;
@@ -1592,5 +1631,7 @@ typedef struct {
 #define IH_INTERRUPT_CONTEXT_ID_AUDIO_D0            0x5
 #define IH_INTERRUPT_CONTEXT_ID_AUDIO_D3            0x6
 #define IH_INTERRUPT_CONTEXT_ID_THERMAL_THROTTLING  0x7
+#define IH_INTERRUPT_CONTEXT_ID_FAN_ABNORMAL        0x8
+#define IH_INTERRUPT_CONTEXT_ID_FAN_RECOVERY        0x9
 
 #endif
diff --git a/drivers/gpu/drm/amd/pm/swsmu/inc/pmfw_if/smu_v13_0_0_ppsmc.h b/drivers/gpu/drm/amd/pm/swsmu/inc/pmfw_if/smu_v13_0_0_ppsmc.h
index 9ebb8f39732a..8b8266890a10 100644
--- a/drivers/gpu/drm/amd/pm/swsmu/inc/pmfw_if/smu_v13_0_0_ppsmc.h
+++ b/drivers/gpu/drm/amd/pm/swsmu/inc/pmfw_if/smu_v13_0_0_ppsmc.h
@@ -131,7 +131,13 @@
 #define PPSMC_MSG_EnableAudioStutterWA           0x44
 #define PPSMC_MSG_PowerUpUmsch                   0x45
 #define PPSMC_MSG_PowerDownUmsch                 0x46
-#define PPSMC_Message_Count                      0x47
+#define PPSMC_MSG_SetDcsArch                     0x47
+#define PPSMC_MSG_TriggerVFFLR                   0x48
+#define PPSMC_MSG_SetNumBadMemoryPagesRetired    0x49
+#define PPSMC_MSG_SetBadMemoryPagesRetiredFlagsPerChannel 0x4A
+#define PPSMC_MSG_SetPriorityDeltaGain           0x4B
+#define PPSMC_MSG_AllowIHHostInterrupt           0x4C
+#define PPSMC_Message_Count                      0x4D
 
 //Debug Dump Message
 #define DEBUGSMC_MSG_TestMessage                    0x1
diff --git a/drivers/gpu/drm/amd/pm/swsmu/inc/pmfw_if/smu_v13_0_4_ppsmc.h b/drivers/gpu/drm/amd/pm/swsmu/inc/pmfw_if/smu_v13_0_4_ppsmc.h
index d9b0cd752200..f4d6c07b56ea 100644
--- a/drivers/gpu/drm/amd/pm/swsmu/inc/pmfw_if/smu_v13_0_4_ppsmc.h
+++ b/drivers/gpu/drm/amd/pm/swsmu/inc/pmfw_if/smu_v13_0_4_ppsmc.h
@@ -54,14 +54,14 @@
 #define PPSMC_MSG_TestMessage                   0x01 ///< To check if PMFW is alive and responding. Requirement specified by PMFW team
 #define PPSMC_MSG_GetPmfwVersion                0x02 ///< Get PMFW version
 #define PPSMC_MSG_GetDriverIfVersion            0x03 ///< Get PMFW_DRIVER_IF version
-#define PPSMC_MSG_EnableGfxOff                  0x04 ///< Enable GFXOFF
-#define PPSMC_MSG_DisableGfxOff                 0x05 ///< Disable GFXOFF
+#define PPSMC_MSG_SPARE0                        0x04 ///< SPARE
+#define PPSMC_MSG_SPARE1                        0x05 ///< SPARE
 #define PPSMC_MSG_PowerDownVcn                  0x06 ///< Power down VCN
 #define PPSMC_MSG_PowerUpVcn                    0x07 ///< Power up VCN; VCN is power gated by default
 #define PPSMC_MSG_SetHardMinVcn                 0x08 ///< For wireless display
 #define PPSMC_MSG_SetSoftMinGfxclk              0x09 ///< Set SoftMin for GFXCLK, argument is frequency in MHz
-#define PPSMC_MSG_ActiveProcessNotify           0x0A ///< Needs update
-#define PPSMC_MSG_ForcePowerDownGfx             0x0B ///< Force power down GFX, i.e. enter GFXOFF
+#define PPSMC_MSG_SPARE2                        0x0A ///< SPARE
+#define PPSMC_MSG_SPARE3                        0x0B ///< SPARE
 #define PPSMC_MSG_PrepareMp1ForUnload           0x0C ///< Prepare PMFW for GFX driver unload
 #define PPSMC_MSG_SetDriverDramAddrHigh         0x0D ///< Set high 32 bits of DRAM address for Driver table transfer
 #define PPSMC_MSG_SetDriverDramAddrLow          0x0E ///< Set low 32 bits of DRAM address for Driver table transfer
@@ -73,8 +73,7 @@
 #define PPSMC_MSG_SetSoftMinFclk                0x14 ///< Set hard min for FCLK
 #define PPSMC_MSG_SetSoftMinVcn                 0x15 ///< Set soft min for VCN clocks (VCLK and DCLK)
 
-
-#define PPSMC_MSG_EnableGfxImu                  0x16 ///< Needs update
+#define PPSMC_MSG_EnableGfxImu                  0x16 ///< Enable GFX IMU
 
 #define PPSMC_MSG_GetGfxclkFrequency            0x17 ///< Get GFX clock frequency
 #define PPSMC_MSG_GetFclkFrequency              0x18 ///< Get FCLK frequency
@@ -102,8 +101,8 @@
 #define PPSMC_MSG_SetHardMinIspxclkByFreq       0x2C ///< Set HardMin by frequency for ISPXCLK
 #define PPSMC_MSG_PowerDownUmsch                0x2D ///< Power down VCN.UMSCH (aka VSCH) scheduler
 #define PPSMC_MSG_PowerUpUmsch                  0x2E ///< Power up VCN.UMSCH (aka VSCH) scheduler
-#define PPSMC_Message_IspStutterOn_MmhubPgDis   0x2F ///< ISP StutterOn mmHub PgDis
-#define PPSMC_Message_IspStutterOff_MmhubPgEn   0x30 ///< ISP StufferOff mmHub PgEn
+#define PPSMC_MSG_IspStutterOn_MmhubPgDis       0x2F ///< ISP StutterOn mmHub PgDis
+#define PPSMC_MSG_IspStutterOff_MmhubPgEn       0x30 ///< ISP StufferOff mmHub PgEn
 
 #define PPSMC_Message_Count                     0x31 ///< Total number of PPSMC messages
 /** @}*/
diff --git a/drivers/gpu/drm/amd/pm/swsmu/inc/smu_types.h b/drivers/gpu/drm/amd/pm/swsmu/inc/smu_types.h
index 58098b82df66..4180c71d930f 100644
--- a/drivers/gpu/drm/amd/pm/swsmu/inc/smu_types.h
+++ b/drivers/gpu/drm/amd/pm/swsmu/inc/smu_types.h
@@ -239,7 +239,10 @@
 	__SMU_DUMMY_MAP(DriverMode2Reset), \
 	__SMU_DUMMY_MAP(GetGfxOffStatus),		 \
 	__SMU_DUMMY_MAP(GetGfxOffEntryCount),		 \
-	__SMU_DUMMY_MAP(LogGfxOffResidency),
+	__SMU_DUMMY_MAP(LogGfxOffResidency),			\
+	__SMU_DUMMY_MAP(SetNumBadMemoryPagesRetired),		\
+	__SMU_DUMMY_MAP(SetBadMemoryPagesRetiredFlagsPerChannel), \
+	__SMU_DUMMY_MAP(AllowGpo),
 
 #undef __SMU_DUMMY_MAP
 #define __SMU_DUMMY_MAP(type)	SMU_MSG_##type
diff --git a/drivers/gpu/drm/amd/pm/swsmu/inc/smu_v11_0.h b/drivers/gpu/drm/amd/pm/swsmu/inc/smu_v11_0.h
index a9215494dcdd..d466db6f0ad4 100644
--- a/drivers/gpu/drm/amd/pm/swsmu/inc/smu_v11_0.h
+++ b/drivers/gpu/drm/amd/pm/swsmu/inc/smu_v11_0.h
@@ -147,14 +147,6 @@ struct smu_11_5_power_context {
 	uint32_t	max_fast_ppt_limit;
 };
 
-enum smu_v11_0_baco_seq {
-	BACO_SEQ_BACO = 0,
-	BACO_SEQ_MSR,
-	BACO_SEQ_BAMACO,
-	BACO_SEQ_ULPS,
-	BACO_SEQ_COUNT,
-};
-
 #if defined(SWSMU_CODE_LAYER_L2) || defined(SWSMU_CODE_LAYER_L3)
 
 int smu_v11_0_init_microcode(struct smu_context *smu);
@@ -257,7 +249,7 @@ int smu_v11_0_baco_enter(struct smu_context *smu);
 int smu_v11_0_baco_exit(struct smu_context *smu);
 
 int smu_v11_0_baco_set_armd3_sequence(struct smu_context *smu,
-				      enum smu_v11_0_baco_seq baco_seq);
+				      enum smu_baco_seq baco_seq);
 
 int smu_v11_0_mode1_reset(struct smu_context *smu);
 
diff --git a/drivers/gpu/drm/amd/pm/swsmu/inc/smu_v13_0.h b/drivers/gpu/drm/amd/pm/swsmu/inc/smu_v13_0.h
index 8f72202aea8e..e8c6febb8b64 100644
--- a/drivers/gpu/drm/amd/pm/swsmu/inc/smu_v13_0.h
+++ b/drivers/gpu/drm/amd/pm/swsmu/inc/smu_v13_0.h
@@ -28,10 +28,11 @@
 #define SMU13_DRIVER_IF_VERSION_INV 0xFFFFFFFF
 #define SMU13_DRIVER_IF_VERSION_YELLOW_CARP 0x04
 #define SMU13_DRIVER_IF_VERSION_ALDE 0x08
+#define SMU13_DRIVER_IF_VERSION_SMU_V13_0_0_0 0x34
 #define SMU13_DRIVER_IF_VERSION_SMU_V13_0_4 0x07
 #define SMU13_DRIVER_IF_VERSION_SMU_V13_0_5 0x04
-#define SMU13_DRIVER_IF_VERSION_SMU_V13_0_0 0x30
-#define SMU13_DRIVER_IF_VERSION_SMU_V13_0_7 0x2C
+#define SMU13_DRIVER_IF_VERSION_SMU_V13_0_0_10 0x32
+#define SMU13_DRIVER_IF_VERSION_SMU_V13_0_7 0x35
 #define SMU13_DRIVER_IF_VERSION_SMU_V13_0_10 0x1D
 
 #define SMU13_MODE1_RESET_WAIT_TIME_IN_MS 500  //500ms
@@ -124,14 +125,6 @@ struct smu_13_0_power_context {
 	enum smu_13_0_power_state power_state;
 };
 
-enum smu_v13_0_baco_seq {
-	BACO_SEQ_BACO = 0,
-	BACO_SEQ_MSR,
-	BACO_SEQ_BAMACO,
-	BACO_SEQ_ULPS,
-	BACO_SEQ_COUNT,
-};
-
 #if defined(SWSMU_CODE_LAYER_L2) || defined(SWSMU_CODE_LAYER_L3)
 
 int smu_v13_0_init_microcode(struct smu_context *smu);
@@ -218,6 +211,9 @@ int smu_v13_0_set_azalia_d3_pme(struct smu_context *smu);
 int smu_v13_0_get_max_sustainable_clocks_by_dc(struct smu_context *smu,
 					       struct pp_smu_nv_clock_table *max_clocks);
 
+int smu_v13_0_baco_set_armd3_sequence(struct smu_context *smu,
+				      enum smu_baco_seq baco_seq);
+
 bool smu_v13_0_baco_is_support(struct smu_context *smu);
 
 enum smu_baco_state smu_v13_0_baco_get_state(struct smu_context *smu);
@@ -277,6 +273,9 @@ int smu_v13_0_init_pptable_microcode(struct smu_context *smu);
 
 int smu_v13_0_run_btc(struct smu_context *smu);
 
+int smu_v13_0_gpo_control(struct smu_context *smu,
+			  bool enablement);
+
 int smu_v13_0_deep_sleep_control(struct smu_context *smu,
 				 bool enablement);
 
diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu11/sienna_cichlid_ppt.c b/drivers/gpu/drm/amd/pm/swsmu/smu11/sienna_cichlid_ppt.c
index 74996a8fb671..697e98a0a20a 100644
--- a/drivers/gpu/drm/amd/pm/swsmu/smu11/sienna_cichlid_ppt.c
+++ b/drivers/gpu/drm/amd/pm/swsmu/smu11/sienna_cichlid_ppt.c
@@ -377,7 +377,13 @@ static void sienna_cichlid_check_bxco_support(struct smu_context *smu)
 		if (((adev->pdev->device == 0x73A1) &&
 		    (adev->pdev->revision == 0x00)) ||
 		    ((adev->pdev->device == 0x73BF) &&
-		    (adev->pdev->revision == 0xCF)))
+		    (adev->pdev->revision == 0xCF)) ||
+		    ((adev->pdev->device == 0x7422) &&
+		    (adev->pdev->revision == 0x00)) ||
+		    ((adev->pdev->device == 0x73A3) &&
+		    (adev->pdev->revision == 0x00)) ||
+		    ((adev->pdev->device == 0x73E3) &&
+		    (adev->pdev->revision == 0x00)))
 			smu_baco->platform_support = false;
 
 	}
diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu11/smu_v11_0.c b/drivers/gpu/drm/amd/pm/swsmu/smu11/smu_v11_0.c
index dccbd9f70723..ad66d57aa102 100644
--- a/drivers/gpu/drm/amd/pm/swsmu/smu11/smu_v11_0.c
+++ b/drivers/gpu/drm/amd/pm/swsmu/smu11/smu_v11_0.c
@@ -79,6 +79,17 @@ MODULE_FIRMWARE("amdgpu/beige_goby_smc.bin");
 #define mmTHM_BACO_CNTL_ARCT			0xA7
 #define mmTHM_BACO_CNTL_ARCT_BASE_IDX		0
 
+static void smu_v11_0_poll_baco_exit(struct smu_context *smu)
+{
+	struct amdgpu_device *adev = smu->adev;
+	uint32_t data, loop = 0;
+
+	do {
+		usleep_range(1000, 1100);
+		data = RREG32_SOC15(THM, 0, mmTHM_BACO_CNTL);
+	} while ((data & 0x100) && (++loop < 100));
+}
+
 int smu_v11_0_init_microcode(struct smu_context *smu)
 {
 	struct amdgpu_device *adev = smu->adev;
@@ -1576,7 +1587,7 @@ int smu_v11_0_set_azalia_d3_pme(struct smu_context *smu)
 }
 
 int smu_v11_0_baco_set_armd3_sequence(struct smu_context *smu,
-				      enum smu_v11_0_baco_seq baco_seq)
+				      enum smu_baco_seq baco_seq)
 {
 	return smu_cmn_send_smc_msg_with_param(smu, SMU_MSG_ArmD3, baco_seq, NULL);
 }
@@ -1588,6 +1599,10 @@ bool smu_v11_0_baco_is_support(struct smu_context *smu)
 	if (amdgpu_sriov_vf(smu->adev) || !smu_baco->platform_support)
 		return false;
 
+	/* return true if ASIC is in BACO state already */
+	if (smu_v11_0_baco_get_state(smu) == SMU_BACO_STATE_ENTER)
+		return true;
+
 	/* Arcturus does not support this bit mask */
 	if (smu_cmn_feature_is_supported(smu, SMU_FEATURE_BACO_BIT) &&
 	   !smu_cmn_feature_is_enabled(smu, SMU_FEATURE_BACO_BIT))
@@ -1685,7 +1700,18 @@ int smu_v11_0_baco_enter(struct smu_context *smu)
 
 int smu_v11_0_baco_exit(struct smu_context *smu)
 {
-	return smu_v11_0_baco_set_state(smu, SMU_BACO_STATE_EXIT);
+	int ret;
+
+	ret = smu_v11_0_baco_set_state(smu, SMU_BACO_STATE_EXIT);
+	if (!ret) {
+		/*
+		 * Poll BACO exit status to ensure FW has completed
+		 * BACO exit process to avoid timing issues.
+		 */
+		smu_v11_0_poll_baco_exit(smu);
+	}
+
+	return ret;
 }
 
 int smu_v11_0_mode1_reset(struct smu_context *smu)
diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0.c b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0.c
index c4552ade8d44..e54b760b875b 100644
--- a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0.c
+++ b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0.c
@@ -250,6 +250,7 @@ int smu_v13_0_check_fw_status(struct smu_context *smu)
 
 	switch (adev->ip_versions[MP1_HWIP][0]) {
 	case IP_VERSION(13, 0, 4):
+	case IP_VERSION(13, 0, 11):
 		mp1_fw_flags = RREG32_PCIE(MP1_Public |
 					   (smnMP1_V13_0_4_FIRMWARE_FLAGS & 0xffffffff));
 		break;
@@ -289,7 +290,10 @@ int smu_v13_0_check_fw_version(struct smu_context *smu)
 		smu->smc_driver_if_version = SMU13_DRIVER_IF_VERSION_ALDE;
 		break;
 	case IP_VERSION(13, 0, 0):
-		smu->smc_driver_if_version = SMU13_DRIVER_IF_VERSION_SMU_V13_0_0;
+		smu->smc_driver_if_version = SMU13_DRIVER_IF_VERSION_SMU_V13_0_0_0;
+		break;
+	case IP_VERSION(13, 0, 10):
+		smu->smc_driver_if_version = SMU13_DRIVER_IF_VERSION_SMU_V13_0_0_10;
 		break;
 	case IP_VERSION(13, 0, 7):
 		smu->smc_driver_if_version = SMU13_DRIVER_IF_VERSION_SMU_V13_0_7;
@@ -300,14 +304,12 @@ int smu_v13_0_check_fw_version(struct smu_context *smu)
 		smu->smc_driver_if_version = SMU13_DRIVER_IF_VERSION_YELLOW_CARP;
 		break;
 	case IP_VERSION(13, 0, 4):
+	case IP_VERSION(13, 0, 11):
 		smu->smc_driver_if_version = SMU13_DRIVER_IF_VERSION_SMU_V13_0_4;
 		break;
 	case IP_VERSION(13, 0, 5):
 		smu->smc_driver_if_version = SMU13_DRIVER_IF_VERSION_SMU_V13_0_5;
 		break;
-	case IP_VERSION(13, 0, 10):
-		smu->smc_driver_if_version = SMU13_DRIVER_IF_VERSION_SMU_V13_0_10;
-		break;
 	default:
 		dev_err(adev->dev, "smu unsupported IP version: 0x%x.\n",
 			adev->ip_versions[MP1_HWIP][0]);
@@ -842,6 +844,8 @@ int smu_v13_0_gfx_off_control(struct smu_context *smu, bool enable)
 	case IP_VERSION(13, 0, 5):
 	case IP_VERSION(13, 0, 7):
 	case IP_VERSION(13, 0, 8):
+	case IP_VERSION(13, 0, 10):
+	case IP_VERSION(13, 0, 11):
 		if (!(adev->pm.pp_feature & PP_GFXOFF_MASK))
 			return 0;
 		if (enable)
@@ -1377,6 +1381,7 @@ static int smu_v13_0_irq_process(struct amdgpu_device *adev,
 	 */
 	uint32_t ctxid = entry->src_data[0];
 	uint32_t data;
+	uint32_t high;
 
 	if (client_id == SOC15_IH_CLIENTID_THM) {
 		switch (src_id) {
@@ -1433,6 +1438,36 @@ static int smu_v13_0_irq_process(struct amdgpu_device *adev,
 					schedule_work(&smu->throttling_logging_work);
 
 				break;
+			case 0x8:
+				high = smu->thermal_range.software_shutdown_temp +
+					smu->thermal_range.software_shutdown_temp_offset;
+				high = min_t(typeof(high),
+					     SMU_THERMAL_MAXIMUM_ALERT_TEMP,
+					     high);
+				dev_emerg(adev->dev, "Reduce soft CTF limit to %d (by an offset %d)\n",
+							high,
+							smu->thermal_range.software_shutdown_temp_offset);
+
+				data = RREG32_SOC15(THM, 0, regTHM_THERMAL_INT_CTRL);
+				data = REG_SET_FIELD(data, THM_THERMAL_INT_CTRL,
+							DIG_THERM_INTH,
+							(high & 0xff));
+				data = data & (~THM_THERMAL_INT_CTRL__THERM_TRIGGER_MASK_MASK);
+				WREG32_SOC15(THM, 0, regTHM_THERMAL_INT_CTRL, data);
+				break;
+			case 0x9:
+				high = min_t(typeof(high),
+					     SMU_THERMAL_MAXIMUM_ALERT_TEMP,
+					     smu->thermal_range.software_shutdown_temp);
+				dev_emerg(adev->dev, "Recover soft CTF limit to %d\n", high);
+
+				data = RREG32_SOC15(THM, 0, regTHM_THERMAL_INT_CTRL);
+				data = REG_SET_FIELD(data, THM_THERMAL_INT_CTRL,
+							DIG_THERM_INTH,
+							(high & 0xff));
+				data = data & (~THM_THERMAL_INT_CTRL__THERM_TRIGGER_MASK_MASK);
+				WREG32_SOC15(THM, 0, regTHM_THERMAL_INT_CTRL, data);
+				break;
 			}
 		}
 	}
@@ -2147,6 +2182,21 @@ int smu_v13_0_run_btc(struct smu_context *smu)
 	return res;
 }
 
+int smu_v13_0_gpo_control(struct smu_context *smu,
+			  bool enablement)
+{
+	int res;
+
+	res = smu_cmn_send_smc_msg_with_param(smu,
+					      SMU_MSG_AllowGpo,
+					      enablement ? 1 : 0,
+					      NULL);
+	if (res)
+		dev_err(smu->adev->dev, "SetGpoAllow %d failed!\n", enablement);
+
+	return res;
+}
+
 int smu_v13_0_deep_sleep_control(struct smu_context *smu,
 				 bool enablement)
 {
@@ -2231,6 +2281,15 @@ int smu_v13_0_gfx_ulv_control(struct smu_context *smu,
 	return ret;
 }
 
+int smu_v13_0_baco_set_armd3_sequence(struct smu_context *smu,
+				      enum smu_baco_seq baco_seq)
+{
+	return smu_cmn_send_smc_msg_with_param(smu,
+					       SMU_MSG_ArmD3,
+					       baco_seq,
+					       NULL);
+}
+
 bool smu_v13_0_baco_is_support(struct smu_context *smu)
 {
 	struct smu_baco_context *smu_baco = &smu->smu_baco;
diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_0_ppt.c b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_0_ppt.c
index 29529328152d..9643b21c636a 100644
--- a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_0_ppt.c
+++ b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_0_ppt.c
@@ -70,6 +70,26 @@
 
 #define MP0_MP1_DATA_REGION_SIZE_COMBOPPTABLE	0x4000
 
+#define mmMP1_SMN_C2PMSG_66                                                                            0x0282
+#define mmMP1_SMN_C2PMSG_66_BASE_IDX                                                                   0
+
+#define mmMP1_SMN_C2PMSG_82                                                                            0x0292
+#define mmMP1_SMN_C2PMSG_82_BASE_IDX                                                                   0
+
+#define mmMP1_SMN_C2PMSG_90                                                                            0x029a
+#define mmMP1_SMN_C2PMSG_90_BASE_IDX                                                                   0
+
+#define mmMP1_SMN_C2PMSG_75                                                                            0x028b
+#define mmMP1_SMN_C2PMSG_75_BASE_IDX                                                                   0
+
+#define mmMP1_SMN_C2PMSG_53                                                                            0x0275
+#define mmMP1_SMN_C2PMSG_53_BASE_IDX                                                                   0
+
+#define mmMP1_SMN_C2PMSG_54                                                                            0x0276
+#define mmMP1_SMN_C2PMSG_54_BASE_IDX                                                                   0
+
+#define DEBUGSMC_MSG_Mode1Reset	2
+
 static struct cmn2asic_msg_mapping smu_v13_0_0_message_map[SMU_MSG_MAX_COUNT] = {
 	MSG_MAP(TestMessage,			PPSMC_MSG_TestMessage,                 1),
 	MSG_MAP(GetSmuVersion,			PPSMC_MSG_GetSmuVersion,               1),
@@ -120,6 +140,11 @@ static struct cmn2asic_msg_mapping smu_v13_0_0_message_map[SMU_MSG_MAX_COUNT] =
 	MSG_MAP(Mode1Reset,			PPSMC_MSG_Mode1Reset,                  0),
 	MSG_MAP(PrepareMp1ForUnload,		PPSMC_MSG_PrepareMp1ForUnload,         0),
 	MSG_MAP(DFCstateControl,		PPSMC_MSG_SetExternalClientDfCstateAllow, 0),
+	MSG_MAP(ArmD3,				PPSMC_MSG_ArmD3,                       0),
+	MSG_MAP(SetNumBadMemoryPagesRetired,	PPSMC_MSG_SetNumBadMemoryPagesRetired,   0),
+	MSG_MAP(SetBadMemoryPagesRetiredFlagsPerChannel,
+			    PPSMC_MSG_SetBadMemoryPagesRetiredFlagsPerChannel,   0),
+	MSG_MAP(AllowGpo,			PPSMC_MSG_SetGpoAllow,           0),
 };
 
 static struct cmn2asic_mapping smu_v13_0_0_clk_map[SMU_CLK_COUNT] = {
@@ -186,6 +211,8 @@ static struct cmn2asic_mapping smu_v13_0_0_feature_mask_map[SMU_FEATURE_COUNT] =
 	FEA_MAP(MEM_TEMP_READ),
 	FEA_MAP(ATHUB_MMHUB_PG),
 	FEA_MAP(SOC_PCC),
+	[SMU_FEATURE_DPM_VCLK_BIT] = {1, FEATURE_MM_DPM_BIT},
+	[SMU_FEATURE_DPM_DCLK_BIT] = {1, FEATURE_MM_DPM_BIT},
 };
 
 static struct cmn2asic_mapping smu_v13_0_0_table_map[SMU_TABLE_COUNT] = {
@@ -516,6 +543,23 @@ static int smu_v13_0_0_set_default_dpm_table(struct smu_context *smu)
 						     dpm_table);
 		if (ret)
 			return ret;
+
+		/*
+		 * Update the reported maximum shader clock to the value
+		 * which can be guarded to be achieved on all cards. This
+		 * is aligned with Window setting. And considering that value
+		 * might be not the peak frequency the card can achieve, it
+		 * is normal some real-time clock frequency can overtake this
+		 * labelled maximum clock frequency(for example in pp_dpm_sclk
+		 * sysfs output).
+		 */
+		if (skutable->DriverReportedClocks.GameClockAc &&
+		    (dpm_table->dpm_levels[dpm_table->count - 1].value >
+		    skutable->DriverReportedClocks.GameClockAc)) {
+			dpm_table->dpm_levels[dpm_table->count - 1].value =
+				skutable->DriverReportedClocks.GameClockAc;
+			dpm_table->max = skutable->DriverReportedClocks.GameClockAc;
+		}
 	} else {
 		dpm_table->count = 1;
 		dpm_table->dpm_levels[0].value = smu->smu_table.boot_values.gfxclk / 100;
@@ -778,6 +822,57 @@ static int smu_v13_0_0_get_smu_metrics_data(struct smu_context *smu,
 	return ret;
 }
 
+static int smu_v13_0_0_get_dpm_ultimate_freq(struct smu_context *smu,
+					     enum smu_clk_type clk_type,
+					     uint32_t *min,
+					     uint32_t *max)
+{
+	struct smu_13_0_dpm_context *dpm_context =
+		smu->smu_dpm.dpm_context;
+	struct smu_13_0_dpm_table *dpm_table;
+
+	switch (clk_type) {
+	case SMU_MCLK:
+	case SMU_UCLK:
+		/* uclk dpm table */
+		dpm_table = &dpm_context->dpm_tables.uclk_table;
+		break;
+	case SMU_GFXCLK:
+	case SMU_SCLK:
+		/* gfxclk dpm table */
+		dpm_table = &dpm_context->dpm_tables.gfx_table;
+		break;
+	case SMU_SOCCLK:
+		/* socclk dpm table */
+		dpm_table = &dpm_context->dpm_tables.soc_table;
+		break;
+	case SMU_FCLK:
+		/* fclk dpm table */
+		dpm_table = &dpm_context->dpm_tables.fclk_table;
+		break;
+	case SMU_VCLK:
+	case SMU_VCLK1:
+		/* vclk dpm table */
+		dpm_table = &dpm_context->dpm_tables.vclk_table;
+		break;
+	case SMU_DCLK:
+	case SMU_DCLK1:
+		/* dclk dpm table */
+		dpm_table = &dpm_context->dpm_tables.dclk_table;
+		break;
+	default:
+		dev_err(smu->adev->dev, "Unsupported clock type!\n");
+		return -EINVAL;
+	}
+
+	if (min)
+		*min = dpm_table->min;
+	if (max)
+		*max = dpm_table->max;
+
+	return 0;
+}
+
 static int smu_v13_0_0_read_sensor(struct smu_context *smu,
 				   enum amd_pp_sensors sensor,
 				   void *data,
@@ -1280,9 +1375,17 @@ static int smu_v13_0_0_populate_umd_state_clk(struct smu_context *smu)
 				&dpm_context->dpm_tables.fclk_table;
 	struct smu_umd_pstate_table *pstate_table =
 				&smu->pstate_table;
+	struct smu_table_context *table_context = &smu->smu_table;
+	PPTable_t *pptable = table_context->driver_pptable;
+	DriverReportedClocks_t driver_clocks =
+			pptable->SkuTable.DriverReportedClocks;
 
 	pstate_table->gfxclk_pstate.min = gfx_table->min;
-	pstate_table->gfxclk_pstate.peak = gfx_table->max;
+	if (driver_clocks.GameClockAc &&
+	    (driver_clocks.GameClockAc < gfx_table->max))
+		pstate_table->gfxclk_pstate.peak = driver_clocks.GameClockAc;
+	else
+		pstate_table->gfxclk_pstate.peak = gfx_table->max;
 
 	pstate_table->uclk_pstate.min = mem_table->min;
 	pstate_table->uclk_pstate.peak = mem_table->max;
@@ -1299,12 +1402,12 @@ static int smu_v13_0_0_populate_umd_state_clk(struct smu_context *smu)
 	pstate_table->fclk_pstate.min = fclk_table->min;
 	pstate_table->fclk_pstate.peak = fclk_table->max;
 
-	/*
-	 * For now, just use the mininum clock frequency.
-	 * TODO: update them when the real pstate settings available
-	 */
-	pstate_table->gfxclk_pstate.standard = gfx_table->min;
-	pstate_table->uclk_pstate.standard = mem_table->min;
+	if (driver_clocks.BaseClockAc &&
+	    driver_clocks.BaseClockAc < gfx_table->max)
+		pstate_table->gfxclk_pstate.standard = driver_clocks.BaseClockAc;
+	else
+		pstate_table->gfxclk_pstate.standard = gfx_table->max;
+	pstate_table->uclk_pstate.standard = mem_table->max;
 	pstate_table->socclk_pstate.standard = soc_table->min;
 	pstate_table->vclk_pstate.standard = vclk_table->min;
 	pstate_table->dclk_pstate.standard = dclk_table->min;
@@ -1338,12 +1441,23 @@ out:
 static int smu_v13_0_0_get_fan_speed_pwm(struct smu_context *smu,
 					 uint32_t *speed)
 {
+	int ret;
+
 	if (!speed)
 		return -EINVAL;
 
-	return smu_v13_0_0_get_smu_metrics_data(smu,
-						METRICS_CURR_FANPWM,
-						speed);
+	ret = smu_v13_0_0_get_smu_metrics_data(smu,
+					       METRICS_CURR_FANPWM,
+					       speed);
+	if (ret) {
+		dev_err(smu->adev->dev, "Failed to get fan speed(PWM)!");
+		return ret;
+	}
+
+	/* Convert the PMFW output which is in percent to pwm(255) based */
+	*speed = MIN(*speed * 255 / 100, 255);
+
+	return 0;
 }
 
 static int smu_v13_0_0_get_fan_speed_rpm(struct smu_context *smu,
@@ -1566,6 +1680,31 @@ static int smu_v13_0_0_set_power_profile_mode(struct smu_context *smu,
 					       NULL);
 }
 
+static int smu_v13_0_0_baco_enter(struct smu_context *smu)
+{
+	struct smu_baco_context *smu_baco = &smu->smu_baco;
+	struct amdgpu_device *adev = smu->adev;
+
+	if (adev->in_runpm && smu_cmn_is_audio_func_enabled(adev))
+		return smu_v13_0_baco_set_armd3_sequence(smu,
+				smu_baco->maco_support ? BACO_SEQ_BAMACO : BACO_SEQ_BACO);
+	else
+		return smu_v13_0_baco_enter(smu);
+}
+
+static int smu_v13_0_0_baco_exit(struct smu_context *smu)
+{
+	struct amdgpu_device *adev = smu->adev;
+
+	if (adev->in_runpm && smu_cmn_is_audio_func_enabled(adev)) {
+		/* Wait for PMFW handling for the Dstate change */
+		usleep_range(10000, 11000);
+		return smu_v13_0_baco_set_armd3_sequence(smu, BACO_SEQ_ULPS);
+	} else {
+		return smu_v13_0_baco_exit(smu);
+	}
+}
+
 static bool smu_v13_0_0_is_mode1_reset_supported(struct smu_context *smu)
 {
 	struct amdgpu_device *adev = smu->adev;
@@ -1763,6 +1902,69 @@ static int smu_v13_0_0_set_df_cstate(struct smu_context *smu,
 					       NULL);
 }
 
+static int smu_v13_0_0_mode1_reset(struct smu_context *smu)
+{
+	int ret;
+	struct amdgpu_device *adev = smu->adev;
+
+	if (adev->ip_versions[MP1_HWIP][0] == IP_VERSION(13, 0, 10))
+		ret = smu_cmn_send_debug_smc_msg(smu, DEBUGSMC_MSG_Mode1Reset);
+	else
+		ret = smu_cmn_send_smc_msg(smu, SMU_MSG_Mode1Reset, NULL);
+
+	if (!ret)
+		msleep(SMU13_MODE1_RESET_WAIT_TIME_IN_MS);
+
+	return ret;
+}
+
+static void smu_v13_0_0_set_smu_mailbox_registers(struct smu_context *smu)
+{
+	struct amdgpu_device *adev = smu->adev;
+
+	smu->param_reg = SOC15_REG_OFFSET(MP1, 0, mmMP1_SMN_C2PMSG_82);
+	smu->msg_reg = SOC15_REG_OFFSET(MP1, 0, mmMP1_SMN_C2PMSG_66);
+	smu->resp_reg = SOC15_REG_OFFSET(MP1, 0, mmMP1_SMN_C2PMSG_90);
+
+	smu->debug_param_reg = SOC15_REG_OFFSET(MP1, 0, mmMP1_SMN_C2PMSG_53);
+	smu->debug_msg_reg = SOC15_REG_OFFSET(MP1, 0, mmMP1_SMN_C2PMSG_75);
+	smu->debug_resp_reg = SOC15_REG_OFFSET(MP1, 0, mmMP1_SMN_C2PMSG_54);
+}
+
+static int smu_v13_0_0_smu_send_bad_mem_page_num(struct smu_context *smu,
+		uint32_t size)
+{
+	int ret = 0;
+
+	/* message SMU to update the bad page number on SMUBUS */
+	ret = smu_cmn_send_smc_msg_with_param(smu,
+					  SMU_MSG_SetNumBadMemoryPagesRetired,
+					  size, NULL);
+	if (ret)
+		dev_err(smu->adev->dev,
+			  "[%s] failed to message SMU to update bad memory pages number\n",
+			  __func__);
+
+	return ret;
+}
+
+static int smu_v13_0_0_send_bad_mem_channel_flag(struct smu_context *smu,
+		uint32_t size)
+{
+	int ret = 0;
+
+	/* message SMU to update the bad channel info on SMUBUS */
+	ret = smu_cmn_send_smc_msg_with_param(smu,
+				  SMU_MSG_SetBadMemoryPagesRetiredFlagsPerChannel,
+				  size, NULL);
+	if (ret)
+		dev_err(smu->adev->dev,
+			  "[%s] failed to message SMU to update bad memory pages channel info\n",
+			  __func__);
+
+	return ret;
+}
+
 static const struct pptable_funcs smu_v13_0_0_ppt_funcs = {
 	.get_allowed_feature_mask = smu_v13_0_0_get_allowed_feature_mask,
 	.set_default_dpm_table = smu_v13_0_0_set_default_dpm_table,
@@ -1787,7 +1989,7 @@ static const struct pptable_funcs smu_v13_0_0_ppt_funcs = {
 	.get_enabled_mask = smu_cmn_get_enabled_mask,
 	.dpm_set_vcn_enable = smu_v13_0_set_vcn_enable,
 	.dpm_set_jpeg_enable = smu_v13_0_set_jpeg_enable,
-	.get_dpm_ultimate_freq = smu_v13_0_get_dpm_ultimate_freq,
+	.get_dpm_ultimate_freq = smu_v13_0_0_get_dpm_ultimate_freq,
 	.get_vbios_bootup_values = smu_v13_0_get_vbios_bootup_values,
 	.read_sensor = smu_v13_0_0_read_sensor,
 	.feature_is_enabled = smu_cmn_feature_is_enabled,
@@ -1827,12 +2029,15 @@ static const struct pptable_funcs smu_v13_0_0_ppt_funcs = {
 	.baco_is_support = smu_v13_0_baco_is_support,
 	.baco_get_state = smu_v13_0_baco_get_state,
 	.baco_set_state = smu_v13_0_baco_set_state,
-	.baco_enter = smu_v13_0_baco_enter,
-	.baco_exit = smu_v13_0_baco_exit,
+	.baco_enter = smu_v13_0_0_baco_enter,
+	.baco_exit = smu_v13_0_0_baco_exit,
 	.mode1_reset_is_support = smu_v13_0_0_is_mode1_reset_supported,
-	.mode1_reset = smu_v13_0_mode1_reset,
+	.mode1_reset = smu_v13_0_0_mode1_reset,
 	.set_mp1_state = smu_v13_0_0_set_mp1_state,
 	.set_df_cstate = smu_v13_0_0_set_df_cstate,
+	.send_hbm_bad_pages_num = smu_v13_0_0_smu_send_bad_mem_page_num,
+	.send_hbm_bad_channel_flag = smu_v13_0_0_send_bad_mem_channel_flag,
+	.gpo_control = smu_v13_0_gpo_control,
 };
 
 void smu_v13_0_0_set_ppt_funcs(struct smu_context *smu)
@@ -1844,5 +2049,5 @@ void smu_v13_0_0_set_ppt_funcs(struct smu_context *smu)
 	smu->table_map = smu_v13_0_0_table_map;
 	smu->pwr_src_map = smu_v13_0_0_pwr_src_map;
 	smu->workload_map = smu_v13_0_0_workload_map;
-	smu_v13_0_set_smu_mailbox_registers(smu);
+	smu_v13_0_0_set_smu_mailbox_registers(smu);
 }
diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_4_ppt.c b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_4_ppt.c
index 97e1d55dcaad..8fa9a36c38b6 100644
--- a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_4_ppt.c
+++ b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_4_ppt.c
@@ -1026,6 +1026,15 @@ static const struct pptable_funcs smu_v13_0_4_ppt_funcs = {
 	.set_gfx_power_up_by_imu = smu_v13_0_set_gfx_power_up_by_imu,
 };
 
+static void smu_v13_0_4_set_smu_mailbox_registers(struct smu_context *smu)
+{
+	struct amdgpu_device *adev = smu->adev;
+
+	smu->param_reg = SOC15_REG_OFFSET(MP1, 0, mmMP1_SMN_C2PMSG_82);
+	smu->msg_reg = SOC15_REG_OFFSET(MP1, 0, mmMP1_SMN_C2PMSG_66);
+	smu->resp_reg = SOC15_REG_OFFSET(MP1, 0, mmMP1_SMN_C2PMSG_90);
+}
+
 void smu_v13_0_4_set_ppt_funcs(struct smu_context *smu)
 {
 	struct amdgpu_device *adev = smu->adev;
@@ -1035,7 +1044,9 @@ void smu_v13_0_4_set_ppt_funcs(struct smu_context *smu)
 	smu->feature_map = smu_v13_0_4_feature_mask_map;
 	smu->table_map = smu_v13_0_4_table_map;
 	smu->is_apu = true;
-	smu->param_reg = SOC15_REG_OFFSET(MP1, 0, mmMP1_SMN_C2PMSG_82);
-	smu->msg_reg = SOC15_REG_OFFSET(MP1, 0, mmMP1_SMN_C2PMSG_66);
-	smu->resp_reg = SOC15_REG_OFFSET(MP1, 0, mmMP1_SMN_C2PMSG_90);
+
+	if (adev->ip_versions[MP1_HWIP][0] == IP_VERSION(13, 0, 4))
+		smu_v13_0_4_set_smu_mailbox_registers(smu);
+	else
+		smu_v13_0_set_smu_mailbox_registers(smu);
 }
diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_7_ppt.c b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_7_ppt.c
index c4102cfb734c..5c6c6ad011ca 100644
--- a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_7_ppt.c
+++ b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_7_ppt.c
@@ -122,6 +122,8 @@ static struct cmn2asic_msg_mapping smu_v13_0_7_message_map[SMU_MSG_MAX_COUNT] =
 	MSG_MAP(PrepareMp1ForUnload,		PPSMC_MSG_PrepareMp1ForUnload,         0),
 	MSG_MAP(SetMGpuFanBoostLimitRpm,	PPSMC_MSG_SetMGpuFanBoostLimitRpm,     0),
 	MSG_MAP(DFCstateControl,		PPSMC_MSG_SetExternalClientDfCstateAllow, 0),
+	MSG_MAP(ArmD3,				PPSMC_MSG_ArmD3,                       0),
+	MSG_MAP(AllowGpo,			PPSMC_MSG_SetGpoAllow,           0),
 };
 
 static struct cmn2asic_mapping smu_v13_0_7_clk_map[SMU_CLK_COUNT] = {
@@ -188,6 +190,8 @@ static struct cmn2asic_mapping smu_v13_0_7_feature_mask_map[SMU_FEATURE_COUNT] =
 	FEA_MAP(MEM_TEMP_READ),
 	FEA_MAP(ATHUB_MMHUB_PG),
 	FEA_MAP(SOC_PCC),
+	[SMU_FEATURE_DPM_VCLK_BIT] = {1, FEATURE_MM_DPM_BIT},
+	[SMU_FEATURE_DPM_DCLK_BIT] = {1, FEATURE_MM_DPM_BIT},
 };
 
 static struct cmn2asic_mapping smu_v13_0_7_table_map[SMU_TABLE_COUNT] = {
@@ -1222,6 +1226,7 @@ static int smu_v13_0_7_get_thermal_temperature_range(struct smu_context *smu,
 	range->mem_emergency_max = (pptable->SkuTable.TemperatureLimit[TEMP_MEM] + CTF_OFFSET_MEM)*
 		SMU_TEMPERATURE_UNITS_PER_CENTIGRADES;
 	range->software_shutdown_temp = powerplay_table->software_shutdown_temp;
+	range->software_shutdown_temp_offset = pptable->SkuTable.FanAbnormalTempLimitOffset;
 
 	return 0;
 }
@@ -1358,12 +1363,23 @@ static int smu_v13_0_7_populate_umd_state_clk(struct smu_context *smu)
 static int smu_v13_0_7_get_fan_speed_pwm(struct smu_context *smu,
 					 uint32_t *speed)
 {
+	int ret;
+
 	if (!speed)
 		return -EINVAL;
 
-	return smu_v13_0_7_get_smu_metrics_data(smu,
-						METRICS_CURR_FANPWM,
-						speed);
+	ret = smu_v13_0_7_get_smu_metrics_data(smu,
+					       METRICS_CURR_FANPWM,
+					       speed);
+	if (ret) {
+		dev_err(smu->adev->dev, "Failed to get fan speed(PWM)!");
+		return ret;
+	}
+
+	/* Convert the PMFW output which is in percent to pwm(255) based */
+	*speed = MIN(*speed * 255 / 100, 255);
+
+	return 0;
 }
 
 static int smu_v13_0_7_get_fan_speed_rpm(struct smu_context *smu,
@@ -1435,7 +1451,7 @@ static int smu_v13_0_7_get_power_limit(struct smu_context *smu,
 
 static int smu_v13_0_7_get_power_profile_mode(struct smu_context *smu, char *buf)
 {
-	DpmActivityMonitorCoeffIntExternal_t activity_monitor_external[PP_SMC_POWER_PROFILE_COUNT];
+	DpmActivityMonitorCoeffIntExternal_t *activity_monitor_external;
 	uint32_t i, j, size = 0;
 	int16_t workload_type = 0;
 	int result = 0;
@@ -1443,6 +1459,12 @@ static int smu_v13_0_7_get_power_profile_mode(struct smu_context *smu, char *buf
 	if (!buf)
 		return -EINVAL;
 
+	activity_monitor_external = kcalloc(PP_SMC_POWER_PROFILE_COUNT,
+					    sizeof(*activity_monitor_external),
+					    GFP_KERNEL);
+	if (!activity_monitor_external)
+		return -ENOMEM;
+
 	size += sysfs_emit_at(buf, size, "                              ");
 	for (i = 0; i <= PP_SMC_POWER_PROFILE_WINDOW3D; i++)
 		size += sysfs_emit_at(buf, size, "%-14s%s", amdgpu_pp_profile_name[i],
@@ -1455,15 +1477,17 @@ static int smu_v13_0_7_get_power_profile_mode(struct smu_context *smu, char *buf
 		workload_type = smu_cmn_to_asic_specific_index(smu,
 							       CMN2ASIC_MAPPING_WORKLOAD,
 							       i);
-		if (workload_type < 0)
-			return -EINVAL;
+		if (workload_type < 0) {
+			result = -EINVAL;
+			goto out;
+		}
 
 		result = smu_cmn_update_table(smu,
 					  SMU_TABLE_ACTIVITY_MONITOR_COEFF, workload_type,
 					  (void *)(&activity_monitor_external[i]), false);
 		if (result) {
 			dev_err(smu->adev->dev, "[%s] Failed to get activity monitor!", __func__);
-			return result;
+			goto out;
 		}
 	}
 
@@ -1491,7 +1515,10 @@ do {													\
 	PRINT_DPM_MONITOR(Fclk_BoosterFreq);
 #undef PRINT_DPM_MONITOR
 
-	return size;
+	result = size;
+out:
+	kfree(activity_monitor_external);
+	return result;
 }
 
 static int smu_v13_0_7_set_power_profile_mode(struct smu_context *smu, long *input, uint32_t size)
@@ -1578,6 +1605,31 @@ static int smu_v13_0_7_set_mp1_state(struct smu_context *smu,
 	return ret;
 }
 
+static int smu_v13_0_7_baco_enter(struct smu_context *smu)
+{
+	struct smu_baco_context *smu_baco = &smu->smu_baco;
+	struct amdgpu_device *adev = smu->adev;
+
+	if (adev->in_runpm && smu_cmn_is_audio_func_enabled(adev))
+		return smu_v13_0_baco_set_armd3_sequence(smu,
+				smu_baco->maco_support ? BACO_SEQ_BAMACO : BACO_SEQ_BACO);
+	else
+		return smu_v13_0_baco_enter(smu);
+}
+
+static int smu_v13_0_7_baco_exit(struct smu_context *smu)
+{
+	struct amdgpu_device *adev = smu->adev;
+
+	if (adev->in_runpm && smu_cmn_is_audio_func_enabled(adev)) {
+		/* Wait for PMFW handling for the Dstate change */
+		usleep_range(10000, 11000);
+		return smu_v13_0_baco_set_armd3_sequence(smu, BACO_SEQ_ULPS);
+	} else {
+		return smu_v13_0_baco_exit(smu);
+	}
+}
+
 static bool smu_v13_0_7_is_mode1_reset_supported(struct smu_context *smu)
 {
 	struct amdgpu_device *adev = smu->adev;
@@ -1655,12 +1707,13 @@ static const struct pptable_funcs smu_v13_0_7_ppt_funcs = {
 	.baco_is_support = smu_v13_0_baco_is_support,
 	.baco_get_state = smu_v13_0_baco_get_state,
 	.baco_set_state = smu_v13_0_baco_set_state,
-	.baco_enter = smu_v13_0_baco_enter,
-	.baco_exit = smu_v13_0_baco_exit,
+	.baco_enter = smu_v13_0_7_baco_enter,
+	.baco_exit = smu_v13_0_7_baco_exit,
 	.mode1_reset_is_support = smu_v13_0_7_is_mode1_reset_supported,
 	.mode1_reset = smu_v13_0_mode1_reset,
 	.set_mp1_state = smu_v13_0_7_set_mp1_state,
 	.set_df_cstate = smu_v13_0_7_set_df_cstate,
+	.gpo_control = smu_v13_0_gpo_control,
 };
 
 void smu_v13_0_7_set_ppt_funcs(struct smu_context *smu)
diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu_cmn.c b/drivers/gpu/drm/amd/pm/swsmu/smu_cmn.c
index e4f8f90ac5aa..768b6e7dbd77 100644
--- a/drivers/gpu/drm/amd/pm/swsmu/smu_cmn.c
+++ b/drivers/gpu/drm/amd/pm/swsmu/smu_cmn.c
@@ -233,6 +233,18 @@ static void __smu_cmn_send_msg(struct smu_context *smu,
 	WREG32(smu->msg_reg, msg);
 }
 
+static int __smu_cmn_send_debug_msg(struct smu_context *smu,
+			       u32 msg,
+			       u32 param)
+{
+	struct amdgpu_device *adev = smu->adev;
+
+	WREG32(smu->debug_param_reg, param);
+	WREG32(smu->debug_msg_reg, msg);
+	WREG32(smu->debug_resp_reg, 0);
+
+	return 0;
+}
 /**
  * smu_cmn_send_msg_without_waiting -- send the message; don't wait for status
  * @smu: pointer to an SMU context
@@ -386,6 +398,12 @@ int smu_cmn_send_smc_msg(struct smu_context *smu,
 					       read_arg);
 }
 
+int smu_cmn_send_debug_smc_msg(struct smu_context *smu,
+			 uint32_t msg)
+{
+	return __smu_cmn_send_debug_msg(smu, msg, 0);
+}
+
 int smu_cmn_to_asic_specific_index(struct smu_context *smu,
 				   enum smu_cmn2asic_mapping_type type,
 				   uint32_t index)
diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu_cmn.h b/drivers/gpu/drm/amd/pm/swsmu/smu_cmn.h
index 1526ce09c399..f82cf76dd3a4 100644
--- a/drivers/gpu/drm/amd/pm/swsmu/smu_cmn.h
+++ b/drivers/gpu/drm/amd/pm/swsmu/smu_cmn.h
@@ -42,6 +42,9 @@ int smu_cmn_send_smc_msg(struct smu_context *smu,
 			 enum smu_message_type msg,
 			 uint32_t *read_arg);
 
+int smu_cmn_send_debug_smc_msg(struct smu_context *smu,
+			 uint32_t msg);
+
 int smu_cmn_wait_for_response(struct smu_context *smu);
 
 int smu_cmn_to_asic_specific_index(struct smu_context *smu,
diff --git a/drivers/gpu/drm/arm/display/komeda/komeda_drv.c b/drivers/gpu/drm/arm/display/komeda/komeda_drv.c
index 9fce4239d4ad..3f4e719eebd8 100644
--- a/drivers/gpu/drm/arm/display/komeda/komeda_drv.c
+++ b/drivers/gpu/drm/arm/display/komeda/komeda_drv.c
@@ -9,7 +9,7 @@
 #include <linux/platform_device.h>
 #include <linux/component.h>
 #include <linux/pm_runtime.h>
-#include <drm/drm_fb_helper.h>
+#include <drm/drm_fbdev_generic.h>
 #include <drm/drm_module.h>
 #include <drm/drm_of.h>
 #include "komeda_dev.h"
diff --git a/drivers/gpu/drm/arm/display/komeda/komeda_kms.c b/drivers/gpu/drm/arm/display/komeda/komeda_kms.c
index 451746ebbe71..62dc64550793 100644
--- a/drivers/gpu/drm/arm/display/komeda/komeda_kms.c
+++ b/drivers/gpu/drm/arm/display/komeda/komeda_kms.c
@@ -10,7 +10,6 @@
 #include <drm/drm_atomic.h>
 #include <drm/drm_atomic_helper.h>
 #include <drm/drm_drv.h>
-#include <drm/drm_fb_helper.h>
 #include <drm/drm_gem_dma_helper.h>
 #include <drm/drm_gem_framebuffer_helper.h>
 #include <drm/drm_managed.h>
@@ -59,7 +58,6 @@ static irqreturn_t komeda_kms_irq_handler(int irq, void *data)
 
 static const struct drm_driver komeda_kms_driver = {
 	.driver_features = DRIVER_GEM | DRIVER_MODESET | DRIVER_ATOMIC,
-	.lastclose			= drm_fb_helper_lastclose,
 	DRM_GEM_DMA_DRIVER_OPS_WITH_DUMB_CREATE(komeda_gem_dma_dumb_create),
 	.fops = &komeda_cma_fops,
 	.name = "komeda",
diff --git a/drivers/gpu/drm/arm/hdlcd_crtc.c b/drivers/gpu/drm/arm/hdlcd_crtc.c
index 7030339fa232..3cfefadc7c9d 100644
--- a/drivers/gpu/drm/arm/hdlcd_crtc.c
+++ b/drivers/gpu/drm/arm/hdlcd_crtc.c
@@ -19,7 +19,6 @@
 #include <drm/drm_atomic_helper.h>
 #include <drm/drm_crtc.h>
 #include <drm/drm_fb_dma_helper.h>
-#include <drm/drm_fb_helper.h>
 #include <drm/drm_framebuffer.h>
 #include <drm/drm_gem_dma_helper.h>
 #include <drm/drm_of.h>
@@ -275,7 +274,7 @@ static void hdlcd_plane_atomic_update(struct drm_plane *plane,
 	dest_h = drm_rect_height(&new_plane_state->dst);
 	scanout_start = drm_fb_dma_get_gem_addr(fb, new_plane_state, 0);
 
-	hdlcd = plane->dev->dev_private;
+	hdlcd = drm_to_hdlcd_priv(plane->dev);
 	hdlcd_write(hdlcd, HDLCD_REG_FB_LINE_LENGTH, fb->pitches[0]);
 	hdlcd_write(hdlcd, HDLCD_REG_FB_LINE_PITCH, fb->pitches[0]);
 	hdlcd_write(hdlcd, HDLCD_REG_FB_LINE_COUNT, dest_h - 1);
@@ -290,7 +289,6 @@ static const struct drm_plane_helper_funcs hdlcd_plane_helper_funcs = {
 static const struct drm_plane_funcs hdlcd_plane_funcs = {
 	.update_plane		= drm_atomic_helper_update_plane,
 	.disable_plane		= drm_atomic_helper_disable_plane,
-	.destroy		= drm_plane_cleanup,
 	.reset			= drm_atomic_helper_plane_reset,
 	.atomic_duplicate_state = drm_atomic_helper_plane_duplicate_state,
 	.atomic_destroy_state	= drm_atomic_helper_plane_destroy_state,
@@ -298,24 +296,19 @@ static const struct drm_plane_funcs hdlcd_plane_funcs = {
 
 static struct drm_plane *hdlcd_plane_init(struct drm_device *drm)
 {
-	struct hdlcd_drm_private *hdlcd = drm->dev_private;
+	struct hdlcd_drm_private *hdlcd = drm_to_hdlcd_priv(drm);
 	struct drm_plane *plane = NULL;
 	u32 formats[ARRAY_SIZE(supported_formats)], i;
-	int ret;
-
-	plane = devm_kzalloc(drm->dev, sizeof(*plane), GFP_KERNEL);
-	if (!plane)
-		return ERR_PTR(-ENOMEM);
 
 	for (i = 0; i < ARRAY_SIZE(supported_formats); i++)
 		formats[i] = supported_formats[i].fourcc;
 
-	ret = drm_universal_plane_init(drm, plane, 0xff, &hdlcd_plane_funcs,
-				       formats, ARRAY_SIZE(formats),
-				       NULL,
-				       DRM_PLANE_TYPE_PRIMARY, NULL);
-	if (ret)
-		return ERR_PTR(ret);
+	plane = drmm_universal_plane_alloc(drm, struct drm_plane, dev, 0xff,
+					   &hdlcd_plane_funcs,
+					   formats, ARRAY_SIZE(formats),
+					   NULL, DRM_PLANE_TYPE_PRIMARY, NULL);
+	if (IS_ERR(plane))
+		return plane;
 
 	drm_plane_helper_add(plane, &hdlcd_plane_helper_funcs);
 	hdlcd->plane = plane;
@@ -325,7 +318,7 @@ static struct drm_plane *hdlcd_plane_init(struct drm_device *drm)
 
 int hdlcd_setup_crtc(struct drm_device *drm)
 {
-	struct hdlcd_drm_private *hdlcd = drm->dev_private;
+	struct hdlcd_drm_private *hdlcd = drm_to_hdlcd_priv(drm);
 	struct drm_plane *primary;
 	int ret;
 
diff --git a/drivers/gpu/drm/arm/hdlcd_drv.c b/drivers/gpu/drm/arm/hdlcd_drv.c
index a032003c340c..7043d1c9ed8f 100644
--- a/drivers/gpu/drm/arm/hdlcd_drv.c
+++ b/drivers/gpu/drm/arm/hdlcd_drv.c
@@ -26,7 +26,7 @@
 #include <drm/drm_crtc.h>
 #include <drm/drm_debugfs.h>
 #include <drm/drm_drv.h>
-#include <drm/drm_fb_helper.h>
+#include <drm/drm_fbdev_generic.h>
 #include <drm/drm_gem_dma_helper.h>
 #include <drm/drm_gem_framebuffer_helper.h>
 #include <drm/drm_modeset_helper.h>
@@ -98,7 +98,7 @@ static void hdlcd_irq_uninstall(struct hdlcd_drm_private *hdlcd)
 
 static int hdlcd_load(struct drm_device *drm, unsigned long flags)
 {
-	struct hdlcd_drm_private *hdlcd = drm->dev_private;
+	struct hdlcd_drm_private *hdlcd = drm_to_hdlcd_priv(drm);
 	struct platform_device *pdev = to_platform_device(drm->dev);
 	struct resource *res;
 	u32 version;
@@ -175,14 +175,21 @@ static const struct drm_mode_config_funcs hdlcd_mode_config_funcs = {
 	.atomic_commit = drm_atomic_helper_commit,
 };
 
-static void hdlcd_setup_mode_config(struct drm_device *drm)
+static int hdlcd_setup_mode_config(struct drm_device *drm)
 {
-	drm_mode_config_init(drm);
+	int ret;
+
+	ret = drmm_mode_config_init(drm);
+	if (ret)
+		return ret;
+
 	drm->mode_config.min_width = 0;
 	drm->mode_config.min_height = 0;
 	drm->mode_config.max_width = HDLCD_MAX_XRES;
 	drm->mode_config.max_height = HDLCD_MAX_YRES;
 	drm->mode_config.funcs = &hdlcd_mode_config_funcs;
+
+	return 0;
 }
 
 #ifdef CONFIG_DEBUG_FS
@@ -190,7 +197,7 @@ static int hdlcd_show_underrun_count(struct seq_file *m, void *arg)
 {
 	struct drm_info_node *node = (struct drm_info_node *)m->private;
 	struct drm_device *drm = node->minor->dev;
-	struct hdlcd_drm_private *hdlcd = drm->dev_private;
+	struct hdlcd_drm_private *hdlcd = drm_to_hdlcd_priv(drm);
 
 	seq_printf(m, "underrun : %d\n", atomic_read(&hdlcd->buffer_underrun_count));
 	seq_printf(m, "dma_end  : %d\n", atomic_read(&hdlcd->dma_end_count));
@@ -203,7 +210,7 @@ static int hdlcd_show_pxlclock(struct seq_file *m, void *arg)
 {
 	struct drm_info_node *node = (struct drm_info_node *)m->private;
 	struct drm_device *drm = node->minor->dev;
-	struct hdlcd_drm_private *hdlcd = drm->dev_private;
+	struct hdlcd_drm_private *hdlcd = drm_to_hdlcd_priv(drm);
 	unsigned long clkrate = clk_get_rate(hdlcd->clk);
 	unsigned long mode_clock = hdlcd->crtc.mode.crtc_clock * 1000;
 
@@ -247,18 +254,18 @@ static int hdlcd_drm_bind(struct device *dev)
 	struct hdlcd_drm_private *hdlcd;
 	int ret;
 
-	hdlcd = devm_kzalloc(dev, sizeof(*hdlcd), GFP_KERNEL);
-	if (!hdlcd)
-		return -ENOMEM;
+	hdlcd = devm_drm_dev_alloc(dev, &hdlcd_driver, typeof(*hdlcd), base);
+	if (IS_ERR(hdlcd))
+		return PTR_ERR(hdlcd);
 
-	drm = drm_dev_alloc(&hdlcd_driver, dev);
-	if (IS_ERR(drm))
-		return PTR_ERR(drm);
+	drm = &hdlcd->base;
 
-	drm->dev_private = hdlcd;
 	dev_set_drvdata(dev, drm);
 
-	hdlcd_setup_mode_config(drm);
+	ret = hdlcd_setup_mode_config(drm);
+	if (ret)
+		goto err_free;
+
 	ret = hdlcd_load(drm, 0);
 	if (ret)
 		goto err_free;
@@ -317,17 +324,14 @@ err_unload:
 	hdlcd_irq_uninstall(hdlcd);
 	of_reserved_mem_device_release(drm->dev);
 err_free:
-	drm_mode_config_cleanup(drm);
 	dev_set_drvdata(dev, NULL);
-	drm_dev_put(drm);
-
 	return ret;
 }
 
 static void hdlcd_drm_unbind(struct device *dev)
 {
 	struct drm_device *drm = dev_get_drvdata(dev);
-	struct hdlcd_drm_private *hdlcd = drm->dev_private;
+	struct hdlcd_drm_private *hdlcd = drm_to_hdlcd_priv(drm);
 
 	drm_dev_unregister(drm);
 	drm_kms_helper_poll_fini(drm);
@@ -341,10 +345,7 @@ static void hdlcd_drm_unbind(struct device *dev)
 	if (pm_runtime_enabled(dev))
 		pm_runtime_disable(dev);
 	of_reserved_mem_device_release(dev);
-	drm_mode_config_cleanup(drm);
-	drm->dev_private = NULL;
 	dev_set_drvdata(dev, NULL);
-	drm_dev_put(drm);
 }
 
 static const struct component_master_ops hdlcd_master_ops = {
diff --git a/drivers/gpu/drm/arm/hdlcd_drv.h b/drivers/gpu/drm/arm/hdlcd_drv.h
index 909c39c28487..f1c1da2ac2db 100644
--- a/drivers/gpu/drm/arm/hdlcd_drv.h
+++ b/drivers/gpu/drm/arm/hdlcd_drv.h
@@ -7,6 +7,7 @@
 #define __HDLCD_DRV_H__
 
 struct hdlcd_drm_private {
+	struct drm_device		base;
 	void __iomem			*mmio;
 	struct clk			*clk;
 	struct drm_crtc			crtc;
@@ -20,6 +21,7 @@ struct hdlcd_drm_private {
 #endif
 };
 
+#define drm_to_hdlcd_priv(x)	container_of(x, struct hdlcd_drm_private, base)
 #define crtc_to_hdlcd_priv(x)	container_of(x, struct hdlcd_drm_private, crtc)
 
 static inline void hdlcd_write(struct hdlcd_drm_private *hdlcd,
diff --git a/drivers/gpu/drm/arm/malidp_crtc.c b/drivers/gpu/drm/arm/malidp_crtc.c
index 962730772b2f..dc01c43f6193 100644
--- a/drivers/gpu/drm/arm/malidp_crtc.c
+++ b/drivers/gpu/drm/arm/malidp_crtc.c
@@ -514,7 +514,6 @@ static void malidp_crtc_disable_vblank(struct drm_crtc *crtc)
 }
 
 static const struct drm_crtc_funcs malidp_crtc_funcs = {
-	.destroy = drm_crtc_cleanup,
 	.set_config = drm_atomic_helper_set_config,
 	.page_flip = drm_atomic_helper_page_flip,
 	.reset = malidp_crtc_reset,
@@ -526,7 +525,7 @@ static const struct drm_crtc_funcs malidp_crtc_funcs = {
 
 int malidp_crtc_init(struct drm_device *drm)
 {
-	struct malidp_drm *malidp = drm->dev_private;
+	struct malidp_drm *malidp = drm_to_malidp(drm);
 	struct drm_plane *primary = NULL, *plane;
 	int ret;
 
@@ -548,8 +547,8 @@ int malidp_crtc_init(struct drm_device *drm)
 		return -EINVAL;
 	}
 
-	ret = drm_crtc_init_with_planes(drm, &malidp->crtc, primary, NULL,
-					&malidp_crtc_funcs, NULL);
+	ret = drmm_crtc_init_with_planes(drm, &malidp->crtc, primary, NULL,
+					 &malidp_crtc_funcs, NULL);
 	if (ret)
 		return ret;
 
diff --git a/drivers/gpu/drm/arm/malidp_drv.c b/drivers/gpu/drm/arm/malidp_drv.c
index 1d0b0c54ccc7..589c1c66a6dc 100644
--- a/drivers/gpu/drm/arm/malidp_drv.c
+++ b/drivers/gpu/drm/arm/malidp_drv.c
@@ -19,10 +19,11 @@
 #include <drm/drm_atomic_helper.h>
 #include <drm/drm_crtc.h>
 #include <drm/drm_drv.h>
-#include <drm/drm_fb_helper.h>
+#include <drm/drm_fbdev_generic.h>
 #include <drm/drm_fourcc.h>
 #include <drm/drm_gem_dma_helper.h>
 #include <drm/drm_gem_framebuffer_helper.h>
+#include <drm/drm_managed.h>
 #include <drm/drm_modeset_helper.h>
 #include <drm/drm_module.h>
 #include <drm/drm_of.h>
@@ -168,7 +169,7 @@ static void malidp_atomic_commit_se_config(struct drm_crtc *crtc,
  */
 static int malidp_set_and_wait_config_valid(struct drm_device *drm)
 {
-	struct malidp_drm *malidp = drm->dev_private;
+	struct malidp_drm *malidp = drm_to_malidp(drm);
 	struct malidp_hw_device *hwdev = malidp->dev;
 	int ret;
 
@@ -189,7 +190,7 @@ static int malidp_set_and_wait_config_valid(struct drm_device *drm)
 static void malidp_atomic_commit_hw_done(struct drm_atomic_state *state)
 {
 	struct drm_device *drm = state->dev;
-	struct malidp_drm *malidp = drm->dev_private;
+	struct malidp_drm *malidp = drm_to_malidp(drm);
 	int loop = 5;
 
 	malidp->event = malidp->crtc.state->event;
@@ -230,7 +231,7 @@ static void malidp_atomic_commit_hw_done(struct drm_atomic_state *state)
 static void malidp_atomic_commit_tail(struct drm_atomic_state *state)
 {
 	struct drm_device *drm = state->dev;
-	struct malidp_drm *malidp = drm->dev_private;
+	struct malidp_drm *malidp = drm_to_malidp(drm);
 	struct drm_crtc *crtc;
 	struct drm_crtc_state *old_crtc_state;
 	int i;
@@ -392,10 +393,12 @@ static const struct drm_mode_config_funcs malidp_mode_config_funcs = {
 static int malidp_init(struct drm_device *drm)
 {
 	int ret;
-	struct malidp_drm *malidp = drm->dev_private;
+	struct malidp_drm *malidp = drm_to_malidp(drm);
 	struct malidp_hw_device *hwdev = malidp->dev;
 
-	drm_mode_config_init(drm);
+	ret = drmm_mode_config_init(drm);
+	if (ret)
+		goto out;
 
 	drm->mode_config.min_width = hwdev->min_line_size;
 	drm->mode_config.min_height = hwdev->min_line_size;
@@ -406,29 +409,21 @@ static int malidp_init(struct drm_device *drm)
 
 	ret = malidp_crtc_init(drm);
 	if (ret)
-		goto crtc_fail;
+		goto out;
 
 	ret = malidp_mw_connector_init(drm);
 	if (ret)
-		goto crtc_fail;
-
-	return 0;
+		goto out;
 
-crtc_fail:
-	drm_mode_config_cleanup(drm);
+out:
 	return ret;
 }
 
-static void malidp_fini(struct drm_device *drm)
-{
-	drm_mode_config_cleanup(drm);
-}
-
 static int malidp_irq_init(struct platform_device *pdev)
 {
 	int irq_de, irq_se, ret = 0;
 	struct drm_device *drm = dev_get_drvdata(&pdev->dev);
-	struct malidp_drm *malidp = drm->dev_private;
+	struct malidp_drm *malidp = drm_to_malidp(drm);
 	struct malidp_hw_device *hwdev = malidp->dev;
 
 	/* fetch the interrupts from DT */
@@ -462,7 +457,7 @@ static int malidp_dumb_create(struct drm_file *file_priv,
 			      struct drm_device *drm,
 			      struct drm_mode_create_dumb *args)
 {
-	struct malidp_drm *malidp = drm->dev_private;
+	struct malidp_drm *malidp = drm_to_malidp(drm);
 	/* allocate for the worst case scenario, i.e. rotated buffers */
 	u8 alignment = malidp_hw_get_pitch_align(malidp->dev, 1);
 
@@ -508,7 +503,7 @@ static void malidp_error_stats_dump(const char *prefix,
 static int malidp_show_stats(struct seq_file *m, void *arg)
 {
 	struct drm_device *drm = m->private;
-	struct malidp_drm *malidp = drm->dev_private;
+	struct malidp_drm *malidp = drm_to_malidp(drm);
 	unsigned long irqflags;
 	struct malidp_error_stats de_errors, se_errors;
 
@@ -531,7 +526,7 @@ static ssize_t malidp_debugfs_write(struct file *file, const char __user *ubuf,
 {
 	struct seq_file *m = file->private_data;
 	struct drm_device *drm = m->private;
-	struct malidp_drm *malidp = drm->dev_private;
+	struct malidp_drm *malidp = drm_to_malidp(drm);
 	unsigned long irqflags;
 
 	spin_lock_irqsave(&malidp->errors_lock, irqflags);
@@ -552,7 +547,7 @@ static const struct file_operations malidp_debugfs_fops = {
 
 static void malidp_debugfs_init(struct drm_minor *minor)
 {
-	struct malidp_drm *malidp = minor->dev->dev_private;
+	struct malidp_drm *malidp = drm_to_malidp(minor->dev);
 
 	malidp_error_stats_init(&malidp->de_errors);
 	malidp_error_stats_init(&malidp->se_errors);
@@ -652,7 +647,7 @@ static ssize_t core_id_show(struct device *dev, struct device_attribute *attr,
 			    char *buf)
 {
 	struct drm_device *drm = dev_get_drvdata(dev);
-	struct malidp_drm *malidp = drm->dev_private;
+	struct malidp_drm *malidp = drm_to_malidp(drm);
 
 	return snprintf(buf, PAGE_SIZE, "%08x\n", malidp->core_id);
 }
@@ -670,7 +665,7 @@ ATTRIBUTE_GROUPS(mali_dp);
 static int malidp_runtime_pm_suspend(struct device *dev)
 {
 	struct drm_device *drm = dev_get_drvdata(dev);
-	struct malidp_drm *malidp = drm->dev_private;
+	struct malidp_drm *malidp = drm_to_malidp(drm);
 	struct malidp_hw_device *hwdev = malidp->dev;
 
 	/* we can only suspend if the hardware is in config mode */
@@ -689,7 +684,7 @@ static int malidp_runtime_pm_suspend(struct device *dev)
 static int malidp_runtime_pm_resume(struct device *dev)
 {
 	struct drm_device *drm = dev_get_drvdata(dev);
-	struct malidp_drm *malidp = drm->dev_private;
+	struct malidp_drm *malidp = drm_to_malidp(drm);
 	struct malidp_hw_device *hwdev = malidp->dev;
 
 	clk_prepare_enable(hwdev->pclk);
@@ -716,11 +711,13 @@ static int malidp_bind(struct device *dev)
 	int ret = 0, i;
 	u32 version, out_depth = 0;
 
-	malidp = devm_kzalloc(dev, sizeof(*malidp), GFP_KERNEL);
-	if (!malidp)
-		return -ENOMEM;
+	malidp = devm_drm_dev_alloc(dev, &malidp_driver, typeof(*malidp), base);
+	if (IS_ERR(malidp))
+		return PTR_ERR(malidp);
+
+	drm = &malidp->base;
 
-	hwdev = devm_kzalloc(dev, sizeof(*hwdev), GFP_KERNEL);
+	hwdev = drmm_kzalloc(drm, sizeof(*hwdev), GFP_KERNEL);
 	if (!hwdev)
 		return -ENOMEM;
 
@@ -753,13 +750,6 @@ static int malidp_bind(struct device *dev)
 	if (ret && ret != -ENODEV)
 		return ret;
 
-	drm = drm_dev_alloc(&malidp_driver, dev);
-	if (IS_ERR(drm)) {
-		ret = PTR_ERR(drm);
-		goto alloc_fail;
-	}
-
-	drm->dev_private = malidp;
 	dev_set_drvdata(dev, drm);
 
 	/* Enable power management */
@@ -878,17 +868,13 @@ irq_init_fail:
 bind_fail:
 	of_node_put(malidp->crtc.port);
 	malidp->crtc.port = NULL;
-	malidp_fini(drm);
 query_hw_fail:
 	pm_runtime_put(dev);
 	if (pm_runtime_enabled(dev))
 		pm_runtime_disable(dev);
 	else
 		malidp_runtime_pm_suspend(dev);
-	drm->dev_private = NULL;
 	dev_set_drvdata(dev, NULL);
-	drm_dev_put(drm);
-alloc_fail:
 	of_reserved_mem_device_release(dev);
 
 	return ret;
@@ -897,7 +883,7 @@ alloc_fail:
 static void malidp_unbind(struct device *dev)
 {
 	struct drm_device *drm = dev_get_drvdata(dev);
-	struct malidp_drm *malidp = drm->dev_private;
+	struct malidp_drm *malidp = drm_to_malidp(drm);
 	struct malidp_hw_device *hwdev = malidp->dev;
 
 	drm_dev_unregister(drm);
@@ -909,15 +895,12 @@ static void malidp_unbind(struct device *dev)
 	component_unbind_all(dev, drm);
 	of_node_put(malidp->crtc.port);
 	malidp->crtc.port = NULL;
-	malidp_fini(drm);
 	pm_runtime_put(dev);
 	if (pm_runtime_enabled(dev))
 		pm_runtime_disable(dev);
 	else
 		malidp_runtime_pm_suspend(dev);
-	drm->dev_private = NULL;
 	dev_set_drvdata(dev, NULL);
-	drm_dev_put(drm);
 	of_reserved_mem_device_release(dev);
 }
 
diff --git a/drivers/gpu/drm/arm/malidp_drv.h b/drivers/gpu/drm/arm/malidp_drv.h
index cdfddfabf2d1..bc0387876dea 100644
--- a/drivers/gpu/drm/arm/malidp_drv.h
+++ b/drivers/gpu/drm/arm/malidp_drv.h
@@ -29,6 +29,7 @@ struct malidp_error_stats {
 };
 
 struct malidp_drm {
+	struct drm_device base;
 	struct malidp_hw_device *dev;
 	struct drm_crtc crtc;
 	struct drm_writeback_connector mw_connector;
@@ -44,6 +45,7 @@ struct malidp_drm {
 #endif
 };
 
+#define drm_to_malidp(x) container_of(x, struct malidp_drm, base)
 #define crtc_to_malidp_device(x) container_of(x, struct malidp_drm, crtc)
 
 struct malidp_plane {
diff --git a/drivers/gpu/drm/arm/malidp_hw.c b/drivers/gpu/drm/arm/malidp_hw.c
index e9de542f9b7c..9b845d3f34e1 100644
--- a/drivers/gpu/drm/arm/malidp_hw.c
+++ b/drivers/gpu/drm/arm/malidp_hw.c
@@ -1168,7 +1168,7 @@ static void malidp_hw_clear_irq(struct malidp_hw_device *hwdev, u8 block, u32 ir
 static irqreturn_t malidp_de_irq(int irq, void *arg)
 {
 	struct drm_device *drm = arg;
-	struct malidp_drm *malidp = drm->dev_private;
+	struct malidp_drm *malidp = drm_to_malidp(drm);
 	struct malidp_hw_device *hwdev;
 	struct malidp_hw *hw;
 	const struct malidp_irq_map *de;
@@ -1226,7 +1226,7 @@ static irqreturn_t malidp_de_irq(int irq, void *arg)
 static irqreturn_t malidp_de_irq_thread_handler(int irq, void *arg)
 {
 	struct drm_device *drm = arg;
-	struct malidp_drm *malidp = drm->dev_private;
+	struct malidp_drm *malidp = drm_to_malidp(drm);
 
 	wake_up(&malidp->wq);
 
@@ -1252,7 +1252,7 @@ void malidp_de_irq_hw_init(struct malidp_hw_device *hwdev)
 
 int malidp_de_irq_init(struct drm_device *drm, int irq)
 {
-	struct malidp_drm *malidp = drm->dev_private;
+	struct malidp_drm *malidp = drm_to_malidp(drm);
 	struct malidp_hw_device *hwdev = malidp->dev;
 	int ret;
 
@@ -1286,7 +1286,7 @@ void malidp_de_irq_fini(struct malidp_hw_device *hwdev)
 static irqreturn_t malidp_se_irq(int irq, void *arg)
 {
 	struct drm_device *drm = arg;
-	struct malidp_drm *malidp = drm->dev_private;
+	struct malidp_drm *malidp = drm_to_malidp(drm);
 	struct malidp_hw_device *hwdev = malidp->dev;
 	struct malidp_hw *hw = hwdev->hw;
 	const struct malidp_irq_map *se = &hw->map.se_irq_map;
@@ -1363,7 +1363,7 @@ static irqreturn_t malidp_se_irq_thread_handler(int irq, void *arg)
 
 int malidp_se_irq_init(struct drm_device *drm, int irq)
 {
-	struct malidp_drm *malidp = drm->dev_private;
+	struct malidp_drm *malidp = drm_to_malidp(drm);
 	struct malidp_hw_device *hwdev = malidp->dev;
 	int ret;
 
diff --git a/drivers/gpu/drm/arm/malidp_mw.c b/drivers/gpu/drm/arm/malidp_mw.c
index ef76d0e6ee2f..626709bec6f5 100644
--- a/drivers/gpu/drm/arm/malidp_mw.c
+++ b/drivers/gpu/drm/arm/malidp_mw.c
@@ -129,7 +129,7 @@ malidp_mw_encoder_atomic_check(struct drm_encoder *encoder,
 			       struct drm_connector_state *conn_state)
 {
 	struct malidp_mw_connector_state *mw_state = to_mw_state(conn_state);
-	struct malidp_drm *malidp = encoder->dev->dev_private;
+	struct malidp_drm *malidp = drm_to_malidp(encoder->dev);
 	struct drm_framebuffer *fb;
 	int i, n_planes;
 
@@ -207,7 +207,7 @@ static u32 *get_writeback_formats(struct malidp_drm *malidp, int *n_formats)
 
 int malidp_mw_connector_init(struct drm_device *drm)
 {
-	struct malidp_drm *malidp = drm->dev_private;
+	struct malidp_drm *malidp = drm_to_malidp(drm);
 	u32 *formats;
 	int ret, n_formats;
 
@@ -236,7 +236,7 @@ int malidp_mw_connector_init(struct drm_device *drm)
 void malidp_mw_atomic_commit(struct drm_device *drm,
 			     struct drm_atomic_state *old_state)
 {
-	struct malidp_drm *malidp = drm->dev_private;
+	struct malidp_drm *malidp = drm_to_malidp(drm);
 	struct drm_writeback_connector *mw_conn = &malidp->mw_connector;
 	struct drm_connector_state *conn_state = mw_conn->base.state;
 	struct malidp_hw_device *hwdev = malidp->dev;
diff --git a/drivers/gpu/drm/arm/malidp_planes.c b/drivers/gpu/drm/arm/malidp_planes.c
index 45f5e35e7f24..34547edf1ee3 100644
--- a/drivers/gpu/drm/arm/malidp_planes.c
+++ b/drivers/gpu/drm/arm/malidp_planes.c
@@ -68,14 +68,6 @@
 /* readahead for partial-frame prefetch */
 #define MALIDP_MMU_PREFETCH_READAHEAD		8
 
-static void malidp_de_plane_destroy(struct drm_plane *plane)
-{
-	struct malidp_plane *mp = to_malidp_plane(plane);
-
-	drm_plane_cleanup(plane);
-	kfree(mp);
-}
-
 /*
  * Replicate what the default ->reset hook does: free the state pointer and
  * allocate a new empty object. We just need enough space to store
@@ -151,7 +143,7 @@ bool malidp_format_mod_supported(struct drm_device *drm,
 {
 	const struct drm_format_info *info;
 	const u64 *modifiers;
-	struct malidp_drm *malidp = drm->dev_private;
+	struct malidp_drm *malidp = drm_to_malidp(drm);
 	const struct malidp_hw_regmap *map = &malidp->dev->hw->map;
 
 	if (WARN_ON(modifier == DRM_FORMAT_MOD_INVALID))
@@ -260,7 +252,6 @@ static bool malidp_format_mod_supported_per_plane(struct drm_plane *plane,
 static const struct drm_plane_funcs malidp_de_plane_funcs = {
 	.update_plane = drm_atomic_helper_update_plane,
 	.disable_plane = drm_atomic_helper_disable_plane,
-	.destroy = malidp_de_plane_destroy,
 	.reset = malidp_plane_reset,
 	.atomic_duplicate_state = malidp_duplicate_plane_state,
 	.atomic_destroy_state = malidp_destroy_plane_state,
@@ -931,7 +922,7 @@ static const uint64_t linear_only_modifiers[] = {
 
 int malidp_de_planes_init(struct drm_device *drm)
 {
-	struct malidp_drm *malidp = drm->dev_private;
+	struct malidp_drm *malidp = drm_to_malidp(drm);
 	const struct malidp_hw_regmap *map = &malidp->dev->hw->map;
 	struct malidp_plane *plane = NULL;
 	enum drm_plane_type plane_type;
@@ -972,12 +963,6 @@ int malidp_de_planes_init(struct drm_device *drm)
 	for (i = 0; i < map->n_layers; i++) {
 		u8 id = map->layers[i].id;
 
-		plane = kzalloc(sizeof(*plane), GFP_KERNEL);
-		if (!plane) {
-			ret = -ENOMEM;
-			goto cleanup;
-		}
-
 		/* build the list of DRM supported formats based on the map */
 		for (n = 0, j = 0;  j < map->n_pixel_formats; j++) {
 			if ((map->pixel_formats[j].layer & id) == id)
@@ -990,13 +975,14 @@ int malidp_de_planes_init(struct drm_device *drm)
 		/*
 		 * All the layers except smart layer supports AFBC modifiers.
 		 */
-		ret = drm_universal_plane_init(drm, &plane->base, crtcs,
-				&malidp_de_plane_funcs, formats, n,
-				(id == DE_SMART) ? linear_only_modifiers : modifiers,
-				plane_type, NULL);
-
-		if (ret < 0)
+		plane = drmm_universal_plane_alloc(drm, struct malidp_plane, base,
+						   crtcs, &malidp_de_plane_funcs, formats, n,
+						   (id == DE_SMART) ? linear_only_modifiers :
+						   modifiers, plane_type, NULL);
+		if (IS_ERR(plane)) {
+			ret = PTR_ERR(plane);
 			goto cleanup;
+		}
 
 		drm_plane_helper_add(&plane->base,
 				     &malidp_de_plane_helper_funcs);
diff --git a/drivers/gpu/drm/armada/armada_fbdev.c b/drivers/gpu/drm/armada/armada_fbdev.c
index 38f5170c0fea..584cee123bd8 100644
--- a/drivers/gpu/drm/armada/armada_fbdev.c
+++ b/drivers/gpu/drm/armada/armada_fbdev.c
@@ -19,6 +19,8 @@
 static const struct fb_ops armada_fb_ops = {
 	.owner		= THIS_MODULE,
 	DRM_FB_HELPER_DEFAULT_OPS,
+	.fb_read	= drm_fb_helper_cfb_read,
+	.fb_write	= drm_fb_helper_cfb_write,
 	.fb_fillrect	= drm_fb_helper_cfb_fillrect,
 	.fb_copyarea	= drm_fb_helper_cfb_copyarea,
 	.fb_imageblit	= drm_fb_helper_cfb_imageblit,
@@ -72,7 +74,7 @@ static int armada_fbdev_create(struct drm_fb_helper *fbh,
 	if (IS_ERR(dfb))
 		return PTR_ERR(dfb);
 
-	info = drm_fb_helper_alloc_fbi(fbh);
+	info = drm_fb_helper_alloc_info(fbh);
 	if (IS_ERR(info)) {
 		ret = PTR_ERR(info);
 		goto err_fballoc;
@@ -155,7 +157,7 @@ void armada_fbdev_fini(struct drm_device *dev)
 	struct drm_fb_helper *fbh = priv->fbdev;
 
 	if (fbh) {
-		drm_fb_helper_unregister_fbi(fbh);
+		drm_fb_helper_unregister_info(fbh);
 
 		drm_fb_helper_fini(fbh);
 
diff --git a/drivers/gpu/drm/aspeed/aspeed_gfx_drv.c b/drivers/gpu/drm/aspeed/aspeed_gfx_drv.c
index a94f1a9e8f40..718119e168a6 100644
--- a/drivers/gpu/drm/aspeed/aspeed_gfx_drv.c
+++ b/drivers/gpu/drm/aspeed/aspeed_gfx_drv.c
@@ -16,7 +16,7 @@
 #include <drm/drm_atomic_helper.h>
 #include <drm/drm_crtc_helper.h>
 #include <drm/drm_device.h>
-#include <drm/drm_fb_helper.h>
+#include <drm/drm_fbdev_generic.h>
 #include <drm/drm_gem_dma_helper.h>
 #include <drm/drm_gem_framebuffer_helper.h>
 #include <drm/drm_module.h>
diff --git a/drivers/gpu/drm/ast/ast_drv.c b/drivers/gpu/drm/ast/ast_drv.c
index bbeb5defc8f5..420fc75c240e 100644
--- a/drivers/gpu/drm/ast/ast_drv.c
+++ b/drivers/gpu/drm/ast/ast_drv.c
@@ -33,6 +33,7 @@
 #include <drm/drm_atomic_helper.h>
 #include <drm/drm_crtc_helper.h>
 #include <drm/drm_drv.h>
+#include <drm/drm_fbdev_generic.h>
 #include <drm/drm_gem_shmem_helper.h>
 #include <drm/drm_module.h>
 #include <drm/drm_probe_helper.h>
diff --git a/drivers/gpu/drm/ast/ast_drv.h b/drivers/gpu/drm/ast/ast_drv.h
index 74f41282444f..d51b81fea9c8 100644
--- a/drivers/gpu/drm/ast/ast_drv.h
+++ b/drivers/gpu/drm/ast/ast_drv.h
@@ -38,7 +38,6 @@
 #include <drm/drm_encoder.h>
 #include <drm/drm_mode.h>
 #include <drm/drm_framebuffer.h>
-#include <drm/drm_fb_helper.h>
 
 #define DRIVER_AUTHOR		"Dave Airlie"
 
diff --git a/drivers/gpu/drm/atmel-hlcdc/atmel_hlcdc_dc.c b/drivers/gpu/drm/atmel-hlcdc/atmel_hlcdc_dc.c
index f7e7f4e919c7..a2bb5b916235 100644
--- a/drivers/gpu/drm/atmel-hlcdc/atmel_hlcdc_dc.c
+++ b/drivers/gpu/drm/atmel-hlcdc/atmel_hlcdc_dc.c
@@ -19,7 +19,7 @@
 #include <drm/drm_atomic.h>
 #include <drm/drm_atomic_helper.h>
 #include <drm/drm_drv.h>
-#include <drm/drm_fb_helper.h>
+#include <drm/drm_fbdev_generic.h>
 #include <drm/drm_gem_dma_helper.h>
 #include <drm/drm_gem_framebuffer_helper.h>
 #include <drm/drm_module.h>
diff --git a/drivers/gpu/drm/bridge/parade-ps8640.c b/drivers/gpu/drm/bridge/parade-ps8640.c
index 5be6562c2a19..6a614e54b383 100644
--- a/drivers/gpu/drm/bridge/parade-ps8640.c
+++ b/drivers/gpu/drm/bridge/parade-ps8640.c
@@ -105,6 +105,7 @@ struct ps8640 {
 	struct gpio_desc *gpio_powerdown;
 	struct device_link *link;
 	bool pre_enabled;
+	bool need_post_hpd_delay;
 };
 
 static const struct regmap_config ps8640_regmap_config[] = {
@@ -173,14 +174,31 @@ static int _ps8640_wait_hpd_asserted(struct ps8640 *ps_bridge, unsigned long wai
 {
 	struct regmap *map = ps_bridge->regmap[PAGE2_TOP_CNTL];
 	int status;
+	int ret;
 
 	/*
 	 * Apparently something about the firmware in the chip signals that
 	 * HPD goes high by reporting GPIO9 as high (even though HPD isn't
 	 * actually connected to GPIO9).
 	 */
-	return regmap_read_poll_timeout(map, PAGE2_GPIO_H, status,
-					status & PS_GPIO9, wait_us / 10, wait_us);
+	ret = regmap_read_poll_timeout(map, PAGE2_GPIO_H, status,
+				       status & PS_GPIO9, wait_us / 10, wait_us);
+
+	/*
+	 * The first time we see HPD go high after a reset we delay an extra
+	 * 50 ms. The best guess is that the MCU is doing "stuff" during this
+	 * time (maybe talking to the panel) and we don't want to interrupt it.
+	 *
+	 * No locking is done around "need_post_hpd_delay". If we're here we
+	 * know we're holding a PM Runtime reference and the only other place
+	 * that touches this is PM Runtime resume.
+	 */
+	if (!ret && ps_bridge->need_post_hpd_delay) {
+		ps_bridge->need_post_hpd_delay = false;
+		msleep(50);
+	}
+
+	return ret;
 }
 
 static int ps8640_wait_hpd_asserted(struct drm_dp_aux *aux, unsigned long wait_us)
@@ -388,6 +406,9 @@ static int __maybe_unused ps8640_resume(struct device *dev)
 	msleep(50);
 	gpiod_set_value(ps_bridge->gpio_reset, 0);
 
+	/* We just reset things, so we need a delay after the first HPD */
+	ps_bridge->need_post_hpd_delay = true;
+
 	/*
 	 * Mystery 200 ms delay for the "MCU to be ready". It's unclear if
 	 * this is truly necessary since the MCU will already signal that
diff --git a/drivers/gpu/drm/bridge/synopsys/dw-hdmi.c b/drivers/gpu/drm/bridge/synopsys/dw-hdmi.c
index 40d8ca37f5bc..aa51c61a78c7 100644
--- a/drivers/gpu/drm/bridge/synopsys/dw-hdmi.c
+++ b/drivers/gpu/drm/bridge/synopsys/dw-hdmi.c
@@ -2720,6 +2720,9 @@ static u32 *dw_hdmi_bridge_atomic_get_output_bus_fmts(struct drm_bridge *bridge,
 	 * if supported. In any case the default RGB888 format is added
 	 */
 
+	/* Default 8bit RGB fallback */
+	output_fmts[i++] = MEDIA_BUS_FMT_RGB888_1X24;
+
 	if (max_bpc >= 16 && info->bpc == 16) {
 		if (info->color_formats & DRM_COLOR_FORMAT_YCBCR444)
 			output_fmts[i++] = MEDIA_BUS_FMT_YUV16_1X48;
@@ -2753,9 +2756,6 @@ static u32 *dw_hdmi_bridge_atomic_get_output_bus_fmts(struct drm_bridge *bridge,
 	if (info->color_formats & DRM_COLOR_FORMAT_YCBCR444)
 		output_fmts[i++] = MEDIA_BUS_FMT_YUV8_1X24;
 
-	/* Default 8bit RGB fallback */
-	output_fmts[i++] = MEDIA_BUS_FMT_RGB888_1X24;
-
 	*num_output_fmts = i;
 
 	return output_fmts;
diff --git a/drivers/gpu/drm/bridge/tc358762.c b/drivers/gpu/drm/bridge/tc358762.c
index 7f4fce1aa998..0b6a28436885 100644
--- a/drivers/gpu/drm/bridge/tc358762.c
+++ b/drivers/gpu/drm/bridge/tc358762.c
@@ -11,6 +11,7 @@
  */
 
 #include <linux/delay.h>
+#include <linux/mod_devicetable.h>
 #include <linux/module.h>
 #include <linux/of_graph.h>
 #include <linux/regulator/consumer.h>
@@ -19,7 +20,6 @@
 
 #include <drm/drm_atomic_helper.h>
 #include <drm/drm_crtc.h>
-#include <drm/drm_fb_helper.h>
 #include <drm/drm_mipi_dsi.h>
 #include <drm/drm_of.h>
 #include <drm/drm_panel.h>
diff --git a/drivers/gpu/drm/bridge/ti-sn65dsi86.c b/drivers/gpu/drm/bridge/ti-sn65dsi86.c
index 3c3561942eb6..05f8756d1aaf 100644
--- a/drivers/gpu/drm/bridge/ti-sn65dsi86.c
+++ b/drivers/gpu/drm/bridge/ti-sn65dsi86.c
@@ -931,9 +931,9 @@ static void ti_sn_bridge_set_video_timings(struct ti_sn65dsi86 *pdata)
 		&pdata->bridge.encoder->crtc->state->adjusted_mode;
 	u8 hsync_polarity = 0, vsync_polarity = 0;
 
-	if (mode->flags & DRM_MODE_FLAG_PHSYNC)
+	if (mode->flags & DRM_MODE_FLAG_NHSYNC)
 		hsync_polarity = CHA_HSYNC_POLARITY;
-	if (mode->flags & DRM_MODE_FLAG_PVSYNC)
+	if (mode->flags & DRM_MODE_FLAG_NVSYNC)
 		vsync_polarity = CHA_VSYNC_POLARITY;
 
 	ti_sn65dsi86_write_u16(pdata, SN_CHA_ACTIVE_LINE_LENGTH_LOW_REG,
@@ -1500,8 +1500,8 @@ out:
 	return ret;
 }
 
-static void ti_sn_pwm_get_state(struct pwm_chip *chip, struct pwm_device *pwm,
-				struct pwm_state *state)
+static int ti_sn_pwm_get_state(struct pwm_chip *chip, struct pwm_device *pwm,
+			       struct pwm_state *state)
 {
 	struct ti_sn65dsi86 *pdata = pwm_chip_to_ti_sn_bridge(chip);
 	unsigned int pwm_en_inv;
@@ -1512,19 +1512,19 @@ static void ti_sn_pwm_get_state(struct pwm_chip *chip, struct pwm_device *pwm,
 
 	ret = regmap_read(pdata->regmap, SN_PWM_EN_INV_REG, &pwm_en_inv);
 	if (ret)
-		return;
+		return ret;
 
 	ret = ti_sn65dsi86_read_u16(pdata, SN_BACKLIGHT_SCALE_REG, &scale);
 	if (ret)
-		return;
+		return ret;
 
 	ret = ti_sn65dsi86_read_u16(pdata, SN_BACKLIGHT_REG, &backlight);
 	if (ret)
-		return;
+		return ret;
 
 	ret = regmap_read(pdata->regmap, SN_PWM_PRE_DIV_REG, &pre_div);
 	if (ret)
-		return;
+		return ret;
 
 	state->enabled = FIELD_GET(SN_PWM_EN_MASK, pwm_en_inv);
 	if (FIELD_GET(SN_PWM_INV_MASK, pwm_en_inv))
@@ -1539,6 +1539,8 @@ static void ti_sn_pwm_get_state(struct pwm_chip *chip, struct pwm_device *pwm,
 
 	if (state->duty_cycle > state->period)
 		state->duty_cycle = state->period;
+
+	return 0;
 }
 
 static const struct pwm_ops ti_sn_pwm_ops = {
diff --git a/drivers/gpu/drm/display/drm_dp_dual_mode_helper.c b/drivers/gpu/drm/display/drm_dp_dual_mode_helper.c
index 3ea53bb67d3b..bd61e20770a5 100644
--- a/drivers/gpu/drm/display/drm_dp_dual_mode_helper.c
+++ b/drivers/gpu/drm/display/drm_dp_dual_mode_helper.c
@@ -63,23 +63,45 @@
 ssize_t drm_dp_dual_mode_read(struct i2c_adapter *adapter,
 			      u8 offset, void *buffer, size_t size)
 {
+	u8 zero = 0;
+	char *tmpbuf = NULL;
+	/*
+	 * As sub-addressing is not supported by all adaptors,
+	 * always explicitly read from the start and discard
+	 * any bytes that come before the requested offset.
+	 * This way, no matter whether the adaptor supports it
+	 * or not, we'll end up reading the proper data.
+	 */
 	struct i2c_msg msgs[] = {
 		{
 			.addr = DP_DUAL_MODE_SLAVE_ADDRESS,
 			.flags = 0,
 			.len = 1,
-			.buf = &offset,
+			.buf = &zero,
 		},
 		{
 			.addr = DP_DUAL_MODE_SLAVE_ADDRESS,
 			.flags = I2C_M_RD,
-			.len = size,
+			.len = size + offset,
 			.buf = buffer,
 		},
 	};
 	int ret;
 
+	if (offset) {
+		tmpbuf = kmalloc(size + offset, GFP_KERNEL);
+		if (!tmpbuf)
+			return -ENOMEM;
+
+		msgs[1].buf = tmpbuf;
+	}
+
 	ret = i2c_transfer(adapter, msgs, ARRAY_SIZE(msgs));
+	if (tmpbuf)
+		memcpy(buffer, tmpbuf + offset, size);
+
+	kfree(tmpbuf);
+
 	if (ret < 0)
 		return ret;
 	if (ret != ARRAY_SIZE(msgs))
@@ -208,18 +230,6 @@ enum drm_dp_dual_mode_type drm_dp_dual_mode_detect(const struct drm_device *dev,
 	if (ret)
 		return DRM_DP_DUAL_MODE_UNKNOWN;
 
-	/*
-	 * Sigh. Some (maybe all?) type 1 adaptors are broken and ack
-	 * the offset but ignore it, and instead they just always return
-	 * data from the start of the HDMI ID buffer. So for a broken
-	 * type 1 HDMI adaptor a single byte read will always give us
-	 * 0x44, and for a type 1 DVI adaptor it should give 0x00
-	 * (assuming it implements any registers). Fortunately neither
-	 * of those values will match the type 2 signature of the
-	 * DP_DUAL_MODE_ADAPTOR_ID register so we can proceed with
-	 * the type 2 adaptor detection safely even in the presence
-	 * of broken type 1 adaptors.
-	 */
 	ret = drm_dp_dual_mode_read(adapter, DP_DUAL_MODE_ADAPTOR_ID,
 				    &adaptor_id, sizeof(adaptor_id));
 	drm_dbg_kms(dev, "DP dual mode adaptor ID: %02x (err %zd)\n", adaptor_id, ret);
@@ -233,11 +243,10 @@ enum drm_dp_dual_mode_type drm_dp_dual_mode_detect(const struct drm_device *dev,
 				return DRM_DP_DUAL_MODE_TYPE2_DVI;
 		}
 		/*
-		 * If neither a proper type 1 ID nor a broken type 1 adaptor
-		 * as described above, assume type 1, but let the user know
-		 * that we may have misdetected the type.
+		 * If not a proper type 1 ID, still assume type 1, but let
+		 * the user know that we may have misdetected the type.
 		 */
-		if (!is_type1_adaptor(adaptor_id) && adaptor_id != hdmi_id[0])
+		if (!is_type1_adaptor(adaptor_id))
 			drm_err(dev, "Unexpected DP dual mode adaptor ID %02x\n", adaptor_id);
 
 	}
@@ -343,10 +352,8 @@ EXPORT_SYMBOL(drm_dp_dual_mode_get_tmds_output);
  * @enable: enable (as opposed to disable) the TMDS output buffers
  *
  * Set the state of the TMDS output buffers in the adaptor. For
- * type2 this is set via the DP_DUAL_MODE_TMDS_OEN register. As
- * some type 1 adaptors have problems with registers (see comments
- * in drm_dp_dual_mode_detect()) we avoid touching the register,
- * making this function a no-op on type 1 adaptors.
+ * type2 this is set via the DP_DUAL_MODE_TMDS_OEN register.
+ * Type1 adaptors do not support any register writes.
  *
  * Returns:
  * 0 on success, negative error code on failure
diff --git a/drivers/gpu/drm/display/drm_dp_mst_topology.c b/drivers/gpu/drm/display/drm_dp_mst_topology.c
index ecd22c038c8c..51a46689cda7 100644
--- a/drivers/gpu/drm/display/drm_dp_mst_topology.c
+++ b/drivers/gpu/drm/display/drm_dp_mst_topology.c
@@ -5186,7 +5186,7 @@ int drm_dp_mst_add_affected_dsc_crtcs(struct drm_atomic_state *state, struct drm
 	mst_state = drm_atomic_get_mst_topology_state(state, mgr);
 
 	if (IS_ERR(mst_state))
-		return -EINVAL;
+		return PTR_ERR(mst_state);
 
 	list_for_each_entry(pos, &mst_state->payloads, next) {
 
diff --git a/drivers/gpu/drm/drm_atomic_helper.c b/drivers/gpu/drm/drm_atomic_helper.c
index 1a586b3c454b..d579fd8f7cb8 100644
--- a/drivers/gpu/drm/drm_atomic_helper.c
+++ b/drivers/gpu/drm/drm_atomic_helper.c
@@ -2536,7 +2536,7 @@ int drm_atomic_helper_prepare_planes(struct drm_device *dev,
 		if (funcs->prepare_fb) {
 			ret = funcs->prepare_fb(plane, new_plane_state);
 			if (ret)
-				goto fail;
+				goto fail_prepare_fb;
 		} else {
 			WARN_ON_ONCE(funcs->cleanup_fb);
 
@@ -2545,13 +2545,34 @@ int drm_atomic_helper_prepare_planes(struct drm_device *dev,
 
 			ret = drm_gem_plane_helper_prepare_fb(plane, new_plane_state);
 			if (ret)
-				goto fail;
+				goto fail_prepare_fb;
+		}
+	}
+
+	for_each_new_plane_in_state(state, plane, new_plane_state, i) {
+		const struct drm_plane_helper_funcs *funcs = plane->helper_private;
+
+		if (funcs->begin_fb_access) {
+			ret = funcs->begin_fb_access(plane, new_plane_state);
+			if (ret)
+				goto fail_begin_fb_access;
 		}
 	}
 
 	return 0;
 
-fail:
+fail_begin_fb_access:
+	for_each_new_plane_in_state(state, plane, new_plane_state, j) {
+		const struct drm_plane_helper_funcs *funcs = plane->helper_private;
+
+		if (j >= i)
+			continue;
+
+		if (funcs->end_fb_access)
+			funcs->end_fb_access(plane, new_plane_state);
+	}
+	i = j; /* set i to upper limit to cleanup all planes */
+fail_prepare_fb:
 	for_each_new_plane_in_state(state, plane, new_plane_state, j) {
 		const struct drm_plane_helper_funcs *funcs;
 
@@ -2828,6 +2849,13 @@ void drm_atomic_helper_cleanup_planes(struct drm_device *dev,
 	int i;
 
 	for_each_oldnew_plane_in_state(old_state, plane, old_plane_state, new_plane_state, i) {
+		const struct drm_plane_helper_funcs *funcs = plane->helper_private;
+
+		if (funcs->end_fb_access)
+			funcs->end_fb_access(plane, new_plane_state);
+	}
+
+	for_each_oldnew_plane_in_state(old_state, plane, old_plane_state, new_plane_state, i) {
 		const struct drm_plane_helper_funcs *funcs;
 		struct drm_plane_state *plane_state;
 
diff --git a/drivers/gpu/drm/drm_client_modeset.c b/drivers/gpu/drm/drm_client_modeset.c
index bbc535cc50dd..d553e793e673 100644
--- a/drivers/gpu/drm/drm_client_modeset.c
+++ b/drivers/gpu/drm/drm_client_modeset.c
@@ -1237,3 +1237,7 @@ int drm_client_modeset_dpms(struct drm_client_dev *client, int mode)
 	return ret;
 }
 EXPORT_SYMBOL(drm_client_modeset_dpms);
+
+#ifdef CONFIG_DRM_KUNIT_TEST
+#include "tests/drm_client_modeset_test.c"
+#endif
diff --git a/drivers/gpu/drm/drm_crtc_helper.c b/drivers/gpu/drm/drm_crtc_helper.c
index f5fb22e0d033..a209659a996c 100644
--- a/drivers/gpu/drm/drm_crtc_helper.c
+++ b/drivers/gpu/drm/drm_crtc_helper.c
@@ -43,7 +43,6 @@
 #include <drm/drm_drv.h>
 #include <drm/drm_edid.h>
 #include <drm/drm_encoder.h>
-#include <drm/drm_fb_helper.h>
 #include <drm/drm_fourcc.h>
 #include <drm/drm_framebuffer.h>
 #include <drm/drm_print.h>
diff --git a/drivers/gpu/drm/drm_drv.c b/drivers/gpu/drm/drm_drv.c
index 8214a0b1ab7f..73b845a75d52 100644
--- a/drivers/gpu/drm/drm_drv.c
+++ b/drivers/gpu/drm/drm_drv.c
@@ -35,6 +35,7 @@
 #include <linux/slab.h>
 #include <linux/srcu.h>
 
+#include <drm/drm_accel.h>
 #include <drm/drm_cache.h>
 #include <drm/drm_client.h>
 #include <drm/drm_color_mgmt.h>
@@ -90,6 +91,8 @@ static struct drm_minor **drm_minor_get_slot(struct drm_device *dev,
 		return &dev->primary;
 	case DRM_MINOR_RENDER:
 		return &dev->render;
+	case DRM_MINOR_ACCEL:
+		return &dev->accel;
 	default:
 		BUG();
 	}
@@ -104,9 +107,13 @@ static void drm_minor_alloc_release(struct drm_device *dev, void *data)
 
 	put_device(minor->kdev);
 
-	spin_lock_irqsave(&drm_minor_lock, flags);
-	idr_remove(&drm_minors_idr, minor->index);
-	spin_unlock_irqrestore(&drm_minor_lock, flags);
+	if (minor->type == DRM_MINOR_ACCEL) {
+		accel_minor_remove(minor->index);
+	} else {
+		spin_lock_irqsave(&drm_minor_lock, flags);
+		idr_remove(&drm_minors_idr, minor->index);
+		spin_unlock_irqrestore(&drm_minor_lock, flags);
+	}
 }
 
 static int drm_minor_alloc(struct drm_device *dev, unsigned int type)
@@ -123,13 +130,17 @@ static int drm_minor_alloc(struct drm_device *dev, unsigned int type)
 	minor->dev = dev;
 
 	idr_preload(GFP_KERNEL);
-	spin_lock_irqsave(&drm_minor_lock, flags);
-	r = idr_alloc(&drm_minors_idr,
-		      NULL,
-		      64 * type,
-		      64 * (type + 1),
-		      GFP_NOWAIT);
-	spin_unlock_irqrestore(&drm_minor_lock, flags);
+	if (type == DRM_MINOR_ACCEL) {
+		r = accel_minor_alloc();
+	} else {
+		spin_lock_irqsave(&drm_minor_lock, flags);
+		r = idr_alloc(&drm_minors_idr,
+			NULL,
+			64 * type,
+			64 * (type + 1),
+			GFP_NOWAIT);
+		spin_unlock_irqrestore(&drm_minor_lock, flags);
+	}
 	idr_preload_end();
 
 	if (r < 0)
@@ -161,10 +172,14 @@ static int drm_minor_register(struct drm_device *dev, unsigned int type)
 	if (!minor)
 		return 0;
 
-	ret = drm_debugfs_init(minor, minor->index, drm_debugfs_root);
-	if (ret) {
-		DRM_ERROR("DRM: Failed to initialize /sys/kernel/debug/dri.\n");
-		goto err_debugfs;
+	if (minor->type == DRM_MINOR_ACCEL) {
+		accel_debugfs_init(minor, minor->index);
+	} else {
+		ret = drm_debugfs_init(minor, minor->index, drm_debugfs_root);
+		if (ret) {
+			DRM_ERROR("DRM: Failed to initialize /sys/kernel/debug/dri.\n");
+			goto err_debugfs;
+		}
 	}
 
 	ret = device_add(minor->kdev);
@@ -172,9 +187,13 @@ static int drm_minor_register(struct drm_device *dev, unsigned int type)
 		goto err_debugfs;
 
 	/* replace NULL with @minor so lookups will succeed from now on */
-	spin_lock_irqsave(&drm_minor_lock, flags);
-	idr_replace(&drm_minors_idr, minor, minor->index);
-	spin_unlock_irqrestore(&drm_minor_lock, flags);
+	if (minor->type == DRM_MINOR_ACCEL) {
+		accel_minor_replace(minor, minor->index);
+	} else {
+		spin_lock_irqsave(&drm_minor_lock, flags);
+		idr_replace(&drm_minors_idr, minor, minor->index);
+		spin_unlock_irqrestore(&drm_minor_lock, flags);
+	}
 
 	DRM_DEBUG("new minor registered %d\n", minor->index);
 	return 0;
@@ -194,9 +213,13 @@ static void drm_minor_unregister(struct drm_device *dev, unsigned int type)
 		return;
 
 	/* replace @minor with NULL so lookups will fail from now on */
-	spin_lock_irqsave(&drm_minor_lock, flags);
-	idr_replace(&drm_minors_idr, NULL, minor->index);
-	spin_unlock_irqrestore(&drm_minor_lock, flags);
+	if (minor->type == DRM_MINOR_ACCEL) {
+		accel_minor_replace(NULL, minor->index);
+	} else {
+		spin_lock_irqsave(&drm_minor_lock, flags);
+		idr_replace(&drm_minors_idr, NULL, minor->index);
+		spin_unlock_irqrestore(&drm_minor_lock, flags);
+	}
 
 	device_del(minor->kdev);
 	dev_set_drvdata(minor->kdev, NULL); /* safety belt */
@@ -603,6 +626,13 @@ static int drm_dev_init(struct drm_device *dev,
 	/* no per-device feature limits by default */
 	dev->driver_features = ~0u;
 
+	if (drm_core_check_feature(dev, DRIVER_COMPUTE_ACCEL) &&
+				(drm_core_check_feature(dev, DRIVER_RENDER) ||
+				drm_core_check_feature(dev, DRIVER_MODESET))) {
+		DRM_ERROR("DRM driver can't be both a compute acceleration and graphics driver\n");
+		return -EINVAL;
+	}
+
 	drm_legacy_init_members(dev);
 	INIT_LIST_HEAD(&dev->filelist);
 	INIT_LIST_HEAD(&dev->filelist_internal);
@@ -615,7 +645,7 @@ static int drm_dev_init(struct drm_device *dev,
 	mutex_init(&dev->clientlist_mutex);
 	mutex_init(&dev->master_mutex);
 
-	ret = drmm_add_action(dev, drm_dev_init_release, NULL);
+	ret = drmm_add_action_or_reset(dev, drm_dev_init_release, NULL);
 	if (ret)
 		return ret;
 
@@ -628,15 +658,21 @@ static int drm_dev_init(struct drm_device *dev,
 
 	dev->anon_inode = inode;
 
-	if (drm_core_check_feature(dev, DRIVER_RENDER)) {
-		ret = drm_minor_alloc(dev, DRM_MINOR_RENDER);
+	if (drm_core_check_feature(dev, DRIVER_COMPUTE_ACCEL)) {
+		ret = drm_minor_alloc(dev, DRM_MINOR_ACCEL);
 		if (ret)
 			goto err;
-	}
+	} else {
+		if (drm_core_check_feature(dev, DRIVER_RENDER)) {
+			ret = drm_minor_alloc(dev, DRM_MINOR_RENDER);
+			if (ret)
+				goto err;
+		}
 
-	ret = drm_minor_alloc(dev, DRM_MINOR_PRIMARY);
-	if (ret)
-		goto err;
+		ret = drm_minor_alloc(dev, DRM_MINOR_PRIMARY);
+		if (ret)
+			goto err;
+	}
 
 	ret = drm_legacy_create_map_hash(dev);
 	if (ret)
@@ -883,6 +919,10 @@ int drm_dev_register(struct drm_device *dev, unsigned long flags)
 	if (ret)
 		goto err_minors;
 
+	ret = drm_minor_register(dev, DRM_MINOR_ACCEL);
+	if (ret)
+		goto err_minors;
+
 	ret = create_compat_control_link(dev);
 	if (ret)
 		goto err_minors;
@@ -902,12 +942,13 @@ int drm_dev_register(struct drm_device *dev, unsigned long flags)
 		 driver->name, driver->major, driver->minor,
 		 driver->patchlevel, driver->date,
 		 dev->dev ? dev_name(dev->dev) : "virtual device",
-		 dev->primary->index);
+		 dev->primary ? dev->primary->index : dev->accel->index);
 
 	goto out_unlock;
 
 err_minors:
 	remove_compat_control_link(dev);
+	drm_minor_unregister(dev, DRM_MINOR_ACCEL);
 	drm_minor_unregister(dev, DRM_MINOR_PRIMARY);
 	drm_minor_unregister(dev, DRM_MINOR_RENDER);
 out_unlock:
@@ -950,6 +991,7 @@ void drm_dev_unregister(struct drm_device *dev)
 	drm_legacy_rmmaps(dev);
 
 	remove_compat_control_link(dev);
+	drm_minor_unregister(dev, DRM_MINOR_ACCEL);
 	drm_minor_unregister(dev, DRM_MINOR_PRIMARY);
 	drm_minor_unregister(dev, DRM_MINOR_RENDER);
 }
@@ -1034,6 +1076,7 @@ static const struct file_operations drm_stub_fops = {
 static void drm_core_exit(void)
 {
 	drm_privacy_screen_lookup_exit();
+	accel_core_exit();
 	unregister_chrdev(DRM_MAJOR, "drm");
 	debugfs_remove(drm_debugfs_root);
 	drm_sysfs_destroy();
@@ -1061,6 +1104,10 @@ static int __init drm_core_init(void)
 	if (ret < 0)
 		goto error;
 
+	ret = accel_core_init();
+	if (ret < 0)
+		goto error;
+
 	drm_privacy_screen_lookup_init();
 
 	drm_core_init_complete = true;
diff --git a/drivers/gpu/drm/drm_edid.c b/drivers/gpu/drm/drm_edid.c
index b2d61c05f559..3841aba17abd 100644
--- a/drivers/gpu/drm/drm_edid.c
+++ b/drivers/gpu/drm/drm_edid.c
@@ -87,6 +87,8 @@ static int oui(u8 first, u8 second, u8 third)
 #define EDID_QUIRK_FORCE_10BPC			(1 << 11)
 /* Non desktop display (i.e. HMD) */
 #define EDID_QUIRK_NON_DESKTOP			(1 << 12)
+/* Cap the DSC target bitrate to 15bpp */
+#define EDID_QUIRK_CAP_DSC_15BPP		(1 << 13)
 
 #define MICROSOFT_IEEE_OUI	0xca125c
 
@@ -147,6 +149,12 @@ static const struct edid_quirk {
 	EDID_QUIRK('F', 'C', 'M', 13600, EDID_QUIRK_PREFER_LARGE_75 |
 				       EDID_QUIRK_DETAILED_IN_CM),
 
+	/* LG 27GP950 */
+	EDID_QUIRK('G', 'S', 'M', 0x5bbf, EDID_QUIRK_CAP_DSC_15BPP),
+
+	/* LG 27GN950 */
+	EDID_QUIRK('G', 'S', 'M', 0x5b9a, EDID_QUIRK_CAP_DSC_15BPP),
+
 	/* LGD panel of HP zBook 17 G2, eDP 10 bpc, but reports unknown bpc */
 	EDID_QUIRK('L', 'G', 'D', 764, EDID_QUIRK_FORCE_10BPC),
 
@@ -2799,6 +2807,8 @@ u32 drm_edid_get_panel_id(struct i2c_adapter *adapter)
 
 	if (edid_block_status_valid(status, edid_block_tag(base_block)))
 		panel_id = edid_extract_panel_id(base_block);
+	else
+		edid_block_dump(KERN_NOTICE, base_block, 0);
 
 	kfree(base_block);
 
@@ -6363,6 +6373,7 @@ static void drm_reset_display_info(struct drm_connector *connector)
 
 	info->mso_stream_count = 0;
 	info->mso_pixel_overlap = 0;
+	info->max_dsc_bpp = 0;
 }
 
 static u32 update_display_info(struct drm_connector *connector,
@@ -6452,6 +6463,9 @@ out:
 		info->non_desktop = true;
 	}
 
+	if (quirks & EDID_QUIRK_CAP_DSC_15BPP)
+		info->max_dsc_bpp = 15;
+
 	return quirks;
 }
 
diff --git a/drivers/gpu/drm/drm_edid_load.c b/drivers/gpu/drm/drm_edid_load.c
index ef4ab59d6935..5d9ef267ebb3 100644
--- a/drivers/gpu/drm/drm_edid_load.c
+++ b/drivers/gpu/drm/drm_edid_load.c
@@ -172,20 +172,9 @@ static const struct drm_edid *edid_load(struct drm_connector *connector, const c
 		fwdata = generic_edid[builtin];
 		fwsize = sizeof(generic_edid[builtin]);
 	} else {
-		struct platform_device *pdev;
 		int err;
 
-		pdev = platform_device_register_simple(connector->name, -1, NULL, 0);
-		if (IS_ERR(pdev)) {
-			drm_err(connector->dev,
-				"[CONNECTOR:%d:%s] Failed to register EDID firmware platform device for connector \"%s\"\n",
-				connector->base.id, connector->name,
-				connector->name);
-			return ERR_CAST(pdev);
-		}
-
-		err = request_firmware(&fw, name, &pdev->dev);
-		platform_device_unregister(pdev);
+		err = request_firmware(&fw, name, connector->dev->dev);
 		if (err) {
 			drm_err(connector->dev,
 				"[CONNECTOR:%d:%s] Requesting EDID firmware \"%s\" failed (err=%d)\n",
diff --git a/drivers/gpu/drm/drm_fb_helper.c b/drivers/gpu/drm/drm_fb_helper.c
index 71edb80fe0fb..b3a731b9170a 100644
--- a/drivers/gpu/drm/drm_fb_helper.c
+++ b/drivers/gpu/drm/drm_fb_helper.c
@@ -30,24 +30,17 @@
 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
 
 #include <linux/console.h>
-#include <linux/dma-buf.h>
-#include <linux/kernel.h>
-#include <linux/module.h>
-#include <linux/slab.h>
 #include <linux/sysrq.h>
-#include <linux/vmalloc.h>
 
 #include <drm/drm_atomic.h>
-#include <drm/drm_crtc.h>
-#include <drm/drm_crtc_helper.h>
 #include <drm/drm_drv.h>
 #include <drm/drm_fb_helper.h>
 #include <drm/drm_fourcc.h>
 #include <drm/drm_framebuffer.h>
+#include <drm/drm_modeset_helper_vtables.h>
 #include <drm/drm_print.h>
 #include <drm/drm_vblank.h>
 
-#include "drm_crtc_helper_internal.h"
 #include "drm_internal.h"
 
 static bool drm_fbdev_emulation = true;
@@ -74,7 +67,7 @@ MODULE_PARM_DESC(drm_fbdev_overalloc,
  * considered as a broken and legacy behaviour from a modern fbdev device.
  */
 #if IS_ENABLED(CONFIG_DRM_FBDEV_LEAK_PHYS_SMEM)
-static bool drm_leak_fbdev_smem = false;
+static bool drm_leak_fbdev_smem;
 module_param_unsafe(drm_leak_fbdev_smem, bool, 0600);
 MODULE_PARM_DESC(drm_leak_fbdev_smem,
 		 "Allow unsafe leaking fbdev physical smem address [default=false]");
@@ -96,11 +89,13 @@ static DEFINE_MUTEX(kernel_fb_helper_lock);
  * It will automatically set up deferred I/O if the driver requires a shadow
  * buffer.
  *
- * At runtime drivers should restore the fbdev console by using
+ * Existing fbdev implementations should restore the fbdev console by using
  * drm_fb_helper_lastclose() as their &drm_driver.lastclose callback.
  * They should also notify the fb helper code from updates to the output
  * configuration by using drm_fb_helper_output_poll_changed() as their
- * &drm_mode_config_funcs.output_poll_changed callback.
+ * &drm_mode_config_funcs.output_poll_changed callback. New implementations
+ * of fbdev should be build on top of struct &drm_client_funcs, which handles
+ * this automatically. Setting the old callbacks should be avoided.
  *
  * For suspend/resume consider using drm_mode_config_helper_suspend() and
  * drm_mode_config_helper_resume() which takes care of fbdev as well.
@@ -368,115 +363,30 @@ static void drm_fb_helper_resume_worker(struct work_struct *work)
 						    resume_work);
 
 	console_lock();
-	fb_set_suspend(helper->fbdev, 0);
+	fb_set_suspend(helper->info, 0);
 	console_unlock();
 }
 
-static void drm_fb_helper_damage_blit_real(struct drm_fb_helper *fb_helper,
-					   struct drm_clip_rect *clip,
-					   struct iosys_map *dst)
+static void drm_fb_helper_fb_dirty(struct drm_fb_helper *helper)
 {
-	struct drm_framebuffer *fb = fb_helper->fb;
-	size_t offset = clip->y1 * fb->pitches[0];
-	size_t len = clip->x2 - clip->x1;
-	unsigned int y;
-	void *src;
-
-	switch (drm_format_info_bpp(fb->format, 0)) {
-	case 1:
-		offset += clip->x1 / 8;
-		len = DIV_ROUND_UP(len + clip->x1 % 8, 8);
-		break;
-	case 2:
-		offset += clip->x1 / 4;
-		len = DIV_ROUND_UP(len + clip->x1 % 4, 4);
-		break;
-	case 4:
-		offset += clip->x1 / 2;
-		len = DIV_ROUND_UP(len + clip->x1 % 2, 2);
-		break;
-	default:
-		offset += clip->x1 * fb->format->cpp[0];
-		len *= fb->format->cpp[0];
-		break;
-	}
-
-	src = fb_helper->fbdev->screen_buffer + offset;
-	iosys_map_incr(dst, offset); /* go to first pixel within clip rect */
-
-	for (y = clip->y1; y < clip->y2; y++) {
-		iosys_map_memcpy_to(dst, 0, src, len);
-		iosys_map_incr(dst, fb->pitches[0]);
-		src += fb->pitches[0];
-	}
-}
-
-static int drm_fb_helper_damage_blit(struct drm_fb_helper *fb_helper,
-				     struct drm_clip_rect *clip)
-{
-	struct drm_client_buffer *buffer = fb_helper->buffer;
-	struct iosys_map map, dst;
-	int ret;
-
-	/*
-	 * We have to pin the client buffer to its current location while
-	 * flushing the shadow buffer. In the general case, concurrent
-	 * modesetting operations could try to move the buffer and would
-	 * fail. The modeset has to be serialized by acquiring the reservation
-	 * object of the underlying BO here.
-	 *
-	 * For fbdev emulation, we only have to protect against fbdev modeset
-	 * operations. Nothing else will involve the client buffer's BO. So it
-	 * is sufficient to acquire struct drm_fb_helper.lock here.
-	 */
-	mutex_lock(&fb_helper->lock);
-
-	ret = drm_client_buffer_vmap(buffer, &map);
-	if (ret)
-		goto out;
-
-	dst = map;
-	drm_fb_helper_damage_blit_real(fb_helper, clip, &dst);
-
-	drm_client_buffer_vunmap(buffer);
-
-out:
-	mutex_unlock(&fb_helper->lock);
-
-	return ret;
-}
-
-static void drm_fb_helper_damage_work(struct work_struct *work)
-{
-	struct drm_fb_helper *helper = container_of(work, struct drm_fb_helper,
-						    damage_work);
 	struct drm_device *dev = helper->dev;
 	struct drm_clip_rect *clip = &helper->damage_clip;
 	struct drm_clip_rect clip_copy;
 	unsigned long flags;
 	int ret;
 
+	if (drm_WARN_ON_ONCE(dev, !helper->funcs->fb_dirty))
+		return;
+
 	spin_lock_irqsave(&helper->damage_lock, flags);
 	clip_copy = *clip;
 	clip->x1 = clip->y1 = ~0;
 	clip->x2 = clip->y2 = 0;
 	spin_unlock_irqrestore(&helper->damage_lock, flags);
 
-	/* Call damage handlers only if necessary */
-	if (!(clip_copy.x1 < clip_copy.x2 && clip_copy.y1 < clip_copy.y2))
-		return;
-
-	if (helper->buffer) {
-		ret = drm_fb_helper_damage_blit(helper, &clip_copy);
-		if (drm_WARN_ONCE(dev, ret, "Damage blitter failed: ret=%d\n", ret))
-			goto err;
-	}
-
-	if (helper->fb->funcs->dirty) {
-		ret = helper->fb->funcs->dirty(helper->fb, NULL, 0, 0, &clip_copy, 1);
-		if (drm_WARN_ONCE(dev, ret, "Dirty helper failed: ret=%d\n", ret))
-			goto err;
-	}
+	ret = helper->funcs->fb_dirty(helper, &clip_copy);
+	if (ret)
+		goto err;
 
 	return;
 
@@ -493,6 +403,13 @@ err:
 	spin_unlock_irqrestore(&helper->damage_lock, flags);
 }
 
+static void drm_fb_helper_damage_work(struct work_struct *work)
+{
+	struct drm_fb_helper *helper = container_of(work, struct drm_fb_helper, damage_work);
+
+	drm_fb_helper_fb_dirty(helper);
+}
+
 /**
  * drm_fb_helper_prepare - setup a drm_fb_helper structure
  * @dev: DRM device
@@ -536,11 +453,6 @@ int drm_fb_helper_init(struct drm_device *dev,
 {
 	int ret;
 
-	if (!drm_fbdev_emulation) {
-		dev->fb_helper = fb_helper;
-		return 0;
-	}
-
 	/*
 	 * If this is not the generic fbdev client, initialize a drm_client
 	 * without callbacks so we can use the modesets.
@@ -558,7 +470,7 @@ int drm_fb_helper_init(struct drm_device *dev,
 EXPORT_SYMBOL(drm_fb_helper_init);
 
 /**
- * drm_fb_helper_alloc_fbi - allocate fb_info and some of its members
+ * drm_fb_helper_alloc_info - allocate fb_info and some of its members
  * @fb_helper: driver-allocated fbdev helper
  *
  * A helper to alloc fb_info and the members cmap and apertures. Called
@@ -570,7 +482,7 @@ EXPORT_SYMBOL(drm_fb_helper_init);
  * fb_info pointer if things went okay, pointer containing error code
  * otherwise
  */
-struct fb_info *drm_fb_helper_alloc_fbi(struct drm_fb_helper *fb_helper)
+struct fb_info *drm_fb_helper_alloc_info(struct drm_fb_helper *fb_helper)
 {
 	struct device *dev = fb_helper->dev->dev;
 	struct fb_info *info;
@@ -598,7 +510,7 @@ struct fb_info *drm_fb_helper_alloc_fbi(struct drm_fb_helper *fb_helper)
 		goto err_free_cmap;
 	}
 
-	fb_helper->fbdev = info;
+	fb_helper->info = info;
 	info->skip_vt_switch = true;
 
 	return info;
@@ -609,22 +521,22 @@ err_release:
 	framebuffer_release(info);
 	return ERR_PTR(ret);
 }
-EXPORT_SYMBOL(drm_fb_helper_alloc_fbi);
+EXPORT_SYMBOL(drm_fb_helper_alloc_info);
 
 /**
- * drm_fb_helper_unregister_fbi - unregister fb_info framebuffer device
+ * drm_fb_helper_unregister_info - unregister fb_info framebuffer device
  * @fb_helper: driver-allocated fbdev helper, can be NULL
  *
  * A wrapper around unregister_framebuffer, to release the fb_info
  * framebuffer device. This must be called before releasing all resources for
  * @fb_helper by calling drm_fb_helper_fini().
  */
-void drm_fb_helper_unregister_fbi(struct drm_fb_helper *fb_helper)
+void drm_fb_helper_unregister_info(struct drm_fb_helper *fb_helper)
 {
-	if (fb_helper && fb_helper->fbdev)
-		unregister_framebuffer(fb_helper->fbdev);
+	if (fb_helper && fb_helper->info)
+		unregister_framebuffer(fb_helper->info);
 }
-EXPORT_SYMBOL(drm_fb_helper_unregister_fbi);
+EXPORT_SYMBOL(drm_fb_helper_unregister_info);
 
 /**
  * drm_fb_helper_fini - finialize a &struct drm_fb_helper
@@ -647,13 +559,13 @@ void drm_fb_helper_fini(struct drm_fb_helper *fb_helper)
 	cancel_work_sync(&fb_helper->resume_work);
 	cancel_work_sync(&fb_helper->damage_work);
 
-	info = fb_helper->fbdev;
+	info = fb_helper->info;
 	if (info) {
 		if (info->cmap.len)
 			fb_dealloc_cmap(&info->cmap);
 		framebuffer_release(info);
 	}
-	fb_helper->fbdev = NULL;
+	fb_helper->info = NULL;
 
 	mutex_lock(&kernel_fb_helper_lock);
 	if (!list_empty(&fb_helper->kernel_fb_list)) {
@@ -670,32 +582,24 @@ void drm_fb_helper_fini(struct drm_fb_helper *fb_helper)
 }
 EXPORT_SYMBOL(drm_fb_helper_fini);
 
-static bool drm_fbdev_use_shadow_fb(struct drm_fb_helper *fb_helper)
-{
-	struct drm_device *dev = fb_helper->dev;
-	struct drm_framebuffer *fb = fb_helper->fb;
-
-	return dev->mode_config.prefer_shadow_fbdev ||
-	       dev->mode_config.prefer_shadow ||
-	       fb->funcs->dirty;
-}
-
-static void drm_fb_helper_damage(struct fb_info *info, u32 x, u32 y,
-				 u32 width, u32 height)
+static void drm_fb_helper_add_damage_clip(struct drm_fb_helper *helper, u32 x, u32 y,
+					  u32 width, u32 height)
 {
-	struct drm_fb_helper *helper = info->par;
 	struct drm_clip_rect *clip = &helper->damage_clip;
 	unsigned long flags;
 
-	if (!drm_fbdev_use_shadow_fb(helper))
-		return;
-
 	spin_lock_irqsave(&helper->damage_lock, flags);
 	clip->x1 = min_t(u32, clip->x1, x);
 	clip->y1 = min_t(u32, clip->y1, y);
 	clip->x2 = max_t(u32, clip->x2, x + width);
 	clip->y2 = max_t(u32, clip->y2, y + height);
 	spin_unlock_irqrestore(&helper->damage_lock, flags);
+}
+
+static void drm_fb_helper_damage(struct drm_fb_helper *helper, u32 x, u32 y,
+				 u32 width, u32 height)
+{
+	drm_fb_helper_add_damage_clip(helper, x, y, width, height);
 
 	schedule_work(&helper->damage_work);
 }
@@ -739,6 +643,7 @@ static void drm_fb_helper_memory_range_to_clip(struct fb_info *info, off_t off,
  */
 void drm_fb_helper_deferred_io(struct fb_info *info, struct list_head *pagereflist)
 {
+	struct drm_fb_helper *helper = info->par;
 	unsigned long start, end, min_off, max_off;
 	struct fb_deferred_io_pageref *pageref;
 	struct drm_rect damage_area;
@@ -751,8 +656,6 @@ void drm_fb_helper_deferred_io(struct fb_info *info, struct list_head *pagerefli
 		min_off = min(min_off, start);
 		max_off = max(max_off, end);
 	}
-	if (min_off >= max_off)
-		return;
 
 	/*
 	 * As we can only track pages, we might reach beyond the end
@@ -761,53 +664,160 @@ void drm_fb_helper_deferred_io(struct fb_info *info, struct list_head *pagerefli
 	 */
 	max_off = min(max_off, info->screen_size);
 
-	drm_fb_helper_memory_range_to_clip(info, min_off, max_off - min_off, &damage_area);
-	drm_fb_helper_damage(info, damage_area.x1, damage_area.y1,
-			     drm_rect_width(&damage_area),
-			     drm_rect_height(&damage_area));
+	if (min_off < max_off) {
+		drm_fb_helper_memory_range_to_clip(info, min_off, max_off - min_off, &damage_area);
+		drm_fb_helper_damage(helper, damage_area.x1, damage_area.y1,
+				     drm_rect_width(&damage_area),
+				     drm_rect_height(&damage_area));
+	}
 }
 EXPORT_SYMBOL(drm_fb_helper_deferred_io);
 
+typedef ssize_t (*drm_fb_helper_read_screen)(struct fb_info *info, char __user *buf,
+					     size_t count, loff_t pos);
+
+static ssize_t __drm_fb_helper_read(struct fb_info *info, char __user *buf, size_t count,
+				    loff_t *ppos, drm_fb_helper_read_screen read_screen)
+{
+	loff_t pos = *ppos;
+	size_t total_size;
+	ssize_t ret;
+
+	if (info->screen_size)
+		total_size = info->screen_size;
+	else
+		total_size = info->fix.smem_len;
+
+	if (pos >= total_size)
+		return 0;
+	if (count >= total_size)
+		count = total_size;
+	if (total_size - count < pos)
+		count = total_size - pos;
+
+	if (info->fbops->fb_sync)
+		info->fbops->fb_sync(info);
+
+	ret = read_screen(info, buf, count, pos);
+	if (ret > 0)
+		*ppos += ret;
+
+	return ret;
+}
+
+typedef ssize_t (*drm_fb_helper_write_screen)(struct fb_info *info, const char __user *buf,
+					      size_t count, loff_t pos);
+
+static ssize_t __drm_fb_helper_write(struct fb_info *info, const char __user *buf, size_t count,
+				     loff_t *ppos, drm_fb_helper_write_screen write_screen)
+{
+	loff_t pos = *ppos;
+	size_t total_size;
+	ssize_t ret;
+	int err = 0;
+
+	if (info->screen_size)
+		total_size = info->screen_size;
+	else
+		total_size = info->fix.smem_len;
+
+	if (pos > total_size)
+		return -EFBIG;
+	if (count > total_size) {
+		err = -EFBIG;
+		count = total_size;
+	}
+	if (total_size - count < pos) {
+		if (!err)
+			err = -ENOSPC;
+		count = total_size - pos;
+	}
+
+	if (info->fbops->fb_sync)
+		info->fbops->fb_sync(info);
+
+	/*
+	 * Copy to framebuffer even if we already logged an error. Emulates
+	 * the behavior of the original fbdev implementation.
+	 */
+	ret = write_screen(info, buf, count, pos);
+	if (ret < 0)
+		return ret; /* return last error, if any */
+	else if (!ret)
+		return err; /* return previous error, if any */
+
+	*ppos += ret;
+
+	return ret;
+}
+
+static ssize_t drm_fb_helper_read_screen_buffer(struct fb_info *info, char __user *buf,
+						size_t count, loff_t pos)
+{
+	const char *src = info->screen_buffer + pos;
+
+	if (copy_to_user(buf, src, count))
+		return -EFAULT;
+
+	return count;
+}
+
 /**
- * drm_fb_helper_sys_read - wrapper around fb_sys_read
+ * drm_fb_helper_sys_read - Implements struct &fb_ops.fb_read for system memory
  * @info: fb_info struct pointer
  * @buf: userspace buffer to read from framebuffer memory
  * @count: number of bytes to read from framebuffer memory
  * @ppos: read offset within framebuffer memory
  *
- * A wrapper around fb_sys_read implemented by fbdev core
+ * Returns:
+ * The number of bytes read on success, or an error code otherwise.
  */
 ssize_t drm_fb_helper_sys_read(struct fb_info *info, char __user *buf,
 			       size_t count, loff_t *ppos)
 {
-	return fb_sys_read(info, buf, count, ppos);
+	return __drm_fb_helper_read(info, buf, count, ppos, drm_fb_helper_read_screen_buffer);
 }
 EXPORT_SYMBOL(drm_fb_helper_sys_read);
 
+static ssize_t drm_fb_helper_write_screen_buffer(struct fb_info *info, const char __user *buf,
+						 size_t count, loff_t pos)
+{
+	char *dst = info->screen_buffer + pos;
+
+	if (copy_from_user(dst, buf, count))
+		return -EFAULT;
+
+	return count;
+}
+
 /**
- * drm_fb_helper_sys_write - wrapper around fb_sys_write
+ * drm_fb_helper_sys_write - Implements struct &fb_ops.fb_write for system memory
  * @info: fb_info struct pointer
  * @buf: userspace buffer to write to framebuffer memory
  * @count: number of bytes to write to framebuffer memory
  * @ppos: write offset within framebuffer memory
  *
- * A wrapper around fb_sys_write implemented by fbdev core
+ * Returns:
+ * The number of bytes written on success, or an error code otherwise.
  */
 ssize_t drm_fb_helper_sys_write(struct fb_info *info, const char __user *buf,
 				size_t count, loff_t *ppos)
 {
+	struct drm_fb_helper *helper = info->par;
 	loff_t pos = *ppos;
 	ssize_t ret;
 	struct drm_rect damage_area;
 
-	ret = fb_sys_write(info, buf, count, ppos);
+	ret = __drm_fb_helper_write(info, buf, count, ppos, drm_fb_helper_write_screen_buffer);
 	if (ret <= 0)
 		return ret;
 
-	drm_fb_helper_memory_range_to_clip(info, pos, ret, &damage_area);
-	drm_fb_helper_damage(info, damage_area.x1, damage_area.y1,
-			     drm_rect_width(&damage_area),
-			     drm_rect_height(&damage_area));
+	if (helper->funcs->fb_dirty) {
+		drm_fb_helper_memory_range_to_clip(info, pos, ret, &damage_area);
+		drm_fb_helper_damage(helper, damage_area.x1, damage_area.y1,
+				     drm_rect_width(&damage_area),
+				     drm_rect_height(&damage_area));
+	}
 
 	return ret;
 }
@@ -823,8 +833,12 @@ EXPORT_SYMBOL(drm_fb_helper_sys_write);
 void drm_fb_helper_sys_fillrect(struct fb_info *info,
 				const struct fb_fillrect *rect)
 {
+	struct drm_fb_helper *helper = info->par;
+
 	sys_fillrect(info, rect);
-	drm_fb_helper_damage(info, rect->dx, rect->dy, rect->width, rect->height);
+
+	if (helper->funcs->fb_dirty)
+		drm_fb_helper_damage(helper, rect->dx, rect->dy, rect->width, rect->height);
 }
 EXPORT_SYMBOL(drm_fb_helper_sys_fillrect);
 
@@ -838,8 +852,12 @@ EXPORT_SYMBOL(drm_fb_helper_sys_fillrect);
 void drm_fb_helper_sys_copyarea(struct fb_info *info,
 				const struct fb_copyarea *area)
 {
+	struct drm_fb_helper *helper = info->par;
+
 	sys_copyarea(info, area);
-	drm_fb_helper_damage(info, area->dx, area->dy, area->width, area->height);
+
+	if (helper->funcs->fb_dirty)
+		drm_fb_helper_damage(helper, area->dx, area->dy, area->width, area->height);
 }
 EXPORT_SYMBOL(drm_fb_helper_sys_copyarea);
 
@@ -853,11 +871,131 @@ EXPORT_SYMBOL(drm_fb_helper_sys_copyarea);
 void drm_fb_helper_sys_imageblit(struct fb_info *info,
 				 const struct fb_image *image)
 {
+	struct drm_fb_helper *helper = info->par;
+
 	sys_imageblit(info, image);
-	drm_fb_helper_damage(info, image->dx, image->dy, image->width, image->height);
+
+	if (helper->funcs->fb_dirty)
+		drm_fb_helper_damage(helper, image->dx, image->dy, image->width, image->height);
 }
 EXPORT_SYMBOL(drm_fb_helper_sys_imageblit);
 
+static ssize_t fb_read_screen_base(struct fb_info *info, char __user *buf, size_t count,
+				   loff_t pos)
+{
+	const char __iomem *src = info->screen_base + pos;
+	size_t alloc_size = min_t(size_t, count, PAGE_SIZE);
+	ssize_t ret = 0;
+	int err = 0;
+	char *tmp;
+
+	tmp = kmalloc(alloc_size, GFP_KERNEL);
+	if (!tmp)
+		return -ENOMEM;
+
+	while (count) {
+		size_t c = min_t(size_t, count, alloc_size);
+
+		memcpy_fromio(tmp, src, c);
+		if (copy_to_user(buf, tmp, c)) {
+			err = -EFAULT;
+			break;
+		}
+
+		src += c;
+		buf += c;
+		ret += c;
+		count -= c;
+	}
+
+	kfree(tmp);
+
+	return ret ? ret : err;
+}
+
+/**
+ * drm_fb_helper_cfb_read - Implements struct &fb_ops.fb_read for I/O memory
+ * @info: fb_info struct pointer
+ * @buf: userspace buffer to read from framebuffer memory
+ * @count: number of bytes to read from framebuffer memory
+ * @ppos: read offset within framebuffer memory
+ *
+ * Returns:
+ * The number of bytes read on success, or an error code otherwise.
+ */
+ssize_t drm_fb_helper_cfb_read(struct fb_info *info, char __user *buf,
+			       size_t count, loff_t *ppos)
+{
+	return __drm_fb_helper_read(info, buf, count, ppos, fb_read_screen_base);
+}
+EXPORT_SYMBOL(drm_fb_helper_cfb_read);
+
+static ssize_t fb_write_screen_base(struct fb_info *info, const char __user *buf, size_t count,
+				    loff_t pos)
+{
+	char __iomem *dst = info->screen_base + pos;
+	size_t alloc_size = min_t(size_t, count, PAGE_SIZE);
+	ssize_t ret = 0;
+	int err = 0;
+	u8 *tmp;
+
+	tmp = kmalloc(alloc_size, GFP_KERNEL);
+	if (!tmp)
+		return -ENOMEM;
+
+	while (count) {
+		size_t c = min_t(size_t, count, alloc_size);
+
+		if (copy_from_user(tmp, buf, c)) {
+			err = -EFAULT;
+			break;
+		}
+		memcpy_toio(dst, tmp, c);
+
+		dst += c;
+		buf += c;
+		ret += c;
+		count -= c;
+	}
+
+	kfree(tmp);
+
+	return ret ? ret : err;
+}
+
+/**
+ * drm_fb_helper_cfb_write - Implements struct &fb_ops.fb_write for I/O memory
+ * @info: fb_info struct pointer
+ * @buf: userspace buffer to write to framebuffer memory
+ * @count: number of bytes to write to framebuffer memory
+ * @ppos: write offset within framebuffer memory
+ *
+ * Returns:
+ * The number of bytes written on success, or an error code otherwise.
+ */
+ssize_t drm_fb_helper_cfb_write(struct fb_info *info, const char __user *buf,
+				size_t count, loff_t *ppos)
+{
+	struct drm_fb_helper *helper = info->par;
+	loff_t pos = *ppos;
+	ssize_t ret;
+	struct drm_rect damage_area;
+
+	ret = __drm_fb_helper_write(info, buf, count, ppos, fb_write_screen_base);
+	if (ret <= 0)
+		return ret;
+
+	if (helper->funcs->fb_dirty) {
+		drm_fb_helper_memory_range_to_clip(info, pos, ret, &damage_area);
+		drm_fb_helper_damage(helper, damage_area.x1, damage_area.y1,
+				     drm_rect_width(&damage_area),
+				     drm_rect_height(&damage_area));
+	}
+
+	return ret;
+}
+EXPORT_SYMBOL(drm_fb_helper_cfb_write);
+
 /**
  * drm_fb_helper_cfb_fillrect - wrapper around cfb_fillrect
  * @info: fbdev registered by the helper
@@ -868,8 +1006,12 @@ EXPORT_SYMBOL(drm_fb_helper_sys_imageblit);
 void drm_fb_helper_cfb_fillrect(struct fb_info *info,
 				const struct fb_fillrect *rect)
 {
+	struct drm_fb_helper *helper = info->par;
+
 	cfb_fillrect(info, rect);
-	drm_fb_helper_damage(info, rect->dx, rect->dy, rect->width, rect->height);
+
+	if (helper->funcs->fb_dirty)
+		drm_fb_helper_damage(helper, rect->dx, rect->dy, rect->width, rect->height);
 }
 EXPORT_SYMBOL(drm_fb_helper_cfb_fillrect);
 
@@ -883,8 +1025,12 @@ EXPORT_SYMBOL(drm_fb_helper_cfb_fillrect);
 void drm_fb_helper_cfb_copyarea(struct fb_info *info,
 				const struct fb_copyarea *area)
 {
+	struct drm_fb_helper *helper = info->par;
+
 	cfb_copyarea(info, area);
-	drm_fb_helper_damage(info, area->dx, area->dy, area->width, area->height);
+
+	if (helper->funcs->fb_dirty)
+		drm_fb_helper_damage(helper, area->dx, area->dy, area->width, area->height);
 }
 EXPORT_SYMBOL(drm_fb_helper_cfb_copyarea);
 
@@ -898,8 +1044,12 @@ EXPORT_SYMBOL(drm_fb_helper_cfb_copyarea);
 void drm_fb_helper_cfb_imageblit(struct fb_info *info,
 				 const struct fb_image *image)
 {
+	struct drm_fb_helper *helper = info->par;
+
 	cfb_imageblit(info, image);
-	drm_fb_helper_damage(info, image->dx, image->dy, image->width, image->height);
+
+	if (helper->funcs->fb_dirty)
+		drm_fb_helper_damage(helper, image->dx, image->dy, image->width, image->height);
 }
 EXPORT_SYMBOL(drm_fb_helper_cfb_imageblit);
 
@@ -914,8 +1064,8 @@ EXPORT_SYMBOL(drm_fb_helper_cfb_imageblit);
  */
 void drm_fb_helper_set_suspend(struct drm_fb_helper *fb_helper, bool suspend)
 {
-	if (fb_helper && fb_helper->fbdev)
-		fb_set_suspend(fb_helper->fbdev, suspend);
+	if (fb_helper && fb_helper->info)
+		fb_set_suspend(fb_helper->info, suspend);
 }
 EXPORT_SYMBOL(drm_fb_helper_set_suspend);
 
@@ -938,20 +1088,20 @@ EXPORT_SYMBOL(drm_fb_helper_set_suspend);
 void drm_fb_helper_set_suspend_unlocked(struct drm_fb_helper *fb_helper,
 					bool suspend)
 {
-	if (!fb_helper || !fb_helper->fbdev)
+	if (!fb_helper || !fb_helper->info)
 		return;
 
 	/* make sure there's no pending/ongoing resume */
 	flush_work(&fb_helper->resume_work);
 
 	if (suspend) {
-		if (fb_helper->fbdev->state != FBINFO_STATE_RUNNING)
+		if (fb_helper->info->state != FBINFO_STATE_RUNNING)
 			return;
 
 		console_lock();
 
 	} else {
-		if (fb_helper->fbdev->state == FBINFO_STATE_RUNNING)
+		if (fb_helper->info->state == FBINFO_STATE_RUNNING)
 			return;
 
 		if (!console_trylock()) {
@@ -960,7 +1110,7 @@ void drm_fb_helper_set_suspend_unlocked(struct drm_fb_helper *fb_helper,
 		}
 	}
 
-	fb_set_suspend(fb_helper->fbdev, suspend);
+	fb_set_suspend(fb_helper->info, suspend);
 	console_unlock();
 }
 EXPORT_SYMBOL(drm_fb_helper_set_suspend_unlocked);
@@ -1749,6 +1899,10 @@ static int drm_fb_helper_single_fb_probe(struct drm_fb_helper *fb_helper,
 		sizes.surface_height = config->max_height;
 	}
 
+#if IS_ENABLED(CONFIG_DRM_FBDEV_LEAK_PHYS_SMEM)
+	fb_helper->hint_leak_smem_start = drm_leak_fbdev_smem;
+#endif
+
 	/* push down into drivers */
 	ret = (*fb_helper->funcs->fb_probe)(fb_helper, &sizes);
 	if (ret < 0)
@@ -1850,7 +2004,7 @@ EXPORT_SYMBOL(drm_fb_helper_fill_info);
 /*
  * This is a continuation of drm_setup_crtcs() that sets up anything related
  * to the framebuffer. During initialization, drm_setup_crtcs() is called before
- * the framebuffer has been allocated (fb_helper->fb and fb_helper->fbdev).
+ * the framebuffer has been allocated (fb_helper->fb and fb_helper->info).
  * So, any setup that touches those fields needs to be done here instead of in
  * drm_setup_crtcs().
  */
@@ -1858,7 +2012,7 @@ static void drm_setup_crtcs_fb(struct drm_fb_helper *fb_helper)
 {
 	struct drm_client_dev *client = &fb_helper->client;
 	struct drm_connector_list_iter conn_iter;
-	struct fb_info *info = fb_helper->fbdev;
+	struct fb_info *info = fb_helper->info;
 	unsigned int rotation, sw_rotations = 0;
 	struct drm_connector *connector;
 	struct drm_mode_set *modeset;
@@ -1942,11 +2096,11 @@ __drm_fb_helper_initial_config_and_unlock(struct drm_fb_helper *fb_helper,
 
 	fb_helper->deferred_setup = false;
 
-	info = fb_helper->fbdev;
+	info = fb_helper->info;
 	info->var.pixclock = 0;
 	/* Shamelessly allow physical address leaking to userspace */
 #if IS_ENABLED(CONFIG_DRM_FBDEV_LEAK_PHYS_SMEM)
-	if (!drm_leak_fbdev_smem)
+	if (!fb_helper->hint_leak_smem_start)
 #endif
 		/* don't leak any physical addresses to userspace */
 		info->flags |= FBINFO_HIDE_SMEM_START;
@@ -2077,7 +2231,7 @@ int drm_fb_helper_hotplug_event(struct drm_fb_helper *fb_helper)
 	drm_setup_crtcs_fb(fb_helper);
 	mutex_unlock(&fb_helper->lock);
 
-	drm_fb_helper_set_par(fb_helper->fbdev);
+	drm_fb_helper_set_par(fb_helper->info);
 
 	return 0;
 }
@@ -2103,530 +2257,10 @@ EXPORT_SYMBOL(drm_fb_helper_lastclose);
  *
  * This function can be used as the
  * &drm_mode_config_funcs.output_poll_changed callback for drivers that only
- * need to call drm_fb_helper_hotplug_event().
+ * need to call drm_fbdev.hotplug_event().
  */
 void drm_fb_helper_output_poll_changed(struct drm_device *dev)
 {
 	drm_fb_helper_hotplug_event(dev->fb_helper);
 }
 EXPORT_SYMBOL(drm_fb_helper_output_poll_changed);
-
-/* @user: 1=userspace, 0=fbcon */
-static int drm_fbdev_fb_open(struct fb_info *info, int user)
-{
-	struct drm_fb_helper *fb_helper = info->par;
-
-	/* No need to take a ref for fbcon because it unbinds on unregister */
-	if (user && !try_module_get(fb_helper->dev->driver->fops->owner))
-		return -ENODEV;
-
-	return 0;
-}
-
-static int drm_fbdev_fb_release(struct fb_info *info, int user)
-{
-	struct drm_fb_helper *fb_helper = info->par;
-
-	if (user)
-		module_put(fb_helper->dev->driver->fops->owner);
-
-	return 0;
-}
-
-static void drm_fbdev_cleanup(struct drm_fb_helper *fb_helper)
-{
-	struct fb_info *fbi = fb_helper->fbdev;
-	void *shadow = NULL;
-
-	if (!fb_helper->dev)
-		return;
-
-	if (fbi) {
-		if (fbi->fbdefio)
-			fb_deferred_io_cleanup(fbi);
-		if (drm_fbdev_use_shadow_fb(fb_helper))
-			shadow = fbi->screen_buffer;
-	}
-
-	drm_fb_helper_fini(fb_helper);
-
-	if (shadow)
-		vfree(shadow);
-	else if (fb_helper->buffer)
-		drm_client_buffer_vunmap(fb_helper->buffer);
-
-	drm_client_framebuffer_delete(fb_helper->buffer);
-}
-
-static void drm_fbdev_release(struct drm_fb_helper *fb_helper)
-{
-	drm_fbdev_cleanup(fb_helper);
-	drm_client_release(&fb_helper->client);
-	kfree(fb_helper);
-}
-
-/*
- * fb_ops.fb_destroy is called by the last put_fb_info() call at the end of
- * unregister_framebuffer() or fb_release().
- */
-static void drm_fbdev_fb_destroy(struct fb_info *info)
-{
-	drm_fbdev_release(info->par);
-}
-
-static int drm_fbdev_fb_mmap(struct fb_info *info, struct vm_area_struct *vma)
-{
-	struct drm_fb_helper *fb_helper = info->par;
-
-	if (drm_fbdev_use_shadow_fb(fb_helper))
-		return fb_deferred_io_mmap(info, vma);
-	else if (fb_helper->dev->driver->gem_prime_mmap)
-		return fb_helper->dev->driver->gem_prime_mmap(fb_helper->buffer->gem, vma);
-	else
-		return -ENODEV;
-}
-
-static bool drm_fbdev_use_iomem(struct fb_info *info)
-{
-	struct drm_fb_helper *fb_helper = info->par;
-	struct drm_client_buffer *buffer = fb_helper->buffer;
-
-	return !drm_fbdev_use_shadow_fb(fb_helper) && buffer->map.is_iomem;
-}
-
-static ssize_t fb_read_screen_base(struct fb_info *info, char __user *buf, size_t count,
-				   loff_t pos)
-{
-	const char __iomem *src = info->screen_base + pos;
-	size_t alloc_size = min_t(size_t, count, PAGE_SIZE);
-	ssize_t ret = 0;
-	int err = 0;
-	char *tmp;
-
-	tmp = kmalloc(alloc_size, GFP_KERNEL);
-	if (!tmp)
-		return -ENOMEM;
-
-	while (count) {
-		size_t c = min_t(size_t, count, alloc_size);
-
-		memcpy_fromio(tmp, src, c);
-		if (copy_to_user(buf, tmp, c)) {
-			err = -EFAULT;
-			break;
-		}
-
-		src += c;
-		buf += c;
-		ret += c;
-		count -= c;
-	}
-
-	kfree(tmp);
-
-	return ret ? ret : err;
-}
-
-static ssize_t fb_read_screen_buffer(struct fb_info *info, char __user *buf, size_t count,
-				     loff_t pos)
-{
-	const char *src = info->screen_buffer + pos;
-
-	if (copy_to_user(buf, src, count))
-		return -EFAULT;
-
-	return count;
-}
-
-static ssize_t drm_fbdev_fb_read(struct fb_info *info, char __user *buf,
-				 size_t count, loff_t *ppos)
-{
-	loff_t pos = *ppos;
-	size_t total_size;
-	ssize_t ret;
-
-	if (info->screen_size)
-		total_size = info->screen_size;
-	else
-		total_size = info->fix.smem_len;
-
-	if (pos >= total_size)
-		return 0;
-	if (count >= total_size)
-		count = total_size;
-	if (total_size - count < pos)
-		count = total_size - pos;
-
-	if (drm_fbdev_use_iomem(info))
-		ret = fb_read_screen_base(info, buf, count, pos);
-	else
-		ret = fb_read_screen_buffer(info, buf, count, pos);
-
-	if (ret > 0)
-		*ppos += ret;
-
-	return ret;
-}
-
-static ssize_t fb_write_screen_base(struct fb_info *info, const char __user *buf, size_t count,
-				    loff_t pos)
-{
-	char __iomem *dst = info->screen_base + pos;
-	size_t alloc_size = min_t(size_t, count, PAGE_SIZE);
-	ssize_t ret = 0;
-	int err = 0;
-	u8 *tmp;
-
-	tmp = kmalloc(alloc_size, GFP_KERNEL);
-	if (!tmp)
-		return -ENOMEM;
-
-	while (count) {
-		size_t c = min_t(size_t, count, alloc_size);
-
-		if (copy_from_user(tmp, buf, c)) {
-			err = -EFAULT;
-			break;
-		}
-		memcpy_toio(dst, tmp, c);
-
-		dst += c;
-		buf += c;
-		ret += c;
-		count -= c;
-	}
-
-	kfree(tmp);
-
-	return ret ? ret : err;
-}
-
-static ssize_t fb_write_screen_buffer(struct fb_info *info, const char __user *buf, size_t count,
-				      loff_t pos)
-{
-	char *dst = info->screen_buffer + pos;
-
-	if (copy_from_user(dst, buf, count))
-		return -EFAULT;
-
-	return count;
-}
-
-static ssize_t drm_fbdev_fb_write(struct fb_info *info, const char __user *buf,
-				  size_t count, loff_t *ppos)
-{
-	loff_t pos = *ppos;
-	size_t total_size;
-	ssize_t ret;
-	struct drm_rect damage_area;
-	int err = 0;
-
-	if (info->screen_size)
-		total_size = info->screen_size;
-	else
-		total_size = info->fix.smem_len;
-
-	if (pos > total_size)
-		return -EFBIG;
-	if (count > total_size) {
-		err = -EFBIG;
-		count = total_size;
-	}
-	if (total_size - count < pos) {
-		if (!err)
-			err = -ENOSPC;
-		count = total_size - pos;
-	}
-
-	/*
-	 * Copy to framebuffer even if we already logged an error. Emulates
-	 * the behavior of the original fbdev implementation.
-	 */
-	if (drm_fbdev_use_iomem(info))
-		ret = fb_write_screen_base(info, buf, count, pos);
-	else
-		ret = fb_write_screen_buffer(info, buf, count, pos);
-
-	if (ret < 0)
-		return ret; /* return last error, if any */
-	else if (!ret)
-		return err; /* return previous error, if any */
-
-	*ppos += ret;
-
-	drm_fb_helper_memory_range_to_clip(info, pos, ret, &damage_area);
-	drm_fb_helper_damage(info, damage_area.x1, damage_area.y1,
-			     drm_rect_width(&damage_area),
-			     drm_rect_height(&damage_area));
-
-	return ret;
-}
-
-static void drm_fbdev_fb_fillrect(struct fb_info *info,
-				  const struct fb_fillrect *rect)
-{
-	if (drm_fbdev_use_iomem(info))
-		drm_fb_helper_cfb_fillrect(info, rect);
-	else
-		drm_fb_helper_sys_fillrect(info, rect);
-}
-
-static void drm_fbdev_fb_copyarea(struct fb_info *info,
-				  const struct fb_copyarea *area)
-{
-	if (drm_fbdev_use_iomem(info))
-		drm_fb_helper_cfb_copyarea(info, area);
-	else
-		drm_fb_helper_sys_copyarea(info, area);
-}
-
-static void drm_fbdev_fb_imageblit(struct fb_info *info,
-				   const struct fb_image *image)
-{
-	if (drm_fbdev_use_iomem(info))
-		drm_fb_helper_cfb_imageblit(info, image);
-	else
-		drm_fb_helper_sys_imageblit(info, image);
-}
-
-static const struct fb_ops drm_fbdev_fb_ops = {
-	.owner		= THIS_MODULE,
-	DRM_FB_HELPER_DEFAULT_OPS,
-	.fb_open	= drm_fbdev_fb_open,
-	.fb_release	= drm_fbdev_fb_release,
-	.fb_destroy	= drm_fbdev_fb_destroy,
-	.fb_mmap	= drm_fbdev_fb_mmap,
-	.fb_read	= drm_fbdev_fb_read,
-	.fb_write	= drm_fbdev_fb_write,
-	.fb_fillrect	= drm_fbdev_fb_fillrect,
-	.fb_copyarea	= drm_fbdev_fb_copyarea,
-	.fb_imageblit	= drm_fbdev_fb_imageblit,
-};
-
-static struct fb_deferred_io drm_fbdev_defio = {
-	.delay		= HZ / 20,
-	.deferred_io	= drm_fb_helper_deferred_io,
-};
-
-/*
- * This function uses the client API to create a framebuffer backed by a dumb buffer.
- *
- * The _sys_ versions are used for &fb_ops.fb_read, fb_write, fb_fillrect,
- * fb_copyarea, fb_imageblit.
- */
-static int drm_fb_helper_generic_probe(struct drm_fb_helper *fb_helper,
-				       struct drm_fb_helper_surface_size *sizes)
-{
-	struct drm_client_dev *client = &fb_helper->client;
-	struct drm_device *dev = fb_helper->dev;
-	struct drm_client_buffer *buffer;
-	struct drm_framebuffer *fb;
-	struct fb_info *fbi;
-	u32 format;
-	struct iosys_map map;
-	int ret;
-
-	drm_dbg_kms(dev, "surface width(%d), height(%d) and bpp(%d)\n",
-		    sizes->surface_width, sizes->surface_height,
-		    sizes->surface_bpp);
-
-	format = drm_mode_legacy_fb_format(sizes->surface_bpp, sizes->surface_depth);
-	buffer = drm_client_framebuffer_create(client, sizes->surface_width,
-					       sizes->surface_height, format);
-	if (IS_ERR(buffer))
-		return PTR_ERR(buffer);
-
-	fb_helper->buffer = buffer;
-	fb_helper->fb = buffer->fb;
-	fb = buffer->fb;
-
-	fbi = drm_fb_helper_alloc_fbi(fb_helper);
-	if (IS_ERR(fbi))
-		return PTR_ERR(fbi);
-
-	fbi->fbops = &drm_fbdev_fb_ops;
-	fbi->screen_size = sizes->surface_height * fb->pitches[0];
-	fbi->fix.smem_len = fbi->screen_size;
-	fbi->flags = FBINFO_DEFAULT;
-
-	drm_fb_helper_fill_info(fbi, fb_helper, sizes);
-
-	if (drm_fbdev_use_shadow_fb(fb_helper)) {
-		fbi->screen_buffer = vzalloc(fbi->screen_size);
-		if (!fbi->screen_buffer)
-			return -ENOMEM;
-		fbi->flags |= FBINFO_VIRTFB | FBINFO_READS_FAST;
-
-		fbi->fbdefio = &drm_fbdev_defio;
-		fb_deferred_io_init(fbi);
-	} else {
-		/* buffer is mapped for HW framebuffer */
-		ret = drm_client_buffer_vmap(fb_helper->buffer, &map);
-		if (ret)
-			return ret;
-		if (map.is_iomem) {
-			fbi->screen_base = map.vaddr_iomem;
-		} else {
-			fbi->screen_buffer = map.vaddr;
-			fbi->flags |= FBINFO_VIRTFB;
-		}
-
-		/*
-		 * Shamelessly leak the physical address to user-space. As
-		 * page_to_phys() is undefined for I/O memory, warn in this
-		 * case.
-		 */
-#if IS_ENABLED(CONFIG_DRM_FBDEV_LEAK_PHYS_SMEM)
-		if (drm_leak_fbdev_smem && fbi->fix.smem_start == 0 &&
-		    !drm_WARN_ON_ONCE(dev, map.is_iomem))
-			fbi->fix.smem_start =
-				page_to_phys(virt_to_page(fbi->screen_buffer));
-#endif
-	}
-
-	return 0;
-}
-
-static const struct drm_fb_helper_funcs drm_fb_helper_generic_funcs = {
-	.fb_probe = drm_fb_helper_generic_probe,
-};
-
-static void drm_fbdev_client_unregister(struct drm_client_dev *client)
-{
-	struct drm_fb_helper *fb_helper = drm_fb_helper_from_client(client);
-
-	if (fb_helper->fbdev)
-		/* drm_fbdev_fb_destroy() takes care of cleanup */
-		drm_fb_helper_unregister_fbi(fb_helper);
-	else
-		drm_fbdev_release(fb_helper);
-}
-
-static int drm_fbdev_client_restore(struct drm_client_dev *client)
-{
-	drm_fb_helper_lastclose(client->dev);
-
-	return 0;
-}
-
-static int drm_fbdev_client_hotplug(struct drm_client_dev *client)
-{
-	struct drm_fb_helper *fb_helper = drm_fb_helper_from_client(client);
-	struct drm_device *dev = client->dev;
-	int ret;
-
-	/* Setup is not retried if it has failed */
-	if (!fb_helper->dev && fb_helper->funcs)
-		return 0;
-
-	if (dev->fb_helper)
-		return drm_fb_helper_hotplug_event(dev->fb_helper);
-
-	if (!dev->mode_config.num_connector) {
-		drm_dbg_kms(dev, "No connectors found, will not create framebuffer!\n");
-		return 0;
-	}
-
-	drm_fb_helper_prepare(dev, fb_helper, &drm_fb_helper_generic_funcs);
-
-	ret = drm_fb_helper_init(dev, fb_helper);
-	if (ret)
-		goto err;
-
-	if (!drm_drv_uses_atomic_modeset(dev))
-		drm_helper_disable_unused_functions(dev);
-
-	ret = drm_fb_helper_initial_config(fb_helper, fb_helper->preferred_bpp);
-	if (ret)
-		goto err_cleanup;
-
-	return 0;
-
-err_cleanup:
-	drm_fbdev_cleanup(fb_helper);
-err:
-	fb_helper->dev = NULL;
-	fb_helper->fbdev = NULL;
-
-	drm_err(dev, "fbdev: Failed to setup generic emulation (ret=%d)\n", ret);
-
-	return ret;
-}
-
-static const struct drm_client_funcs drm_fbdev_client_funcs = {
-	.owner		= THIS_MODULE,
-	.unregister	= drm_fbdev_client_unregister,
-	.restore	= drm_fbdev_client_restore,
-	.hotplug	= drm_fbdev_client_hotplug,
-};
-
-/**
- * drm_fbdev_generic_setup() - Setup generic fbdev emulation
- * @dev: DRM device
- * @preferred_bpp: Preferred bits per pixel for the device.
- *                 @dev->mode_config.preferred_depth is used if this is zero.
- *
- * This function sets up generic fbdev emulation for drivers that supports
- * dumb buffers with a virtual address and that can be mmap'ed.
- * drm_fbdev_generic_setup() shall be called after the DRM driver registered
- * the new DRM device with drm_dev_register().
- *
- * Restore, hotplug events and teardown are all taken care of. Drivers that do
- * suspend/resume need to call drm_fb_helper_set_suspend_unlocked() themselves.
- * Simple drivers might use drm_mode_config_helper_suspend().
- *
- * Drivers that set the dirty callback on their framebuffer will get a shadow
- * fbdev buffer that is blitted onto the real buffer. This is done in order to
- * make deferred I/O work with all kinds of buffers. A shadow buffer can be
- * requested explicitly by setting struct drm_mode_config.prefer_shadow or
- * struct drm_mode_config.prefer_shadow_fbdev to true beforehand. This is
- * required to use generic fbdev emulation with SHMEM helpers.
- *
- * This function is safe to call even when there are no connectors present.
- * Setup will be retried on the next hotplug event.
- *
- * The fbdev is destroyed by drm_dev_unregister().
- */
-void drm_fbdev_generic_setup(struct drm_device *dev,
-			     unsigned int preferred_bpp)
-{
-	struct drm_fb_helper *fb_helper;
-	int ret;
-
-	drm_WARN(dev, !dev->registered, "Device has not been registered.\n");
-	drm_WARN(dev, dev->fb_helper, "fb_helper is already set!\n");
-
-	if (!drm_fbdev_emulation)
-		return;
-
-	fb_helper = kzalloc(sizeof(*fb_helper), GFP_KERNEL);
-	if (!fb_helper) {
-		drm_err(dev, "Failed to allocate fb_helper\n");
-		return;
-	}
-
-	ret = drm_client_init(dev, &fb_helper->client, "fbdev", &drm_fbdev_client_funcs);
-	if (ret) {
-		kfree(fb_helper);
-		drm_err(dev, "Failed to register client: %d\n", ret);
-		return;
-	}
-
-	/*
-	 * FIXME: This mixes up depth with bpp, which results in a glorious
-	 * mess, resulting in some drivers picking wrong fbdev defaults and
-	 * others wrong preferred_depth defaults.
-	 */
-	if (!preferred_bpp)
-		preferred_bpp = dev->mode_config.preferred_depth;
-	if (!preferred_bpp)
-		preferred_bpp = 32;
-	fb_helper->preferred_bpp = preferred_bpp;
-
-	ret = drm_fbdev_client_hotplug(&fb_helper->client);
-	if (ret)
-		drm_dbg_kms(dev, "client hotplug ret=%d\n", ret);
-
-	drm_client_register(&fb_helper->client);
-}
-EXPORT_SYMBOL(drm_fbdev_generic_setup);
diff --git a/drivers/gpu/drm/drm_fbdev_generic.c b/drivers/gpu/drm/drm_fbdev_generic.c
new file mode 100644
index 000000000000..ab8695669279
--- /dev/null
+++ b/drivers/gpu/drm/drm_fbdev_generic.c
@@ -0,0 +1,494 @@
+// SPDX-License-Identifier: MIT
+
+#include <linux/moduleparam.h>
+#include <linux/vmalloc.h>
+
+#include <drm/drm_crtc_helper.h>
+#include <drm/drm_drv.h>
+#include <drm/drm_fb_helper.h>
+#include <drm/drm_framebuffer.h>
+#include <drm/drm_print.h>
+
+#include <drm/drm_fbdev_generic.h>
+
+static bool drm_fbdev_use_shadow_fb(struct drm_fb_helper *fb_helper)
+{
+	struct drm_device *dev = fb_helper->dev;
+	struct drm_framebuffer *fb = fb_helper->fb;
+
+	return dev->mode_config.prefer_shadow_fbdev ||
+	       dev->mode_config.prefer_shadow ||
+	       fb->funcs->dirty;
+}
+
+/* @user: 1=userspace, 0=fbcon */
+static int drm_fbdev_fb_open(struct fb_info *info, int user)
+{
+	struct drm_fb_helper *fb_helper = info->par;
+
+	/* No need to take a ref for fbcon because it unbinds on unregister */
+	if (user && !try_module_get(fb_helper->dev->driver->fops->owner))
+		return -ENODEV;
+
+	return 0;
+}
+
+static int drm_fbdev_fb_release(struct fb_info *info, int user)
+{
+	struct drm_fb_helper *fb_helper = info->par;
+
+	if (user)
+		module_put(fb_helper->dev->driver->fops->owner);
+
+	return 0;
+}
+
+static void drm_fbdev_cleanup(struct drm_fb_helper *fb_helper)
+{
+	struct fb_info *fbi = fb_helper->info;
+	void *shadow = NULL;
+
+	if (!fb_helper->dev)
+		return;
+
+	if (fbi) {
+		if (fbi->fbdefio)
+			fb_deferred_io_cleanup(fbi);
+		if (drm_fbdev_use_shadow_fb(fb_helper))
+			shadow = fbi->screen_buffer;
+	}
+
+	drm_fb_helper_fini(fb_helper);
+
+	if (shadow)
+		vfree(shadow);
+	else if (fb_helper->buffer)
+		drm_client_buffer_vunmap(fb_helper->buffer);
+
+	drm_client_framebuffer_delete(fb_helper->buffer);
+}
+
+static void drm_fbdev_release(struct drm_fb_helper *fb_helper)
+{
+	drm_fbdev_cleanup(fb_helper);
+	drm_client_release(&fb_helper->client);
+	kfree(fb_helper);
+}
+
+/*
+ * fb_ops.fb_destroy is called by the last put_fb_info() call at the end of
+ * unregister_framebuffer() or fb_release().
+ */
+static void drm_fbdev_fb_destroy(struct fb_info *info)
+{
+	drm_fbdev_release(info->par);
+}
+
+static int drm_fbdev_fb_mmap(struct fb_info *info, struct vm_area_struct *vma)
+{
+	struct drm_fb_helper *fb_helper = info->par;
+
+	if (drm_fbdev_use_shadow_fb(fb_helper))
+		return fb_deferred_io_mmap(info, vma);
+	else if (fb_helper->dev->driver->gem_prime_mmap)
+		return fb_helper->dev->driver->gem_prime_mmap(fb_helper->buffer->gem, vma);
+	else
+		return -ENODEV;
+}
+
+static bool drm_fbdev_use_iomem(struct fb_info *info)
+{
+	struct drm_fb_helper *fb_helper = info->par;
+	struct drm_client_buffer *buffer = fb_helper->buffer;
+
+	return !drm_fbdev_use_shadow_fb(fb_helper) && buffer->map.is_iomem;
+}
+
+static ssize_t drm_fbdev_fb_read(struct fb_info *info, char __user *buf,
+				 size_t count, loff_t *ppos)
+{
+	ssize_t ret;
+
+	if (drm_fbdev_use_iomem(info))
+		ret = drm_fb_helper_cfb_read(info, buf, count, ppos);
+	else
+		ret = drm_fb_helper_sys_read(info, buf, count, ppos);
+
+	return ret;
+}
+
+static ssize_t drm_fbdev_fb_write(struct fb_info *info, const char __user *buf,
+				  size_t count, loff_t *ppos)
+{
+	ssize_t ret;
+
+	if (drm_fbdev_use_iomem(info))
+		ret = drm_fb_helper_cfb_write(info, buf, count, ppos);
+	else
+		ret = drm_fb_helper_sys_write(info, buf, count, ppos);
+
+	return ret;
+}
+
+static void drm_fbdev_fb_fillrect(struct fb_info *info,
+				  const struct fb_fillrect *rect)
+{
+	if (drm_fbdev_use_iomem(info))
+		drm_fb_helper_cfb_fillrect(info, rect);
+	else
+		drm_fb_helper_sys_fillrect(info, rect);
+}
+
+static void drm_fbdev_fb_copyarea(struct fb_info *info,
+				  const struct fb_copyarea *area)
+{
+	if (drm_fbdev_use_iomem(info))
+		drm_fb_helper_cfb_copyarea(info, area);
+	else
+		drm_fb_helper_sys_copyarea(info, area);
+}
+
+static void drm_fbdev_fb_imageblit(struct fb_info *info,
+				   const struct fb_image *image)
+{
+	if (drm_fbdev_use_iomem(info))
+		drm_fb_helper_cfb_imageblit(info, image);
+	else
+		drm_fb_helper_sys_imageblit(info, image);
+}
+
+static const struct fb_ops drm_fbdev_fb_ops = {
+	.owner		= THIS_MODULE,
+	DRM_FB_HELPER_DEFAULT_OPS,
+	.fb_open	= drm_fbdev_fb_open,
+	.fb_release	= drm_fbdev_fb_release,
+	.fb_destroy	= drm_fbdev_fb_destroy,
+	.fb_mmap	= drm_fbdev_fb_mmap,
+	.fb_read	= drm_fbdev_fb_read,
+	.fb_write	= drm_fbdev_fb_write,
+	.fb_fillrect	= drm_fbdev_fb_fillrect,
+	.fb_copyarea	= drm_fbdev_fb_copyarea,
+	.fb_imageblit	= drm_fbdev_fb_imageblit,
+};
+
+static struct fb_deferred_io drm_fbdev_defio = {
+	.delay		= HZ / 20,
+	.deferred_io	= drm_fb_helper_deferred_io,
+};
+
+/*
+ * This function uses the client API to create a framebuffer backed by a dumb buffer.
+ */
+static int drm_fbdev_fb_probe(struct drm_fb_helper *fb_helper,
+			      struct drm_fb_helper_surface_size *sizes)
+{
+	struct drm_client_dev *client = &fb_helper->client;
+	struct drm_device *dev = fb_helper->dev;
+	struct drm_client_buffer *buffer;
+	struct drm_framebuffer *fb;
+	struct fb_info *fbi;
+	u32 format;
+	struct iosys_map map;
+	int ret;
+
+	drm_dbg_kms(dev, "surface width(%d), height(%d) and bpp(%d)\n",
+		    sizes->surface_width, sizes->surface_height,
+		    sizes->surface_bpp);
+
+	format = drm_mode_legacy_fb_format(sizes->surface_bpp, sizes->surface_depth);
+	buffer = drm_client_framebuffer_create(client, sizes->surface_width,
+					       sizes->surface_height, format);
+	if (IS_ERR(buffer))
+		return PTR_ERR(buffer);
+
+	fb_helper->buffer = buffer;
+	fb_helper->fb = buffer->fb;
+	fb = buffer->fb;
+
+	fbi = drm_fb_helper_alloc_info(fb_helper);
+	if (IS_ERR(fbi))
+		return PTR_ERR(fbi);
+
+	fbi->fbops = &drm_fbdev_fb_ops;
+	fbi->screen_size = sizes->surface_height * fb->pitches[0];
+	fbi->fix.smem_len = fbi->screen_size;
+	fbi->flags = FBINFO_DEFAULT;
+
+	drm_fb_helper_fill_info(fbi, fb_helper, sizes);
+
+	if (drm_fbdev_use_shadow_fb(fb_helper)) {
+		fbi->screen_buffer = vzalloc(fbi->screen_size);
+		if (!fbi->screen_buffer)
+			return -ENOMEM;
+		fbi->flags |= FBINFO_VIRTFB | FBINFO_READS_FAST;
+
+		fbi->fbdefio = &drm_fbdev_defio;
+		fb_deferred_io_init(fbi);
+	} else {
+		/* buffer is mapped for HW framebuffer */
+		ret = drm_client_buffer_vmap(fb_helper->buffer, &map);
+		if (ret)
+			return ret;
+		if (map.is_iomem) {
+			fbi->screen_base = map.vaddr_iomem;
+		} else {
+			fbi->screen_buffer = map.vaddr;
+			fbi->flags |= FBINFO_VIRTFB;
+		}
+
+		/*
+		 * Shamelessly leak the physical address to user-space. As
+		 * page_to_phys() is undefined for I/O memory, warn in this
+		 * case.
+		 */
+#if IS_ENABLED(CONFIG_DRM_FBDEV_LEAK_PHYS_SMEM)
+		if (fb_helper->hint_leak_smem_start && fbi->fix.smem_start == 0 &&
+		    !drm_WARN_ON_ONCE(dev, map.is_iomem))
+			fbi->fix.smem_start =
+				page_to_phys(virt_to_page(fbi->screen_buffer));
+#endif
+	}
+
+	return 0;
+}
+
+static void drm_fbdev_damage_blit_real(struct drm_fb_helper *fb_helper,
+				       struct drm_clip_rect *clip,
+				       struct iosys_map *dst)
+{
+	struct drm_framebuffer *fb = fb_helper->fb;
+	size_t offset = clip->y1 * fb->pitches[0];
+	size_t len = clip->x2 - clip->x1;
+	unsigned int y;
+	void *src;
+
+	switch (drm_format_info_bpp(fb->format, 0)) {
+	case 1:
+		offset += clip->x1 / 8;
+		len = DIV_ROUND_UP(len + clip->x1 % 8, 8);
+		break;
+	case 2:
+		offset += clip->x1 / 4;
+		len = DIV_ROUND_UP(len + clip->x1 % 4, 4);
+		break;
+	case 4:
+		offset += clip->x1 / 2;
+		len = DIV_ROUND_UP(len + clip->x1 % 2, 2);
+		break;
+	default:
+		offset += clip->x1 * fb->format->cpp[0];
+		len *= fb->format->cpp[0];
+		break;
+	}
+
+	src = fb_helper->info->screen_buffer + offset;
+	iosys_map_incr(dst, offset); /* go to first pixel within clip rect */
+
+	for (y = clip->y1; y < clip->y2; y++) {
+		iosys_map_memcpy_to(dst, 0, src, len);
+		iosys_map_incr(dst, fb->pitches[0]);
+		src += fb->pitches[0];
+	}
+}
+
+static int drm_fbdev_damage_blit(struct drm_fb_helper *fb_helper,
+				 struct drm_clip_rect *clip)
+{
+	struct drm_client_buffer *buffer = fb_helper->buffer;
+	struct iosys_map map, dst;
+	int ret;
+
+	/*
+	 * We have to pin the client buffer to its current location while
+	 * flushing the shadow buffer. In the general case, concurrent
+	 * modesetting operations could try to move the buffer and would
+	 * fail. The modeset has to be serialized by acquiring the reservation
+	 * object of the underlying BO here.
+	 *
+	 * For fbdev emulation, we only have to protect against fbdev modeset
+	 * operations. Nothing else will involve the client buffer's BO. So it
+	 * is sufficient to acquire struct drm_fb_helper.lock here.
+	 */
+	mutex_lock(&fb_helper->lock);
+
+	ret = drm_client_buffer_vmap(buffer, &map);
+	if (ret)
+		goto out;
+
+	dst = map;
+	drm_fbdev_damage_blit_real(fb_helper, clip, &dst);
+
+	drm_client_buffer_vunmap(buffer);
+
+out:
+	mutex_unlock(&fb_helper->lock);
+
+	return ret;
+}
+
+static int drm_fbdev_fb_dirty(struct drm_fb_helper *helper, struct drm_clip_rect *clip)
+{
+	struct drm_device *dev = helper->dev;
+	int ret;
+
+	if (!drm_fbdev_use_shadow_fb(helper))
+		return 0;
+
+	/* Call damage handlers only if necessary */
+	if (!(clip->x1 < clip->x2 && clip->y1 < clip->y2))
+		return 0;
+
+	if (helper->buffer) {
+		ret = drm_fbdev_damage_blit(helper, clip);
+		if (drm_WARN_ONCE(dev, ret, "Damage blitter failed: ret=%d\n", ret))
+			return ret;
+	}
+
+	if (helper->fb->funcs->dirty) {
+		ret = helper->fb->funcs->dirty(helper->fb, NULL, 0, 0, clip, 1);
+		if (drm_WARN_ONCE(dev, ret, "Dirty helper failed: ret=%d\n", ret))
+			return ret;
+	}
+
+	return 0;
+}
+
+static const struct drm_fb_helper_funcs drm_fb_helper_generic_funcs = {
+	.fb_probe = drm_fbdev_fb_probe,
+	.fb_dirty = drm_fbdev_fb_dirty,
+};
+
+static void drm_fbdev_client_unregister(struct drm_client_dev *client)
+{
+	struct drm_fb_helper *fb_helper = drm_fb_helper_from_client(client);
+
+	if (fb_helper->info)
+		/* drm_fbdev_fb_destroy() takes care of cleanup */
+		drm_fb_helper_unregister_info(fb_helper);
+	else
+		drm_fbdev_release(fb_helper);
+}
+
+static int drm_fbdev_client_restore(struct drm_client_dev *client)
+{
+	drm_fb_helper_lastclose(client->dev);
+
+	return 0;
+}
+
+static int drm_fbdev_client_hotplug(struct drm_client_dev *client)
+{
+	struct drm_fb_helper *fb_helper = drm_fb_helper_from_client(client);
+	struct drm_device *dev = client->dev;
+	int ret;
+
+	/* Setup is not retried if it has failed */
+	if (!fb_helper->dev && fb_helper->funcs)
+		return 0;
+
+	if (dev->fb_helper)
+		return drm_fb_helper_hotplug_event(dev->fb_helper);
+
+	if (!dev->mode_config.num_connector) {
+		drm_dbg_kms(dev, "No connectors found, will not create framebuffer!\n");
+		return 0;
+	}
+
+	drm_fb_helper_prepare(dev, fb_helper, &drm_fb_helper_generic_funcs);
+
+	ret = drm_fb_helper_init(dev, fb_helper);
+	if (ret)
+		goto err;
+
+	if (!drm_drv_uses_atomic_modeset(dev))
+		drm_helper_disable_unused_functions(dev);
+
+	ret = drm_fb_helper_initial_config(fb_helper, fb_helper->preferred_bpp);
+	if (ret)
+		goto err_cleanup;
+
+	return 0;
+
+err_cleanup:
+	drm_fbdev_cleanup(fb_helper);
+err:
+	fb_helper->dev = NULL;
+	fb_helper->info = NULL;
+
+	drm_err(dev, "fbdev: Failed to setup generic emulation (ret=%d)\n", ret);
+
+	return ret;
+}
+
+static const struct drm_client_funcs drm_fbdev_client_funcs = {
+	.owner		= THIS_MODULE,
+	.unregister	= drm_fbdev_client_unregister,
+	.restore	= drm_fbdev_client_restore,
+	.hotplug	= drm_fbdev_client_hotplug,
+};
+
+/**
+ * drm_fbdev_generic_setup() - Setup generic fbdev emulation
+ * @dev: DRM device
+ * @preferred_bpp: Preferred bits per pixel for the device.
+ *                 @dev->mode_config.preferred_depth is used if this is zero.
+ *
+ * This function sets up generic fbdev emulation for drivers that supports
+ * dumb buffers with a virtual address and that can be mmap'ed.
+ * drm_fbdev_generic_setup() shall be called after the DRM driver registered
+ * the new DRM device with drm_dev_register().
+ *
+ * Restore, hotplug events and teardown are all taken care of. Drivers that do
+ * suspend/resume need to call drm_fb_helper_set_suspend_unlocked() themselves.
+ * Simple drivers might use drm_mode_config_helper_suspend().
+ *
+ * Drivers that set the dirty callback on their framebuffer will get a shadow
+ * fbdev buffer that is blitted onto the real buffer. This is done in order to
+ * make deferred I/O work with all kinds of buffers. A shadow buffer can be
+ * requested explicitly by setting struct drm_mode_config.prefer_shadow or
+ * struct drm_mode_config.prefer_shadow_fbdev to true beforehand. This is
+ * required to use generic fbdev emulation with SHMEM helpers.
+ *
+ * This function is safe to call even when there are no connectors present.
+ * Setup will be retried on the next hotplug event.
+ *
+ * The fbdev is destroyed by drm_dev_unregister().
+ */
+void drm_fbdev_generic_setup(struct drm_device *dev,
+			     unsigned int preferred_bpp)
+{
+	struct drm_fb_helper *fb_helper;
+	int ret;
+
+	drm_WARN(dev, !dev->registered, "Device has not been registered.\n");
+	drm_WARN(dev, dev->fb_helper, "fb_helper is already set!\n");
+
+	fb_helper = kzalloc(sizeof(*fb_helper), GFP_KERNEL);
+	if (!fb_helper)
+		return;
+
+	ret = drm_client_init(dev, &fb_helper->client, "fbdev", &drm_fbdev_client_funcs);
+	if (ret) {
+		kfree(fb_helper);
+		drm_err(dev, "Failed to register client: %d\n", ret);
+		return;
+	}
+
+	/*
+	 * FIXME: This mixes up depth with bpp, which results in a glorious
+	 * mess, resulting in some drivers picking wrong fbdev defaults and
+	 * others wrong preferred_depth defaults.
+	 */
+	if (!preferred_bpp)
+		preferred_bpp = dev->mode_config.preferred_depth;
+	if (!preferred_bpp)
+		preferred_bpp = 32;
+	fb_helper->preferred_bpp = preferred_bpp;
+
+	ret = drm_fbdev_client_hotplug(&fb_helper->client);
+	if (ret)
+		drm_dbg_kms(dev, "client hotplug ret=%d\n", ret);
+
+	drm_client_register(&fb_helper->client);
+}
+EXPORT_SYMBOL(drm_fbdev_generic_setup);
diff --git a/drivers/gpu/drm/drm_file.c b/drivers/gpu/drm/drm_file.c
index a8b4d918e9a3..64b4a3a87fbb 100644
--- a/drivers/gpu/drm/drm_file.c
+++ b/drivers/gpu/drm/drm_file.c
@@ -326,7 +326,7 @@ static int drm_cpu_valid(void)
  * Creates and initializes a drm_file structure for the file private data in \p
  * filp and add it into the double linked list in \p dev.
  */
-static int drm_open_helper(struct file *filp, struct drm_minor *minor)
+int drm_open_helper(struct file *filp, struct drm_minor *minor)
 {
 	struct drm_device *dev = minor->dev;
 	struct drm_file *priv;
diff --git a/drivers/gpu/drm/drm_format_helper.c b/drivers/gpu/drm/drm_format_helper.c
index 653a5821dd53..74ff33c2ddaa 100644
--- a/drivers/gpu/drm/drm_format_helper.c
+++ b/drivers/gpu/drm/drm_format_helper.c
@@ -817,6 +817,38 @@ static bool is_listed_fourcc(const uint32_t *fourccs, size_t nfourccs, uint32_t
 	return false;
 }
 
+static const uint32_t conv_from_xrgb8888[] = {
+	DRM_FORMAT_XRGB8888,
+	DRM_FORMAT_ARGB8888,
+	DRM_FORMAT_XRGB2101010,
+	DRM_FORMAT_ARGB2101010,
+	DRM_FORMAT_RGB565,
+	DRM_FORMAT_RGB888,
+};
+
+static const uint32_t conv_from_rgb565_888[] = {
+	DRM_FORMAT_XRGB8888,
+	DRM_FORMAT_ARGB8888,
+};
+
+static bool is_conversion_supported(uint32_t from, uint32_t to)
+{
+	switch (from) {
+	case DRM_FORMAT_XRGB8888:
+	case DRM_FORMAT_ARGB8888:
+		return is_listed_fourcc(conv_from_xrgb8888, ARRAY_SIZE(conv_from_xrgb8888), to);
+	case DRM_FORMAT_RGB565:
+	case DRM_FORMAT_RGB888:
+		return is_listed_fourcc(conv_from_rgb565_888, ARRAY_SIZE(conv_from_rgb565_888), to);
+	case DRM_FORMAT_XRGB2101010:
+		return to == DRM_FORMAT_ARGB2101010;
+	case DRM_FORMAT_ARGB2101010:
+		return to == DRM_FORMAT_XRGB2101010;
+	default:
+		return false;
+	}
+}
+
 /**
  * drm_fb_build_fourcc_list - Filters a list of supported color formats against
  *                            the device's native formats
@@ -837,7 +869,9 @@ static bool is_listed_fourcc(const uint32_t *fourccs, size_t nfourccs, uint32_t
  * be handed over to drm_universal_plane_init() et al. Native formats
  * will go before emulated formats. Other heuristics might be applied
  * to optimize the order. Formats near the beginning of the list are
- * usually preferred over formats near the end of the list.
+ * usually preferred over formats near the end of the list. Formats
+ * without conversion helpers will be skipped. New drivers should only
+ * pass in XRGB8888 and avoid exposing additional emulated formats.
  *
  * Returns:
  * The number of color-formats 4CC codes returned in @fourccs_out.
@@ -849,7 +883,7 @@ size_t drm_fb_build_fourcc_list(struct drm_device *dev,
 {
 	u32 *fourccs = fourccs_out;
 	const u32 *fourccs_end = fourccs_out + nfourccs_out;
-	bool found_native = false;
+	uint32_t native_format = 0;
 	size_t i;
 
 	/*
@@ -868,27 +902,19 @@ size_t drm_fb_build_fourcc_list(struct drm_device *dev,
 
 		drm_dbg_kms(dev, "adding native format %p4cc\n", &fourcc);
 
-		if (!found_native)
-			found_native = is_listed_fourcc(driver_fourccs, driver_nfourccs, fourcc);
+		/*
+		 * There should only be one native format with the current API.
+		 * This API needs to be refactored to correctly support arbitrary
+		 * sets of native formats, since it needs to report which native
+		 * format to use for each emulated format.
+		 */
+		if (!native_format)
+			native_format = fourcc;
 		*fourccs = fourcc;
 		++fourccs;
 	}
 
 	/*
-	 * The plane's atomic_update helper converts the framebuffer's color format
-	 * to a native format when copying to device memory.
-	 *
-	 * If there is not a single format supported by both, device and
-	 * driver, the native formats are likely not supported by the conversion
-	 * helpers. Therefore *only* support the native formats and add a
-	 * conversion helper ASAP.
-	 */
-	if (!found_native) {
-		drm_warn(dev, "Format conversion helpers required to add extra formats.\n");
-		goto out;
-	}
-
-	/*
 	 * The extra formats, emulated by the driver, go second.
 	 */
 
@@ -900,6 +926,9 @@ size_t drm_fb_build_fourcc_list(struct drm_device *dev,
 		} else if (fourccs == fourccs_end) {
 			drm_warn(dev, "Ignoring emulated format %p4cc\n", &fourcc);
 			continue; /* end of available output buffer */
+		} else if (!is_conversion_supported(fourcc, native_format)) {
+			drm_dbg_kms(dev, "Unsupported emulated format %p4cc\n", &fourcc);
+			continue; /* format is not supported for conversion */
 		}
 
 		drm_dbg_kms(dev, "adding emulated format %p4cc\n", &fourcc);
@@ -908,7 +937,6 @@ size_t drm_fb_build_fourcc_list(struct drm_device *dev,
 		++fourccs;
 	}
 
-out:
 	return fourccs - fourccs_out;
 }
 EXPORT_SYMBOL(drm_fb_build_fourcc_list);
diff --git a/drivers/gpu/drm/drm_fourcc.c b/drivers/gpu/drm/drm_fourcc.c
index e09331bb3bc7..6242dfbe9240 100644
--- a/drivers/gpu/drm/drm_fourcc.c
+++ b/drivers/gpu/drm/drm_fourcc.c
@@ -297,12 +297,12 @@ const struct drm_format_info *__drm_format_info(u32 format)
 		  .vsub = 2, .is_yuv = true },
 		{ .format = DRM_FORMAT_Q410,		.depth = 0,
 		  .num_planes = 3, .char_per_block = { 2, 2, 2 },
-		  .block_w = { 1, 1, 1 }, .block_h = { 1, 1, 1 }, .hsub = 0,
-		  .vsub = 0, .is_yuv = true },
+		  .block_w = { 1, 1, 1 }, .block_h = { 1, 1, 1 }, .hsub = 1,
+		  .vsub = 1, .is_yuv = true },
 		{ .format = DRM_FORMAT_Q401,		.depth = 0,
 		  .num_planes = 3, .char_per_block = { 2, 2, 2 },
-		  .block_w = { 1, 1, 1 }, .block_h = { 1, 1, 1 }, .hsub = 0,
-		  .vsub = 0, .is_yuv = true },
+		  .block_w = { 1, 1, 1 }, .block_h = { 1, 1, 1 }, .hsub = 1,
+		  .vsub = 1, .is_yuv = true },
 		{ .format = DRM_FORMAT_P030,            .depth = 0,  .num_planes = 2,
 		  .char_per_block = { 4, 8, 0 }, .block_w = { 3, 3, 0 }, .block_h = { 1, 1, 0 },
 		  .hsub = 2, .vsub = 2, .is_yuv = true},
diff --git a/drivers/gpu/drm/drm_gem_atomic_helper.c b/drivers/gpu/drm/drm_gem_atomic_helper.c
index b6a0110eb64a..e42800718f51 100644
--- a/drivers/gpu/drm/drm_gem_atomic_helper.c
+++ b/drivers/gpu/drm/drm_gem_atomic_helper.c
@@ -360,48 +360,43 @@ void drm_gem_reset_shadow_plane(struct drm_plane *plane)
 EXPORT_SYMBOL(drm_gem_reset_shadow_plane);
 
 /**
- * drm_gem_prepare_shadow_fb - prepares shadow framebuffers
+ * drm_gem_begin_shadow_fb_access - prepares shadow framebuffers for CPU access
  * @plane: the plane
  * @plane_state: the plane state of type struct drm_shadow_plane_state
  *
- * This function implements struct &drm_plane_helper_funcs.prepare_fb. It
+ * This function implements struct &drm_plane_helper_funcs.begin_fb_access. It
  * maps all buffer objects of the plane's framebuffer into kernel address
- * space and stores them in &struct drm_shadow_plane_state.map. The
- * framebuffer will be synchronized as part of the atomic commit.
+ * space and stores them in struct &drm_shadow_plane_state.map. The first data
+ * bytes are available in struct &drm_shadow_plane_state.data.
  *
- * See drm_gem_cleanup_shadow_fb() for cleanup.
+ * See drm_gem_end_shadow_fb_access() for cleanup.
  *
  * Returns:
  * 0 on success, or a negative errno code otherwise.
  */
-int drm_gem_prepare_shadow_fb(struct drm_plane *plane, struct drm_plane_state *plane_state)
+int drm_gem_begin_shadow_fb_access(struct drm_plane *plane, struct drm_plane_state *plane_state)
 {
 	struct drm_shadow_plane_state *shadow_plane_state = to_drm_shadow_plane_state(plane_state);
 	struct drm_framebuffer *fb = plane_state->fb;
-	int ret;
 
 	if (!fb)
 		return 0;
 
-	ret = drm_gem_plane_helper_prepare_fb(plane, plane_state);
-	if (ret)
-		return ret;
-
 	return drm_gem_fb_vmap(fb, shadow_plane_state->map, shadow_plane_state->data);
 }
-EXPORT_SYMBOL(drm_gem_prepare_shadow_fb);
+EXPORT_SYMBOL(drm_gem_begin_shadow_fb_access);
 
 /**
- * drm_gem_cleanup_shadow_fb - releases shadow framebuffers
+ * drm_gem_end_shadow_fb_access - releases shadow framebuffers from CPU access
  * @plane: the plane
  * @plane_state: the plane state of type struct drm_shadow_plane_state
  *
- * This function implements struct &drm_plane_helper_funcs.cleanup_fb.
- * This function unmaps all buffer objects of the plane's framebuffer.
+ * This function implements struct &drm_plane_helper_funcs.end_fb_access. It
+ * undoes all effects of drm_gem_begin_shadow_fb_access() in reverse order.
  *
- * See drm_gem_prepare_shadow_fb() for more information.
+ * See drm_gem_begin_shadow_fb_access() for more information.
  */
-void drm_gem_cleanup_shadow_fb(struct drm_plane *plane, struct drm_plane_state *plane_state)
+void drm_gem_end_shadow_fb_access(struct drm_plane *plane, struct drm_plane_state *plane_state)
 {
 	struct drm_shadow_plane_state *shadow_plane_state = to_drm_shadow_plane_state(plane_state);
 	struct drm_framebuffer *fb = plane_state->fb;
@@ -411,46 +406,45 @@ void drm_gem_cleanup_shadow_fb(struct drm_plane *plane, struct drm_plane_state *
 
 	drm_gem_fb_vunmap(fb, shadow_plane_state->map);
 }
-EXPORT_SYMBOL(drm_gem_cleanup_shadow_fb);
+EXPORT_SYMBOL(drm_gem_end_shadow_fb_access);
 
 /**
- * drm_gem_simple_kms_prepare_shadow_fb - prepares shadow framebuffers
+ * drm_gem_simple_kms_begin_shadow_fb_access - prepares shadow framebuffers for CPU access
  * @pipe: the simple display pipe
  * @plane_state: the plane state of type struct drm_shadow_plane_state
  *
- * This function implements struct drm_simple_display_funcs.prepare_fb. It
- * maps all buffer objects of the plane's framebuffer into kernel address
- * space and stores them in struct drm_shadow_plane_state.map. The
- * framebuffer will be synchronized as part of the atomic commit.
+ * This function implements struct drm_simple_display_funcs.begin_fb_access.
  *
- * See drm_gem_simple_kms_cleanup_shadow_fb() for cleanup.
+ * See drm_gem_begin_shadow_fb_access() for details and
+ * drm_gem_simple_kms_cleanup_shadow_fb() for cleanup.
  *
  * Returns:
  * 0 on success, or a negative errno code otherwise.
  */
-int drm_gem_simple_kms_prepare_shadow_fb(struct drm_simple_display_pipe *pipe,
-					 struct drm_plane_state *plane_state)
+int drm_gem_simple_kms_begin_shadow_fb_access(struct drm_simple_display_pipe *pipe,
+					      struct drm_plane_state *plane_state)
 {
-	return drm_gem_prepare_shadow_fb(&pipe->plane, plane_state);
+	return drm_gem_begin_shadow_fb_access(&pipe->plane, plane_state);
 }
-EXPORT_SYMBOL(drm_gem_simple_kms_prepare_shadow_fb);
+EXPORT_SYMBOL(drm_gem_simple_kms_begin_shadow_fb_access);
 
 /**
- * drm_gem_simple_kms_cleanup_shadow_fb - releases shadow framebuffers
+ * drm_gem_simple_kms_end_shadow_fb_access - releases shadow framebuffers from CPU access
  * @pipe: the simple display pipe
  * @plane_state: the plane state of type struct drm_shadow_plane_state
  *
- * This function implements struct drm_simple_display_funcs.cleanup_fb.
- * This function unmaps all buffer objects of the plane's framebuffer.
+ * This function implements struct drm_simple_display_funcs.end_fb_access.
+ * It undoes all effects of drm_gem_simple_kms_begin_shadow_fb_access() in
+ * reverse order.
  *
- * See drm_gem_simple_kms_prepare_shadow_fb().
+ * See drm_gem_simple_kms_begin_shadow_fb_access().
  */
-void drm_gem_simple_kms_cleanup_shadow_fb(struct drm_simple_display_pipe *pipe,
-					  struct drm_plane_state *plane_state)
+void drm_gem_simple_kms_end_shadow_fb_access(struct drm_simple_display_pipe *pipe,
+					     struct drm_plane_state *plane_state)
 {
-	drm_gem_cleanup_shadow_fb(&pipe->plane, plane_state);
+	drm_gem_end_shadow_fb_access(&pipe->plane, plane_state);
 }
-EXPORT_SYMBOL(drm_gem_simple_kms_cleanup_shadow_fb);
+EXPORT_SYMBOL(drm_gem_simple_kms_end_shadow_fb_access);
 
 /**
  * drm_gem_simple_kms_reset_shadow_plane - resets a shadow-buffered plane
diff --git a/drivers/gpu/drm/drm_gem_framebuffer_helper.c b/drivers/gpu/drm/drm_gem_framebuffer_helper.c
index e35e224e6303..e93533b86037 100644
--- a/drivers/gpu/drm/drm_gem_framebuffer_helper.c
+++ b/drivers/gpu/drm/drm_gem_framebuffer_helper.c
@@ -9,7 +9,6 @@
 #include <linux/module.h>
 
 #include <drm/drm_damage_helper.h>
-#include <drm/drm_fb_helper.h>
 #include <drm/drm_fourcc.h>
 #include <drm/drm_framebuffer.h>
 #include <drm/drm_gem.h>
diff --git a/drivers/gpu/drm/drm_gem_shmem_helper.c b/drivers/gpu/drm/drm_gem_shmem_helper.c
index 35138f8a375c..b602cd72a120 100644
--- a/drivers/gpu/drm/drm_gem_shmem_helper.c
+++ b/drivers/gpu/drm/drm_gem_shmem_helper.c
@@ -571,12 +571,20 @@ static void drm_gem_shmem_vm_open(struct vm_area_struct *vma)
 {
 	struct drm_gem_object *obj = vma->vm_private_data;
 	struct drm_gem_shmem_object *shmem = to_drm_gem_shmem_obj(obj);
-	int ret;
 
 	WARN_ON(shmem->base.import_attach);
 
-	ret = drm_gem_shmem_get_pages(shmem);
-	WARN_ON_ONCE(ret != 0);
+	mutex_lock(&shmem->pages_lock);
+
+	/*
+	 * We should have already pinned the pages when the buffer was first
+	 * mmap'd, vm_open() just grabs an additional reference for the new
+	 * mm the vma is getting copied into (ie. on fork()).
+	 */
+	if (!WARN_ON_ONCE(!shmem->pages_use_count))
+		shmem->pages_use_count++;
+
+	mutex_unlock(&shmem->pages_lock);
 
 	drm_gem_vm_open(vma);
 }
@@ -622,10 +630,8 @@ int drm_gem_shmem_mmap(struct drm_gem_shmem_object *shmem, struct vm_area_struct
 	}
 
 	ret = drm_gem_shmem_get_pages(shmem);
-	if (ret) {
-		drm_gem_vm_close(vma);
+	if (ret)
 		return ret;
-	}
 
 	vma->vm_flags |= VM_PFNMAP | VM_DONTEXPAND | VM_DONTDUMP;
 	vma->vm_page_prot = vm_get_page_prot(vma->vm_flags);
diff --git a/drivers/gpu/drm/drm_gem_vram_helper.c b/drivers/gpu/drm/drm_gem_vram_helper.c
index 125160b534be..b6c7e3803bb3 100644
--- a/drivers/gpu/drm/drm_gem_vram_helper.c
+++ b/drivers/gpu/drm/drm_gem_vram_helper.c
@@ -433,25 +433,19 @@ int drm_gem_vram_vmap(struct drm_gem_vram_object *gbo, struct iosys_map *map)
 {
 	int ret;
 
-	ret = ttm_bo_reserve(&gbo->bo, true, false, NULL);
-	if (ret)
-		return ret;
+	dma_resv_assert_held(gbo->bo.base.resv);
 
 	ret = drm_gem_vram_pin_locked(gbo, 0);
 	if (ret)
-		goto err_ttm_bo_unreserve;
+		return ret;
 	ret = drm_gem_vram_kmap_locked(gbo, map);
 	if (ret)
 		goto err_drm_gem_vram_unpin_locked;
 
-	ttm_bo_unreserve(&gbo->bo);
-
 	return 0;
 
 err_drm_gem_vram_unpin_locked:
 	drm_gem_vram_unpin_locked(gbo);
-err_ttm_bo_unreserve:
-	ttm_bo_unreserve(&gbo->bo);
 	return ret;
 }
 EXPORT_SYMBOL(drm_gem_vram_vmap);
@@ -467,16 +461,10 @@ EXPORT_SYMBOL(drm_gem_vram_vmap);
 void drm_gem_vram_vunmap(struct drm_gem_vram_object *gbo,
 			 struct iosys_map *map)
 {
-	int ret;
-
-	ret = ttm_bo_reserve(&gbo->bo, false, false, NULL);
-	if (WARN_ONCE(ret, "ttm_bo_reserve_failed(): ret=%d\n", ret))
-		return;
+	dma_resv_assert_held(gbo->bo.base.resv);
 
 	drm_gem_vram_kunmap_locked(gbo, map);
 	drm_gem_vram_unpin_locked(gbo);
-
-	ttm_bo_unreserve(&gbo->bo);
 }
 EXPORT_SYMBOL(drm_gem_vram_vunmap);
 
diff --git a/drivers/gpu/drm/drm_internal.h b/drivers/gpu/drm/drm_internal.h
index 7bb98e6a446d..5ea5e260118c 100644
--- a/drivers/gpu/drm/drm_internal.h
+++ b/drivers/gpu/drm/drm_internal.h
@@ -104,7 +104,8 @@ static inline void drm_vblank_flush_worker(struct drm_vblank_crtc *vblank)
 
 static inline void drm_vblank_destroy_worker(struct drm_vblank_crtc *vblank)
 {
-	kthread_destroy_worker(vblank->worker);
+	if (vblank->worker)
+		kthread_destroy_worker(vblank->worker);
 }
 
 int drm_vblank_worker_init(struct drm_vblank_crtc *vblank);
diff --git a/drivers/gpu/drm/drm_modes.c b/drivers/gpu/drm/drm_modes.c
index 5d4ac79381c4..3c8034a8c27b 100644
--- a/drivers/gpu/drm/drm_modes.c
+++ b/drivers/gpu/drm/drm_modes.c
@@ -1750,11 +1750,78 @@ static int drm_mode_parse_cmdline_options(const char *str,
 	return 0;
 }
 
-static const char * const drm_named_modes_whitelist[] = {
-	"NTSC",
-	"PAL",
+struct drm_named_mode {
+	const char *name;
+	unsigned int pixel_clock_khz;
+	unsigned int xres;
+	unsigned int yres;
+	unsigned int flags;
+};
+
+#define NAMED_MODE(_name, _pclk, _x, _y, _flags)	\
+	{						\
+		.name = _name,				\
+		.pixel_clock_khz = _pclk,		\
+		.xres = _x,				\
+		.yres = _y,				\
+		.flags = _flags,			\
+	}
+
+static const struct drm_named_mode drm_named_modes[] = {
+	NAMED_MODE("NTSC", 13500, 720, 480, DRM_MODE_FLAG_INTERLACE),
+	NAMED_MODE("PAL", 13500, 720, 576, DRM_MODE_FLAG_INTERLACE),
 };
 
+static int drm_mode_parse_cmdline_named_mode(const char *name,
+					     unsigned int name_end,
+					     struct drm_cmdline_mode *cmdline_mode)
+{
+	unsigned int i;
+
+	if (!name_end)
+		return 0;
+
+	/* If the name starts with a digit, it's not a named mode */
+	if (isdigit(name[0]))
+		return 0;
+
+	/*
+	 * If there's an equal sign in the name, the command-line
+	 * contains only an option and no mode.
+	 */
+	if (strnchr(name, name_end, '='))
+		return 0;
+
+	/* The connection status extras can be set without a mode. */
+	if (name_end == 1 &&
+	    (name[0] == 'd' || name[0] == 'D' || name[0] == 'e'))
+		return 0;
+
+	/*
+	 * We're sure we're a named mode at this point, iterate over the
+	 * list of modes we're aware of.
+	 */
+	for (i = 0; i < ARRAY_SIZE(drm_named_modes); i++) {
+		const struct drm_named_mode *mode = &drm_named_modes[i];
+		int ret;
+
+		ret = str_has_prefix(name, mode->name);
+		if (ret != name_end)
+			continue;
+
+		strcpy(cmdline_mode->name, mode->name);
+		cmdline_mode->pixel_clock = mode->pixel_clock_khz;
+		cmdline_mode->xres = mode->xres;
+		cmdline_mode->yres = mode->yres;
+		cmdline_mode->interlace = !!(mode->flags & DRM_MODE_FLAG_INTERLACE);
+		cmdline_mode->specified = true;
+
+		return 1;
+	}
+
+	return -EINVAL;
+}
+
 /**
  * drm_mode_parse_command_line_for_connector - parse command line modeline for connector
  * @mode_option: optional per connector mode option
@@ -1791,7 +1858,7 @@ bool drm_mode_parse_command_line_for_connector(const char *mode_option,
 	const char *bpp_ptr = NULL, *refresh_ptr = NULL, *extra_ptr = NULL;
 	const char *options_ptr = NULL;
 	char *bpp_end_ptr = NULL, *refresh_end_ptr = NULL;
-	int i, len, ret;
+	int len, ret;
 
 	memset(mode, 0, sizeof(*mode));
 	mode->panel_orientation = DRM_MODE_PANEL_ORIENTATION_UNKNOWN;
@@ -1832,18 +1899,19 @@ bool drm_mode_parse_command_line_for_connector(const char *mode_option,
 		parse_extras = true;
 	}
 
-	/* First check for a named mode */
-	for (i = 0; i < ARRAY_SIZE(drm_named_modes_whitelist); i++) {
-		ret = str_has_prefix(name, drm_named_modes_whitelist[i]);
-		if (ret == mode_end) {
-			if (refresh_ptr)
-				return false; /* named + refresh is invalid */
+	if (!mode_end)
+		return false;
+
+	ret = drm_mode_parse_cmdline_named_mode(name, mode_end, mode);
+	if (ret < 0)
+		return false;
 
-			strcpy(mode->name, drm_named_modes_whitelist[i]);
-			mode->specified = true;
-			break;
-		}
-	}
+	/*
+	 * Having a mode that starts by a letter (and thus is named) and
+	 * an at-sign (used to specify a refresh rate) is disallowed.
+	 */
+	if (ret && refresh_ptr)
+		return false;
 
 	/* No named mode? Check for a normal mode argument, e.g. 1024x768 */
 	if (!mode->specified && isdigit(name[0])) {
diff --git a/drivers/gpu/drm/drm_nomodeset.c b/drivers/gpu/drm/drm_nomodeset.c
deleted file mode 100644
index f3978d5bd3a1..000000000000
--- a/drivers/gpu/drm/drm_nomodeset.c
+++ /dev/null
@@ -1,24 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-
-#include <linux/module.h>
-#include <linux/types.h>
-
-static bool drm_nomodeset;
-
-bool drm_firmware_drivers_only(void)
-{
-	return drm_nomodeset;
-}
-EXPORT_SYMBOL(drm_firmware_drivers_only);
-
-static int __init disable_modeset(char *str)
-{
-	drm_nomodeset = true;
-
-	pr_warn("Booted with the nomodeset parameter. Only the system framebuffer will be available\n");
-
-	return 1;
-}
-
-/* Disable kernel modesetting */
-__setup("nomodeset", disable_modeset);
diff --git a/drivers/gpu/drm/drm_panel_orientation_quirks.c b/drivers/gpu/drm/drm_panel_orientation_quirks.c
index 8a0c0e0bb5bd..52d8800a8ab8 100644
--- a/drivers/gpu/drm/drm_panel_orientation_quirks.c
+++ b/drivers/gpu/drm/drm_panel_orientation_quirks.c
@@ -134,6 +134,12 @@ static const struct dmi_system_id orientation_data[] = {
 		  DMI_EXACT_MATCH(DMI_PRODUCT_NAME, "One S1003"),
 		},
 		.driver_data = (void *)&lcd800x1280_rightside_up,
+	}, {	/* Acer Switch V 10 (SW5-017) */
+		.matches = {
+		  DMI_EXACT_MATCH(DMI_SYS_VENDOR, "Acer"),
+		  DMI_EXACT_MATCH(DMI_PRODUCT_NAME, "SW5-017"),
+		},
+		.driver_data = (void *)&lcd800x1280_rightside_up,
 	}, {	/* Anbernic Win600 */
 		.matches = {
 		  DMI_EXACT_MATCH(DMI_BOARD_VENDOR, "Anbernic"),
@@ -319,6 +325,12 @@ static const struct dmi_system_id orientation_data[] = {
 		 DMI_MATCH(DMI_BIOS_VERSION, "BLADE_21"),
 		},
 		.driver_data = (void *)&lcd1200x1920_rightside_up,
+	}, {	/* Nanote UMPC-01 */
+		.matches = {
+		 DMI_MATCH(DMI_SYS_VENDOR, "RWC CO.,LTD"),
+		 DMI_MATCH(DMI_PRODUCT_NAME, "UMPC-01"),
+		},
+		.driver_data = (void *)&lcd1200x1920_rightside_up,
 	}, {	/* OneGX1 Pro */
 		.matches = {
 		  DMI_EXACT_MATCH(DMI_SYS_VENDOR, "SYSTEM_MANUFACTURER"),
diff --git a/drivers/gpu/drm/drm_prime.c b/drivers/gpu/drm/drm_prime.c
index 20e109a802ae..f924b8b4ab6b 100644
--- a/drivers/gpu/drm/drm_prime.c
+++ b/drivers/gpu/drm/drm_prime.c
@@ -781,6 +781,8 @@ int drm_gem_dmabuf_mmap(struct dma_buf *dma_buf, struct vm_area_struct *vma)
 	struct drm_gem_object *obj = dma_buf->priv;
 	struct drm_device *dev = obj->dev;
 
+	dma_resv_assert_held(dma_buf->resv);
+
 	if (!dev->driver->gem_prime_mmap)
 		return -ENOSYS;
 
diff --git a/drivers/gpu/drm/drm_probe_helper.c b/drivers/gpu/drm/drm_probe_helper.c
index 2fc21df709bc..bcd9611dabfd 100644
--- a/drivers/gpu/drm/drm_probe_helper.c
+++ b/drivers/gpu/drm/drm_probe_helper.c
@@ -36,7 +36,6 @@
 #include <drm/drm_client.h>
 #include <drm/drm_crtc.h>
 #include <drm/drm_edid.h>
-#include <drm/drm_fb_helper.h>
 #include <drm/drm_fourcc.h>
 #include <drm/drm_modeset_helper_vtables.h>
 #include <drm/drm_print.h>
diff --git a/drivers/gpu/drm/drm_simple_kms_helper.c b/drivers/gpu/drm/drm_simple_kms_helper.c
index 31233c6ae3c4..3ef420ec4534 100644
--- a/drivers/gpu/drm/drm_simple_kms_helper.c
+++ b/drivers/gpu/drm/drm_simple_kms_helper.c
@@ -285,6 +285,30 @@ static void drm_simple_kms_plane_cleanup_fb(struct drm_plane *plane,
 	pipe->funcs->cleanup_fb(pipe, state);
 }
 
+static int drm_simple_kms_plane_begin_fb_access(struct drm_plane *plane,
+						struct drm_plane_state *new_plane_state)
+{
+	struct drm_simple_display_pipe *pipe;
+
+	pipe = container_of(plane, struct drm_simple_display_pipe, plane);
+	if (!pipe->funcs || !pipe->funcs->begin_fb_access)
+		return 0;
+
+	return pipe->funcs->begin_fb_access(pipe, new_plane_state);
+}
+
+static void drm_simple_kms_plane_end_fb_access(struct drm_plane *plane,
+					       struct drm_plane_state *new_plane_state)
+{
+	struct drm_simple_display_pipe *pipe;
+
+	pipe = container_of(plane, struct drm_simple_display_pipe, plane);
+	if (!pipe->funcs || !pipe->funcs->end_fb_access)
+		return;
+
+	pipe->funcs->end_fb_access(pipe, new_plane_state);
+}
+
 static bool drm_simple_kms_format_mod_supported(struct drm_plane *plane,
 						uint32_t format,
 						uint64_t modifier)
@@ -295,6 +319,8 @@ static bool drm_simple_kms_format_mod_supported(struct drm_plane *plane,
 static const struct drm_plane_helper_funcs drm_simple_kms_plane_helper_funcs = {
 	.prepare_fb = drm_simple_kms_plane_prepare_fb,
 	.cleanup_fb = drm_simple_kms_plane_cleanup_fb,
+	.begin_fb_access = drm_simple_kms_plane_begin_fb_access,
+	.end_fb_access = drm_simple_kms_plane_end_fb_access,
 	.atomic_check = drm_simple_kms_plane_atomic_check,
 	.atomic_update = drm_simple_kms_plane_atomic_update,
 };
diff --git a/drivers/gpu/drm/drm_sysfs.c b/drivers/gpu/drm/drm_sysfs.c
index 430e00b16eec..183130355997 100644
--- a/drivers/gpu/drm/drm_sysfs.c
+++ b/drivers/gpu/drm/drm_sysfs.c
@@ -19,6 +19,7 @@
 #include <linux/kdev_t.h>
 #include <linux/slab.h>
 
+#include <drm/drm_accel.h>
 #include <drm/drm_connector.h>
 #include <drm/drm_device.h>
 #include <drm/drm_file.h>
@@ -90,7 +91,7 @@ static void drm_sysfs_acpi_register(void) { }
 static void drm_sysfs_acpi_unregister(void) { }
 #endif
 
-static char *drm_devnode(struct device *dev, umode_t *mode)
+static char *drm_devnode(const struct device *dev, umode_t *mode)
 {
 	return kasprintf(GFP_KERNEL, "dri/%s", dev_name(dev));
 }
@@ -471,19 +472,26 @@ struct device *drm_sysfs_minor_alloc(struct drm_minor *minor)
 	struct device *kdev;
 	int r;
 
-	if (minor->type == DRM_MINOR_RENDER)
-		minor_str = "renderD%d";
-	else
-		minor_str = "card%d";
-
 	kdev = kzalloc(sizeof(*kdev), GFP_KERNEL);
 	if (!kdev)
 		return ERR_PTR(-ENOMEM);
 
 	device_initialize(kdev);
-	kdev->devt = MKDEV(DRM_MAJOR, minor->index);
-	kdev->class = drm_class;
-	kdev->type = &drm_sysfs_device_minor;
+
+	if (minor->type == DRM_MINOR_ACCEL) {
+		minor_str = "accel%d";
+		accel_set_device_instance_params(kdev, minor->index);
+	} else {
+		if (minor->type == DRM_MINOR_RENDER)
+			minor_str = "renderD%d";
+		else
+			minor_str = "card%d";
+
+		kdev->devt = MKDEV(DRM_MAJOR, minor->index);
+		kdev->class = drm_class;
+		kdev->type = &drm_sysfs_device_minor;
+	}
+
 	kdev->parent = minor->dev->dev;
 	kdev->release = drm_sysfs_release;
 	dev_set_drvdata(kdev, minor);
diff --git a/drivers/gpu/drm/etnaviv/etnaviv_drv.h b/drivers/gpu/drm/etnaviv/etnaviv_drv.h
index f32f4771dada..2bb4c25565dc 100644
--- a/drivers/gpu/drm/etnaviv/etnaviv_drv.h
+++ b/drivers/gpu/drm/etnaviv/etnaviv_drv.h
@@ -6,13 +6,14 @@
 #ifndef __ETNAVIV_DRV_H__
 #define __ETNAVIV_DRV_H__
 
+#include <linux/io.h>
 #include <linux/list.h>
 #include <linux/mm_types.h>
 #include <linux/sizes.h>
 #include <linux/time64.h>
 #include <linux/types.h>
 
-#include <drm/drm_fb_helper.h>
+#include <drm/drm_drv.h>
 #include <drm/drm_gem.h>
 #include <drm/etnaviv_drm.h>
 #include <drm/gpu_scheduler.h>
diff --git a/drivers/gpu/drm/etnaviv/etnaviv_dump.c b/drivers/gpu/drm/etnaviv/etnaviv_dump.c
index f418e0b75772..44b5f3c35aab 100644
--- a/drivers/gpu/drm/etnaviv/etnaviv_dump.c
+++ b/drivers/gpu/drm/etnaviv/etnaviv_dump.c
@@ -83,10 +83,15 @@ static void etnaviv_core_dump_registers(struct core_dump_iterator *iter,
 {
 	struct etnaviv_dump_registers *reg = iter->data;
 	unsigned int i;
+	u32 read_addr;
 
 	for (i = 0; i < ARRAY_SIZE(etnaviv_dump_registers); i++, reg++) {
+		read_addr = etnaviv_dump_registers[i];
+		if (read_addr >= VIVS_PM_POWER_CONTROLS &&
+		    read_addr <= VIVS_PM_PULSE_EATER)
+			read_addr = gpu_fix_power_address(gpu, read_addr);
 		reg->reg = cpu_to_le32(etnaviv_dump_registers[i]);
-		reg->value = cpu_to_le32(gpu_read(gpu, etnaviv_dump_registers[i]));
+		reg->value = cpu_to_le32(gpu_read(gpu, read_addr));
 	}
 
 	etnaviv_core_dump_header(iter, ETDUMP_BUF_REG, reg);
diff --git a/drivers/gpu/drm/etnaviv/etnaviv_gem.c b/drivers/gpu/drm/etnaviv/etnaviv_gem.c
index cc386f8a7116..c5ae5492e1af 100644
--- a/drivers/gpu/drm/etnaviv/etnaviv_gem.c
+++ b/drivers/gpu/drm/etnaviv/etnaviv_gem.c
@@ -130,7 +130,7 @@ static int etnaviv_gem_mmap_obj(struct etnaviv_gem_object *etnaviv_obj,
 {
 	pgprot_t vm_page_prot;
 
-	vma->vm_flags |= VM_IO | VM_MIXEDMAP | VM_DONTEXPAND | VM_DONTDUMP;
+	vma->vm_flags |= VM_PFNMAP | VM_DONTEXPAND | VM_DONTDUMP;
 
 	vm_page_prot = vm_get_page_prot(vma->vm_flags);
 
@@ -165,7 +165,8 @@ static vm_fault_t etnaviv_gem_fault(struct vm_fault *vmf)
 	struct vm_area_struct *vma = vmf->vma;
 	struct drm_gem_object *obj = vma->vm_private_data;
 	struct etnaviv_gem_object *etnaviv_obj = to_etnaviv_bo(obj);
-	struct page **pages, *page;
+	struct page **pages;
+	unsigned long pfn;
 	pgoff_t pgoff;
 	int err;
 
@@ -189,12 +190,12 @@ static vm_fault_t etnaviv_gem_fault(struct vm_fault *vmf)
 	/* We don't use vmf->pgoff since that has the fake offset: */
 	pgoff = (vmf->address - vma->vm_start) >> PAGE_SHIFT;
 
-	page = pages[pgoff];
+	pfn = page_to_pfn(pages[pgoff]);
 
 	VERB("Inserting %p pfn %lx, pa %lx", (void *)vmf->address,
-	     page_to_pfn(page), page_to_pfn(page) << PAGE_SHIFT);
+	     pfn, pfn << PAGE_SHIFT);
 
-	return vmf_insert_page(vma, vmf->address, page);
+	return vmf_insert_pfn(vma, vmf->address, pfn);
 }
 
 int etnaviv_gem_mmap_offset(struct drm_gem_object *obj, u64 *offset)
@@ -258,7 +259,12 @@ struct etnaviv_vram_mapping *etnaviv_gem_mapping_get(
 		if (mapping->use == 0) {
 			mutex_lock(&mmu_context->lock);
 			if (mapping->context == mmu_context)
-				mapping->use += 1;
+				if (va && mapping->iova != va) {
+					etnaviv_iommu_reap_mapping(mapping);
+					mapping = NULL;
+				} else {
+					mapping->use += 1;
+				}
 			else
 				mapping = NULL;
 			mutex_unlock(&mmu_context->lock);
@@ -504,7 +510,6 @@ void etnaviv_gem_free_object(struct drm_gem_object *obj)
 		kfree(mapping);
 	}
 
-	drm_gem_free_mmap_offset(obj);
 	etnaviv_obj->ops->release(etnaviv_obj);
 	drm_gem_object_release(obj);
 
@@ -638,6 +643,7 @@ static int etnaviv_gem_userptr_get_pages(struct etnaviv_gem_object *etnaviv_obj)
 	struct page **pvec = NULL;
 	struct etnaviv_gem_userptr *userptr = &etnaviv_obj->userptr;
 	int ret, pinned = 0, npages = etnaviv_obj->base.size >> PAGE_SHIFT;
+	unsigned int gup_flags = FOLL_LONGTERM;
 
 	might_lock_read(&current->mm->mmap_lock);
 
@@ -648,14 +654,15 @@ static int etnaviv_gem_userptr_get_pages(struct etnaviv_gem_object *etnaviv_obj)
 	if (!pvec)
 		return -ENOMEM;
 
+	if (!userptr->ro)
+		gup_flags |= FOLL_WRITE;
+
 	do {
 		unsigned num_pages = npages - pinned;
 		uint64_t ptr = userptr->ptr + pinned * PAGE_SIZE;
 		struct page **pages = pvec + pinned;
 
-		ret = pin_user_pages_fast(ptr, num_pages,
-					  FOLL_WRITE | FOLL_FORCE | FOLL_LONGTERM,
-					  pages);
+		ret = pin_user_pages_fast(ptr, num_pages, gup_flags, pages);
 		if (ret < 0) {
 			unpin_user_pages(pvec, pinned);
 			kvfree(pvec);
diff --git a/drivers/gpu/drm/etnaviv/etnaviv_gem.h b/drivers/gpu/drm/etnaviv/etnaviv_gem.h
index 63688e6e4580..baa81cbf701a 100644
--- a/drivers/gpu/drm/etnaviv/etnaviv_gem.h
+++ b/drivers/gpu/drm/etnaviv/etnaviv_gem.h
@@ -96,6 +96,7 @@ struct etnaviv_gem_submit {
 	int out_fence_id;
 	struct list_head node; /* GPU active submit list */
 	struct etnaviv_cmdbuf cmdbuf;
+	struct pid *pid;       /* submitting process */
 	bool runtime_resumed;
 	u32 exec_state;
 	u32 flags;
diff --git a/drivers/gpu/drm/etnaviv/etnaviv_gem_submit.c b/drivers/gpu/drm/etnaviv/etnaviv_gem_submit.c
index 1ac916b24891..1491159d0d20 100644
--- a/drivers/gpu/drm/etnaviv/etnaviv_gem_submit.c
+++ b/drivers/gpu/drm/etnaviv/etnaviv_gem_submit.c
@@ -399,6 +399,9 @@ static void submit_cleanup(struct kref *kref)
 		mutex_unlock(&submit->gpu->fence_lock);
 		dma_fence_put(submit->out_fence);
 	}
+
+	put_pid(submit->pid);
+
 	kfree(submit->pmrs);
 	kfree(submit);
 }
@@ -422,6 +425,7 @@ int etnaviv_ioctl_gem_submit(struct drm_device *dev, void *data,
 	struct sync_file *sync_file = NULL;
 	struct ww_acquire_ctx ticket;
 	int out_fence_fd = -1;
+	struct pid *pid = get_pid(task_pid(current));
 	void *stream;
 	int ret;
 
@@ -519,6 +523,8 @@ int etnaviv_ioctl_gem_submit(struct drm_device *dev, void *data,
 		goto err_submit_ww_acquire;
 	}
 
+	submit->pid = pid;
+
 	ret = etnaviv_cmdbuf_init(priv->cmdbuf_suballoc, &submit->cmdbuf,
 				  ALIGN(args->stream_size, 8) + 8);
 	if (ret)
diff --git a/drivers/gpu/drm/etnaviv/etnaviv_gpu.c b/drivers/gpu/drm/etnaviv/etnaviv_gpu.c
index 37018bc55810..51320eeebfcf 100644
--- a/drivers/gpu/drm/etnaviv/etnaviv_gpu.c
+++ b/drivers/gpu/drm/etnaviv/etnaviv_gpu.c
@@ -416,6 +416,12 @@ static void etnaviv_hw_identify(struct etnaviv_gpu *gpu)
 	if (gpu->identity.model == chipModel_GC700)
 		gpu->identity.features &= ~chipFeatures_FAST_CLEAR;
 
+	/* These models/revisions don't have the 2D pipe bit */
+	if ((gpu->identity.model == chipModel_GC500 &&
+	     gpu->identity.revision <= 2) ||
+	    gpu->identity.model == chipModel_GC300)
+		gpu->identity.features |= chipFeatures_PIPE_2D;
+
 	if ((gpu->identity.model == chipModel_GC500 &&
 	     gpu->identity.revision < 2) ||
 	    (gpu->identity.model == chipModel_GC300 &&
@@ -449,8 +455,9 @@ static void etnaviv_hw_identify(struct etnaviv_gpu *gpu)
 				gpu_read(gpu, VIVS_HI_CHIP_MINOR_FEATURE_5);
 	}
 
-	/* GC600 idle register reports zero bits where modules aren't present */
-	if (gpu->identity.model == chipModel_GC600)
+	/* GC600/300 idle register reports zero bits where modules aren't present */
+	if (gpu->identity.model == chipModel_GC600 ||
+	    gpu->identity.model == chipModel_GC300)
 		gpu->idle_mask = VIVS_HI_IDLE_STATE_TX |
 				 VIVS_HI_IDLE_STATE_RA |
 				 VIVS_HI_IDLE_STATE_SE |
@@ -583,7 +590,7 @@ static void etnaviv_gpu_enable_mlcg(struct etnaviv_gpu *gpu)
 	u32 pmc, ppc;
 
 	/* enable clock gating */
-	ppc = gpu_read(gpu, VIVS_PM_POWER_CONTROLS);
+	ppc = gpu_read_power(gpu, VIVS_PM_POWER_CONTROLS);
 	ppc |= VIVS_PM_POWER_CONTROLS_ENABLE_MODULE_CLOCK_GATING;
 
 	/* Disable stall module clock gating for 4.3.0.1 and 4.3.0.2 revs */
@@ -591,9 +598,9 @@ static void etnaviv_gpu_enable_mlcg(struct etnaviv_gpu *gpu)
 	    gpu->identity.revision == 0x4302)
 		ppc |= VIVS_PM_POWER_CONTROLS_DISABLE_STALL_MODULE_CLOCK_GATING;
 
-	gpu_write(gpu, VIVS_PM_POWER_CONTROLS, ppc);
+	gpu_write_power(gpu, VIVS_PM_POWER_CONTROLS, ppc);
 
-	pmc = gpu_read(gpu, VIVS_PM_MODULE_CONTROLS);
+	pmc = gpu_read_power(gpu, VIVS_PM_MODULE_CONTROLS);
 
 	/* Disable PA clock gating for GC400+ without bugfix except for GC420 */
 	if (gpu->identity.model >= chipModel_GC400 &&
@@ -616,19 +623,20 @@ static void etnaviv_gpu_enable_mlcg(struct etnaviv_gpu *gpu)
 
 	/* Disable TX clock gating on affected core revisions. */
 	if (etnaviv_is_model_rev(gpu, GC4000, 0x5222) ||
-	    etnaviv_is_model_rev(gpu, GC2000, 0x5108))
+	    etnaviv_is_model_rev(gpu, GC2000, 0x5108) ||
+	    etnaviv_is_model_rev(gpu, GC2000, 0x6202) ||
+	    etnaviv_is_model_rev(gpu, GC2000, 0x6203))
 		pmc |= VIVS_PM_MODULE_CONTROLS_DISABLE_MODULE_CLOCK_GATING_TX;
 
-	/* Disable SE, RA and TX clock gating on affected core revisions. */
+	/* Disable SE and RA clock gating on affected core revisions. */
 	if (etnaviv_is_model_rev(gpu, GC7000, 0x6202))
 		pmc |= VIVS_PM_MODULE_CONTROLS_DISABLE_MODULE_CLOCK_GATING_SE |
-		       VIVS_PM_MODULE_CONTROLS_DISABLE_MODULE_CLOCK_GATING_RA |
-		       VIVS_PM_MODULE_CONTROLS_DISABLE_MODULE_CLOCK_GATING_TX;
+		       VIVS_PM_MODULE_CONTROLS_DISABLE_MODULE_CLOCK_GATING_RA;
 
 	pmc |= VIVS_PM_MODULE_CONTROLS_DISABLE_MODULE_CLOCK_GATING_RA_HZ;
 	pmc |= VIVS_PM_MODULE_CONTROLS_DISABLE_MODULE_CLOCK_GATING_RA_EZ;
 
-	gpu_write(gpu, VIVS_PM_MODULE_CONTROLS, pmc);
+	gpu_write_power(gpu, VIVS_PM_MODULE_CONTROLS, pmc);
 }
 
 void etnaviv_gpu_start_fe(struct etnaviv_gpu *gpu, u32 address, u16 prefetch)
@@ -688,11 +696,11 @@ static void etnaviv_gpu_setup_pulse_eater(struct etnaviv_gpu *gpu)
 	    (gpu->identity.features & chipFeatures_PIPE_3D))
 	{
 		/* Performance fix: disable internal DFS */
-		pulse_eater = gpu_read(gpu, VIVS_PM_PULSE_EATER);
+		pulse_eater = gpu_read_power(gpu, VIVS_PM_PULSE_EATER);
 		pulse_eater |= BIT(18);
 	}
 
-	gpu_write(gpu, VIVS_PM_PULSE_EATER, pulse_eater);
+	gpu_write_power(gpu, VIVS_PM_PULSE_EATER, pulse_eater);
 }
 
 static void etnaviv_gpu_hw_init(struct etnaviv_gpu *gpu)
@@ -1045,12 +1053,28 @@ pm_put:
 }
 #endif
 
-void etnaviv_gpu_recover_hang(struct etnaviv_gpu *gpu)
+void etnaviv_gpu_recover_hang(struct etnaviv_gem_submit *submit)
 {
+	struct etnaviv_gpu *gpu = submit->gpu;
+	char *comm = NULL, *cmd = NULL;
+	struct task_struct *task;
 	unsigned int i;
 
 	dev_err(gpu->dev, "recover hung GPU!\n");
 
+	task = get_pid_task(submit->pid, PIDTYPE_PID);
+	if (task) {
+		comm = kstrdup(task->comm, GFP_KERNEL);
+		cmd = kstrdup_quotable_cmdline(task, GFP_KERNEL);
+		put_task_struct(task);
+	}
+
+	if (comm && cmd)
+		dev_err(gpu->dev, "offending task: %s (%s)\n", comm, cmd);
+
+	kfree(cmd);
+	kfree(comm);
+
 	if (pm_runtime_get_sync(gpu->dev) < 0)
 		goto pm_put;
 
@@ -1294,9 +1318,9 @@ static void sync_point_perfmon_sample_pre(struct etnaviv_gpu *gpu,
 	u32 val;
 
 	/* disable clock gating */
-	val = gpu_read(gpu, VIVS_PM_POWER_CONTROLS);
+	val = gpu_read_power(gpu, VIVS_PM_POWER_CONTROLS);
 	val &= ~VIVS_PM_POWER_CONTROLS_ENABLE_MODULE_CLOCK_GATING;
-	gpu_write(gpu, VIVS_PM_POWER_CONTROLS, val);
+	gpu_write_power(gpu, VIVS_PM_POWER_CONTROLS, val);
 
 	/* enable debug register */
 	val = gpu_read(gpu, VIVS_HI_CLOCK_CONTROL);
@@ -1327,9 +1351,9 @@ static void sync_point_perfmon_sample_post(struct etnaviv_gpu *gpu,
 	gpu_write(gpu, VIVS_HI_CLOCK_CONTROL, val);
 
 	/* enable clock gating */
-	val = gpu_read(gpu, VIVS_PM_POWER_CONTROLS);
+	val = gpu_read_power(gpu, VIVS_PM_POWER_CONTROLS);
 	val |= VIVS_PM_POWER_CONTROLS_ENABLE_MODULE_CLOCK_GATING;
-	gpu_write(gpu, VIVS_PM_POWER_CONTROLS, val);
+	gpu_write_power(gpu, VIVS_PM_POWER_CONTROLS, val);
 }
 
 
diff --git a/drivers/gpu/drm/etnaviv/etnaviv_gpu.h b/drivers/gpu/drm/etnaviv/etnaviv_gpu.h
index 85eddd492774..f1204b070fb8 100644
--- a/drivers/gpu/drm/etnaviv/etnaviv_gpu.h
+++ b/drivers/gpu/drm/etnaviv/etnaviv_gpu.h
@@ -10,6 +10,7 @@
 #include "etnaviv_gem.h"
 #include "etnaviv_mmu.h"
 #include "etnaviv_drv.h"
+#include "common.xml.h"
 
 struct etnaviv_gem_submit;
 struct etnaviv_vram_mapping;
@@ -159,6 +160,26 @@ static inline u32 gpu_read(struct etnaviv_gpu *gpu, u32 reg)
 	return readl(gpu->mmio + reg);
 }
 
+static inline u32 gpu_fix_power_address(struct etnaviv_gpu *gpu, u32 reg)
+{
+	/* Power registers in GC300 < 2.0 are offset by 0x100 */
+	if (gpu->identity.model == chipModel_GC300 &&
+	    gpu->identity.revision < 0x2000)
+		reg += 0x100;
+
+	return reg;
+}
+
+static inline void gpu_write_power(struct etnaviv_gpu *gpu, u32 reg, u32 data)
+{
+	writel(data, gpu->mmio + gpu_fix_power_address(gpu, reg));
+}
+
+static inline u32 gpu_read_power(struct etnaviv_gpu *gpu, u32 reg)
+{
+	return readl(gpu->mmio + gpu_fix_power_address(gpu, reg));
+}
+
 int etnaviv_gpu_get_param(struct etnaviv_gpu *gpu, u32 param, u64 *value);
 
 int etnaviv_gpu_init(struct etnaviv_gpu *gpu);
@@ -168,7 +189,7 @@ bool etnaviv_fill_identity_from_hwdb(struct etnaviv_gpu *gpu);
 int etnaviv_gpu_debugfs(struct etnaviv_gpu *gpu, struct seq_file *m);
 #endif
 
-void etnaviv_gpu_recover_hang(struct etnaviv_gpu *gpu);
+void etnaviv_gpu_recover_hang(struct etnaviv_gem_submit *submit);
 void etnaviv_gpu_retire(struct etnaviv_gpu *gpu);
 int etnaviv_gpu_wait_fence_interruptible(struct etnaviv_gpu *gpu,
 	u32 fence, struct drm_etnaviv_timespec *timeout);
diff --git a/drivers/gpu/drm/etnaviv/etnaviv_hwdb.c b/drivers/gpu/drm/etnaviv/etnaviv_hwdb.c
index f2fc645c7956..57f334e24189 100644
--- a/drivers/gpu/drm/etnaviv/etnaviv_hwdb.c
+++ b/drivers/gpu/drm/etnaviv/etnaviv_hwdb.c
@@ -70,6 +70,37 @@ static const struct etnaviv_chip_identity etnaviv_chip_identities[] = {
 	},
 	{
 		.model = 0x7000,
+		.revision = 0x6203,
+		.product_id = 0x70003,
+		.customer_id = 0x4,
+		.eco_id = 0,
+		.stream_count = 16,
+		.register_max = 64,
+		.thread_count = 512,
+		.shader_core_count = 2,
+		.vertex_cache_size = 16,
+		.vertex_output_buffer_size = 1024,
+		.pixel_pipes = 1,
+		.instruction_count = 512,
+		.num_constants = 320,
+		.buffer_size = 0,
+		.varyings_count = 16,
+		.features = 0xe0287c8d,
+		.minor_features0 = 0xc1589eff,
+		.minor_features1 = 0xfefbfad9,
+		.minor_features2 = 0xeb9d4fbf,
+		.minor_features3 = 0xedfffced,
+		.minor_features4 = 0xdb0dafc7,
+		.minor_features5 = 0x3b5ac333,
+		.minor_features6 = 0xfcce6000,
+		.minor_features7 = 0xfffbfa6f,
+		.minor_features8 = 0x00e10ef3,
+		.minor_features9 = 0x00c8003c,
+		.minor_features10 = 0x00004040,
+		.minor_features11 = 0x00000024,
+	},
+	{
+		.model = 0x7000,
 		.revision = 0x6204,
 		.product_id = ~0U,
 		.customer_id = ~0U,
diff --git a/drivers/gpu/drm/etnaviv/etnaviv_mmu.c b/drivers/gpu/drm/etnaviv/etnaviv_mmu.c
index dc1aa738c4f1..67bdce5326c6 100644
--- a/drivers/gpu/drm/etnaviv/etnaviv_mmu.c
+++ b/drivers/gpu/drm/etnaviv/etnaviv_mmu.c
@@ -80,10 +80,10 @@ static int etnaviv_iommu_map(struct etnaviv_iommu_context *context, u32 iova,
 		return -EINVAL;
 
 	for_each_sgtable_dma_sg(sgt, sg, i) {
-		u32 pa = sg_dma_address(sg) - sg->offset;
+		phys_addr_t pa = sg_dma_address(sg) - sg->offset;
 		size_t bytes = sg_dma_len(sg) + sg->offset;
 
-		VERB("map[%d]: %08x %08x(%zx)", i, iova, pa, bytes);
+		VERB("map[%d]: %08x %pap(%zx)", i, iova, &pa, bytes);
 
 		ret = etnaviv_context_map(context, da, pa, bytes, prot);
 		if (ret)
@@ -135,6 +135,19 @@ static void etnaviv_iommu_remove_mapping(struct etnaviv_iommu_context *context,
 	drm_mm_remove_node(&mapping->vram_node);
 }
 
+void etnaviv_iommu_reap_mapping(struct etnaviv_vram_mapping *mapping)
+{
+	struct etnaviv_iommu_context *context = mapping->context;
+
+	lockdep_assert_held(&context->lock);
+	WARN_ON(mapping->use);
+
+	etnaviv_iommu_remove_mapping(context, mapping);
+	etnaviv_iommu_context_put(mapping->context);
+	mapping->context = NULL;
+	list_del_init(&mapping->mmu_node);
+}
+
 static int etnaviv_iommu_find_iova(struct etnaviv_iommu_context *context,
 				   struct drm_mm_node *node, size_t size)
 {
@@ -202,10 +215,7 @@ static int etnaviv_iommu_find_iova(struct etnaviv_iommu_context *context,
 		 * this mapping.
 		 */
 		list_for_each_entry_safe(m, n, &list, scan_node) {
-			etnaviv_iommu_remove_mapping(context, m);
-			etnaviv_iommu_context_put(m->context);
-			m->context = NULL;
-			list_del_init(&m->mmu_node);
+			etnaviv_iommu_reap_mapping(m);
 			list_del_init(&m->scan_node);
 		}
 
@@ -257,10 +267,7 @@ static int etnaviv_iommu_insert_exact(struct etnaviv_iommu_context *context,
 	}
 
 	list_for_each_entry_safe(m, n, &scan_list, scan_node) {
-		etnaviv_iommu_remove_mapping(context, m);
-		etnaviv_iommu_context_put(m->context);
-		m->context = NULL;
-		list_del_init(&m->mmu_node);
+		etnaviv_iommu_reap_mapping(m);
 		list_del_init(&m->scan_node);
 	}
 
diff --git a/drivers/gpu/drm/etnaviv/etnaviv_mmu.h b/drivers/gpu/drm/etnaviv/etnaviv_mmu.h
index e4a0b7d09c2e..c01a147f0dfd 100644
--- a/drivers/gpu/drm/etnaviv/etnaviv_mmu.h
+++ b/drivers/gpu/drm/etnaviv/etnaviv_mmu.h
@@ -91,6 +91,7 @@ int etnaviv_iommu_map_gem(struct etnaviv_iommu_context *context,
 	struct etnaviv_vram_mapping *mapping, u64 va);
 void etnaviv_iommu_unmap_gem(struct etnaviv_iommu_context *context,
 	struct etnaviv_vram_mapping *mapping);
+void etnaviv_iommu_reap_mapping(struct etnaviv_vram_mapping *mapping);
 
 int etnaviv_iommu_get_suballoc_va(struct etnaviv_iommu_context *ctx,
 				  struct etnaviv_vram_mapping *mapping,
diff --git a/drivers/gpu/drm/etnaviv/etnaviv_sched.c b/drivers/gpu/drm/etnaviv/etnaviv_sched.c
index 72e2553fbc98..d29f467eee13 100644
--- a/drivers/gpu/drm/etnaviv/etnaviv_sched.c
+++ b/drivers/gpu/drm/etnaviv/etnaviv_sched.c
@@ -67,7 +67,7 @@ static enum drm_gpu_sched_stat etnaviv_sched_timedout_job(struct drm_sched_job
 
 	/* get the GPU back into the init state */
 	etnaviv_core_dump(submit);
-	etnaviv_gpu_recover_hang(gpu);
+	etnaviv_gpu_recover_hang(submit);
 
 	drm_sched_resubmit_jobs(&gpu->sched);
 
diff --git a/drivers/gpu/drm/exynos/exynos_drm_fbdev.c b/drivers/gpu/drm/exynos/exynos_drm_fbdev.c
index 767afd2bfa82..55c92372fca0 100644
--- a/drivers/gpu/drm/exynos/exynos_drm_fbdev.c
+++ b/drivers/gpu/drm/exynos/exynos_drm_fbdev.c
@@ -49,6 +49,8 @@ static const struct fb_ops exynos_drm_fb_ops = {
 	.owner		= THIS_MODULE,
 	DRM_FB_HELPER_DEFAULT_OPS,
 	.fb_mmap        = exynos_drm_fb_mmap,
+	.fb_read	= drm_fb_helper_cfb_read,
+	.fb_write	= drm_fb_helper_cfb_write,
 	.fb_fillrect	= drm_fb_helper_cfb_fillrect,
 	.fb_copyarea	= drm_fb_helper_cfb_copyarea,
 	.fb_imageblit	= drm_fb_helper_cfb_imageblit,
@@ -63,7 +65,7 @@ static int exynos_drm_fbdev_update(struct drm_fb_helper *helper,
 	unsigned int size = fb->width * fb->height * fb->format->cpp[0];
 	unsigned long offset;
 
-	fbi = drm_fb_helper_alloc_fbi(helper);
+	fbi = drm_fb_helper_alloc_info(helper);
 	if (IS_ERR(fbi)) {
 		DRM_DEV_ERROR(to_dma_dev(helper->dev),
 			      "failed to allocate fb info.\n");
@@ -201,7 +203,7 @@ static void exynos_drm_fbdev_destroy(struct drm_device *dev,
 			drm_framebuffer_remove(fb);
 	}
 
-	drm_fb_helper_unregister_fbi(fb_helper);
+	drm_fb_helper_unregister_info(fb_helper);
 
 	drm_fb_helper_fini(fb_helper);
 }
diff --git a/drivers/gpu/drm/exynos/exynos_drm_g2d.c b/drivers/gpu/drm/exynos/exynos_drm_g2d.c
index 471fd6c8135f..e19c2ceb3759 100644
--- a/drivers/gpu/drm/exynos/exynos_drm_g2d.c
+++ b/drivers/gpu/drm/exynos/exynos_drm_g2d.c
@@ -477,7 +477,7 @@ static dma_addr_t *g2d_userptr_get_dma_addr(struct g2d_data *g2d,
 	}
 
 	ret = pin_user_pages_fast(start, npages,
-				  FOLL_FORCE | FOLL_WRITE | FOLL_LONGTERM,
+				  FOLL_WRITE | FOLL_LONGTERM,
 				  g2d_userptr->pages);
 	if (ret != npages) {
 		DRM_DEV_ERROR(g2d->dev,
diff --git a/drivers/gpu/drm/fsl-dcu/fsl_dcu_drm_drv.c b/drivers/gpu/drm/fsl-dcu/fsl_dcu_drm_drv.c
index b4acc3422ba4..8579c7629f5e 100644
--- a/drivers/gpu/drm/fsl-dcu/fsl_dcu_drm_drv.c
+++ b/drivers/gpu/drm/fsl-dcu/fsl_dcu_drm_drv.c
@@ -20,7 +20,7 @@
 
 #include <drm/drm_atomic_helper.h>
 #include <drm/drm_drv.h>
-#include <drm/drm_fb_helper.h>
+#include <drm/drm_fbdev_generic.h>
 #include <drm/drm_gem_dma_helper.h>
 #include <drm/drm_modeset_helper.h>
 #include <drm/drm_module.h>
diff --git a/drivers/gpu/drm/fsl-dcu/fsl_dcu_drm_rgb.c b/drivers/gpu/drm/fsl-dcu/fsl_dcu_drm_rgb.c
index 4d4a715b429d..2c2b92324a2e 100644
--- a/drivers/gpu/drm/fsl-dcu/fsl_dcu_drm_rgb.c
+++ b/drivers/gpu/drm/fsl-dcu/fsl_dcu_drm_rgb.c
@@ -60,8 +60,9 @@ static int fsl_dcu_drm_connector_get_modes(struct drm_connector *connector)
 	return drm_panel_get_modes(fsl_connector->panel, connector);
 }
 
-static int fsl_dcu_drm_connector_mode_valid(struct drm_connector *connector,
-					    struct drm_display_mode *mode)
+static enum drm_mode_status
+fsl_dcu_drm_connector_mode_valid(struct drm_connector *connector,
+				 struct drm_display_mode *mode)
 {
 	if (mode->hdisplay & 0xf)
 		return MODE_ERROR;
diff --git a/drivers/gpu/drm/gma500/framebuffer.c b/drivers/gpu/drm/gma500/framebuffer.c
index 5f502a0048ab..8d5a37b8f110 100644
--- a/drivers/gpu/drm/gma500/framebuffer.c
+++ b/drivers/gpu/drm/gma500/framebuffer.c
@@ -147,6 +147,8 @@ static const struct fb_ops psbfb_unaccel_ops = {
 	.owner = THIS_MODULE,
 	DRM_FB_HELPER_DEFAULT_OPS,
 	.fb_setcolreg = psbfb_setcolreg,
+	.fb_read = drm_fb_helper_cfb_read,
+	.fb_write = drm_fb_helper_cfb_write,
 	.fb_fillrect = drm_fb_helper_cfb_fillrect,
 	.fb_copyarea = drm_fb_helper_cfb_copyarea,
 	.fb_imageblit = drm_fb_helper_cfb_imageblit,
@@ -268,7 +270,7 @@ static int psbfb_create(struct drm_fb_helper *fb_helper,
 
 	memset(dev_priv->vram_addr + backing->offset, 0, size);
 
-	info = drm_fb_helper_alloc_fbi(fb_helper);
+	info = drm_fb_helper_alloc_info(fb_helper);
 	if (IS_ERR(info)) {
 		ret = PTR_ERR(info);
 		goto err_drm_gem_object_put;
@@ -383,7 +385,7 @@ static int psb_fbdev_destroy(struct drm_device *dev,
 {
 	struct drm_framebuffer *fb = fb_helper->fb;
 
-	drm_fb_helper_unregister_fbi(fb_helper);
+	drm_fb_helper_unregister_info(fb_helper);
 
 	drm_fb_helper_fini(fb_helper);
 	drm_framebuffer_unregister_private(fb);
diff --git a/drivers/gpu/drm/gud/gud_drv.c b/drivers/gpu/drm/gud/gud_drv.c
index 8d1630b8edac..d57dab104358 100644
--- a/drivers/gpu/drm/gud/gud_drv.c
+++ b/drivers/gpu/drm/gud/gud_drv.c
@@ -18,7 +18,7 @@
 #include <drm/drm_damage_helper.h>
 #include <drm/drm_debugfs.h>
 #include <drm/drm_drv.h>
-#include <drm/drm_fb_helper.h>
+#include <drm/drm_fbdev_generic.h>
 #include <drm/drm_fourcc.h>
 #include <drm/drm_gem_atomic_helper.h>
 #include <drm/drm_gem_framebuffer_helper.h>
diff --git a/drivers/gpu/drm/hisilicon/hibmc/hibmc_drm_drv.c b/drivers/gpu/drm/hisilicon/hibmc/hibmc_drm_drv.c
index 5a2e1cac06b2..22053c613644 100644
--- a/drivers/gpu/drm/hisilicon/hibmc/hibmc_drm_drv.c
+++ b/drivers/gpu/drm/hisilicon/hibmc/hibmc_drm_drv.c
@@ -17,6 +17,7 @@
 #include <drm/drm_aperture.h>
 #include <drm/drm_atomic_helper.h>
 #include <drm/drm_drv.h>
+#include <drm/drm_fbdev_generic.h>
 #include <drm/drm_gem_framebuffer_helper.h>
 #include <drm/drm_gem_vram_helper.h>
 #include <drm/drm_managed.h>
diff --git a/drivers/gpu/drm/hisilicon/hibmc/hibmc_drm_drv.h b/drivers/gpu/drm/hisilicon/hibmc/hibmc_drm_drv.h
index 4a0cd22c10e2..f957552c6c50 100644
--- a/drivers/gpu/drm/hisilicon/hibmc/hibmc_drm_drv.h
+++ b/drivers/gpu/drm/hisilicon/hibmc/hibmc_drm_drv.h
@@ -19,7 +19,6 @@
 #include <linux/i2c.h>
 
 #include <drm/drm_edid.h>
-#include <drm/drm_fb_helper.h>
 #include <drm/drm_framebuffer.h>
 
 struct hibmc_connector {
diff --git a/drivers/gpu/drm/hisilicon/hibmc/hibmc_drm_vdac.c b/drivers/gpu/drm/hisilicon/hibmc/hibmc_drm_vdac.c
index c228091fb0e6..8c6d2ea2a472 100644
--- a/drivers/gpu/drm/hisilicon/hibmc/hibmc_drm_vdac.c
+++ b/drivers/gpu/drm/hisilicon/hibmc/hibmc_drm_vdac.c
@@ -11,6 +11,8 @@
  *	Jianhua Li <lijianhua@huawei.com>
  */
 
+#include <linux/io.h>
+
 #include <drm/drm_atomic_helper.h>
 #include <drm/drm_probe_helper.h>
 #include <drm/drm_print.h>
diff --git a/drivers/gpu/drm/hisilicon/kirin/dw_drm_dsi.c b/drivers/gpu/drm/hisilicon/kirin/dw_drm_dsi.c
index a0d5aa727d58..d9978b79828c 100644
--- a/drivers/gpu/drm/hisilicon/kirin/dw_drm_dsi.c
+++ b/drivers/gpu/drm/hisilicon/kirin/dw_drm_dsi.c
@@ -658,7 +658,7 @@ static enum drm_mode_status dsi_encoder_mode_valid(struct drm_encoder *encoder,
 		 * reset adj_mode to the mode value each time,
 		 * so we don't adjust the mode twice
 		 */
-		drm_mode_copy(&adj_mode, mode);
+		drm_mode_init(&adj_mode, mode);
 
 		crtc_funcs = crtc->helper_private;
 		if (crtc_funcs && crtc_funcs->mode_fixup)
diff --git a/drivers/gpu/drm/hisilicon/kirin/kirin_drm_drv.c b/drivers/gpu/drm/hisilicon/kirin/kirin_drm_drv.c
index 73ee7f25f734..9c5d49bf40c9 100644
--- a/drivers/gpu/drm/hisilicon/kirin/kirin_drm_drv.c
+++ b/drivers/gpu/drm/hisilicon/kirin/kirin_drm_drv.c
@@ -19,7 +19,7 @@
 
 #include <drm/drm_atomic_helper.h>
 #include <drm/drm_drv.h>
-#include <drm/drm_fb_helper.h>
+#include <drm/drm_fbdev_generic.h>
 #include <drm/drm_gem_dma_helper.h>
 #include <drm/drm_gem_framebuffer_helper.h>
 #include <drm/drm_module.h>
diff --git a/drivers/gpu/drm/hyperv/hyperv_drm_drv.c b/drivers/gpu/drm/hyperv/hyperv_drm_drv.c
index ca127ff797f7..427c20ba3404 100644
--- a/drivers/gpu/drm/hyperv/hyperv_drm_drv.c
+++ b/drivers/gpu/drm/hyperv/hyperv_drm_drv.c
@@ -11,7 +11,7 @@
 #include <drm/drm_aperture.h>
 #include <drm/drm_atomic_helper.h>
 #include <drm/drm_drv.h>
-#include <drm/drm_fb_helper.h>
+#include <drm/drm_fbdev_generic.h>
 #include <drm/drm_gem_shmem_helper.h>
 #include <drm/drm_simple_kms_helper.h>
 
diff --git a/drivers/gpu/drm/hyperv/hyperv_drm_modeset.c b/drivers/gpu/drm/hyperv/hyperv_drm_modeset.c
index 28e732f94bf2..6c6b57298797 100644
--- a/drivers/gpu/drm/hyperv/hyperv_drm_modeset.c
+++ b/drivers/gpu/drm/hyperv/hyperv_drm_modeset.c
@@ -8,7 +8,6 @@
 #include <drm/drm_damage_helper.h>
 #include <drm/drm_drv.h>
 #include <drm/drm_edid.h>
-#include <drm/drm_fb_helper.h>
 #include <drm/drm_format_helper.h>
 #include <drm/drm_fourcc.h>
 #include <drm/drm_framebuffer.h>
diff --git a/drivers/gpu/drm/i2c/tda998x_drv.c b/drivers/gpu/drm/i2c/tda998x_drv.c
index d444e7fffb54..a14d2896aebb 100644
--- a/drivers/gpu/drm/i2c/tda998x_drv.c
+++ b/drivers/gpu/drm/i2c/tda998x_drv.c
@@ -1174,6 +1174,8 @@ static int tda998x_audio_codec_init(struct tda998x_priv *priv,
 	struct hdmi_codec_pdata codec_data = {
 		.ops = &audio_codec_ops,
 		.max_i2s_channels = 2,
+		.no_i2s_capture = 1,
+		.no_spdif_capture = 1,
 	};
 
 	if (priv->audio_port_enable[AUDIO_ROUTE_I2S])
diff --git a/drivers/gpu/drm/i915/Makefile b/drivers/gpu/drm/i915/Makefile
index f9b6844a4854..dfa211451a1d 100644
--- a/drivers/gpu/drm/i915/Makefile
+++ b/drivers/gpu/drm/i915/Makefile
@@ -251,6 +251,7 @@ i915-y += \
 	display/intel_global_state.o \
 	display/intel_hdcp.o \
 	display/intel_hotplug.o \
+	display/intel_hti.o \
 	display/intel_lpe_audio.o \
 	display/intel_modeset_verify.o \
 	display/intel_modeset_setup.o \
diff --git a/drivers/gpu/drm/i915/display/g4x_dp.c b/drivers/gpu/drm/i915/display/g4x_dp.c
index e3e3d27ffb53..24ef36ec2d3d 100644
--- a/drivers/gpu/drm/i915/display/g4x_dp.c
+++ b/drivers/gpu/drm/i915/display/g4x_dp.c
@@ -8,6 +8,7 @@
 #include <linux/string_helpers.h>
 
 #include "g4x_dp.h"
+#include "i915_reg.h"
 #include "intel_audio.h"
 #include "intel_backlight.h"
 #include "intel_connector.h"
@@ -672,8 +673,6 @@ static void intel_enable_dp(struct intel_atomic_state *state,
 	intel_dp_pcon_dsc_configure(intel_dp, pipe_config);
 	intel_dp_start_link_train(intel_dp, pipe_config);
 	intel_dp_stop_link_train(intel_dp, pipe_config);
-
-	intel_audio_codec_enable(encoder, pipe_config, conn_state);
 }
 
 static void g4x_enable_dp(struct intel_atomic_state *state,
@@ -682,6 +681,7 @@ static void g4x_enable_dp(struct intel_atomic_state *state,
 			  const struct drm_connector_state *conn_state)
 {
 	intel_enable_dp(state, encoder, pipe_config, conn_state);
+	intel_audio_codec_enable(encoder, pipe_config, conn_state);
 	intel_edp_backlight_on(pipe_config, conn_state);
 }
 
@@ -690,6 +690,7 @@ static void vlv_enable_dp(struct intel_atomic_state *state,
 			  const struct intel_crtc_state *pipe_config,
 			  const struct drm_connector_state *conn_state)
 {
+	intel_audio_codec_enable(encoder, pipe_config, conn_state);
 	intel_edp_backlight_on(pipe_config, conn_state);
 }
 
diff --git a/drivers/gpu/drm/i915/display/g4x_dp.h b/drivers/gpu/drm/i915/display/g4x_dp.h
index e1f50263a725..a38b3e1e01d3 100644
--- a/drivers/gpu/drm/i915/display/g4x_dp.h
+++ b/drivers/gpu/drm/i915/display/g4x_dp.h
@@ -8,7 +8,7 @@
 
 #include <linux/types.h>
 
-#include "i915_reg.h"
+#include "i915_reg_defs.h"
 
 enum pipe;
 enum port;
diff --git a/drivers/gpu/drm/i915/display/g4x_hdmi.c b/drivers/gpu/drm/i915/display/g4x_hdmi.c
index 8aadf96fa5e9..c3580d96765c 100644
--- a/drivers/gpu/drm/i915/display/g4x_hdmi.c
+++ b/drivers/gpu/drm/i915/display/g4x_hdmi.c
@@ -6,6 +6,7 @@
  */
 
 #include "g4x_hdmi.h"
+#include "i915_reg.h"
 #include "intel_audio.h"
 #include "intel_connector.h"
 #include "intel_crtc.h"
@@ -78,6 +79,18 @@ static bool intel_hdmi_get_hw_state(struct intel_encoder *encoder,
 	return ret;
 }
 
+static int g4x_hdmi_compute_config(struct intel_encoder *encoder,
+				   struct intel_crtc_state *crtc_state,
+				   struct drm_connector_state *conn_state)
+{
+	struct drm_i915_private *i915 = to_i915(encoder->base.dev);
+
+	if (HAS_PCH_SPLIT(i915))
+		crtc_state->has_pch_encoder = true;
+
+	return intel_hdmi_compute_config(encoder, crtc_state, conn_state);
+}
+
 static void intel_hdmi_get_config(struct intel_encoder *encoder,
 				  struct intel_crtc_state *pipe_config)
 {
@@ -144,10 +157,8 @@ static void intel_hdmi_get_config(struct intel_encoder *encoder,
 			     &pipe_config->infoframes.hdmi);
 }
 
-static void g4x_enable_hdmi(struct intel_atomic_state *state,
-			    struct intel_encoder *encoder,
-			    const struct intel_crtc_state *pipe_config,
-			    const struct drm_connector_state *conn_state)
+static void g4x_hdmi_enable_port(struct intel_encoder *encoder,
+				 const struct intel_crtc_state *pipe_config)
 {
 	struct drm_device *dev = encoder->base.dev;
 	struct drm_i915_private *dev_priv = to_i915(dev);
@@ -162,6 +173,16 @@ static void g4x_enable_hdmi(struct intel_atomic_state *state,
 
 	intel_de_write(dev_priv, intel_hdmi->hdmi_reg, temp);
 	intel_de_posting_read(dev_priv, intel_hdmi->hdmi_reg);
+}
+
+static void g4x_enable_hdmi(struct intel_atomic_state *state,
+			    struct intel_encoder *encoder,
+			    const struct intel_crtc_state *pipe_config,
+			    const struct drm_connector_state *conn_state)
+{
+	struct drm_i915_private *dev_priv = to_i915(encoder->base.dev);
+
+	g4x_hdmi_enable_port(encoder, pipe_config);
 
 	drm_WARN_ON(&dev_priv->drm, pipe_config->has_audio &&
 		    !pipe_config->has_hdmi_sink);
@@ -281,6 +302,11 @@ static void vlv_enable_hdmi(struct intel_atomic_state *state,
 			    const struct intel_crtc_state *pipe_config,
 			    const struct drm_connector_state *conn_state)
 {
+	struct drm_i915_private *dev_priv = to_i915(encoder->base.dev);
+
+	drm_WARN_ON(&dev_priv->drm, pipe_config->has_audio &&
+		    !pipe_config->has_hdmi_sink);
+	intel_audio_codec_enable(encoder, pipe_config, conn_state);
 }
 
 static void intel_disable_hdmi(struct intel_atomic_state *state,
@@ -402,7 +428,7 @@ static void vlv_hdmi_pre_enable(struct intel_atomic_state *state,
 			      pipe_config->has_infoframe,
 			      pipe_config, conn_state);
 
-	g4x_enable_hdmi(state, encoder, pipe_config, conn_state);
+	g4x_hdmi_enable_port(encoder, pipe_config);
 
 	vlv_wait_port_ready(dev_priv, dig_port, 0x0);
 }
@@ -479,7 +505,7 @@ static void chv_hdmi_pre_enable(struct intel_atomic_state *state,
 			      pipe_config->has_infoframe,
 			      pipe_config, conn_state);
 
-	g4x_enable_hdmi(state, encoder, pipe_config, conn_state);
+	g4x_hdmi_enable_port(encoder, pipe_config);
 
 	vlv_wait_port_ready(dev_priv, dig_port, 0x0);
 
@@ -543,7 +569,7 @@ void g4x_hdmi_init(struct drm_i915_private *dev_priv,
 			 "HDMI %c", port_name(port));
 
 	intel_encoder->hotplug = intel_hdmi_hotplug;
-	intel_encoder->compute_config = intel_hdmi_compute_config;
+	intel_encoder->compute_config = g4x_hdmi_compute_config;
 	if (HAS_PCH_SPLIT(dev_priv)) {
 		intel_encoder->disable = pch_disable_hdmi;
 		intel_encoder->post_disable = pch_post_disable_hdmi;
diff --git a/drivers/gpu/drm/i915/display/i9xx_plane.c b/drivers/gpu/drm/i915/display/i9xx_plane.c
index 5afbe3e98ee8..ecaeb7dc196b 100644
--- a/drivers/gpu/drm/i915/display/i9xx_plane.c
+++ b/drivers/gpu/drm/i915/display/i9xx_plane.c
@@ -8,6 +8,9 @@
 #include <drm/drm_blend.h>
 #include <drm/drm_fourcc.h>
 
+#include "i915_irq.h"
+#include "i915_reg.h"
+#include "i9xx_plane.h"
 #include "intel_atomic.h"
 #include "intel_atomic_plane.h"
 #include "intel_de.h"
@@ -15,7 +18,6 @@
 #include "intel_fb.h"
 #include "intel_fbc.h"
 #include "intel_sprite.h"
-#include "i9xx_plane.h"
 
 /* Primary plane formats for gen <= 3 */
 static const u32 i8xx_primary_formats[] = {
diff --git a/drivers/gpu/drm/i915/display/icl_dsi.c b/drivers/gpu/drm/i915/display/icl_dsi.c
index e05e7cd6c412..d16b30a2dded 100644
--- a/drivers/gpu/drm/i915/display/icl_dsi.c
+++ b/drivers/gpu/drm/i915/display/icl_dsi.c
@@ -29,6 +29,7 @@
 #include <drm/drm_atomic_helper.h>
 #include <drm/drm_mipi_dsi.h>
 
+#include "i915_reg.h"
 #include "icl_dsi.h"
 #include "icl_dsi_regs.h"
 #include "intel_atomic.h"
diff --git a/drivers/gpu/drm/i915/display/icl_dsi_regs.h b/drivers/gpu/drm/i915/display/icl_dsi_regs.h
index f78f28b8dd94..d4845ac65acc 100644
--- a/drivers/gpu/drm/i915/display/icl_dsi_regs.h
+++ b/drivers/gpu/drm/i915/display/icl_dsi_regs.h
@@ -6,7 +6,7 @@
 #ifndef __ICL_DSI_REGS_H__
 #define __ICL_DSI_REGS_H__
 
-#include "i915_reg_defs.h"
+#include "intel_display_reg_defs.h"
 
 /* Gen11 DSI */
 #define _MMIO_DSI(tc, dsi0, dsi1)	_MMIO_TRANS((tc) - TRANSCODER_DSI_0, \
diff --git a/drivers/gpu/drm/i915/display/intel_atomic_plane.c b/drivers/gpu/drm/i915/display/intel_atomic_plane.c
index bcf0239b9533..10e1fc9d0698 100644
--- a/drivers/gpu/drm/i915/display/intel_atomic_plane.c
+++ b/drivers/gpu/drm/i915/display/intel_atomic_plane.c
@@ -757,7 +757,7 @@ void intel_plane_update_noarm(struct intel_plane *plane,
 {
 	struct intel_crtc *crtc = to_intel_crtc(crtc_state->uapi.crtc);
 
-	trace_intel_plane_update_noarm(&plane->base, crtc);
+	trace_intel_plane_update_noarm(plane, crtc);
 
 	if (plane->update_noarm)
 		plane->update_noarm(plane, crtc_state, plane_state);
@@ -769,7 +769,7 @@ void intel_plane_update_arm(struct intel_plane *plane,
 {
 	struct intel_crtc *crtc = to_intel_crtc(crtc_state->uapi.crtc);
 
-	trace_intel_plane_update_arm(&plane->base, crtc);
+	trace_intel_plane_update_arm(plane, crtc);
 
 	if (crtc_state->do_async_flip && plane->async_flip)
 		plane->async_flip(plane, crtc_state, plane_state, true);
@@ -782,7 +782,7 @@ void intel_plane_disable_arm(struct intel_plane *plane,
 {
 	struct intel_crtc *crtc = to_intel_crtc(crtc_state->uapi.crtc);
 
-	trace_intel_plane_disable_arm(&plane->base, crtc);
+	trace_intel_plane_disable_arm(plane, crtc);
 	plane->disable_arm(plane, crtc_state);
 }
 
diff --git a/drivers/gpu/drm/i915/display/intel_audio.c b/drivers/gpu/drm/i915/display/intel_audio.c
index c3176c9c89a6..98c3322b4549 100644
--- a/drivers/gpu/drm/i915/display/intel_audio.c
+++ b/drivers/gpu/drm/i915/display/intel_audio.c
@@ -838,8 +838,8 @@ void intel_audio_codec_enable(struct intel_encoder *encoder,
 
 	if (i915->display.funcs.audio)
 		i915->display.funcs.audio->audio_codec_enable(encoder,
-								  crtc_state,
-								  conn_state);
+							      crtc_state,
+							      conn_state);
 
 	mutex_lock(&i915->display.audio.mutex);
 	encoder->audio_connector = connector;
@@ -854,7 +854,7 @@ void intel_audio_codec_enable(struct intel_encoder *encoder,
 		if (!intel_crtc_has_type(crtc_state, INTEL_OUTPUT_DP_MST))
 			pipe = -1;
 		acomp->base.audio_ops->pin_eld_notify(acomp->base.audio_ops->audio_ptr,
-						 (int) port, (int) pipe);
+						      (int)port, (int)pipe);
 	}
 
 	intel_lpe_audio_notify(i915, pipe, port, connector->eld,
@@ -891,8 +891,8 @@ void intel_audio_codec_disable(struct intel_encoder *encoder,
 
 	if (i915->display.funcs.audio)
 		i915->display.funcs.audio->audio_codec_disable(encoder,
-								   old_crtc_state,
-								   old_conn_state);
+							       old_crtc_state,
+							       old_conn_state);
 
 	mutex_lock(&i915->display.audio.mutex);
 	encoder->audio_connector = NULL;
@@ -905,7 +905,7 @@ void intel_audio_codec_disable(struct intel_encoder *encoder,
 		if (!intel_crtc_has_type(old_crtc_state, INTEL_OUTPUT_DP_MST))
 			pipe = -1;
 		acomp->base.audio_ops->pin_eld_notify(acomp->base.audio_ops->audio_ptr,
-						 (int) port, (int) pipe);
+						      (int)port, (int)pipe);
 	}
 
 	intel_lpe_audio_notify(i915, pipe, port, NULL, 0, false);
@@ -1129,10 +1129,10 @@ static int i915_audio_component_get_cdclk_freq(struct device *kdev)
 static struct intel_encoder *get_saved_enc(struct drm_i915_private *i915,
 					   int port, int pipe)
 {
-	struct intel_encoder *encoder;
-
 	/* MST */
 	if (pipe >= 0) {
+		struct intel_encoder *encoder;
+
 		if (drm_WARN_ON(&i915->drm,
 				pipe >= ARRAY_SIZE(i915->display.audio.encoder_map)))
 			return NULL;
@@ -1143,7 +1143,7 @@ static struct intel_encoder *get_saved_enc(struct drm_i915_private *i915,
 		 * MST or not. So it will poll all the port & pipe
 		 * combinations
 		 */
-		if (encoder != NULL && encoder->port == port &&
+		if (encoder && encoder->port == port &&
 		    encoder->type == INTEL_OUTPUT_DP_MST)
 			return encoder;
 	}
@@ -1153,14 +1153,12 @@ static struct intel_encoder *get_saved_enc(struct drm_i915_private *i915,
 		return NULL;
 
 	for_each_pipe(i915, pipe) {
-		encoder = i915->display.audio.encoder_map[pipe];
-		if (encoder == NULL)
-			continue;
+		struct intel_encoder *encoder;
 
-		if (encoder->type == INTEL_OUTPUT_DP_MST)
-			continue;
+		encoder = i915->display.audio.encoder_map[pipe];
 
-		if (port == encoder->port)
+		if (encoder && encoder->port == port &&
+		    encoder->type != INTEL_OUTPUT_DP_MST)
 			return encoder;
 	}
 
diff --git a/drivers/gpu/drm/i915/display/intel_audio_regs.h b/drivers/gpu/drm/i915/display/intel_audio_regs.h
index 4f432c2eb543..616e7b1275c4 100644
--- a/drivers/gpu/drm/i915/display/intel_audio_regs.h
+++ b/drivers/gpu/drm/i915/display/intel_audio_regs.h
@@ -6,7 +6,7 @@
 #ifndef __INTEL_AUDIO_REGS_H__
 #define __INTEL_AUDIO_REGS_H__
 
-#include "i915_reg_defs.h"
+#include "intel_display_reg_defs.h"
 
 #define G4X_AUD_CNTL_ST			_MMIO(0x620B4)
 #define   G4X_ELD_VALID			REG_BIT(14)
diff --git a/drivers/gpu/drm/i915/display/intel_backlight.c b/drivers/gpu/drm/i915/display/intel_backlight.c
index beba39a38c87..71af88a70461 100644
--- a/drivers/gpu/drm/i915/display/intel_backlight.c
+++ b/drivers/gpu/drm/i915/display/intel_backlight.c
@@ -10,6 +10,7 @@
 
 #include <acpi/video.h>
 
+#include "i915_reg.h"
 #include "intel_backlight.h"
 #include "intel_backlight_regs.h"
 #include "intel_connector.h"
diff --git a/drivers/gpu/drm/i915/display/intel_backlight_regs.h b/drivers/gpu/drm/i915/display/intel_backlight_regs.h
index 50c1210f6d5d..344eb8096bd2 100644
--- a/drivers/gpu/drm/i915/display/intel_backlight_regs.h
+++ b/drivers/gpu/drm/i915/display/intel_backlight_regs.h
@@ -6,7 +6,7 @@
 #ifndef __INTEL_BACKLIGHT_REGS_H__
 #define __INTEL_BACKLIGHT_REGS_H__
 
-#include "i915_reg_defs.h"
+#include "intel_display_reg_defs.h"
 
 #define _VLV_BLC_PWM_CTL2_A (DISPLAY_MMIO_BASE(dev_priv) + 0x61250)
 #define _VLV_BLC_PWM_CTL2_B (DISPLAY_MMIO_BASE(dev_priv) + 0x61350)
diff --git a/drivers/gpu/drm/i915/display/intel_bios.c b/drivers/gpu/drm/i915/display/intel_bios.c
index c2987f2c2b2e..572a4e3769f3 100644
--- a/drivers/gpu/drm/i915/display/intel_bios.c
+++ b/drivers/gpu/drm/i915/display/intel_bios.c
@@ -414,7 +414,7 @@ static void *generate_lfp_data_ptrs(struct drm_i915_private *i915,
 		ptrs->lvds_entries++;
 
 	if (size != 0 || ptrs->lvds_entries != 3) {
-		kfree(ptrs);
+		kfree(ptrs_block);
 		return NULL;
 	}
 
diff --git a/drivers/gpu/drm/i915/display/intel_bw.c b/drivers/gpu/drm/i915/display/intel_bw.c
index 4ace026b29bd..1c236f02b380 100644
--- a/drivers/gpu/drm/i915/display/intel_bw.c
+++ b/drivers/gpu/drm/i915/display/intel_bw.c
@@ -439,7 +439,8 @@ static int tgl_get_bw_info(struct drm_i915_private *dev_priv, const struct intel
 		return ret;
 	}
 
-	if (dram_info->type == INTEL_DRAM_LPDDR4 || dram_info->type == INTEL_DRAM_LPDDR5)
+	if (DISPLAY_VER(dev_priv) < 14 &&
+	    (dram_info->type == INTEL_DRAM_LPDDR4 || dram_info->type == INTEL_DRAM_LPDDR5))
 		num_channels *= 2;
 
 	qi.deinterleave = qi.deinterleave ? : DIV_ROUND_UP(num_channels, is_y_tile ? 4 : 2);
diff --git a/drivers/gpu/drm/i915/display/intel_cdclk.c b/drivers/gpu/drm/i915/display/intel_cdclk.c
index eada931cb1c8..b74e36d76013 100644
--- a/drivers/gpu/drm/i915/display/intel_cdclk.c
+++ b/drivers/gpu/drm/i915/display/intel_cdclk.c
@@ -24,6 +24,7 @@
 #include <linux/time.h>
 
 #include "hsw_ips.h"
+#include "i915_reg.h"
 #include "intel_atomic.h"
 #include "intel_atomic_plane.h"
 #include "intel_audio.h"
@@ -2755,7 +2756,7 @@ int intel_modeset_calc_cdclk(struct intel_atomic_state *state)
 		if (IS_ERR(crtc_state))
 			return PTR_ERR(crtc_state);
 
-		if (drm_atomic_crtc_needs_modeset(&crtc_state->uapi))
+		if (intel_crtc_needs_modeset(crtc_state))
 			pipe = INVALID_PIPE;
 	}
 
diff --git a/drivers/gpu/drm/i915/display/intel_color.c b/drivers/gpu/drm/i915/display/intel_color.c
index 4bb113c39f4b..250e83f1f5ac 100644
--- a/drivers/gpu/drm/i915/display/intel_color.c
+++ b/drivers/gpu/drm/i915/display/intel_color.c
@@ -22,6 +22,7 @@
  *
  */
 
+#include "i915_reg.h"
 #include "intel_color.h"
 #include "intel_de.h"
 #include "intel_display_types.h"
@@ -184,31 +185,31 @@ static void ilk_update_pipe_csc(struct intel_crtc *crtc,
 				const u16 coeff[9],
 				const u16 postoff[3])
 {
-	struct drm_i915_private *dev_priv = to_i915(crtc->base.dev);
+	struct drm_i915_private *i915 = to_i915(crtc->base.dev);
 	enum pipe pipe = crtc->pipe;
 
-	intel_de_write_fw(dev_priv, PIPE_CSC_PREOFF_HI(pipe), preoff[0]);
-	intel_de_write_fw(dev_priv, PIPE_CSC_PREOFF_ME(pipe), preoff[1]);
-	intel_de_write_fw(dev_priv, PIPE_CSC_PREOFF_LO(pipe), preoff[2]);
+	intel_de_write_fw(i915, PIPE_CSC_PREOFF_HI(pipe), preoff[0]);
+	intel_de_write_fw(i915, PIPE_CSC_PREOFF_ME(pipe), preoff[1]);
+	intel_de_write_fw(i915, PIPE_CSC_PREOFF_LO(pipe), preoff[2]);
 
-	intel_de_write_fw(dev_priv, PIPE_CSC_COEFF_RY_GY(pipe),
+	intel_de_write_fw(i915, PIPE_CSC_COEFF_RY_GY(pipe),
 			  coeff[0] << 16 | coeff[1]);
-	intel_de_write_fw(dev_priv, PIPE_CSC_COEFF_BY(pipe), coeff[2] << 16);
+	intel_de_write_fw(i915, PIPE_CSC_COEFF_BY(pipe), coeff[2] << 16);
 
-	intel_de_write_fw(dev_priv, PIPE_CSC_COEFF_RU_GU(pipe),
+	intel_de_write_fw(i915, PIPE_CSC_COEFF_RU_GU(pipe),
 			  coeff[3] << 16 | coeff[4]);
-	intel_de_write_fw(dev_priv, PIPE_CSC_COEFF_BU(pipe), coeff[5] << 16);
+	intel_de_write_fw(i915, PIPE_CSC_COEFF_BU(pipe), coeff[5] << 16);
 
-	intel_de_write_fw(dev_priv, PIPE_CSC_COEFF_RV_GV(pipe),
+	intel_de_write_fw(i915, PIPE_CSC_COEFF_RV_GV(pipe),
 			  coeff[6] << 16 | coeff[7]);
-	intel_de_write_fw(dev_priv, PIPE_CSC_COEFF_BV(pipe), coeff[8] << 16);
+	intel_de_write_fw(i915, PIPE_CSC_COEFF_BV(pipe), coeff[8] << 16);
 
-	if (DISPLAY_VER(dev_priv) >= 7) {
-		intel_de_write_fw(dev_priv, PIPE_CSC_POSTOFF_HI(pipe),
+	if (DISPLAY_VER(i915) >= 7) {
+		intel_de_write_fw(i915, PIPE_CSC_POSTOFF_HI(pipe),
 				  postoff[0]);
-		intel_de_write_fw(dev_priv, PIPE_CSC_POSTOFF_ME(pipe),
+		intel_de_write_fw(i915, PIPE_CSC_POSTOFF_ME(pipe),
 				  postoff[1]);
-		intel_de_write_fw(dev_priv, PIPE_CSC_POSTOFF_LO(pipe),
+		intel_de_write_fw(i915, PIPE_CSC_POSTOFF_LO(pipe),
 				  postoff[2]);
 	}
 }
@@ -218,55 +219,55 @@ static void icl_update_output_csc(struct intel_crtc *crtc,
 				  const u16 coeff[9],
 				  const u16 postoff[3])
 {
-	struct drm_i915_private *dev_priv = to_i915(crtc->base.dev);
+	struct drm_i915_private *i915 = to_i915(crtc->base.dev);
 	enum pipe pipe = crtc->pipe;
 
-	intel_de_write_fw(dev_priv, PIPE_CSC_OUTPUT_PREOFF_HI(pipe), preoff[0]);
-	intel_de_write_fw(dev_priv, PIPE_CSC_OUTPUT_PREOFF_ME(pipe), preoff[1]);
-	intel_de_write_fw(dev_priv, PIPE_CSC_OUTPUT_PREOFF_LO(pipe), preoff[2]);
+	intel_de_write_fw(i915, PIPE_CSC_OUTPUT_PREOFF_HI(pipe), preoff[0]);
+	intel_de_write_fw(i915, PIPE_CSC_OUTPUT_PREOFF_ME(pipe), preoff[1]);
+	intel_de_write_fw(i915, PIPE_CSC_OUTPUT_PREOFF_LO(pipe), preoff[2]);
 
-	intel_de_write_fw(dev_priv, PIPE_CSC_OUTPUT_COEFF_RY_GY(pipe),
+	intel_de_write_fw(i915, PIPE_CSC_OUTPUT_COEFF_RY_GY(pipe),
 			  coeff[0] << 16 | coeff[1]);
-	intel_de_write_fw(dev_priv, PIPE_CSC_OUTPUT_COEFF_BY(pipe),
+	intel_de_write_fw(i915, PIPE_CSC_OUTPUT_COEFF_BY(pipe),
 			  coeff[2] << 16);
 
-	intel_de_write_fw(dev_priv, PIPE_CSC_OUTPUT_COEFF_RU_GU(pipe),
+	intel_de_write_fw(i915, PIPE_CSC_OUTPUT_COEFF_RU_GU(pipe),
 			  coeff[3] << 16 | coeff[4]);
-	intel_de_write_fw(dev_priv, PIPE_CSC_OUTPUT_COEFF_BU(pipe),
+	intel_de_write_fw(i915, PIPE_CSC_OUTPUT_COEFF_BU(pipe),
 			  coeff[5] << 16);
 
-	intel_de_write_fw(dev_priv, PIPE_CSC_OUTPUT_COEFF_RV_GV(pipe),
+	intel_de_write_fw(i915, PIPE_CSC_OUTPUT_COEFF_RV_GV(pipe),
 			  coeff[6] << 16 | coeff[7]);
-	intel_de_write_fw(dev_priv, PIPE_CSC_OUTPUT_COEFF_BV(pipe),
+	intel_de_write_fw(i915, PIPE_CSC_OUTPUT_COEFF_BV(pipe),
 			  coeff[8] << 16);
 
-	intel_de_write_fw(dev_priv, PIPE_CSC_OUTPUT_POSTOFF_HI(pipe), postoff[0]);
-	intel_de_write_fw(dev_priv, PIPE_CSC_OUTPUT_POSTOFF_ME(pipe), postoff[1]);
-	intel_de_write_fw(dev_priv, PIPE_CSC_OUTPUT_POSTOFF_LO(pipe), postoff[2]);
+	intel_de_write_fw(i915, PIPE_CSC_OUTPUT_POSTOFF_HI(pipe), postoff[0]);
+	intel_de_write_fw(i915, PIPE_CSC_OUTPUT_POSTOFF_ME(pipe), postoff[1]);
+	intel_de_write_fw(i915, PIPE_CSC_OUTPUT_POSTOFF_LO(pipe), postoff[2]);
 }
 
 static bool ilk_csc_limited_range(const struct intel_crtc_state *crtc_state)
 {
-	struct drm_i915_private *dev_priv = to_i915(crtc_state->uapi.crtc->dev);
+	struct drm_i915_private *i915 = to_i915(crtc_state->uapi.crtc->dev);
 
 	/*
 	 * FIXME if there's a gamma LUT after the CSC, we should
 	 * do the range compression using the gamma LUT instead.
 	 */
 	return crtc_state->limited_color_range &&
-		(IS_HASWELL(dev_priv) || IS_BROADWELL(dev_priv) ||
-		 IS_DISPLAY_VER(dev_priv, 9, 10));
+		(IS_HASWELL(i915) || IS_BROADWELL(i915) ||
+		 IS_DISPLAY_VER(i915, 9, 10));
 }
 
 static void ilk_csc_convert_ctm(const struct intel_crtc_state *crtc_state,
-				u16 coeffs[9])
+				u16 coeffs[9], bool limited_color_range)
 {
 	const struct drm_color_ctm *ctm = crtc_state->hw.ctm->data;
 	const u64 *input;
 	u64 temp[9];
 	int i;
 
-	if (ilk_csc_limited_range(crtc_state))
+	if (limited_color_range)
 		input = ctm_mult_by_limited(temp, ctm->matrix);
 	else
 		input = ctm->matrix;
@@ -313,13 +314,13 @@ static void ilk_csc_convert_ctm(const struct intel_crtc_state *crtc_state,
 static void ilk_load_csc_matrix(const struct intel_crtc_state *crtc_state)
 {
 	struct intel_crtc *crtc = to_intel_crtc(crtc_state->uapi.crtc);
-	struct drm_i915_private *dev_priv = to_i915(crtc->base.dev);
+	struct drm_i915_private *i915 = to_i915(crtc->base.dev);
 	bool limited_color_range = ilk_csc_limited_range(crtc_state);
 
 	if (crtc_state->hw.ctm) {
 		u16 coeff[9];
 
-		ilk_csc_convert_ctm(crtc_state, coeff);
+		ilk_csc_convert_ctm(crtc_state, coeff, limited_color_range);
 		ilk_update_pipe_csc(crtc, ilk_csc_off_zero, coeff,
 				    limited_color_range ?
 				    ilk_csc_postoff_limited_range :
@@ -339,7 +340,7 @@ static void ilk_load_csc_matrix(const struct intel_crtc_state *crtc_state)
 		 * LUT is needed but CSC is not we need to load an
 		 * identity matrix.
 		 */
-		drm_WARN_ON(&dev_priv->drm, !IS_GEMINILAKE(dev_priv));
+		drm_WARN_ON(&i915->drm, !IS_GEMINILAKE(i915));
 
 		ilk_update_pipe_csc(crtc, ilk_csc_off_zero,
 				    ilk_csc_coeff_identity,
@@ -354,7 +355,7 @@ static void icl_load_csc_matrix(const struct intel_crtc_state *crtc_state)
 	if (crtc_state->hw.ctm) {
 		u16 coeff[9];
 
-		ilk_csc_convert_ctm(crtc_state, coeff);
+		ilk_csc_convert_ctm(crtc_state, coeff, false);
 		ilk_update_pipe_csc(crtc, ilk_csc_off_zero,
 				    coeff, ilk_csc_off_zero);
 	}
@@ -373,7 +374,7 @@ static void icl_load_csc_matrix(const struct intel_crtc_state *crtc_state)
 static void chv_load_cgm_csc(struct intel_crtc *crtc,
 			     const struct drm_property_blob *blob)
 {
-	struct drm_i915_private *dev_priv = to_i915(crtc->base.dev);
+	struct drm_i915_private *i915 = to_i915(crtc->base.dev);
 	const struct drm_color_ctm *ctm = blob->data;
 	enum pipe pipe = crtc->pipe;
 	u16 coeffs[9];
@@ -397,15 +398,15 @@ static void chv_load_cgm_csc(struct intel_crtc *crtc,
 		coeffs[i] |= (abs_coeff >> 20) & 0xfff;
 	}
 
-	intel_de_write_fw(dev_priv, CGM_PIPE_CSC_COEFF01(pipe),
+	intel_de_write_fw(i915, CGM_PIPE_CSC_COEFF01(pipe),
 			  coeffs[1] << 16 | coeffs[0]);
-	intel_de_write_fw(dev_priv, CGM_PIPE_CSC_COEFF23(pipe),
+	intel_de_write_fw(i915, CGM_PIPE_CSC_COEFF23(pipe),
 			  coeffs[3] << 16 | coeffs[2]);
-	intel_de_write_fw(dev_priv, CGM_PIPE_CSC_COEFF45(pipe),
+	intel_de_write_fw(i915, CGM_PIPE_CSC_COEFF45(pipe),
 			  coeffs[5] << 16 | coeffs[4]);
-	intel_de_write_fw(dev_priv, CGM_PIPE_CSC_COEFF67(pipe),
+	intel_de_write_fw(i915, CGM_PIPE_CSC_COEFF67(pipe),
 			  coeffs[7] << 16 | coeffs[6]);
-	intel_de_write_fw(dev_priv, CGM_PIPE_CSC_COEFF8(pipe),
+	intel_de_write_fw(i915, CGM_PIPE_CSC_COEFF8(pipe),
 			  coeffs[8]);
 }
 
@@ -424,32 +425,32 @@ static u32 intel_color_lut_pack(u32 val, int bit_precision)
 
 static u32 i9xx_lut_8(const struct drm_color_lut *color)
 {
-	return drm_color_lut_extract(color->red, 8) << 16 |
-		drm_color_lut_extract(color->green, 8) << 8 |
-		drm_color_lut_extract(color->blue, 8);
+	return REG_FIELD_PREP(PALETTE_RED_MASK, drm_color_lut_extract(color->red, 8)) |
+		REG_FIELD_PREP(PALETTE_GREEN_MASK, drm_color_lut_extract(color->green, 8)) |
+		REG_FIELD_PREP(PALETTE_BLUE_MASK, drm_color_lut_extract(color->blue, 8));
 }
 
 static void i9xx_lut_8_pack(struct drm_color_lut *entry, u32 val)
 {
-	entry->red = intel_color_lut_pack(REG_FIELD_GET(LGC_PALETTE_RED_MASK, val), 8);
-	entry->green = intel_color_lut_pack(REG_FIELD_GET(LGC_PALETTE_GREEN_MASK, val), 8);
-	entry->blue = intel_color_lut_pack(REG_FIELD_GET(LGC_PALETTE_BLUE_MASK, val), 8);
+	entry->red = intel_color_lut_pack(REG_FIELD_GET(PALETTE_RED_MASK, val), 8);
+	entry->green = intel_color_lut_pack(REG_FIELD_GET(PALETTE_GREEN_MASK, val), 8);
+	entry->blue = intel_color_lut_pack(REG_FIELD_GET(PALETTE_BLUE_MASK, val), 8);
 }
 
 /* i965+ "10.6" bit interpolated format "even DW" (low 8 bits) */
 static u32 i965_lut_10p6_ldw(const struct drm_color_lut *color)
 {
-	return (color->red & 0xff) << 16 |
-		(color->green & 0xff) << 8 |
-		(color->blue & 0xff);
+	return REG_FIELD_PREP(PALETTE_RED_MASK, color->red & 0xff) |
+		REG_FIELD_PREP(PALETTE_GREEN_MASK, color->green & 0xff) |
+		REG_FIELD_PREP(PALETTE_BLUE_MASK, color->blue & 0xff);
 }
 
 /* i965+ "10.6" interpolated format "odd DW" (high 8 bits) */
 static u32 i965_lut_10p6_udw(const struct drm_color_lut *color)
 {
-	return (color->red >> 8) << 16 |
-		(color->green >> 8) << 8 |
-		(color->blue >> 8);
+	return REG_FIELD_PREP(PALETTE_RED_MASK, color->red >> 8) |
+		REG_FIELD_PREP(PALETTE_GREEN_MASK, color->green >> 8) |
+		REG_FIELD_PREP(PALETTE_BLUE_MASK, color->blue >> 8);
 }
 
 static void i965_lut_10p6_pack(struct drm_color_lut *entry, u32 ldw, u32 udw)
@@ -470,26 +471,42 @@ static u16 i965_lut_11p6_max_pack(u32 val)
 
 static u32 ilk_lut_10(const struct drm_color_lut *color)
 {
-	return drm_color_lut_extract(color->red, 10) << 20 |
-		drm_color_lut_extract(color->green, 10) << 10 |
-		drm_color_lut_extract(color->blue, 10);
+	return REG_FIELD_PREP(PREC_PALETTE_10_RED_MASK, drm_color_lut_extract(color->red, 10)) |
+		REG_FIELD_PREP(PREC_PALETTE_10_GREEN_MASK, drm_color_lut_extract(color->green, 10)) |
+		REG_FIELD_PREP(PREC_PALETTE_10_BLUE_MASK, drm_color_lut_extract(color->blue, 10));
 }
 
 static void ilk_lut_10_pack(struct drm_color_lut *entry, u32 val)
 {
-	entry->red = intel_color_lut_pack(REG_FIELD_GET(PREC_PALETTE_RED_MASK, val), 10);
-	entry->green = intel_color_lut_pack(REG_FIELD_GET(PREC_PALETTE_GREEN_MASK, val), 10);
-	entry->blue = intel_color_lut_pack(REG_FIELD_GET(PREC_PALETTE_BLUE_MASK, val), 10);
+	entry->red = intel_color_lut_pack(REG_FIELD_GET(PREC_PALETTE_10_RED_MASK, val), 10);
+	entry->green = intel_color_lut_pack(REG_FIELD_GET(PREC_PALETTE_10_GREEN_MASK, val), 10);
+	entry->blue = intel_color_lut_pack(REG_FIELD_GET(PREC_PALETTE_10_BLUE_MASK, val), 10);
 }
 
-static void icl_lut_multi_seg_pack(struct drm_color_lut *entry, u32 ldw, u32 udw)
+/* ilk+ "12.4" interpolated format (low 6 bits) */
+static u32 ilk_lut_12p4_ldw(const struct drm_color_lut *color)
 {
-	entry->red = REG_FIELD_GET(PAL_PREC_MULTI_SEG_RED_UDW_MASK, udw) << 6 |
-				   REG_FIELD_GET(PAL_PREC_MULTI_SEG_RED_LDW_MASK, ldw);
-	entry->green = REG_FIELD_GET(PAL_PREC_MULTI_SEG_GREEN_UDW_MASK, udw) << 6 |
-				     REG_FIELD_GET(PAL_PREC_MULTI_SEG_GREEN_LDW_MASK, ldw);
-	entry->blue = REG_FIELD_GET(PAL_PREC_MULTI_SEG_BLUE_UDW_MASK, udw) << 6 |
-				    REG_FIELD_GET(PAL_PREC_MULTI_SEG_BLUE_LDW_MASK, ldw);
+	return REG_FIELD_PREP(PREC_PALETTE_12P4_RED_LDW_MASK, color->red & 0x3f) |
+		REG_FIELD_PREP(PREC_PALETTE_12P4_GREEN_LDW_MASK, color->green & 0x3f) |
+		REG_FIELD_PREP(PREC_PALETTE_12P4_BLUE_LDW_MASK, color->blue & 0x3f);
+}
+
+/* ilk+ "12.4" interpolated format (high 10 bits) */
+static u32 ilk_lut_12p4_udw(const struct drm_color_lut *color)
+{
+	return REG_FIELD_PREP(PREC_PALETTE_12P4_RED_UDW_MASK, color->red >> 6) |
+		REG_FIELD_PREP(PREC_PALETTE_12P4_GREEN_UDW_MASK, color->green >> 6) |
+		REG_FIELD_PREP(PREC_PALETTE_12P4_BLUE_UDW_MASK, color->blue >> 6);
+}
+
+static void ilk_lut_12p4_pack(struct drm_color_lut *entry, u32 ldw, u32 udw)
+{
+	entry->red = REG_FIELD_GET(PREC_PALETTE_12P4_RED_UDW_MASK, udw) << 6 |
+		REG_FIELD_GET(PREC_PALETTE_12P4_RED_LDW_MASK, ldw);
+	entry->green = REG_FIELD_GET(PREC_PALETTE_12P4_GREEN_UDW_MASK, udw) << 6 |
+		REG_FIELD_GET(PREC_PALETTE_12P4_GREEN_LDW_MASK, ldw);
+	entry->blue = REG_FIELD_GET(PREC_PALETTE_12P4_BLUE_UDW_MASK, udw) << 6 |
+		REG_FIELD_GET(PREC_PALETTE_12P4_BLUE_LDW_MASK, ldw);
 }
 
 static void icl_color_commit_noarm(const struct intel_crtc_state *crtc_state)
@@ -511,31 +528,31 @@ static void i9xx_color_commit_arm(const struct intel_crtc_state *crtc_state)
 static void ilk_color_commit_arm(const struct intel_crtc_state *crtc_state)
 {
 	struct intel_crtc *crtc = to_intel_crtc(crtc_state->uapi.crtc);
-	struct drm_i915_private *dev_priv = to_i915(crtc->base.dev);
+	struct drm_i915_private *i915 = to_i915(crtc->base.dev);
 
 	/* update PIPECONF GAMMA_MODE */
 	ilk_set_pipeconf(crtc_state);
 
-	intel_de_write_fw(dev_priv, PIPE_CSC_MODE(crtc->pipe),
+	intel_de_write_fw(i915, PIPE_CSC_MODE(crtc->pipe),
 			  crtc_state->csc_mode);
 }
 
 static void hsw_color_commit_arm(const struct intel_crtc_state *crtc_state)
 {
 	struct intel_crtc *crtc = to_intel_crtc(crtc_state->uapi.crtc);
-	struct drm_i915_private *dev_priv = to_i915(crtc->base.dev);
+	struct drm_i915_private *i915 = to_i915(crtc->base.dev);
 
-	intel_de_write(dev_priv, GAMMA_MODE(crtc->pipe),
+	intel_de_write(i915, GAMMA_MODE(crtc->pipe),
 		       crtc_state->gamma_mode);
 
-	intel_de_write_fw(dev_priv, PIPE_CSC_MODE(crtc->pipe),
+	intel_de_write_fw(i915, PIPE_CSC_MODE(crtc->pipe),
 			  crtc_state->csc_mode);
 }
 
 static void skl_color_commit_arm(const struct intel_crtc_state *crtc_state)
 {
 	struct intel_crtc *crtc = to_intel_crtc(crtc_state->uapi.crtc);
-	struct drm_i915_private *dev_priv = to_i915(crtc->base.dev);
+	struct drm_i915_private *i915 = to_i915(crtc->base.dev);
 	enum pipe pipe = crtc->pipe;
 	u32 val = 0;
 
@@ -548,12 +565,12 @@ static void skl_color_commit_arm(const struct intel_crtc_state *crtc_state)
 		val |= SKL_BOTTOM_COLOR_GAMMA_ENABLE;
 	if (crtc_state->csc_enable)
 		val |= SKL_BOTTOM_COLOR_CSC_ENABLE;
-	intel_de_write(dev_priv, SKL_BOTTOM_COLOR(pipe), val);
+	intel_de_write(i915, SKL_BOTTOM_COLOR(pipe), val);
 
-	intel_de_write(dev_priv, GAMMA_MODE(crtc->pipe),
+	intel_de_write(i915, GAMMA_MODE(crtc->pipe),
 		       crtc_state->gamma_mode);
 
-	intel_de_write_fw(dev_priv, PIPE_CSC_MODE(crtc->pipe),
+	intel_de_write_fw(i915, PIPE_CSC_MODE(crtc->pipe),
 			  crtc_state->csc_mode);
 }
 
@@ -565,7 +582,7 @@ create_linear_lut(struct drm_i915_private *i915, int lut_size)
 	int i;
 
 	blob = drm_property_create_blob(&i915->drm,
-					sizeof(struct drm_color_lut) * lut_size,
+					sizeof(lut[0]) * lut_size,
 					NULL);
 	if (IS_ERR(blob))
 		return blob;
@@ -583,6 +600,30 @@ create_linear_lut(struct drm_i915_private *i915, int lut_size)
 	return blob;
 }
 
+static struct drm_property_blob *
+create_resized_lut(struct drm_i915_private *i915,
+		   const struct drm_property_blob *blob_in, int lut_out_size)
+{
+	int i, lut_in_size = drm_color_lut_size(blob_in);
+	struct drm_property_blob *blob_out;
+	const struct drm_color_lut *lut_in;
+	struct drm_color_lut *lut_out;
+
+	blob_out = drm_property_create_blob(&i915->drm,
+					    sizeof(lut_out[0]) * lut_out_size,
+					    NULL);
+	if (IS_ERR(blob_out))
+		return blob_out;
+
+	lut_in = blob_in->data;
+	lut_out = blob_out->data;
+
+	for (i = 0; i < lut_out_size; i++)
+		lut_out[i] = lut_in[i * (lut_in_size - 1) / (lut_out_size - 1)];
+
+	return blob_out;
+}
+
 static void i9xx_load_lut_8(struct intel_crtc *crtc,
 			    const struct drm_property_blob *blob)
 {
@@ -643,7 +684,7 @@ static void i965_load_luts(const struct intel_crtc_state *crtc_state)
 static void ilk_load_lut_8(struct intel_crtc *crtc,
 			   const struct drm_property_blob *blob)
 {
-	struct drm_i915_private *dev_priv = to_i915(crtc->base.dev);
+	struct drm_i915_private *i915 = to_i915(crtc->base.dev);
 	const struct drm_color_lut *lut;
 	enum pipe pipe = crtc->pipe;
 	int i;
@@ -654,20 +695,20 @@ static void ilk_load_lut_8(struct intel_crtc *crtc,
 	lut = blob->data;
 
 	for (i = 0; i < 256; i++)
-		intel_de_write_fw(dev_priv, LGC_PALETTE(pipe, i),
+		intel_de_write_fw(i915, LGC_PALETTE(pipe, i),
 				  i9xx_lut_8(&lut[i]));
 }
 
 static void ilk_load_lut_10(struct intel_crtc *crtc,
 			    const struct drm_property_blob *blob)
 {
-	struct drm_i915_private *dev_priv = to_i915(crtc->base.dev);
+	struct drm_i915_private *i915 = to_i915(crtc->base.dev);
 	const struct drm_color_lut *lut = blob->data;
 	int i, lut_size = drm_color_lut_size(blob);
 	enum pipe pipe = crtc->pipe;
 
 	for (i = 0; i < lut_size; i++)
-		intel_de_write_fw(dev_priv, PREC_PALETTE(pipe, i),
+		intel_de_write_fw(i915, PREC_PALETTE(pipe, i),
 				  ilk_lut_10(&lut[i]));
 }
 
@@ -708,27 +749,22 @@ static void ivb_load_lut_10(struct intel_crtc *crtc,
 			    const struct drm_property_blob *blob,
 			    u32 prec_index)
 {
-	struct drm_i915_private *dev_priv = to_i915(crtc->base.dev);
-	int hw_lut_size = ivb_lut_10_size(prec_index);
+	struct drm_i915_private *i915 = to_i915(crtc->base.dev);
 	const struct drm_color_lut *lut = blob->data;
 	int i, lut_size = drm_color_lut_size(blob);
 	enum pipe pipe = crtc->pipe;
 
-	for (i = 0; i < hw_lut_size; i++) {
-		/* We discard half the user entries in split gamma mode */
-		const struct drm_color_lut *entry =
-			&lut[i * (lut_size - 1) / (hw_lut_size - 1)];
-
-		intel_de_write_fw(dev_priv, PREC_PAL_INDEX(pipe), prec_index++);
-		intel_de_write_fw(dev_priv, PREC_PAL_DATA(pipe),
-				  ilk_lut_10(entry));
+	for (i = 0; i < lut_size; i++) {
+		intel_de_write_fw(i915, PREC_PAL_INDEX(pipe), prec_index++);
+		intel_de_write_fw(i915, PREC_PAL_DATA(pipe),
+				  ilk_lut_10(&lut[i]));
 	}
 
 	/*
 	 * Reset the index, otherwise it prevents the legacy palette to be
 	 * written properly.
 	 */
-	intel_de_write_fw(dev_priv, PREC_PAL_INDEX(pipe), 0);
+	intel_de_write_fw(i915, PREC_PAL_INDEX(pipe), 0);
 }
 
 /* On BDW+ the index auto increment mode actually works */
@@ -736,55 +772,45 @@ static void bdw_load_lut_10(struct intel_crtc *crtc,
 			    const struct drm_property_blob *blob,
 			    u32 prec_index)
 {
-	struct drm_i915_private *dev_priv = to_i915(crtc->base.dev);
-	int hw_lut_size = ivb_lut_10_size(prec_index);
+	struct drm_i915_private *i915 = to_i915(crtc->base.dev);
 	const struct drm_color_lut *lut = blob->data;
 	int i, lut_size = drm_color_lut_size(blob);
 	enum pipe pipe = crtc->pipe;
 
-	intel_de_write_fw(dev_priv, PREC_PAL_INDEX(pipe),
+	intel_de_write_fw(i915, PREC_PAL_INDEX(pipe),
 			  prec_index | PAL_PREC_AUTO_INCREMENT);
 
-	for (i = 0; i < hw_lut_size; i++) {
-		/* We discard half the user entries in split gamma mode */
-		const struct drm_color_lut *entry =
-			&lut[i * (lut_size - 1) / (hw_lut_size - 1)];
-
-		intel_de_write_fw(dev_priv, PREC_PAL_DATA(pipe),
-				  ilk_lut_10(entry));
-	}
+	for (i = 0; i < lut_size; i++)
+		intel_de_write_fw(i915, PREC_PAL_DATA(pipe),
+				  ilk_lut_10(&lut[i]));
 
 	/*
 	 * Reset the index, otherwise it prevents the legacy palette to be
 	 * written properly.
 	 */
-	intel_de_write_fw(dev_priv, PREC_PAL_INDEX(pipe), 0);
+	intel_de_write_fw(i915, PREC_PAL_INDEX(pipe), 0);
 }
 
 static void ivb_load_lut_ext_max(const struct intel_crtc_state *crtc_state)
 {
 	struct intel_crtc *crtc = to_intel_crtc(crtc_state->uapi.crtc);
-	struct drm_i915_private *dev_priv = to_i915(crtc->base.dev);
 	enum pipe pipe = crtc->pipe;
 
 	/* Program the max register to clamp values > 1.0. */
 	intel_dsb_reg_write(crtc_state, PREC_PAL_EXT_GC_MAX(pipe, 0), 1 << 16);
 	intel_dsb_reg_write(crtc_state, PREC_PAL_EXT_GC_MAX(pipe, 1), 1 << 16);
 	intel_dsb_reg_write(crtc_state, PREC_PAL_EXT_GC_MAX(pipe, 2), 1 << 16);
+}
 
-	/*
-	 * Program the gc max 2 register to clamp values > 1.0.
-	 * ToDo: Extend the ABI to be able to program values
-	 * from 3.0 to 7.0
-	 */
-	if (DISPLAY_VER(dev_priv) >= 10) {
-		intel_dsb_reg_write(crtc_state, PREC_PAL_EXT2_GC_MAX(pipe, 0),
-				    1 << 16);
-		intel_dsb_reg_write(crtc_state, PREC_PAL_EXT2_GC_MAX(pipe, 1),
-				    1 << 16);
-		intel_dsb_reg_write(crtc_state, PREC_PAL_EXT2_GC_MAX(pipe, 2),
-				    1 << 16);
-	}
+static void glk_load_lut_ext2_max(const struct intel_crtc_state *crtc_state)
+{
+	struct intel_crtc *crtc = to_intel_crtc(crtc_state->uapi.crtc);
+	enum pipe pipe = crtc->pipe;
+
+	/* Program the max register to clamp values > 1.0. */
+	intel_dsb_reg_write(crtc_state, PREC_PAL_EXT2_GC_MAX(pipe, 0), 1 << 16);
+	intel_dsb_reg_write(crtc_state, PREC_PAL_EXT2_GC_MAX(pipe, 1), 1 << 16);
+	intel_dsb_reg_write(crtc_state, PREC_PAL_EXT2_GC_MAX(pipe, 2), 1 << 16);
 }
 
 static void ivb_load_luts(const struct intel_crtc_state *crtc_state)
@@ -858,7 +884,7 @@ static void glk_load_degamma_lut(const struct intel_crtc_state *crtc_state,
 				 const struct drm_property_blob *blob)
 {
 	struct intel_crtc *crtc = to_intel_crtc(crtc_state->uapi.crtc);
-	struct drm_i915_private *dev_priv = to_i915(crtc->base.dev);
+	struct drm_i915_private *i915 = to_i915(crtc->base.dev);
 	const struct drm_color_lut *lut = blob->data;
 	int i, lut_size = drm_color_lut_size(blob);
 	enum pipe pipe = crtc->pipe;
@@ -868,8 +894,8 @@ static void glk_load_degamma_lut(const struct intel_crtc_state *crtc_state,
 	 * ignore the index bits, so we need to reset it to index 0
 	 * separately.
 	 */
-	intel_de_write_fw(dev_priv, PRE_CSC_GAMC_INDEX(pipe), 0);
-	intel_de_write_fw(dev_priv, PRE_CSC_GAMC_INDEX(pipe),
+	intel_de_write_fw(i915, PRE_CSC_GAMC_INDEX(pipe), 0);
+	intel_de_write_fw(i915, PRE_CSC_GAMC_INDEX(pipe),
 			  PRE_CSC_GAMC_AUTO_INCREMENT);
 
 	for (i = 0; i < lut_size; i++) {
@@ -886,15 +912,15 @@ static void glk_load_degamma_lut(const struct intel_crtc_state *crtc_state,
 		 * ToDo: Extend to max 7.0. Enable 32 bit input value
 		 * as compared to just 16 to achieve this.
 		 */
-		intel_de_write_fw(dev_priv, PRE_CSC_GAMC_DATA(pipe),
+		intel_de_write_fw(i915, PRE_CSC_GAMC_DATA(pipe),
 				  lut[i].green);
 	}
 
 	/* Clamp values > 1.0. */
-	while (i++ < glk_degamma_lut_size(dev_priv))
-		intel_de_write_fw(dev_priv, PRE_CSC_GAMC_DATA(pipe), 1 << 16);
+	while (i++ < glk_degamma_lut_size(i915))
+		intel_de_write_fw(i915, PRE_CSC_GAMC_DATA(pipe), 1 << 16);
 
-	intel_de_write_fw(dev_priv, PRE_CSC_GAMC_INDEX(pipe), 0);
+	intel_de_write_fw(i915, PRE_CSC_GAMC_INDEX(pipe), 0);
 }
 
 static void glk_load_luts(const struct intel_crtc_state *crtc_state)
@@ -913,6 +939,7 @@ static void glk_load_luts(const struct intel_crtc_state *crtc_state)
 	case GAMMA_MODE_MODE_10BIT:
 		bdw_load_lut_10(crtc, post_csc_lut, PAL_PREC_INDEX_VALUE(0));
 		ivb_load_lut_ext_max(crtc_state);
+		glk_load_lut_ext2_max(crtc_state);
 		break;
 	default:
 		MISSING_CASE(crtc_state->gamma_mode);
@@ -920,23 +947,9 @@ static void glk_load_luts(const struct intel_crtc_state *crtc_state)
 	}
 }
 
-/* ilk+ "12.4" interpolated format (high 10 bits) */
-static u32 ilk_lut_12p4_udw(const struct drm_color_lut *color)
-{
-	return (color->red >> 6) << 20 | (color->green >> 6) << 10 |
-		(color->blue >> 6);
-}
-
-/* ilk+ "12.4" interpolated format (low 6 bits) */
-static u32 ilk_lut_12p4_ldw(const struct drm_color_lut *color)
-{
-	return (color->red & 0x3f) << 24 | (color->green & 0x3f) << 14 |
-		(color->blue & 0x3f) << 4;
-}
-
 static void
-icl_load_gcmax(const struct intel_crtc_state *crtc_state,
-	       const struct drm_color_lut *color)
+ivb_load_lut_max(const struct intel_crtc_state *crtc_state,
+		 const struct drm_color_lut *color)
 {
 	struct intel_crtc *crtc = to_intel_crtc(crtc_state->uapi.crtc);
 	enum pipe pipe = crtc->pipe;
@@ -1028,8 +1041,7 @@ icl_program_gamma_multi_segment(const struct intel_crtc_state *crtc_state)
 
 	/* The last entry in the LUT is to be programmed in GCMAX */
 	entry = &lut[256 * 8 * 128];
-	icl_load_gcmax(crtc_state, entry);
-	ivb_load_lut_ext_max(crtc_state);
+	ivb_load_lut_max(crtc_state, entry);
 }
 
 static void icl_load_luts(const struct intel_crtc_state *crtc_state)
@@ -1048,10 +1060,13 @@ static void icl_load_luts(const struct intel_crtc_state *crtc_state)
 	case GAMMA_MODE_MODE_12BIT_MULTI_SEGMENTED:
 		icl_program_gamma_superfine_segment(crtc_state);
 		icl_program_gamma_multi_segment(crtc_state);
+		ivb_load_lut_ext_max(crtc_state);
+		glk_load_lut_ext2_max(crtc_state);
 		break;
 	case GAMMA_MODE_MODE_10BIT:
 		bdw_load_lut_10(crtc, post_csc_lut, PAL_PREC_INDEX_VALUE(0));
 		ivb_load_lut_ext_max(crtc_state);
+		glk_load_lut_ext2_max(crtc_state);
 		break;
 	default:
 		MISSING_CASE(crtc_state->gamma_mode);
@@ -1063,61 +1078,61 @@ static void icl_load_luts(const struct intel_crtc_state *crtc_state)
 
 static u32 chv_cgm_degamma_ldw(const struct drm_color_lut *color)
 {
-	return drm_color_lut_extract(color->green, 14) << 16 |
-		drm_color_lut_extract(color->blue, 14);
+	return REG_FIELD_PREP(CGM_PIPE_DEGAMMA_GREEN_LDW_MASK, drm_color_lut_extract(color->green, 14)) |
+		REG_FIELD_PREP(CGM_PIPE_DEGAMMA_BLUE_LDW_MASK, drm_color_lut_extract(color->blue, 14));
 }
 
 static u32 chv_cgm_degamma_udw(const struct drm_color_lut *color)
 {
-	return drm_color_lut_extract(color->red, 14);
+	return REG_FIELD_PREP(CGM_PIPE_DEGAMMA_RED_UDW_MASK, drm_color_lut_extract(color->red, 14));
 }
 
 static void chv_load_cgm_degamma(struct intel_crtc *crtc,
 				 const struct drm_property_blob *blob)
 {
-	struct drm_i915_private *dev_priv = to_i915(crtc->base.dev);
+	struct drm_i915_private *i915 = to_i915(crtc->base.dev);
 	const struct drm_color_lut *lut = blob->data;
 	int i, lut_size = drm_color_lut_size(blob);
 	enum pipe pipe = crtc->pipe;
 
 	for (i = 0; i < lut_size; i++) {
-		intel_de_write_fw(dev_priv, CGM_PIPE_DEGAMMA(pipe, i, 0),
+		intel_de_write_fw(i915, CGM_PIPE_DEGAMMA(pipe, i, 0),
 				  chv_cgm_degamma_ldw(&lut[i]));
-		intel_de_write_fw(dev_priv, CGM_PIPE_DEGAMMA(pipe, i, 1),
+		intel_de_write_fw(i915, CGM_PIPE_DEGAMMA(pipe, i, 1),
 				  chv_cgm_degamma_udw(&lut[i]));
 	}
 }
 
 static u32 chv_cgm_gamma_ldw(const struct drm_color_lut *color)
 {
-	return drm_color_lut_extract(color->green, 10) << 16 |
-		drm_color_lut_extract(color->blue, 10);
+	return REG_FIELD_PREP(CGM_PIPE_GAMMA_GREEN_LDW_MASK, drm_color_lut_extract(color->green, 10)) |
+		REG_FIELD_PREP(CGM_PIPE_GAMMA_BLUE_LDW_MASK, drm_color_lut_extract(color->blue, 10));
 }
 
 static u32 chv_cgm_gamma_udw(const struct drm_color_lut *color)
 {
-	return drm_color_lut_extract(color->red, 10);
+	return REG_FIELD_PREP(CGM_PIPE_GAMMA_RED_UDW_MASK, drm_color_lut_extract(color->red, 10));
 }
 
 static void chv_cgm_gamma_pack(struct drm_color_lut *entry, u32 ldw, u32 udw)
 {
-	entry->green = intel_color_lut_pack(REG_FIELD_GET(CGM_PIPE_GAMMA_GREEN_MASK, ldw), 10);
-	entry->blue = intel_color_lut_pack(REG_FIELD_GET(CGM_PIPE_GAMMA_BLUE_MASK, ldw), 10);
-	entry->red = intel_color_lut_pack(REG_FIELD_GET(CGM_PIPE_GAMMA_RED_MASK, udw), 10);
+	entry->green = intel_color_lut_pack(REG_FIELD_GET(CGM_PIPE_GAMMA_GREEN_LDW_MASK, ldw), 10);
+	entry->blue = intel_color_lut_pack(REG_FIELD_GET(CGM_PIPE_GAMMA_BLUE_LDW_MASK, ldw), 10);
+	entry->red = intel_color_lut_pack(REG_FIELD_GET(CGM_PIPE_GAMMA_RED_UDW_MASK, udw), 10);
 }
 
 static void chv_load_cgm_gamma(struct intel_crtc *crtc,
 			       const struct drm_property_blob *blob)
 {
-	struct drm_i915_private *dev_priv = to_i915(crtc->base.dev);
+	struct drm_i915_private *i915 = to_i915(crtc->base.dev);
 	const struct drm_color_lut *lut = blob->data;
 	int i, lut_size = drm_color_lut_size(blob);
 	enum pipe pipe = crtc->pipe;
 
 	for (i = 0; i < lut_size; i++) {
-		intel_de_write_fw(dev_priv, CGM_PIPE_GAMMA(pipe, i, 0),
+		intel_de_write_fw(i915, CGM_PIPE_GAMMA(pipe, i, 0),
 				  chv_cgm_gamma_ldw(&lut[i]));
-		intel_de_write_fw(dev_priv, CGM_PIPE_GAMMA(pipe, i, 1),
+		intel_de_write_fw(i915, CGM_PIPE_GAMMA(pipe, i, 1),
 				  chv_cgm_gamma_udw(&lut[i]));
 	}
 }
@@ -1125,7 +1140,7 @@ static void chv_load_cgm_gamma(struct intel_crtc *crtc,
 static void chv_load_luts(const struct intel_crtc_state *crtc_state)
 {
 	struct intel_crtc *crtc = to_intel_crtc(crtc_state->uapi.crtc);
-	struct drm_i915_private *dev_priv = to_i915(crtc->base.dev);
+	struct drm_i915_private *i915 = to_i915(crtc->base.dev);
 	const struct drm_property_blob *pre_csc_lut = crtc_state->pre_csc_lut;
 	const struct drm_property_blob *post_csc_lut = crtc_state->post_csc_lut;
 	const struct drm_property_blob *ctm = crtc_state->hw.ctm;
@@ -1141,30 +1156,30 @@ static void chv_load_luts(const struct intel_crtc_state *crtc_state)
 	else
 		i965_load_luts(crtc_state);
 
-	intel_de_write_fw(dev_priv, CGM_PIPE_MODE(crtc->pipe),
+	intel_de_write_fw(i915, CGM_PIPE_MODE(crtc->pipe),
 			  crtc_state->cgm_mode);
 }
 
 void intel_color_load_luts(const struct intel_crtc_state *crtc_state)
 {
-	struct drm_i915_private *dev_priv = to_i915(crtc_state->uapi.crtc->dev);
+	struct drm_i915_private *i915 = to_i915(crtc_state->uapi.crtc->dev);
 
-	dev_priv->display.funcs.color->load_luts(crtc_state);
+	i915->display.funcs.color->load_luts(crtc_state);
 }
 
 void intel_color_commit_noarm(const struct intel_crtc_state *crtc_state)
 {
-	struct drm_i915_private *dev_priv = to_i915(crtc_state->uapi.crtc->dev);
+	struct drm_i915_private *i915 = to_i915(crtc_state->uapi.crtc->dev);
 
-	if (dev_priv->display.funcs.color->color_commit_noarm)
-		dev_priv->display.funcs.color->color_commit_noarm(crtc_state);
+	if (i915->display.funcs.color->color_commit_noarm)
+		i915->display.funcs.color->color_commit_noarm(crtc_state);
 }
 
 void intel_color_commit_arm(const struct intel_crtc_state *crtc_state)
 {
-	struct drm_i915_private *dev_priv = to_i915(crtc_state->uapi.crtc->dev);
+	struct drm_i915_private *i915 = to_i915(crtc_state->uapi.crtc->dev);
 
-	dev_priv->display.funcs.color->color_commit_arm(crtc_state);
+	i915->display.funcs.color->color_commit_arm(crtc_state);
 }
 
 static bool intel_can_preload_luts(const struct intel_crtc_state *new_crtc_state)
@@ -1200,23 +1215,23 @@ static bool chv_can_preload_luts(const struct intel_crtc_state *new_crtc_state)
 
 int intel_color_check(struct intel_crtc_state *crtc_state)
 {
-	struct drm_i915_private *dev_priv = to_i915(crtc_state->uapi.crtc->dev);
+	struct drm_i915_private *i915 = to_i915(crtc_state->uapi.crtc->dev);
 
-	return dev_priv->display.funcs.color->color_check(crtc_state);
+	return i915->display.funcs.color->color_check(crtc_state);
 }
 
 void intel_color_get_config(struct intel_crtc_state *crtc_state)
 {
-	struct drm_i915_private *dev_priv = to_i915(crtc_state->uapi.crtc->dev);
+	struct drm_i915_private *i915 = to_i915(crtc_state->uapi.crtc->dev);
 
-	if (dev_priv->display.funcs.color->read_luts)
-		dev_priv->display.funcs.color->read_luts(crtc_state);
+	if (i915->display.funcs.color->read_luts)
+		i915->display.funcs.color->read_luts(crtc_state);
 }
 
 static bool need_plane_update(struct intel_plane *plane,
 			      const struct intel_crtc_state *crtc_state)
 {
-	struct drm_i915_private *dev_priv = to_i915(plane->base.dev);
+	struct drm_i915_private *i915 = to_i915(plane->base.dev);
 
 	/*
 	 * On pre-SKL the pipe gamma enable and pipe csc enable for
@@ -1224,7 +1239,7 @@ static bool need_plane_update(struct intel_plane *plane,
 	 * We have to reconfigure that even if the plane is inactive.
 	 */
 	return crtc_state->active_planes & BIT(plane->id) ||
-		(DISPLAY_VER(dev_priv) < 9 &&
+		(DISPLAY_VER(i915) < 9 &&
 		 plane->id == PLANE_PRIMARY);
 }
 
@@ -1232,7 +1247,7 @@ static int
 intel_color_add_affected_planes(struct intel_crtc_state *new_crtc_state)
 {
 	struct intel_crtc *crtc = to_intel_crtc(new_crtc_state->uapi.crtc);
-	struct drm_i915_private *dev_priv = to_i915(crtc->base.dev);
+	struct drm_i915_private *i915 = to_i915(crtc->base.dev);
 	struct intel_atomic_state *state =
 		to_intel_atomic_state(new_crtc_state->uapi.state);
 	const struct intel_crtc_state *old_crtc_state =
@@ -1240,14 +1255,14 @@ intel_color_add_affected_planes(struct intel_crtc_state *new_crtc_state)
 	struct intel_plane *plane;
 
 	if (!new_crtc_state->hw.active ||
-	    drm_atomic_crtc_needs_modeset(&new_crtc_state->uapi))
+	    intel_crtc_needs_modeset(new_crtc_state))
 		return 0;
 
 	if (new_crtc_state->gamma_enable == old_crtc_state->gamma_enable &&
 	    new_crtc_state->csc_enable == old_crtc_state->csc_enable)
 		return 0;
 
-	for_each_intel_plane_on_crtc(&dev_priv->drm, crtc, plane) {
+	for_each_intel_plane_on_crtc(&i915->drm, crtc, plane) {
 		struct intel_plane_state *plane_state;
 
 		if (!need_plane_update(plane, new_crtc_state))
@@ -1260,7 +1275,7 @@ intel_color_add_affected_planes(struct intel_crtc_state *new_crtc_state)
 		new_crtc_state->update_planes |= BIT(plane->id);
 
 		/* plane control register changes blocked by CxSR */
-		if (HAS_GMCH(dev_priv))
+		if (HAS_GMCH(i915))
 			new_crtc_state->disable_cxsr = true;
 	}
 
@@ -1286,7 +1301,7 @@ static int check_lut_size(const struct drm_property_blob *lut, int expected)
 
 static int check_luts(const struct intel_crtc_state *crtc_state)
 {
-	struct drm_i915_private *dev_priv = to_i915(crtc_state->uapi.crtc->dev);
+	struct drm_i915_private *i915 = to_i915(crtc_state->uapi.crtc->dev);
 	const struct drm_property_blob *gamma_lut = crtc_state->hw.gamma_lut;
 	const struct drm_property_blob *degamma_lut = crtc_state->hw.degamma_lut;
 	int gamma_length, degamma_length;
@@ -1298,15 +1313,15 @@ static int check_luts(const struct intel_crtc_state *crtc_state)
 
 	/* C8 relies on its palette being stored in the legacy LUT */
 	if (crtc_state->c8_planes) {
-		drm_dbg_kms(&dev_priv->drm,
+		drm_dbg_kms(&i915->drm,
 			    "C8 pixelformat requires the legacy LUT\n");
 		return -EINVAL;
 	}
 
-	degamma_length = INTEL_INFO(dev_priv)->display.color.degamma_lut_size;
-	gamma_length = INTEL_INFO(dev_priv)->display.color.gamma_lut_size;
-	degamma_tests = INTEL_INFO(dev_priv)->display.color.degamma_lut_tests;
-	gamma_tests = INTEL_INFO(dev_priv)->display.color.gamma_lut_tests;
+	degamma_length = INTEL_INFO(i915)->display.color.degamma_lut_size;
+	gamma_length = INTEL_INFO(i915)->display.color.gamma_lut_size;
+	degamma_tests = INTEL_INFO(i915)->display.color.degamma_lut_tests;
+	gamma_tests = INTEL_INFO(i915)->display.color.gamma_lut_tests;
 
 	if (check_lut_size(degamma_lut, degamma_length) ||
 	    check_lut_size(gamma_lut, gamma_length))
@@ -1344,7 +1359,7 @@ void intel_color_assert_luts(const struct intel_crtc_state *crtc_state)
 			    crtc_state->pre_csc_lut != i915->display.color.glk_linear_degamma_lut);
 		drm_WARN_ON(&i915->drm,
 			    crtc_state->post_csc_lut != crtc_state->hw.gamma_lut);
-	} else {
+	} else if (crtc_state->gamma_mode != GAMMA_MODE_MODE_SPLIT) {
 		drm_WARN_ON(&i915->drm,
 			    crtc_state->pre_csc_lut != crtc_state->hw.degamma_lut &&
 			    crtc_state->pre_csc_lut != crtc_state->hw.gamma_lut);
@@ -1443,6 +1458,20 @@ static int chv_color_check(struct intel_crtc_state *crtc_state)
 	return 0;
 }
 
+static bool ilk_gamma_enable(const struct intel_crtc_state *crtc_state)
+{
+	return (crtc_state->hw.gamma_lut ||
+		crtc_state->hw.degamma_lut) &&
+		!crtc_state->c8_planes;
+}
+
+static bool ilk_csc_enable(const struct intel_crtc_state *crtc_state)
+{
+	return crtc_state->output_format != INTEL_OUTPUT_FORMAT_RGB ||
+		ilk_csc_limited_range(crtc_state) ||
+		crtc_state->hw.ctm;
+}
+
 static u32 ilk_gamma_mode(const struct intel_crtc_state *crtc_state)
 {
 	if (!crtc_state->gamma_enable ||
@@ -1488,22 +1517,29 @@ static void ilk_assign_luts(struct intel_crtc_state *crtc_state)
 
 static int ilk_color_check(struct intel_crtc_state *crtc_state)
 {
+	struct drm_i915_private *i915 = to_i915(crtc_state->uapi.crtc->dev);
 	int ret;
 
 	ret = check_luts(crtc_state);
 	if (ret)
 		return ret;
 
-	crtc_state->gamma_enable =
-		crtc_state->hw.gamma_lut &&
-		!crtc_state->c8_planes;
+	if (crtc_state->hw.degamma_lut && crtc_state->hw.gamma_lut) {
+		drm_dbg_kms(&i915->drm,
+			    "Degamma and gamma together are not possible\n");
+		return -EINVAL;
+	}
 
-	/*
-	 * We don't expose the ctm on ilk/snb currently, also RGB
-	 * limited range output is handled by the hw automagically.
-	 */
-	crtc_state->csc_enable =
-		crtc_state->output_format != INTEL_OUTPUT_FORMAT_RGB;
+	if (crtc_state->output_format != INTEL_OUTPUT_FORMAT_RGB &&
+	    crtc_state->hw.ctm) {
+		drm_dbg_kms(&i915->drm,
+			    "YCbCr and CTM together are not possible\n");
+		return -EINVAL;
+	}
+
+	crtc_state->gamma_enable = ilk_gamma_enable(crtc_state);
+
+	crtc_state->csc_enable = ilk_csc_enable(crtc_state);
 
 	crtc_state->gamma_mode = ilk_gamma_mode(crtc_state);
 
@@ -1522,14 +1558,10 @@ static int ilk_color_check(struct intel_crtc_state *crtc_state)
 
 static u32 ivb_gamma_mode(const struct intel_crtc_state *crtc_state)
 {
-	if (!crtc_state->gamma_enable ||
-	    crtc_state_is_legacy_gamma(crtc_state))
-		return GAMMA_MODE_MODE_8BIT;
-	else if (crtc_state->hw.gamma_lut &&
-		 crtc_state->hw.degamma_lut)
+	if (crtc_state->hw.degamma_lut && crtc_state->hw.gamma_lut)
 		return GAMMA_MODE_MODE_SPLIT;
-	else
-		return GAMMA_MODE_MODE_10BIT;
+
+	return ilk_gamma_mode(crtc_state);
 }
 
 static u32 ivb_csc_mode(const struct intel_crtc_state *crtc_state)
@@ -1548,10 +1580,41 @@ static u32 ivb_csc_mode(const struct intel_crtc_state *crtc_state)
 	return CSC_POSITION_BEFORE_GAMMA;
 }
 
+static int ivb_assign_luts(struct intel_crtc_state *crtc_state)
+{
+	struct drm_i915_private *i915 = to_i915(crtc_state->uapi.crtc->dev);
+	struct drm_property_blob *degamma_lut, *gamma_lut;
+
+	if (crtc_state->gamma_mode != GAMMA_MODE_MODE_SPLIT) {
+		ilk_assign_luts(crtc_state);
+		return 0;
+	}
+
+	drm_WARN_ON(&i915->drm, drm_color_lut_size(crtc_state->hw.degamma_lut) != 1024);
+	drm_WARN_ON(&i915->drm, drm_color_lut_size(crtc_state->hw.gamma_lut) != 1024);
+
+	degamma_lut = create_resized_lut(i915, crtc_state->hw.degamma_lut, 512);
+	if (IS_ERR(degamma_lut))
+		return PTR_ERR(degamma_lut);
+
+	gamma_lut = create_resized_lut(i915, crtc_state->hw.gamma_lut, 512);
+	if (IS_ERR(gamma_lut)) {
+		drm_property_blob_put(degamma_lut);
+		return PTR_ERR(gamma_lut);
+	}
+
+	drm_property_replace_blob(&crtc_state->pre_csc_lut, degamma_lut);
+	drm_property_replace_blob(&crtc_state->post_csc_lut, gamma_lut);
+
+	drm_property_blob_put(degamma_lut);
+	drm_property_blob_put(gamma_lut);
+
+	return 0;
+}
+
 static int ivb_color_check(struct intel_crtc_state *crtc_state)
 {
-	struct drm_i915_private *dev_priv = to_i915(crtc_state->uapi.crtc->dev);
-	bool limited_color_range = ilk_csc_limited_range(crtc_state);
+	struct drm_i915_private *i915 = to_i915(crtc_state->uapi.crtc->dev);
 	int ret;
 
 	ret = check_luts(crtc_state);
@@ -1560,19 +1623,21 @@ static int ivb_color_check(struct intel_crtc_state *crtc_state)
 
 	if (crtc_state->output_format != INTEL_OUTPUT_FORMAT_RGB &&
 	    crtc_state->hw.ctm) {
-		drm_dbg_kms(&dev_priv->drm,
-			    "YCBCR and CTM together are not possible\n");
+		drm_dbg_kms(&i915->drm,
+			    "YCbCr and CTM together are not possible\n");
 		return -EINVAL;
 	}
 
-	crtc_state->gamma_enable =
-		(crtc_state->hw.gamma_lut ||
-		 crtc_state->hw.degamma_lut) &&
-		!crtc_state->c8_planes;
+	if (crtc_state->output_format != INTEL_OUTPUT_FORMAT_RGB &&
+	    crtc_state->hw.degamma_lut && crtc_state->hw.gamma_lut) {
+		drm_dbg_kms(&i915->drm,
+			    "YCbCr and degamma+gamma together are not possible\n");
+		return -EINVAL;
+	}
 
-	crtc_state->csc_enable =
-		crtc_state->output_format != INTEL_OUTPUT_FORMAT_RGB ||
-		crtc_state->hw.ctm || limited_color_range;
+	crtc_state->gamma_enable = ilk_gamma_enable(crtc_state);
+
+	crtc_state->csc_enable = ilk_csc_enable(crtc_state);
 
 	crtc_state->gamma_mode = ivb_gamma_mode(crtc_state);
 
@@ -1582,7 +1647,9 @@ static int ivb_color_check(struct intel_crtc_state *crtc_state)
 	if (ret)
 		return ret;
 
-	ilk_assign_luts(crtc_state);
+	ret = ivb_assign_luts(crtc_state);
+	if (ret)
+		return ret;
 
 	crtc_state->preload_luts = intel_can_preload_luts(crtc_state);
 
@@ -1617,7 +1684,7 @@ static void glk_assign_luts(struct intel_crtc_state *crtc_state)
 
 static int glk_color_check(struct intel_crtc_state *crtc_state)
 {
-	struct drm_i915_private *dev_priv = to_i915(crtc_state->uapi.crtc->dev);
+	struct drm_i915_private *i915 = to_i915(crtc_state->uapi.crtc->dev);
 	int ret;
 
 	ret = check_luts(crtc_state);
@@ -1626,8 +1693,15 @@ static int glk_color_check(struct intel_crtc_state *crtc_state)
 
 	if (crtc_state->output_format != INTEL_OUTPUT_FORMAT_RGB &&
 	    crtc_state->hw.ctm) {
-		drm_dbg_kms(&dev_priv->drm,
-			    "YCBCR and CTM together are not possible\n");
+		drm_dbg_kms(&i915->drm,
+			    "YCbCr and CTM together are not possible\n");
+		return -EINVAL;
+	}
+
+	if (crtc_state->output_format != INTEL_OUTPUT_FORMAT_RGB &&
+	    crtc_state->hw.degamma_lut && crtc_state->hw.gamma_lut) {
+		drm_dbg_kms(&i915->drm,
+			    "YCbCr and degamma+gamma together are not possible\n");
 		return -EINVAL;
 	}
 
@@ -1798,19 +1872,19 @@ static int icl_gamma_precision(const struct intel_crtc_state *crtc_state)
 int intel_color_get_gamma_bit_precision(const struct intel_crtc_state *crtc_state)
 {
 	struct intel_crtc *crtc = to_intel_crtc(crtc_state->uapi.crtc);
-	struct drm_i915_private *dev_priv = to_i915(crtc->base.dev);
+	struct drm_i915_private *i915 = to_i915(crtc->base.dev);
 
-	if (HAS_GMCH(dev_priv)) {
-		if (IS_CHERRYVIEW(dev_priv))
+	if (HAS_GMCH(i915)) {
+		if (IS_CHERRYVIEW(i915))
 			return chv_gamma_precision(crtc_state);
 		else
 			return i9xx_gamma_precision(crtc_state);
 	} else {
-		if (DISPLAY_VER(dev_priv) >= 11)
+		if (DISPLAY_VER(i915) >= 11)
 			return icl_gamma_precision(crtc_state);
-		else if (DISPLAY_VER(dev_priv) == 10)
+		else if (DISPLAY_VER(i915) == 10)
 			return glk_gamma_precision(crtc_state);
-		else if (IS_IRONLAKE(dev_priv))
+		else if (IS_IRONLAKE(i915))
 			return ilk_gamma_precision(crtc_state);
 	}
 
@@ -1895,7 +1969,7 @@ static struct drm_property_blob *i9xx_read_lut_8(struct intel_crtc *crtc)
 	int i;
 
 	blob = drm_property_create_blob(&dev_priv->drm,
-					sizeof(struct drm_color_lut) * LEGACY_LUT_LENGTH,
+					sizeof(lut[0]) * LEGACY_LUT_LENGTH,
 					NULL);
 	if (IS_ERR(blob))
 		return NULL;
@@ -1930,7 +2004,7 @@ static struct drm_property_blob *i965_read_lut_10p6(struct intel_crtc *crtc)
 	struct drm_color_lut *lut;
 
 	blob = drm_property_create_blob(&dev_priv->drm,
-					sizeof(struct drm_color_lut) * lut_size,
+					sizeof(lut[0]) * lut_size,
 					NULL);
 	if (IS_ERR(blob))
 		return NULL;
@@ -1966,14 +2040,14 @@ static void i965_read_luts(struct intel_crtc_state *crtc_state)
 
 static struct drm_property_blob *chv_read_cgm_gamma(struct intel_crtc *crtc)
 {
-	struct drm_i915_private *dev_priv = to_i915(crtc->base.dev);
-	int i, lut_size = INTEL_INFO(dev_priv)->display.color.gamma_lut_size;
+	struct drm_i915_private *i915 = to_i915(crtc->base.dev);
+	int i, lut_size = INTEL_INFO(i915)->display.color.gamma_lut_size;
 	enum pipe pipe = crtc->pipe;
 	struct drm_property_blob *blob;
 	struct drm_color_lut *lut;
 
-	blob = drm_property_create_blob(&dev_priv->drm,
-					sizeof(struct drm_color_lut) * lut_size,
+	blob = drm_property_create_blob(&i915->drm,
+					sizeof(lut[0]) * lut_size,
 					NULL);
 	if (IS_ERR(blob))
 		return NULL;
@@ -1981,8 +2055,8 @@ static struct drm_property_blob *chv_read_cgm_gamma(struct intel_crtc *crtc)
 	lut = blob->data;
 
 	for (i = 0; i < lut_size; i++) {
-		u32 ldw = intel_de_read_fw(dev_priv, CGM_PIPE_GAMMA(pipe, i, 0));
-		u32 udw = intel_de_read_fw(dev_priv, CGM_PIPE_GAMMA(pipe, i, 1));
+		u32 ldw = intel_de_read_fw(i915, CGM_PIPE_GAMMA(pipe, i, 0));
+		u32 udw = intel_de_read_fw(i915, CGM_PIPE_GAMMA(pipe, i, 1));
 
 		chv_cgm_gamma_pack(&lut[i], ldw, udw);
 	}
@@ -2002,14 +2076,14 @@ static void chv_read_luts(struct intel_crtc_state *crtc_state)
 
 static struct drm_property_blob *ilk_read_lut_8(struct intel_crtc *crtc)
 {
-	struct drm_i915_private *dev_priv = to_i915(crtc->base.dev);
+	struct drm_i915_private *i915 = to_i915(crtc->base.dev);
 	enum pipe pipe = crtc->pipe;
 	struct drm_property_blob *blob;
 	struct drm_color_lut *lut;
 	int i;
 
-	blob = drm_property_create_blob(&dev_priv->drm,
-					sizeof(struct drm_color_lut) * LEGACY_LUT_LENGTH,
+	blob = drm_property_create_blob(&i915->drm,
+					sizeof(lut[0]) * LEGACY_LUT_LENGTH,
 					NULL);
 	if (IS_ERR(blob))
 		return NULL;
@@ -2017,7 +2091,7 @@ static struct drm_property_blob *ilk_read_lut_8(struct intel_crtc *crtc)
 	lut = blob->data;
 
 	for (i = 0; i < LEGACY_LUT_LENGTH; i++) {
-		u32 val = intel_de_read_fw(dev_priv, LGC_PALETTE(pipe, i));
+		u32 val = intel_de_read_fw(i915, LGC_PALETTE(pipe, i));
 
 		i9xx_lut_8_pack(&lut[i], val);
 	}
@@ -2027,14 +2101,14 @@ static struct drm_property_blob *ilk_read_lut_8(struct intel_crtc *crtc)
 
 static struct drm_property_blob *ilk_read_lut_10(struct intel_crtc *crtc)
 {
-	struct drm_i915_private *dev_priv = to_i915(crtc->base.dev);
-	int i, lut_size = INTEL_INFO(dev_priv)->display.color.gamma_lut_size;
+	struct drm_i915_private *i915 = to_i915(crtc->base.dev);
+	int i, lut_size = INTEL_INFO(i915)->display.color.gamma_lut_size;
 	enum pipe pipe = crtc->pipe;
 	struct drm_property_blob *blob;
 	struct drm_color_lut *lut;
 
-	blob = drm_property_create_blob(&dev_priv->drm,
-					sizeof(struct drm_color_lut) * lut_size,
+	blob = drm_property_create_blob(&i915->drm,
+					sizeof(lut[0]) * lut_size,
 					NULL);
 	if (IS_ERR(blob))
 		return NULL;
@@ -2042,7 +2116,7 @@ static struct drm_property_blob *ilk_read_lut_10(struct intel_crtc *crtc)
 	lut = blob->data;
 
 	for (i = 0; i < lut_size; i++) {
-		u32 val = intel_de_read_fw(dev_priv, PREC_PALETTE(pipe, i));
+		u32 val = intel_de_read_fw(i915, PREC_PALETTE(pipe, i));
 
 		ilk_lut_10_pack(&lut[i], val);
 	}
@@ -2077,33 +2151,33 @@ static void ilk_read_luts(struct intel_crtc_state *crtc_state)
 static struct drm_property_blob *bdw_read_lut_10(struct intel_crtc *crtc,
 						 u32 prec_index)
 {
-	struct drm_i915_private *dev_priv = to_i915(crtc->base.dev);
+	struct drm_i915_private *i915 = to_i915(crtc->base.dev);
 	int i, hw_lut_size = ivb_lut_10_size(prec_index);
-	int lut_size = INTEL_INFO(dev_priv)->display.color.gamma_lut_size;
+	int lut_size = INTEL_INFO(i915)->display.color.gamma_lut_size;
 	enum pipe pipe = crtc->pipe;
 	struct drm_property_blob *blob;
 	struct drm_color_lut *lut;
 
-	drm_WARN_ON(&dev_priv->drm, lut_size != hw_lut_size);
+	drm_WARN_ON(&i915->drm, lut_size != hw_lut_size);
 
-	blob = drm_property_create_blob(&dev_priv->drm,
-					sizeof(struct drm_color_lut) * lut_size,
+	blob = drm_property_create_blob(&i915->drm,
+					sizeof(lut[0]) * lut_size,
 					NULL);
 	if (IS_ERR(blob))
 		return NULL;
 
 	lut = blob->data;
 
-	intel_de_write_fw(dev_priv, PREC_PAL_INDEX(pipe),
+	intel_de_write_fw(i915, PREC_PAL_INDEX(pipe),
 			  prec_index | PAL_PREC_AUTO_INCREMENT);
 
 	for (i = 0; i < lut_size; i++) {
-		u32 val = intel_de_read_fw(dev_priv, PREC_PAL_DATA(pipe));
+		u32 val = intel_de_read_fw(i915, PREC_PAL_DATA(pipe));
 
 		ilk_lut_10_pack(&lut[i], val);
 	}
 
-	intel_de_write_fw(dev_priv, PREC_PAL_INDEX(pipe), 0);
+	intel_de_write_fw(i915, PREC_PAL_INDEX(pipe), 0);
 
 	return blob;
 }
@@ -2131,31 +2205,31 @@ static void glk_read_luts(struct intel_crtc_state *crtc_state)
 static struct drm_property_blob *
 icl_read_lut_multi_segment(struct intel_crtc *crtc)
 {
-	struct drm_i915_private *dev_priv = to_i915(crtc->base.dev);
-	int i, lut_size = INTEL_INFO(dev_priv)->display.color.gamma_lut_size;
+	struct drm_i915_private *i915 = to_i915(crtc->base.dev);
+	int i, lut_size = INTEL_INFO(i915)->display.color.gamma_lut_size;
 	enum pipe pipe = crtc->pipe;
 	struct drm_property_blob *blob;
 	struct drm_color_lut *lut;
 
-	blob = drm_property_create_blob(&dev_priv->drm,
-					sizeof(struct drm_color_lut) * lut_size,
+	blob = drm_property_create_blob(&i915->drm,
+					sizeof(lut[0]) * lut_size,
 					NULL);
 	if (IS_ERR(blob))
 		return NULL;
 
 	lut = blob->data;
 
-	intel_de_write_fw(dev_priv, PREC_PAL_MULTI_SEG_INDEX(pipe),
+	intel_de_write_fw(i915, PREC_PAL_MULTI_SEG_INDEX(pipe),
 			  PAL_PREC_AUTO_INCREMENT);
 
 	for (i = 0; i < 9; i++) {
-		u32 ldw = intel_de_read_fw(dev_priv, PREC_PAL_MULTI_SEG_DATA(pipe));
-		u32 udw = intel_de_read_fw(dev_priv, PREC_PAL_MULTI_SEG_DATA(pipe));
+		u32 ldw = intel_de_read_fw(i915, PREC_PAL_MULTI_SEG_DATA(pipe));
+		u32 udw = intel_de_read_fw(i915, PREC_PAL_MULTI_SEG_DATA(pipe));
 
-		icl_lut_multi_seg_pack(&lut[i], ldw, udw);
+		ilk_lut_12p4_pack(&lut[i], ldw, udw);
 	}
 
-	intel_de_write_fw(dev_priv, PREC_PAL_MULTI_SEG_INDEX(pipe), 0);
+	intel_de_write_fw(i915, PREC_PAL_MULTI_SEG_INDEX(pipe), 0);
 
 	/*
 	 * FIXME readouts from PAL_PREC_DATA register aren't giving
@@ -2268,15 +2342,15 @@ static const struct intel_color_funcs ilk_color_funcs = {
 
 void intel_color_crtc_init(struct intel_crtc *crtc)
 {
-	struct drm_i915_private *dev_priv = to_i915(crtc->base.dev);
-	bool has_ctm = INTEL_INFO(dev_priv)->display.color.degamma_lut_size != 0;
+	struct drm_i915_private *i915 = to_i915(crtc->base.dev);
+	bool has_ctm = INTEL_INFO(i915)->display.color.degamma_lut_size != 0;
 
 	drm_mode_crtc_set_gamma_size(&crtc->base, 256);
 
 	drm_crtc_enable_color_mgmt(&crtc->base,
-				   INTEL_INFO(dev_priv)->display.color.degamma_lut_size,
+				   INTEL_INFO(i915)->display.color.degamma_lut_size,
 				   has_ctm,
-				   INTEL_INFO(dev_priv)->display.color.gamma_lut_size);
+				   INTEL_INFO(i915)->display.color.gamma_lut_size);
 }
 
 int intel_color_init(struct drm_i915_private *i915)
diff --git a/drivers/gpu/drm/i915/display/intel_combo_phy.c b/drivers/gpu/drm/i915/display/intel_combo_phy.c
index 71d7aece1dc6..8b870b2dd4f9 100644
--- a/drivers/gpu/drm/i915/display/intel_combo_phy.c
+++ b/drivers/gpu/drm/i915/display/intel_combo_phy.c
@@ -3,6 +3,7 @@
  * Copyright © 2018 Intel Corporation
  */
 
+#include "i915_reg.h"
 #include "intel_combo_phy.h"
 #include "intel_combo_phy_regs.h"
 #include "intel_de.h"
diff --git a/drivers/gpu/drm/i915/display/intel_connector.h b/drivers/gpu/drm/i915/display/intel_connector.h
index 7d7b588d2286..9d2bc261b204 100644
--- a/drivers/gpu/drm/i915/display/intel_connector.h
+++ b/drivers/gpu/drm/i915/display/intel_connector.h
@@ -6,7 +6,7 @@
 #ifndef __INTEL_CONNECTOR_H__
 #define __INTEL_CONNECTOR_H__
 
-#include "intel_display.h"
+#include <linux/types.h>
 
 struct drm_connector;
 struct edid;
diff --git a/drivers/gpu/drm/i915/display/intel_crt.c b/drivers/gpu/drm/i915/display/intel_crt.c
index 94d0a5e1dd03..797ad9489f7e 100644
--- a/drivers/gpu/drm/i915/display/intel_crt.c
+++ b/drivers/gpu/drm/i915/display/intel_crt.c
@@ -34,6 +34,8 @@
 #include <drm/drm_probe_helper.h>
 
 #include "i915_drv.h"
+#include "i915_irq.h"
+#include "i915_reg.h"
 #include "intel_connector.h"
 #include "intel_crt.h"
 #include "intel_crtc.h"
diff --git a/drivers/gpu/drm/i915/display/intel_cursor.c b/drivers/gpu/drm/i915/display/intel_cursor.c
index 96422c98656a..d190fa0d393b 100644
--- a/drivers/gpu/drm/i915/display/intel_cursor.c
+++ b/drivers/gpu/drm/i915/display/intel_cursor.c
@@ -10,12 +10,13 @@
 #include <drm/drm_damage_helper.h>
 #include <drm/drm_fourcc.h>
 
+#include "i915_reg.h"
 #include "intel_atomic.h"
 #include "intel_atomic_plane.h"
 #include "intel_cursor.h"
 #include "intel_de.h"
-#include "intel_display_types.h"
 #include "intel_display.h"
+#include "intel_display_types.h"
 #include "intel_fb.h"
 #include "intel_fb_pin.h"
 #include "intel_frontbuffer.h"
diff --git a/drivers/gpu/drm/i915/display/intel_ddi.c b/drivers/gpu/drm/i915/display/intel_ddi.c
index e95bde5cf060..0f1ec2a98cc8 100644
--- a/drivers/gpu/drm/i915/display/intel_ddi.c
+++ b/drivers/gpu/drm/i915/display/intel_ddi.c
@@ -31,6 +31,7 @@
 #include <drm/drm_privacy_screen_consumer.h>
 
 #include "i915_drv.h"
+#include "i915_reg.h"
 #include "intel_audio.h"
 #include "intel_audio_regs.h"
 #include "intel_backlight.h"
@@ -56,6 +57,7 @@
 #include "intel_hdcp.h"
 #include "intel_hdmi.h"
 #include "intel_hotplug.h"
+#include "intel_hti.h"
 #include "intel_lspcon.h"
 #include "intel_mg_phy_regs.h"
 #include "intel_pps.h"
@@ -846,22 +848,65 @@ bool intel_ddi_get_hw_state(struct intel_encoder *encoder,
 }
 
 static enum intel_display_power_domain
-intel_ddi_main_link_aux_domain(struct intel_digital_port *dig_port)
+intel_ddi_main_link_aux_domain(struct intel_digital_port *dig_port,
+			       const struct intel_crtc_state *crtc_state)
 {
-	/* ICL+ HW requires corresponding AUX IOs to be powered up for PSR with
+	struct drm_i915_private *i915 = to_i915(dig_port->base.base.dev);
+	enum phy phy = intel_port_to_phy(i915, dig_port->base.port);
+
+	/*
+	 * ICL+ HW requires corresponding AUX IOs to be powered up for PSR with
 	 * DC states enabled at the same time, while for driver initiated AUX
 	 * transfers we need the same AUX IOs to be powered but with DC states
-	 * disabled. Accordingly use the AUX power domain here which leaves DC
-	 * states enabled.
-	 * However, for non-A AUX ports the corresponding non-EDP transcoders
-	 * would have already enabled power well 2 and DC_OFF. This means we can
-	 * acquire a wider POWER_DOMAIN_AUX_{B,C,D,F} reference instead of a
-	 * specific AUX_IO reference without powering up any extra wells.
-	 * Note that PSR is enabled only on Port A even though this function
-	 * returns the correct domain for other ports too.
+	 * disabled. Accordingly use the AUX_IO_<port> power domain here which
+	 * leaves DC states enabled.
+	 *
+	 * Before MTL TypeC PHYs (in all TypeC modes and both DP/HDMI) also require
+	 * AUX IO to be enabled, but all these require DC_OFF to be enabled as
+	 * well, so we can acquire a wider AUX_<port> power domain reference
+	 * instead of a specific AUX_IO_<port> reference without powering up any
+	 * extra wells.
 	 */
-	return dig_port->aux_ch == AUX_CH_A ? POWER_DOMAIN_AUX_IO_A :
-					      intel_aux_power_domain(dig_port);
+	if (intel_encoder_can_psr(&dig_port->base))
+		return intel_display_power_aux_io_domain(i915, dig_port->aux_ch);
+	else if (DISPLAY_VER(i915) < 14 &&
+		 (intel_crtc_has_dp_encoder(crtc_state) ||
+		  intel_phy_is_tc(i915, phy)))
+		return intel_aux_power_domain(dig_port);
+	else
+		return POWER_DOMAIN_INVALID;
+}
+
+static void
+main_link_aux_power_domain_get(struct intel_digital_port *dig_port,
+			       const struct intel_crtc_state *crtc_state)
+{
+	struct drm_i915_private *i915 = to_i915(dig_port->base.base.dev);
+	enum intel_display_power_domain domain =
+		intel_ddi_main_link_aux_domain(dig_port, crtc_state);
+
+	drm_WARN_ON(&i915->drm, dig_port->aux_wakeref);
+
+	if (domain == POWER_DOMAIN_INVALID)
+		return;
+
+	dig_port->aux_wakeref = intel_display_power_get(i915, domain);
+}
+
+static void
+main_link_aux_power_domain_put(struct intel_digital_port *dig_port,
+			       const struct intel_crtc_state *crtc_state)
+{
+	struct drm_i915_private *i915 = to_i915(dig_port->base.base.dev);
+	enum intel_display_power_domain domain =
+		intel_ddi_main_link_aux_domain(dig_port, crtc_state);
+	intel_wakeref_t wf;
+
+	wf = fetch_and_zero(&dig_port->aux_wakeref);
+	if (!wf)
+		return;
+
+	intel_display_power_put(i915, domain, wf);
 }
 
 static void intel_ddi_get_power_domains(struct intel_encoder *encoder,
@@ -869,7 +914,6 @@ static void intel_ddi_get_power_domains(struct intel_encoder *encoder,
 {
 	struct drm_i915_private *dev_priv = to_i915(encoder->base.dev);
 	struct intel_digital_port *dig_port;
-	enum phy phy = intel_port_to_phy(dev_priv, encoder->port);
 
 	/*
 	 * TODO: Add support for MST encoders. Atm, the following should never
@@ -888,17 +932,7 @@ static void intel_ddi_get_power_domains(struct intel_encoder *encoder,
 								   dig_port->ddi_io_power_domain);
 	}
 
-	/*
-	 * AUX power is only needed for (e)DP mode, and for HDMI mode on TC
-	 * ports.
-	 */
-	if (intel_crtc_has_dp_encoder(crtc_state) ||
-	    intel_phy_is_tc(dev_priv, phy)) {
-		drm_WARN_ON(&dev_priv->drm, dig_port->aux_wakeref);
-		dig_port->aux_wakeref =
-			intel_display_power_get(dev_priv,
-						intel_ddi_main_link_aux_domain(dig_port));
-	}
+	main_link_aux_power_domain_get(dig_port, crtc_state);
 }
 
 void intel_ddi_enable_pipe_clock(struct intel_encoder *encoder,
@@ -2737,10 +2771,7 @@ static void intel_ddi_post_disable(struct intel_atomic_state *state,
 		intel_ddi_post_disable_dp(state, encoder, old_crtc_state,
 					  old_conn_state);
 
-	if (intel_crtc_has_dp_encoder(old_crtc_state) || is_tc_port)
-		intel_display_power_put(dev_priv,
-					intel_ddi_main_link_aux_domain(dig_port),
-					fetch_and_zero(&dig_port->aux_wakeref));
+	main_link_aux_power_domain_put(dig_port, old_crtc_state);
 
 	if (is_tc_port)
 		intel_tc_port_put_link(dig_port);
@@ -3061,12 +3092,7 @@ intel_ddi_pre_pll_enable(struct intel_atomic_state *state,
 	if (is_tc_port)
 		intel_tc_port_get_link(dig_port, crtc_state->lane_count);
 
-	if (intel_crtc_has_dp_encoder(crtc_state) || is_tc_port) {
-		drm_WARN_ON(&dev_priv->drm, dig_port->aux_wakeref);
-		dig_port->aux_wakeref =
-			intel_display_power_get(dev_priv,
-						intel_ddi_main_link_aux_domain(dig_port));
-	}
+	main_link_aux_power_domain_get(dig_port, crtc_state);
 
 	if (is_tc_port && !intel_tc_port_in_tbt_alt_mode(dig_port))
 		/*
@@ -4113,12 +4139,6 @@ intel_ddi_max_lanes(struct intel_digital_port *dig_port)
 	return max_lanes;
 }
 
-static bool hti_uses_phy(struct drm_i915_private *i915, enum phy phy)
-{
-	return i915->hti_state & HDPORT_ENABLED &&
-	       i915->hti_state & HDPORT_DDI_USED(phy);
-}
-
 static enum hpd_pin xelpd_hpd_pin(struct drm_i915_private *dev_priv,
 				  enum port port)
 {
@@ -4247,7 +4267,7 @@ void intel_ddi_init(struct drm_i915_private *dev_priv, enum port port)
 	 * driver.  In that case we should skip initializing the corresponding
 	 * outputs.
 	 */
-	if (hti_uses_phy(dev_priv, phy)) {
+	if (intel_hti_uses_phy(dev_priv, phy)) {
 		drm_dbg_kms(&dev_priv->drm, "PORT %c / PHY %c reserved by HTI\n",
 			    port_name(port), phy_name(phy));
 		return;
diff --git a/drivers/gpu/drm/i915/display/intel_display.c b/drivers/gpu/drm/i915/display/intel_display.c
index b9393f9fc764..6c2686ecb62a 100644
--- a/drivers/gpu/drm/i915/display/intel_display.c
+++ b/drivers/gpu/drm/i915/display/intel_display.c
@@ -76,6 +76,7 @@
 #include "g4x_hdmi.h"
 #include "hsw_ips.h"
 #include "i915_drv.h"
+#include "i915_reg.h"
 #include "i915_utils.h"
 #include "icl_dsi.h"
 #include "intel_acpi.h"
@@ -90,6 +91,7 @@
 #include "intel_display_types.h"
 #include "intel_dmc.h"
 #include "intel_dp_link_training.h"
+#include "intel_dpio_phy.h"
 #include "intel_dpt.h"
 #include "intel_dsb.h"
 #include "intel_fbc.h"
@@ -99,6 +101,7 @@
 #include "intel_frontbuffer.h"
 #include "intel_hdcp.h"
 #include "intel_hotplug.h"
+#include "intel_hti.h"
 #include "intel_modeset_verify.h"
 #include "intel_modeset_setup.h"
 #include "intel_overlay.h"
@@ -888,7 +891,7 @@ static bool gpu_reset_clobbers_display(struct drm_i915_private *dev_priv)
 
 void intel_display_prepare_reset(struct drm_i915_private *dev_priv)
 {
-	struct drm_modeset_acquire_ctx *ctx = &dev_priv->reset_ctx;
+	struct drm_modeset_acquire_ctx *ctx = &dev_priv->display.restore.reset_ctx;
 	struct drm_atomic_state *state;
 	int ret;
 
@@ -944,13 +947,13 @@ void intel_display_prepare_reset(struct drm_i915_private *dev_priv)
 		return;
 	}
 
-	dev_priv->modeset_restore_state = state;
+	dev_priv->display.restore.modeset_state = state;
 	state->acquire_ctx = ctx;
 }
 
 void intel_display_finish_reset(struct drm_i915_private *i915)
 {
-	struct drm_modeset_acquire_ctx *ctx = &i915->reset_ctx;
+	struct drm_modeset_acquire_ctx *ctx = &i915->display.restore.reset_ctx;
 	struct drm_atomic_state *state;
 	int ret;
 
@@ -961,7 +964,7 @@ void intel_display_finish_reset(struct drm_i915_private *i915)
 	if (!test_bit(I915_RESET_MODESET, &to_gt(i915)->reset.flags))
 		return;
 
-	state = fetch_and_zero(&i915->modeset_restore_state);
+	state = fetch_and_zero(&i915->display.restore.modeset_state);
 	if (!state)
 		goto unlock;
 
@@ -2441,7 +2444,7 @@ int intel_display_suspend(struct drm_device *dev)
 		drm_err(&dev_priv->drm, "Suspending crtc's failed with %i\n",
 			ret);
 	else
-		dev_priv->modeset_restore_state = state;
+		dev_priv->display.restore.modeset_state = state;
 	return ret;
 }
 
@@ -3730,12 +3733,16 @@ out:
 
 static u8 bigjoiner_pipes(struct drm_i915_private *i915)
 {
+	u8 pipes;
+
 	if (DISPLAY_VER(i915) >= 12)
-		return BIT(PIPE_A) | BIT(PIPE_B) | BIT(PIPE_C) | BIT(PIPE_D);
+		pipes = BIT(PIPE_A) | BIT(PIPE_B) | BIT(PIPE_C) | BIT(PIPE_D);
 	else if (DISPLAY_VER(i915) >= 11)
-		return BIT(PIPE_B) | BIT(PIPE_C);
+		pipes = BIT(PIPE_B) | BIT(PIPE_C);
 	else
-		return 0;
+		pipes = 0;
+
+	return pipes & RUNTIME_INFO(i915)->pipe_mask;
 }
 
 static bool transcoder_ddi_func_is_enabled(struct drm_i915_private *dev_priv,
@@ -4048,20 +4055,19 @@ static bool hsw_get_pipe_config(struct intel_crtc *crtc,
 				struct intel_crtc_state *pipe_config)
 {
 	struct drm_i915_private *dev_priv = to_i915(crtc->base.dev);
-	struct intel_display_power_domain_set power_domain_set = { };
 	bool active;
 	u32 tmp;
 
-	if (!intel_display_power_get_in_set_if_enabled(dev_priv, &power_domain_set,
+	if (!intel_display_power_get_in_set_if_enabled(dev_priv, &crtc->hw_readout_power_domains,
 						       POWER_DOMAIN_PIPE(crtc->pipe)))
 		return false;
 
 	pipe_config->shared_dpll = NULL;
 
-	active = hsw_get_transcoder_state(crtc, pipe_config, &power_domain_set);
+	active = hsw_get_transcoder_state(crtc, pipe_config, &crtc->hw_readout_power_domains);
 
 	if ((IS_GEMINILAKE(dev_priv) || IS_BROXTON(dev_priv)) &&
-	    bxt_get_dsi_transcoder_state(crtc, pipe_config, &power_domain_set)) {
+	    bxt_get_dsi_transcoder_state(crtc, pipe_config, &crtc->hw_readout_power_domains)) {
 		drm_WARN_ON(&dev_priv->drm, active);
 		active = true;
 	}
@@ -4120,7 +4126,7 @@ static bool hsw_get_pipe_config(struct intel_crtc *crtc,
 		pipe_config->ips_linetime =
 			REG_FIELD_GET(HSW_IPS_LINETIME_MASK, tmp);
 
-	if (intel_display_power_get_in_set_if_enabled(dev_priv, &power_domain_set,
+	if (intel_display_power_get_in_set_if_enabled(dev_priv, &crtc->hw_readout_power_domains,
 						      POWER_DOMAIN_PIPE_PANEL_FITTER(crtc->pipe))) {
 		if (DISPLAY_VER(dev_priv) >= 9)
 			skl_get_pfit_config(pipe_config);
@@ -4151,7 +4157,7 @@ static bool hsw_get_pipe_config(struct intel_crtc *crtc,
 	}
 
 out:
-	intel_display_power_put_all_in_set(dev_priv, &power_domain_set);
+	intel_display_power_put_all_in_set(dev_priv, &crtc->hw_readout_power_domains);
 
 	return active;
 }
@@ -5930,7 +5936,7 @@ int intel_modeset_all_pipes(struct intel_atomic_state *state,
 			return PTR_ERR(crtc_state);
 
 		if (!crtc_state->hw.active ||
-		    drm_atomic_crtc_needs_modeset(&crtc_state->uapi))
+		    intel_crtc_needs_modeset(crtc_state))
 			continue;
 
 		drm_dbg_kms(&dev_priv->drm, "[CRTC:%d:%s] Full modeset due to %s\n",
@@ -8569,7 +8575,7 @@ static void intel_mode_config_init(struct drm_i915_private *i915)
 	struct drm_mode_config *mode_config = &i915->drm.mode_config;
 
 	drm_mode_config_init(&i915->drm);
-	INIT_LIST_HEAD(&i915->global_obj_list);
+	INIT_LIST_HEAD(&i915->display.global.obj_list);
 
 	mode_config->min_width = 0;
 	mode_config->min_height = 0;
@@ -8734,12 +8740,7 @@ int intel_modeset_init_nogem(struct drm_i915_private *i915)
 	if (i915->display.cdclk.max_cdclk_freq == 0)
 		intel_update_max_cdclk(i915);
 
-	/*
-	 * If the platform has HTI, we need to find out whether it has reserved
-	 * any display resources before we create our display outputs.
-	 */
-	if (INTEL_INFO(i915)->display.has_hti)
-		i915->hti_state = intel_de_read(i915, HDPORT_STATE);
+	intel_hti_init(i915);
 
 	/* Just disable it once at startup */
 	intel_vga_disable(i915);
@@ -8902,14 +8903,14 @@ void i830_disable_pipe(struct drm_i915_private *dev_priv, enum pipe pipe)
 void intel_display_resume(struct drm_device *dev)
 {
 	struct drm_i915_private *i915 = to_i915(dev);
-	struct drm_atomic_state *state = i915->modeset_restore_state;
+	struct drm_atomic_state *state = i915->display.restore.modeset_state;
 	struct drm_modeset_acquire_ctx ctx;
 	int ret;
 
 	if (!HAS_DISPLAY(i915))
 		return;
 
-	i915->modeset_restore_state = NULL;
+	i915->display.restore.modeset_state = NULL;
 	if (state)
 		state->acquire_ctx = &ctx;
 
diff --git a/drivers/gpu/drm/i915/display/intel_display.h b/drivers/gpu/drm/i915/display/intel_display.h
index c803330a276d..714030136b7f 100644
--- a/drivers/gpu/drm/i915/display/intel_display.h
+++ b/drivers/gpu/drm/i915/display/intel_display.h
@@ -53,6 +53,7 @@ struct intel_digital_port;
 struct intel_dp;
 struct intel_encoder;
 struct intel_initial_plane_config;
+struct intel_link_m_n;
 struct intel_load_detect_pipe;
 struct intel_plane;
 struct intel_plane_state;
@@ -61,24 +62,6 @@ struct intel_remapped_info;
 struct intel_rotation_info;
 struct pci_dev;
 
-enum i915_gpio {
-	GPIOA,
-	GPIOB,
-	GPIOC,
-	GPIOD,
-	GPIOE,
-	GPIOF,
-	GPIOG,
-	GPIOH,
-	__GPIOI_UNUSED,
-	GPIOJ,
-	GPIOK,
-	GPIOL,
-	GPIOM,
-	GPION,
-	GPIOO,
-};
-
 /*
  * Keep the pipe enum values fixed: the code assumes that PIPE_A=0, the
  * rest have consecutive values and match the enum values of transcoders
@@ -279,17 +262,6 @@ enum tc_port_mode {
 	TC_PORT_LEGACY,
 };
 
-enum dpio_channel {
-	DPIO_CH0,
-	DPIO_CH1
-};
-
-enum dpio_phy {
-	DPIO_PHY0,
-	DPIO_PHY1,
-	DPIO_PHY2,
-};
-
 enum aux_ch {
 	AUX_CH_A,
 	AUX_CH_B,
@@ -316,15 +288,6 @@ enum aux_ch {
 
 #define aux_ch_name(a) ((a) + 'A')
 
-/* Used by dp and fdi links */
-struct intel_link_m_n {
-	u32 tu;
-	u32 data_m;
-	u32 data_n;
-	u32 link_m;
-	u32 link_n;
-};
-
 enum phy {
 	PHY_NONE = -1,
 
diff --git a/drivers/gpu/drm/i915/display/intel_display_core.h b/drivers/gpu/drm/i915/display/intel_display_core.h
index 337d8e08ba43..57ddce3ba02b 100644
--- a/drivers/gpu/drm/i915/display/intel_display_core.h
+++ b/drivers/gpu/drm/i915/display/intel_display_core.h
@@ -14,6 +14,7 @@
 #include <linux/workqueue.h>
 
 #include <drm/drm_connector.h>
+#include <drm/drm_modeset_lock.h>
 
 #include "intel_cdclk.h"
 #include "intel_display.h"
@@ -345,6 +346,10 @@ struct intel_display {
 	} fdi;
 
 	struct {
+		struct list_head obj_list;
+	} global;
+
+	struct {
 		/*
 		 * Base address of where the gmbus and gpio blocks are located
 		 * (either on PCH or on SoC for platforms without PCH).
@@ -371,6 +376,16 @@ struct intel_display {
 	} hdcp;
 
 	struct {
+		/*
+		 * HTI (aka HDPORT) state read during initial hw readout. Most
+		 * platforms don't have HTI, so this will just stay 0. Those
+		 * that do will use this later to figure out which PLLs and PHYs
+		 * are unavailable for driver usage.
+		 */
+		u32 state;
+	} hti;
+
+	struct {
 		struct i915_power_domains domains;
 
 		/* Shadow for DISPLAY_PHY_CONTROL which can't be safely read */
@@ -397,6 +412,12 @@ struct intel_display {
 	} quirks;
 
 	struct {
+		/* restore state for suspend/resume and display reset */
+		struct drm_atomic_state *modeset_state;
+		struct drm_modeset_acquire_ctx reset_ctx;
+	} restore;
+
+	struct {
 		enum {
 			I915_SAGV_UNKNOWN = 0,
 			I915_SAGV_DISABLED,
diff --git a/drivers/gpu/drm/i915/display/intel_display_debugfs.c b/drivers/gpu/drm/i915/display/intel_display_debugfs.c
index cfc056a05bbf..7bcd90384a46 100644
--- a/drivers/gpu/drm/i915/display/intel_display_debugfs.c
+++ b/drivers/gpu/drm/i915/display/intel_display_debugfs.c
@@ -9,6 +9,8 @@
 #include <drm/drm_fourcc.h>
 
 #include "i915_debugfs.h"
+#include "i915_irq.h"
+#include "i915_reg.h"
 #include "intel_de.h"
 #include "intel_display_debugfs.h"
 #include "intel_display_power.h"
diff --git a/drivers/gpu/drm/i915/display/intel_display_power.c b/drivers/gpu/drm/i915/display/intel_display_power.c
index 4c1de91e56ff..3adba64937de 100644
--- a/drivers/gpu/drm/i915/display/intel_display_power.c
+++ b/drivers/gpu/drm/i915/display/intel_display_power.c
@@ -129,6 +129,18 @@ intel_display_power_domain_str(enum intel_display_power_domain domain)
 		return "AUDIO_MMIO";
 	case POWER_DOMAIN_AUDIO_PLAYBACK:
 		return "AUDIO_PLAYBACK";
+	case POWER_DOMAIN_AUX_IO_A:
+		return "AUX_IO_A";
+	case POWER_DOMAIN_AUX_IO_B:
+		return "AUX_IO_B";
+	case POWER_DOMAIN_AUX_IO_C:
+		return "AUX_IO_C";
+	case POWER_DOMAIN_AUX_IO_D:
+		return "AUX_IO_D";
+	case POWER_DOMAIN_AUX_IO_E:
+		return "AUX_IO_E";
+	case POWER_DOMAIN_AUX_IO_F:
+		return "AUX_IO_F";
 	case POWER_DOMAIN_AUX_A:
 		return "AUX_A";
 	case POWER_DOMAIN_AUX_B:
@@ -153,8 +165,6 @@ intel_display_power_domain_str(enum intel_display_power_domain domain)
 		return "AUX_USBC5";
 	case POWER_DOMAIN_AUX_USBC6:
 		return "AUX_USBC6";
-	case POWER_DOMAIN_AUX_IO_A:
-		return "AUX_IO_A";
 	case POWER_DOMAIN_AUX_TBT1:
 		return "AUX_TBT1";
 	case POWER_DOMAIN_AUX_TBT2:
@@ -2289,6 +2299,7 @@ struct intel_ddi_port_domains {
 
 	enum intel_display_power_domain ddi_lanes;
 	enum intel_display_power_domain ddi_io;
+	enum intel_display_power_domain aux_io;
 	enum intel_display_power_domain aux_legacy_usbc;
 	enum intel_display_power_domain aux_tbt;
 };
@@ -2303,6 +2314,7 @@ i9xx_port_domains[] = {
 
 		.ddi_lanes = POWER_DOMAIN_PORT_DDI_LANES_A,
 		.ddi_io = POWER_DOMAIN_PORT_DDI_IO_A,
+		.aux_io = POWER_DOMAIN_AUX_IO_A,
 		.aux_legacy_usbc = POWER_DOMAIN_AUX_A,
 		.aux_tbt = POWER_DOMAIN_INVALID,
 	},
@@ -2318,6 +2330,7 @@ d11_port_domains[] = {
 
 		.ddi_lanes = POWER_DOMAIN_PORT_DDI_LANES_A,
 		.ddi_io = POWER_DOMAIN_PORT_DDI_IO_A,
+		.aux_io = POWER_DOMAIN_AUX_IO_A,
 		.aux_legacy_usbc = POWER_DOMAIN_AUX_A,
 		.aux_tbt = POWER_DOMAIN_INVALID,
 	}, {
@@ -2328,6 +2341,7 @@ d11_port_domains[] = {
 
 		.ddi_lanes = POWER_DOMAIN_PORT_DDI_LANES_C,
 		.ddi_io = POWER_DOMAIN_PORT_DDI_IO_C,
+		.aux_io = POWER_DOMAIN_AUX_IO_C,
 		.aux_legacy_usbc = POWER_DOMAIN_AUX_C,
 		.aux_tbt = POWER_DOMAIN_AUX_TBT1,
 	},
@@ -2343,6 +2357,7 @@ d12_port_domains[] = {
 
 		.ddi_lanes = POWER_DOMAIN_PORT_DDI_LANES_A,
 		.ddi_io = POWER_DOMAIN_PORT_DDI_IO_A,
+		.aux_io = POWER_DOMAIN_AUX_IO_A,
 		.aux_legacy_usbc = POWER_DOMAIN_AUX_A,
 		.aux_tbt = POWER_DOMAIN_INVALID,
 	}, {
@@ -2353,6 +2368,7 @@ d12_port_domains[] = {
 
 		.ddi_lanes = POWER_DOMAIN_PORT_DDI_LANES_TC1,
 		.ddi_io = POWER_DOMAIN_PORT_DDI_IO_TC1,
+		.aux_io = POWER_DOMAIN_INVALID,
 		.aux_legacy_usbc = POWER_DOMAIN_AUX_USBC1,
 		.aux_tbt = POWER_DOMAIN_AUX_TBT1,
 	},
@@ -2368,6 +2384,7 @@ d13_port_domains[] = {
 
 		.ddi_lanes = POWER_DOMAIN_PORT_DDI_LANES_A,
 		.ddi_io = POWER_DOMAIN_PORT_DDI_IO_A,
+		.aux_io = POWER_DOMAIN_AUX_IO_A,
 		.aux_legacy_usbc = POWER_DOMAIN_AUX_A,
 		.aux_tbt = POWER_DOMAIN_INVALID,
 	}, {
@@ -2378,6 +2395,7 @@ d13_port_domains[] = {
 
 		.ddi_lanes = POWER_DOMAIN_PORT_DDI_LANES_TC1,
 		.ddi_io = POWER_DOMAIN_PORT_DDI_IO_TC1,
+		.aux_io = POWER_DOMAIN_INVALID,
 		.aux_legacy_usbc = POWER_DOMAIN_AUX_USBC1,
 		.aux_tbt = POWER_DOMAIN_AUX_TBT1,
 	}, {
@@ -2388,6 +2406,7 @@ d13_port_domains[] = {
 
 		.ddi_lanes = POWER_DOMAIN_PORT_DDI_LANES_D,
 		.ddi_io = POWER_DOMAIN_PORT_DDI_IO_D,
+		.aux_io = POWER_DOMAIN_AUX_IO_D,
 		.aux_legacy_usbc = POWER_DOMAIN_AUX_D,
 		.aux_tbt = POWER_DOMAIN_INVALID,
 	},
@@ -2433,7 +2452,7 @@ intel_display_power_ddi_io_domain(struct drm_i915_private *i915, enum port port)
 {
 	const struct intel_ddi_port_domains *domains = intel_port_domains_for_port(i915, port);
 
-	if (drm_WARN_ON(&i915->drm, !domains) || domains->ddi_io == POWER_DOMAIN_INVALID)
+	if (drm_WARN_ON(&i915->drm, !domains || domains->ddi_io == POWER_DOMAIN_INVALID))
 		return POWER_DOMAIN_PORT_DDI_IO_A;
 
 	return domains->ddi_io + (int)(port - domains->port_start);
@@ -2444,7 +2463,7 @@ intel_display_power_ddi_lanes_domain(struct drm_i915_private *i915, enum port po
 {
 	const struct intel_ddi_port_domains *domains = intel_port_domains_for_port(i915, port);
 
-	if (drm_WARN_ON(&i915->drm, !domains) || domains->ddi_lanes == POWER_DOMAIN_INVALID)
+	if (drm_WARN_ON(&i915->drm, !domains || domains->ddi_lanes == POWER_DOMAIN_INVALID))
 		return POWER_DOMAIN_PORT_DDI_LANES_A;
 
 	return domains->ddi_lanes + (int)(port - domains->port_start);
@@ -2466,11 +2485,22 @@ intel_port_domains_for_aux_ch(struct drm_i915_private *i915, enum aux_ch aux_ch)
 }
 
 enum intel_display_power_domain
+intel_display_power_aux_io_domain(struct drm_i915_private *i915, enum aux_ch aux_ch)
+{
+	const struct intel_ddi_port_domains *domains = intel_port_domains_for_aux_ch(i915, aux_ch);
+
+	if (drm_WARN_ON(&i915->drm, !domains || domains->aux_io == POWER_DOMAIN_INVALID))
+		return POWER_DOMAIN_AUX_IO_A;
+
+	return domains->aux_io + (int)(aux_ch - domains->aux_ch_start);
+}
+
+enum intel_display_power_domain
 intel_display_power_legacy_aux_domain(struct drm_i915_private *i915, enum aux_ch aux_ch)
 {
 	const struct intel_ddi_port_domains *domains = intel_port_domains_for_aux_ch(i915, aux_ch);
 
-	if (drm_WARN_ON(&i915->drm, !domains) || domains->aux_legacy_usbc == POWER_DOMAIN_INVALID)
+	if (drm_WARN_ON(&i915->drm, !domains || domains->aux_legacy_usbc == POWER_DOMAIN_INVALID))
 		return POWER_DOMAIN_AUX_A;
 
 	return domains->aux_legacy_usbc + (int)(aux_ch - domains->aux_ch_start);
@@ -2481,7 +2511,7 @@ intel_display_power_tbt_aux_domain(struct drm_i915_private *i915, enum aux_ch au
 {
 	const struct intel_ddi_port_domains *domains = intel_port_domains_for_aux_ch(i915, aux_ch);
 
-	if (drm_WARN_ON(&i915->drm, !domains) || domains->aux_tbt == POWER_DOMAIN_INVALID)
+	if (drm_WARN_ON(&i915->drm, !domains || domains->aux_tbt == POWER_DOMAIN_INVALID))
 		return POWER_DOMAIN_AUX_TBT1;
 
 	return domains->aux_tbt + (int)(aux_ch - domains->aux_ch_start);
diff --git a/drivers/gpu/drm/i915/display/intel_display_power.h b/drivers/gpu/drm/i915/display/intel_display_power.h
index 7136ea3f233e..2154d900b1aa 100644
--- a/drivers/gpu/drm/i915/display/intel_display_power.h
+++ b/drivers/gpu/drm/i915/display/intel_display_power.h
@@ -6,11 +6,12 @@
 #ifndef __INTEL_DISPLAY_POWER_H__
 #define __INTEL_DISPLAY_POWER_H__
 
-#include "intel_runtime_pm.h"
+#include "intel_wakeref.h"
 
 enum aux_ch;
 enum dpio_channel;
 enum dpio_phy;
+enum i915_drm_suspend_mode;
 enum port;
 struct drm_i915_private;
 struct i915_power_well;
@@ -76,6 +77,14 @@ enum intel_display_power_domain {
 	POWER_DOMAIN_VGA,
 	POWER_DOMAIN_AUDIO_MMIO,
 	POWER_DOMAIN_AUDIO_PLAYBACK,
+
+	POWER_DOMAIN_AUX_IO_A,
+	POWER_DOMAIN_AUX_IO_B,
+	POWER_DOMAIN_AUX_IO_C,
+	POWER_DOMAIN_AUX_IO_D,
+	POWER_DOMAIN_AUX_IO_E,
+	POWER_DOMAIN_AUX_IO_F,
+
 	POWER_DOMAIN_AUX_A,
 	POWER_DOMAIN_AUX_B,
 	POWER_DOMAIN_AUX_C,
@@ -90,8 +99,6 @@ enum intel_display_power_domain {
 	POWER_DOMAIN_AUX_USBC5,
 	POWER_DOMAIN_AUX_USBC6,
 
-	POWER_DOMAIN_AUX_IO_A,
-
 	POWER_DOMAIN_AUX_TBT1,
 	POWER_DOMAIN_AUX_TBT2,
 	POWER_DOMAIN_AUX_TBT3,
@@ -249,6 +256,8 @@ intel_display_power_ddi_lanes_domain(struct drm_i915_private *i915, enum port po
 enum intel_display_power_domain
 intel_display_power_ddi_io_domain(struct drm_i915_private *i915, enum port port);
 enum intel_display_power_domain
+intel_display_power_aux_io_domain(struct drm_i915_private *i915, enum aux_ch aux_ch);
+enum intel_display_power_domain
 intel_display_power_legacy_aux_domain(struct drm_i915_private *i915, enum aux_ch aux_ch);
 enum intel_display_power_domain
 intel_display_power_tbt_aux_domain(struct drm_i915_private *i915, enum aux_ch aux_ch);
diff --git a/drivers/gpu/drm/i915/display/intel_display_power_map.c b/drivers/gpu/drm/i915/display/intel_display_power_map.c
index dc04afc6cc8f..f5d66ca85b19 100644
--- a/drivers/gpu/drm/i915/display/intel_display_power_map.c
+++ b/drivers/gpu/drm/i915/display/intel_display_power_map.c
@@ -170,6 +170,8 @@ I915_DECL_PW_DOMAINS(vlv_pwdoms_display,
 	POWER_DOMAIN_VGA,
 	POWER_DOMAIN_AUDIO_MMIO,
 	POWER_DOMAIN_AUDIO_PLAYBACK,
+	POWER_DOMAIN_AUX_IO_B,
+	POWER_DOMAIN_AUX_IO_C,
 	POWER_DOMAIN_AUX_B,
 	POWER_DOMAIN_AUX_C,
 	POWER_DOMAIN_GMBUS,
@@ -179,6 +181,8 @@ I915_DECL_PW_DOMAINS(vlv_pwdoms_dpio_cmn_bc,
 	POWER_DOMAIN_PORT_DDI_LANES_B,
 	POWER_DOMAIN_PORT_DDI_LANES_C,
 	POWER_DOMAIN_PORT_CRT,
+	POWER_DOMAIN_AUX_IO_B,
+	POWER_DOMAIN_AUX_IO_C,
 	POWER_DOMAIN_AUX_B,
 	POWER_DOMAIN_AUX_C,
 	POWER_DOMAIN_INIT);
@@ -186,6 +190,8 @@ I915_DECL_PW_DOMAINS(vlv_pwdoms_dpio_cmn_bc,
 I915_DECL_PW_DOMAINS(vlv_pwdoms_dpio_tx_bc_lanes,
 	POWER_DOMAIN_PORT_DDI_LANES_B,
 	POWER_DOMAIN_PORT_DDI_LANES_C,
+	POWER_DOMAIN_AUX_IO_B,
+	POWER_DOMAIN_AUX_IO_C,
 	POWER_DOMAIN_AUX_B,
 	POWER_DOMAIN_AUX_C,
 	POWER_DOMAIN_INIT);
@@ -243,6 +249,9 @@ I915_DECL_PW_DOMAINS(chv_pwdoms_display,
 	POWER_DOMAIN_VGA,
 	POWER_DOMAIN_AUDIO_MMIO,
 	POWER_DOMAIN_AUDIO_PLAYBACK,
+	POWER_DOMAIN_AUX_IO_B,
+	POWER_DOMAIN_AUX_IO_C,
+	POWER_DOMAIN_AUX_IO_D,
 	POWER_DOMAIN_AUX_B,
 	POWER_DOMAIN_AUX_C,
 	POWER_DOMAIN_AUX_D,
@@ -252,12 +261,15 @@ I915_DECL_PW_DOMAINS(chv_pwdoms_display,
 I915_DECL_PW_DOMAINS(chv_pwdoms_dpio_cmn_bc,
 	POWER_DOMAIN_PORT_DDI_LANES_B,
 	POWER_DOMAIN_PORT_DDI_LANES_C,
+	POWER_DOMAIN_AUX_IO_B,
+	POWER_DOMAIN_AUX_IO_C,
 	POWER_DOMAIN_AUX_B,
 	POWER_DOMAIN_AUX_C,
 	POWER_DOMAIN_INIT);
 
 I915_DECL_PW_DOMAINS(chv_pwdoms_dpio_cmn_d,
 	POWER_DOMAIN_PORT_DDI_LANES_D,
+	POWER_DOMAIN_AUX_IO_D,
 	POWER_DOMAIN_AUX_D,
 	POWER_DOMAIN_INIT);
 
@@ -305,6 +317,9 @@ static const struct i915_power_well_desc_list chv_power_wells[] = {
 	POWER_DOMAIN_VGA, \
 	POWER_DOMAIN_AUDIO_MMIO, \
 	POWER_DOMAIN_AUDIO_PLAYBACK, \
+	POWER_DOMAIN_AUX_IO_B, \
+	POWER_DOMAIN_AUX_IO_C, \
+	POWER_DOMAIN_AUX_IO_D, \
 	POWER_DOMAIN_AUX_B, \
 	POWER_DOMAIN_AUX_C, \
 	POWER_DOMAIN_AUX_D
@@ -318,6 +333,7 @@ I915_DECL_PW_DOMAINS(skl_pwdoms_dc_off,
 	POWER_DOMAIN_AUX_A,
 	POWER_DOMAIN_MODESET,
 	POWER_DOMAIN_GT_IRQ,
+	POWER_DOMAIN_DC_OFF,
 	POWER_DOMAIN_INIT);
 
 I915_DECL_PW_DOMAINS(skl_pwdoms_ddi_io_a_e,
@@ -407,6 +423,8 @@ static const struct i915_power_well_desc_list skl_power_wells[] = {
 	POWER_DOMAIN_VGA, \
 	POWER_DOMAIN_AUDIO_MMIO, \
 	POWER_DOMAIN_AUDIO_PLAYBACK, \
+	POWER_DOMAIN_AUX_IO_B, \
+	POWER_DOMAIN_AUX_IO_C, \
 	POWER_DOMAIN_AUX_B, \
 	POWER_DOMAIN_AUX_C
 
@@ -420,16 +438,20 @@ I915_DECL_PW_DOMAINS(bxt_pwdoms_dc_off,
 	POWER_DOMAIN_GMBUS,
 	POWER_DOMAIN_MODESET,
 	POWER_DOMAIN_GT_IRQ,
+	POWER_DOMAIN_DC_OFF,
 	POWER_DOMAIN_INIT);
 
 I915_DECL_PW_DOMAINS(bxt_pwdoms_dpio_cmn_a,
 	POWER_DOMAIN_PORT_DDI_LANES_A,
+	POWER_DOMAIN_AUX_IO_A,
 	POWER_DOMAIN_AUX_A,
 	POWER_DOMAIN_INIT);
 
 I915_DECL_PW_DOMAINS(bxt_pwdoms_dpio_cmn_bc,
 	POWER_DOMAIN_PORT_DDI_LANES_B,
 	POWER_DOMAIN_PORT_DDI_LANES_C,
+	POWER_DOMAIN_AUX_IO_B,
+	POWER_DOMAIN_AUX_IO_C,
 	POWER_DOMAIN_AUX_B,
 	POWER_DOMAIN_AUX_C,
 	POWER_DOMAIN_INIT);
@@ -483,6 +505,8 @@ static const struct i915_power_well_desc_list bxt_power_wells[] = {
 	POWER_DOMAIN_VGA, \
 	POWER_DOMAIN_AUDIO_MMIO, \
 	POWER_DOMAIN_AUDIO_PLAYBACK, \
+	POWER_DOMAIN_AUX_IO_B, \
+	POWER_DOMAIN_AUX_IO_C, \
 	POWER_DOMAIN_AUX_B, \
 	POWER_DOMAIN_AUX_C
 
@@ -496,6 +520,7 @@ I915_DECL_PW_DOMAINS(glk_pwdoms_dc_off,
 	POWER_DOMAIN_GMBUS,
 	POWER_DOMAIN_MODESET,
 	POWER_DOMAIN_GT_IRQ,
+	POWER_DOMAIN_DC_OFF,
 	POWER_DOMAIN_INIT);
 
 I915_DECL_PW_DOMAINS(glk_pwdoms_ddi_io_a,	POWER_DOMAIN_PORT_DDI_IO_A);
@@ -504,29 +529,34 @@ I915_DECL_PW_DOMAINS(glk_pwdoms_ddi_io_c,	POWER_DOMAIN_PORT_DDI_IO_C);
 
 I915_DECL_PW_DOMAINS(glk_pwdoms_dpio_cmn_a,
 	POWER_DOMAIN_PORT_DDI_LANES_A,
+	POWER_DOMAIN_AUX_IO_A,
 	POWER_DOMAIN_AUX_A,
 	POWER_DOMAIN_INIT);
 
 I915_DECL_PW_DOMAINS(glk_pwdoms_dpio_cmn_b,
 	POWER_DOMAIN_PORT_DDI_LANES_B,
+	POWER_DOMAIN_AUX_IO_B,
 	POWER_DOMAIN_AUX_B,
 	POWER_DOMAIN_INIT);
 
 I915_DECL_PW_DOMAINS(glk_pwdoms_dpio_cmn_c,
 	POWER_DOMAIN_PORT_DDI_LANES_C,
+	POWER_DOMAIN_AUX_IO_C,
 	POWER_DOMAIN_AUX_C,
 	POWER_DOMAIN_INIT);
 
 I915_DECL_PW_DOMAINS(glk_pwdoms_aux_a,
-	POWER_DOMAIN_AUX_A,
 	POWER_DOMAIN_AUX_IO_A,
+	POWER_DOMAIN_AUX_A,
 	POWER_DOMAIN_INIT);
 
 I915_DECL_PW_DOMAINS(glk_pwdoms_aux_b,
+	POWER_DOMAIN_AUX_IO_B,
 	POWER_DOMAIN_AUX_B,
 	POWER_DOMAIN_INIT);
 
 I915_DECL_PW_DOMAINS(glk_pwdoms_aux_c,
+	POWER_DOMAIN_AUX_IO_C,
 	POWER_DOMAIN_AUX_C,
 	POWER_DOMAIN_INIT);
 
@@ -617,6 +647,11 @@ I915_DECL_PW_DOMAINS(icl_pwdoms_pw_4,
 	POWER_DOMAIN_VGA, \
 	POWER_DOMAIN_AUDIO_MMIO, \
 	POWER_DOMAIN_AUDIO_PLAYBACK, \
+	POWER_DOMAIN_AUX_IO_B, \
+	POWER_DOMAIN_AUX_IO_C, \
+	POWER_DOMAIN_AUX_IO_D, \
+	POWER_DOMAIN_AUX_IO_E, \
+	POWER_DOMAIN_AUX_IO_F, \
 	POWER_DOMAIN_AUX_B, \
 	POWER_DOMAIN_AUX_C, \
 	POWER_DOMAIN_AUX_D, \
@@ -658,13 +693,23 @@ I915_DECL_PW_DOMAINS(icl_pwdoms_ddi_io_e,	POWER_DOMAIN_PORT_DDI_IO_E);
 I915_DECL_PW_DOMAINS(icl_pwdoms_ddi_io_f,	POWER_DOMAIN_PORT_DDI_IO_F);
 
 I915_DECL_PW_DOMAINS(icl_pwdoms_aux_a,
-	POWER_DOMAIN_AUX_A,
-	POWER_DOMAIN_AUX_IO_A);
-I915_DECL_PW_DOMAINS(icl_pwdoms_aux_b,		POWER_DOMAIN_AUX_B);
-I915_DECL_PW_DOMAINS(icl_pwdoms_aux_c,		POWER_DOMAIN_AUX_C);
-I915_DECL_PW_DOMAINS(icl_pwdoms_aux_d,		POWER_DOMAIN_AUX_D);
-I915_DECL_PW_DOMAINS(icl_pwdoms_aux_e,		POWER_DOMAIN_AUX_E);
-I915_DECL_PW_DOMAINS(icl_pwdoms_aux_f,		POWER_DOMAIN_AUX_F);
+	POWER_DOMAIN_AUX_IO_A,
+	POWER_DOMAIN_AUX_A);
+I915_DECL_PW_DOMAINS(icl_pwdoms_aux_b,
+	POWER_DOMAIN_AUX_IO_B,
+	POWER_DOMAIN_AUX_B);
+I915_DECL_PW_DOMAINS(icl_pwdoms_aux_c,
+	POWER_DOMAIN_AUX_IO_C,
+	POWER_DOMAIN_AUX_C);
+I915_DECL_PW_DOMAINS(icl_pwdoms_aux_d,
+	POWER_DOMAIN_AUX_IO_D,
+	POWER_DOMAIN_AUX_D);
+I915_DECL_PW_DOMAINS(icl_pwdoms_aux_e,
+	POWER_DOMAIN_AUX_IO_E,
+	POWER_DOMAIN_AUX_E);
+I915_DECL_PW_DOMAINS(icl_pwdoms_aux_f,
+	POWER_DOMAIN_AUX_IO_F,
+	POWER_DOMAIN_AUX_F);
 I915_DECL_PW_DOMAINS(icl_pwdoms_aux_tbt1,	POWER_DOMAIN_AUX_TBT1);
 I915_DECL_PW_DOMAINS(icl_pwdoms_aux_tbt2,	POWER_DOMAIN_AUX_TBT2);
 I915_DECL_PW_DOMAINS(icl_pwdoms_aux_tbt3,	POWER_DOMAIN_AUX_TBT3);
@@ -816,6 +861,7 @@ I915_DECL_PW_DOMAINS(tgl_pwdoms_dc_off,
 	POWER_DOMAIN_AUX_B,
 	POWER_DOMAIN_AUX_C,
 	POWER_DOMAIN_MODESET,
+	POWER_DOMAIN_DC_OFF,
 	POWER_DOMAIN_INIT);
 
 I915_DECL_PW_DOMAINS(tgl_pwdoms_ddi_io_tc1,	POWER_DOMAIN_PORT_DDI_IO_TC1);
@@ -1012,6 +1058,7 @@ I915_DECL_PW_DOMAINS(rkl_pwdoms_dc_off,
 	POWER_DOMAIN_AUX_A,
 	POWER_DOMAIN_AUX_B,
 	POWER_DOMAIN_MODESET,
+	POWER_DOMAIN_DC_OFF,
 	POWER_DOMAIN_INIT);
 
 static const struct i915_power_well_desc rkl_power_wells_main[] = {
@@ -1094,6 +1141,7 @@ I915_DECL_PW_DOMAINS(dg1_pwdoms_dc_off,
 	POWER_DOMAIN_AUX_A,
 	POWER_DOMAIN_AUX_B,
 	POWER_DOMAIN_MODESET,
+	POWER_DOMAIN_DC_OFF,
 	POWER_DOMAIN_INIT);
 
 I915_DECL_PW_DOMAINS(dg1_pwdoms_pw_2,
@@ -1215,6 +1263,9 @@ I915_DECL_PW_DOMAINS(xelpd_pwdoms_pw_a,
 	POWER_DOMAIN_PORT_DDI_LANES_TC4, \
 	POWER_DOMAIN_VGA, \
 	POWER_DOMAIN_AUDIO_PLAYBACK, \
+	POWER_DOMAIN_AUX_IO_C, \
+	POWER_DOMAIN_AUX_IO_D, \
+	POWER_DOMAIN_AUX_IO_E, \
 	POWER_DOMAIN_AUX_C, \
 	POWER_DOMAIN_AUX_D, \
 	POWER_DOMAIN_AUX_E, \
@@ -1255,6 +1306,7 @@ I915_DECL_PW_DOMAINS(xelpd_pwdoms_dc_off,
 	POWER_DOMAIN_AUX_A,
 	POWER_DOMAIN_AUX_B,
 	POWER_DOMAIN_MODESET,
+	POWER_DOMAIN_DC_OFF,
 	POWER_DOMAIN_INIT);
 
 static const struct i915_power_well_desc xelpd_power_wells_main[] = {
@@ -1376,6 +1428,7 @@ I915_DECL_PW_DOMAINS(xelpdp_pwdoms_dc_off,
 	POWER_DOMAIN_MODESET,
 	POWER_DOMAIN_AUX_A,
 	POWER_DOMAIN_AUX_B,
+	POWER_DOMAIN_DC_OFF,
 	POWER_DOMAIN_INIT);
 
 I915_DECL_PW_DOMAINS(xelpdp_pwdoms_aux_tc1,
diff --git a/drivers/gpu/drm/i915/display/intel_display_power_well.h b/drivers/gpu/drm/i915/display/intel_display_power_well.h
index e13b521e322a..ba7cb977e7c7 100644
--- a/drivers/gpu/drm/i915/display/intel_display_power_well.h
+++ b/drivers/gpu/drm/i915/display/intel_display_power_well.h
@@ -7,8 +7,8 @@
 
 #include <linux/types.h>
 
-#include "intel_display.h"
 #include "intel_display_power.h"
+#include "intel_dpio_phy.h"
 
 struct drm_i915_private;
 struct i915_power_well;
diff --git a/drivers/gpu/drm/i915/display/intel_display_reg_defs.h b/drivers/gpu/drm/i915/display/intel_display_reg_defs.h
new file mode 100644
index 000000000000..02605418ff08
--- /dev/null
+++ b/drivers/gpu/drm/i915/display/intel_display_reg_defs.h
@@ -0,0 +1,53 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2022 Intel Corporation
+ */
+
+#ifndef __INTEL_DISPLAY_REG_DEFS_H__
+#define __INTEL_DISPLAY_REG_DEFS_H__
+
+#include "i915_reg_defs.h"
+
+#define DISPLAY_MMIO_BASE(dev_priv)	(INTEL_INFO(dev_priv)->display.mmio_offset)
+
+#define VLV_DISPLAY_BASE		0x180000
+
+/*
+ * Named helper wrappers around _PICK_EVEN() and _PICK().
+ */
+#define _PIPE(pipe, a, b)		_PICK_EVEN(pipe, a, b)
+#define _PLANE(plane, a, b)		_PICK_EVEN(plane, a, b)
+#define _TRANS(tran, a, b)		_PICK_EVEN(tran, a, b)
+#define _PORT(port, a, b)		_PICK_EVEN(port, a, b)
+#define _PLL(pll, a, b)			_PICK_EVEN(pll, a, b)
+#define _PHY(phy, a, b)			_PICK_EVEN(phy, a, b)
+
+#define _MMIO_PIPE(pipe, a, b)		_MMIO(_PIPE(pipe, a, b))
+#define _MMIO_PLANE(plane, a, b)	_MMIO(_PLANE(plane, a, b))
+#define _MMIO_TRANS(tran, a, b)		_MMIO(_TRANS(tran, a, b))
+#define _MMIO_PORT(port, a, b)		_MMIO(_PORT(port, a, b))
+#define _MMIO_PLL(pll, a, b)		_MMIO(_PLL(pll, a, b))
+#define _MMIO_PHY(phy, a, b)		_MMIO(_PHY(phy, a, b))
+
+#define _PHY3(phy, ...)			_PICK(phy, __VA_ARGS__)
+
+#define _MMIO_PIPE3(pipe, a, b, c)	_MMIO(_PICK(pipe, a, b, c))
+#define _MMIO_PORT3(pipe, a, b, c)	_MMIO(_PICK(pipe, a, b, c))
+#define _MMIO_PHY3(phy, a, b, c)	_MMIO(_PHY3(phy, a, b, c))
+#define _MMIO_PLL3(pll, ...)		_MMIO(_PICK(pll, __VA_ARGS__))
+
+/*
+ * Device info offset array based helpers for groups of registers with unevenly
+ * spaced base offsets.
+ */
+#define _MMIO_PIPE2(pipe, reg)		_MMIO(INTEL_INFO(dev_priv)->display.pipe_offsets[(pipe)] - \
+					      INTEL_INFO(dev_priv)->display.pipe_offsets[PIPE_A] + \
+					      DISPLAY_MMIO_BASE(dev_priv) + (reg))
+#define _MMIO_TRANS2(tran, reg)		_MMIO(INTEL_INFO(dev_priv)->display.trans_offsets[(tran)] - \
+					      INTEL_INFO(dev_priv)->display.trans_offsets[TRANSCODER_A] + \
+					      DISPLAY_MMIO_BASE(dev_priv) + (reg))
+#define _MMIO_CURSOR2(pipe, reg)	_MMIO(INTEL_INFO(dev_priv)->display.cursor_offsets[(pipe)] - \
+					      INTEL_INFO(dev_priv)->display.cursor_offsets[PIPE_A] + \
+					      DISPLAY_MMIO_BASE(dev_priv) + (reg))
+
+#endif /* __INTEL_DISPLAY_REG_DEFS_H__ */
diff --git a/drivers/gpu/drm/i915/display/intel_display_trace.h b/drivers/gpu/drm/i915/display/intel_display_trace.h
index 2dd5a4b7f5d8..725aba3fa531 100644
--- a/drivers/gpu/drm/i915/display/intel_display_trace.h
+++ b/drivers/gpu/drm/i915/display/intel_display_trace.h
@@ -18,11 +18,15 @@
 #include "intel_crtc.h"
 #include "intel_display_types.h"
 
+#define __dev_name_i915(i915) dev_name((i915)->drm.dev)
+#define __dev_name_kms(obj) dev_name((obj)->base.dev->dev)
+
 TRACE_EVENT(intel_pipe_enable,
 	    TP_PROTO(struct intel_crtc *crtc),
 	    TP_ARGS(crtc),
 
 	    TP_STRUCT__entry(
+			     __string(dev, __dev_name_kms(crtc))
 			     __array(u32, frame, 3)
 			     __array(u32, scanline, 3)
 			     __field(enum pipe, pipe)
@@ -30,6 +34,7 @@ TRACE_EVENT(intel_pipe_enable,
 	    TP_fast_assign(
 			   struct drm_i915_private *dev_priv = to_i915(crtc->base.dev);
 			   struct intel_crtc *it__;
+			   __assign_str(dev, __dev_name_kms(crtc));
 			   for_each_intel_crtc(&dev_priv->drm, it__) {
 				   __entry->frame[it__->pipe] = intel_crtc_get_vblank_counter(it__);
 				   __entry->scanline[it__->pipe] = intel_get_crtc_scanline(it__);
@@ -37,8 +42,8 @@ TRACE_EVENT(intel_pipe_enable,
 			   __entry->pipe = crtc->pipe;
 			   ),
 
-	    TP_printk("pipe %c enable, pipe A: frame=%u, scanline=%u, pipe B: frame=%u, scanline=%u, pipe C: frame=%u, scanline=%u",
-		      pipe_name(__entry->pipe),
+	    TP_printk("dev %s, pipe %c enable, pipe A: frame=%u, scanline=%u, pipe B: frame=%u, scanline=%u, pipe C: frame=%u, scanline=%u",
+		      __get_str(dev), pipe_name(__entry->pipe),
 		      __entry->frame[PIPE_A], __entry->scanline[PIPE_A],
 		      __entry->frame[PIPE_B], __entry->scanline[PIPE_B],
 		      __entry->frame[PIPE_C], __entry->scanline[PIPE_C])
@@ -49,6 +54,7 @@ TRACE_EVENT(intel_pipe_disable,
 	    TP_ARGS(crtc),
 
 	    TP_STRUCT__entry(
+			     __string(dev, __dev_name_kms(crtc))
 			     __array(u32, frame, 3)
 			     __array(u32, scanline, 3)
 			     __field(enum pipe, pipe)
@@ -57,6 +63,7 @@ TRACE_EVENT(intel_pipe_disable,
 	    TP_fast_assign(
 			   struct drm_i915_private *dev_priv = to_i915(crtc->base.dev);
 			   struct intel_crtc *it__;
+			   __assign_str(dev, __dev_name_kms(crtc));
 			   for_each_intel_crtc(&dev_priv->drm, it__) {
 				   __entry->frame[it__->pipe] = intel_crtc_get_vblank_counter(it__);
 				   __entry->scanline[it__->pipe] = intel_get_crtc_scanline(it__);
@@ -64,8 +71,8 @@ TRACE_EVENT(intel_pipe_disable,
 			   __entry->pipe = crtc->pipe;
 			   ),
 
-	    TP_printk("pipe %c disable, pipe A: frame=%u, scanline=%u, pipe B: frame=%u, scanline=%u, pipe C: frame=%u, scanline=%u",
-		      pipe_name(__entry->pipe),
+	    TP_printk("dev %s, pipe %c disable, pipe A: frame=%u, scanline=%u, pipe B: frame=%u, scanline=%u, pipe C: frame=%u, scanline=%u",
+		      __get_str(dev), pipe_name(__entry->pipe),
 		      __entry->frame[PIPE_A], __entry->scanline[PIPE_A],
 		      __entry->frame[PIPE_B], __entry->scanline[PIPE_B],
 		      __entry->frame[PIPE_C], __entry->scanline[PIPE_C])
@@ -76,6 +83,7 @@ TRACE_EVENT(intel_pipe_crc,
 	    TP_ARGS(crtc, crcs),
 
 	    TP_STRUCT__entry(
+			     __string(dev, __dev_name_kms(crtc))
 			     __field(enum pipe, pipe)
 			     __field(u32, frame)
 			     __field(u32, scanline)
@@ -83,16 +91,19 @@ TRACE_EVENT(intel_pipe_crc,
 			     ),
 
 	    TP_fast_assign(
+			   __assign_str(dev, __dev_name_kms(crtc));
 			   __entry->pipe = crtc->pipe;
 			   __entry->frame = intel_crtc_get_vblank_counter(crtc);
 			   __entry->scanline = intel_get_crtc_scanline(crtc);
 			   memcpy(__entry->crcs, crcs, sizeof(__entry->crcs));
 			   ),
 
-	    TP_printk("pipe %c, frame=%u, scanline=%u crc=%08x %08x %08x %08x %08x",
-		      pipe_name(__entry->pipe), __entry->frame, __entry->scanline,
-		      __entry->crcs[0], __entry->crcs[1], __entry->crcs[2],
-		      __entry->crcs[3], __entry->crcs[4])
+	    TP_printk("dev %s, pipe %c, frame=%u, scanline=%u crc=%08x %08x %08x %08x %08x",
+		      __get_str(dev), pipe_name(__entry->pipe),
+		      __entry->frame, __entry->scanline,
+		      __entry->crcs[0], __entry->crcs[1],
+		      __entry->crcs[2], __entry->crcs[3],
+		      __entry->crcs[4])
 );
 
 TRACE_EVENT(intel_cpu_fifo_underrun,
@@ -100,6 +111,7 @@ TRACE_EVENT(intel_cpu_fifo_underrun,
 	    TP_ARGS(dev_priv, pipe),
 
 	    TP_STRUCT__entry(
+			     __string(dev, __dev_name_i915(dev_priv))
 			     __field(enum pipe, pipe)
 			     __field(u32, frame)
 			     __field(u32, scanline)
@@ -107,13 +119,14 @@ TRACE_EVENT(intel_cpu_fifo_underrun,
 
 	    TP_fast_assign(
 			    struct intel_crtc *crtc = intel_crtc_for_pipe(dev_priv, pipe);
+			   __assign_str(dev, __dev_name_kms(crtc));
 			   __entry->pipe = pipe;
 			   __entry->frame = intel_crtc_get_vblank_counter(crtc);
 			   __entry->scanline = intel_get_crtc_scanline(crtc);
 			   ),
 
-	    TP_printk("pipe %c, frame=%u, scanline=%u",
-		      pipe_name(__entry->pipe),
+	    TP_printk("dev %s, pipe %c, frame=%u, scanline=%u",
+		      __get_str(dev), pipe_name(__entry->pipe),
 		      __entry->frame, __entry->scanline)
 );
 
@@ -122,6 +135,7 @@ TRACE_EVENT(intel_pch_fifo_underrun,
 	    TP_ARGS(dev_priv, pch_transcoder),
 
 	    TP_STRUCT__entry(
+			     __string(dev, __dev_name_i915(dev_priv))
 			     __field(enum pipe, pipe)
 			     __field(u32, frame)
 			     __field(u32, scanline)
@@ -130,13 +144,14 @@ TRACE_EVENT(intel_pch_fifo_underrun,
 	    TP_fast_assign(
 			   enum pipe pipe = pch_transcoder;
 			   struct intel_crtc *crtc = intel_crtc_for_pipe(dev_priv, pipe);
+			   __assign_str(dev, __dev_name_i915(dev_priv));
 			   __entry->pipe = pipe;
 			   __entry->frame = intel_crtc_get_vblank_counter(crtc);
 			   __entry->scanline = intel_get_crtc_scanline(crtc);
 			   ),
 
-	    TP_printk("pch transcoder %c, frame=%u, scanline=%u",
-		      pipe_name(__entry->pipe),
+	    TP_printk("dev %s, pch transcoder %c, frame=%u, scanline=%u",
+		      __get_str(dev), pipe_name(__entry->pipe),
 		      __entry->frame, __entry->scanline)
 );
 
@@ -145,6 +160,7 @@ TRACE_EVENT(intel_memory_cxsr,
 	    TP_ARGS(dev_priv, old, new),
 
 	    TP_STRUCT__entry(
+			     __string(dev, __dev_name_i915(dev_priv))
 			     __array(u32, frame, 3)
 			     __array(u32, scanline, 3)
 			     __field(bool, old)
@@ -153,6 +169,7 @@ TRACE_EVENT(intel_memory_cxsr,
 
 	    TP_fast_assign(
 			   struct intel_crtc *crtc;
+			   __assign_str(dev, __dev_name_i915(dev_priv));
 			   for_each_intel_crtc(&dev_priv->drm, crtc) {
 				   __entry->frame[crtc->pipe] = intel_crtc_get_vblank_counter(crtc);
 				   __entry->scanline[crtc->pipe] = intel_get_crtc_scanline(crtc);
@@ -161,8 +178,8 @@ TRACE_EVENT(intel_memory_cxsr,
 			   __entry->new = new;
 			   ),
 
-	    TP_printk("%s->%s, pipe A: frame=%u, scanline=%u, pipe B: frame=%u, scanline=%u, pipe C: frame=%u, scanline=%u",
-		      str_on_off(__entry->old), str_on_off(__entry->new),
+	    TP_printk("dev %s, cxsr %s->%s, pipe A: frame=%u, scanline=%u, pipe B: frame=%u, scanline=%u, pipe C: frame=%u, scanline=%u",
+		      __get_str(dev), str_on_off(__entry->old), str_on_off(__entry->new),
 		      __entry->frame[PIPE_A], __entry->scanline[PIPE_A],
 		      __entry->frame[PIPE_B], __entry->scanline[PIPE_B],
 		      __entry->frame[PIPE_C], __entry->scanline[PIPE_C])
@@ -173,6 +190,7 @@ TRACE_EVENT(g4x_wm,
 	    TP_ARGS(crtc, wm),
 
 	    TP_STRUCT__entry(
+			     __string(dev, __dev_name_kms(crtc))
 			     __field(enum pipe, pipe)
 			     __field(u32, frame)
 			     __field(u32, scanline)
@@ -191,6 +209,7 @@ TRACE_EVENT(g4x_wm,
 			     ),
 
 	    TP_fast_assign(
+			   __assign_str(dev, __dev_name_kms(crtc));
 			   __entry->pipe = crtc->pipe;
 			   __entry->frame = intel_crtc_get_vblank_counter(crtc);
 			   __entry->scanline = intel_get_crtc_scanline(crtc);
@@ -208,8 +227,9 @@ TRACE_EVENT(g4x_wm,
 			   __entry->fbc = wm->fbc_en;
 			   ),
 
-	    TP_printk("pipe %c, frame=%u, scanline=%u, wm %d/%d/%d, sr %s/%d/%d/%d, hpll %s/%d/%d/%d, fbc %s",
-		      pipe_name(__entry->pipe), __entry->frame, __entry->scanline,
+	    TP_printk("dev %s, pipe %c, frame=%u, scanline=%u, wm %d/%d/%d, sr %s/%d/%d/%d, hpll %s/%d/%d/%d, fbc %s",
+		      __get_str(dev), pipe_name(__entry->pipe),
+		      __entry->frame, __entry->scanline,
 		      __entry->primary, __entry->sprite, __entry->cursor,
 		      str_yes_no(__entry->cxsr), __entry->sr_plane, __entry->sr_cursor, __entry->sr_fbc,
 		      str_yes_no(__entry->hpll), __entry->hpll_plane, __entry->hpll_cursor, __entry->hpll_fbc,
@@ -221,6 +241,7 @@ TRACE_EVENT(vlv_wm,
 	    TP_ARGS(crtc, wm),
 
 	    TP_STRUCT__entry(
+			     __string(dev, __dev_name_kms(crtc))
 			     __field(enum pipe, pipe)
 			     __field(u32, frame)
 			     __field(u32, scanline)
@@ -235,6 +256,7 @@ TRACE_EVENT(vlv_wm,
 			     ),
 
 	    TP_fast_assign(
+			   __assign_str(dev, __dev_name_kms(crtc));
 			   __entry->pipe = crtc->pipe;
 			   __entry->frame = intel_crtc_get_vblank_counter(crtc);
 			   __entry->scanline = intel_get_crtc_scanline(crtc);
@@ -248,9 +270,10 @@ TRACE_EVENT(vlv_wm,
 			   __entry->sr_cursor = wm->sr.cursor;
 			   ),
 
-	    TP_printk("pipe %c, frame=%u, scanline=%u, level=%d, cxsr=%d, wm %d/%d/%d/%d, sr %d/%d",
-		      pipe_name(__entry->pipe), __entry->frame,
-		      __entry->scanline, __entry->level, __entry->cxsr,
+	    TP_printk("dev %s, pipe %c, frame=%u, scanline=%u, level=%d, cxsr=%d, wm %d/%d/%d/%d, sr %d/%d",
+		      __get_str(dev), pipe_name(__entry->pipe),
+		      __entry->frame, __entry->scanline,
+		      __entry->level, __entry->cxsr,
 		      __entry->primary, __entry->sprite0, __entry->sprite1, __entry->cursor,
 		      __entry->sr_plane, __entry->sr_cursor)
 );
@@ -260,6 +283,7 @@ TRACE_EVENT(vlv_fifo_size,
 	    TP_ARGS(crtc, sprite0_start, sprite1_start, fifo_size),
 
 	    TP_STRUCT__entry(
+			     __string(dev, __dev_name_kms(crtc))
 			     __field(enum pipe, pipe)
 			     __field(u32, frame)
 			     __field(u32, scanline)
@@ -269,6 +293,7 @@ TRACE_EVENT(vlv_fifo_size,
 			     ),
 
 	    TP_fast_assign(
+			   __assign_str(dev, __dev_name_kms(crtc));
 			   __entry->pipe = crtc->pipe;
 			   __entry->frame = intel_crtc_get_vblank_counter(crtc);
 			   __entry->scanline = intel_get_crtc_scanline(crtc);
@@ -277,90 +302,96 @@ TRACE_EVENT(vlv_fifo_size,
 			   __entry->fifo_size = fifo_size;
 			   ),
 
-	    TP_printk("pipe %c, frame=%u, scanline=%u, %d/%d/%d",
-		      pipe_name(__entry->pipe), __entry->frame,
-		      __entry->scanline, __entry->sprite0_start,
-		      __entry->sprite1_start, __entry->fifo_size)
+	    TP_printk("dev %s, pipe %c, frame=%u, scanline=%u, %d/%d/%d",
+		      __get_str(dev), pipe_name(__entry->pipe),
+		      __entry->frame, __entry->scanline,
+		      __entry->sprite0_start, __entry->sprite1_start, __entry->fifo_size)
 );
 
 TRACE_EVENT(intel_plane_update_noarm,
-	    TP_PROTO(struct drm_plane *plane, struct intel_crtc *crtc),
+	    TP_PROTO(struct intel_plane *plane, struct intel_crtc *crtc),
 	    TP_ARGS(plane, crtc),
 
 	    TP_STRUCT__entry(
+			     __string(dev, __dev_name_kms(plane))
 			     __field(enum pipe, pipe)
 			     __field(u32, frame)
 			     __field(u32, scanline)
 			     __array(int, src, 4)
 			     __array(int, dst, 4)
-			     __string(name, plane->name)
+			     __string(name, plane->base.name)
 			     ),
 
 	    TP_fast_assign(
-			   __assign_str(name, plane->name);
+			   __assign_str(dev, __dev_name_kms(plane));
+			   __assign_str(name, plane->base.name);
 			   __entry->pipe = crtc->pipe;
 			   __entry->frame = intel_crtc_get_vblank_counter(crtc);
 			   __entry->scanline = intel_get_crtc_scanline(crtc);
-			   memcpy(__entry->src, &plane->state->src, sizeof(__entry->src));
-			   memcpy(__entry->dst, &plane->state->dst, sizeof(__entry->dst));
+			   memcpy(__entry->src, &plane->base.state->src, sizeof(__entry->src));
+			   memcpy(__entry->dst, &plane->base.state->dst, sizeof(__entry->dst));
 			   ),
 
-	    TP_printk("pipe %c, plane %s, frame=%u, scanline=%u, " DRM_RECT_FP_FMT " -> " DRM_RECT_FMT,
-		      pipe_name(__entry->pipe), __get_str(name),
+	    TP_printk("dev %s, pipe %c, plane %s, frame=%u, scanline=%u, " DRM_RECT_FP_FMT " -> " DRM_RECT_FMT,
+		      __get_str(dev), pipe_name(__entry->pipe), __get_str(name),
 		      __entry->frame, __entry->scanline,
 		      DRM_RECT_FP_ARG((const struct drm_rect *)__entry->src),
 		      DRM_RECT_ARG((const struct drm_rect *)__entry->dst))
 );
 
 TRACE_EVENT(intel_plane_update_arm,
-	    TP_PROTO(struct drm_plane *plane, struct intel_crtc *crtc),
+	    TP_PROTO(struct intel_plane *plane, struct intel_crtc *crtc),
 	    TP_ARGS(plane, crtc),
 
 	    TP_STRUCT__entry(
+			     __string(dev, __dev_name_kms(plane))
 			     __field(enum pipe, pipe)
 			     __field(u32, frame)
 			     __field(u32, scanline)
 			     __array(int, src, 4)
 			     __array(int, dst, 4)
-			     __string(name, plane->name)
+			     __string(name, plane->base.name)
 			     ),
 
 	    TP_fast_assign(
-			   __assign_str(name, plane->name);
+			   __assign_str(dev, __dev_name_kms(plane));
+			   __assign_str(name, plane->base.name);
 			   __entry->pipe = crtc->pipe;
 			   __entry->frame = intel_crtc_get_vblank_counter(crtc);
 			   __entry->scanline = intel_get_crtc_scanline(crtc);
-			   memcpy(__entry->src, &plane->state->src, sizeof(__entry->src));
-			   memcpy(__entry->dst, &plane->state->dst, sizeof(__entry->dst));
+			   memcpy(__entry->src, &plane->base.state->src, sizeof(__entry->src));
+			   memcpy(__entry->dst, &plane->base.state->dst, sizeof(__entry->dst));
 			   ),
 
-	    TP_printk("pipe %c, plane %s, frame=%u, scanline=%u, " DRM_RECT_FP_FMT " -> " DRM_RECT_FMT,
-		      pipe_name(__entry->pipe), __get_str(name),
+	    TP_printk("dev %s, pipe %c, plane %s, frame=%u, scanline=%u, " DRM_RECT_FP_FMT " -> " DRM_RECT_FMT,
+		      __get_str(dev), pipe_name(__entry->pipe), __get_str(name),
 		      __entry->frame, __entry->scanline,
 		      DRM_RECT_FP_ARG((const struct drm_rect *)__entry->src),
 		      DRM_RECT_ARG((const struct drm_rect *)__entry->dst))
 );
 
 TRACE_EVENT(intel_plane_disable_arm,
-	    TP_PROTO(struct drm_plane *plane, struct intel_crtc *crtc),
+	    TP_PROTO(struct intel_plane *plane, struct intel_crtc *crtc),
 	    TP_ARGS(plane, crtc),
 
 	    TP_STRUCT__entry(
+			     __string(dev, __dev_name_kms(plane))
 			     __field(enum pipe, pipe)
 			     __field(u32, frame)
 			     __field(u32, scanline)
-			     __string(name, plane->name)
+			     __string(name, plane->base.name)
 			     ),
 
 	    TP_fast_assign(
-			   __assign_str(name, plane->name);
+			   __assign_str(dev, __dev_name_kms(plane));
+			   __assign_str(name, plane->base.name);
 			   __entry->pipe = crtc->pipe;
 			   __entry->frame = intel_crtc_get_vblank_counter(crtc);
 			   __entry->scanline = intel_get_crtc_scanline(crtc);
 			   ),
 
-	    TP_printk("pipe %c, plane %s, frame=%u, scanline=%u",
-		      pipe_name(__entry->pipe), __get_str(name),
+	    TP_printk("dev %s, pipe %c, plane %s, frame=%u, scanline=%u",
+		      __get_str(dev), pipe_name(__entry->pipe), __get_str(name),
 		      __entry->frame, __entry->scanline)
 );
 
@@ -369,6 +400,8 @@ TRACE_EVENT(intel_fbc_activate,
 	    TP_ARGS(plane),
 
 	    TP_STRUCT__entry(
+			     __string(dev, __dev_name_kms(plane))
+			     __string(name, plane->base.name)
 			     __field(enum pipe, pipe)
 			     __field(u32, frame)
 			     __field(u32, scanline)
@@ -377,13 +410,16 @@ TRACE_EVENT(intel_fbc_activate,
 	    TP_fast_assign(
 			   struct intel_crtc *crtc = intel_crtc_for_pipe(to_i915(plane->base.dev),
 									 plane->pipe);
+			   __assign_str(dev, __dev_name_kms(plane));
+			   __assign_str(name, plane->base.name)
 			   __entry->pipe = crtc->pipe;
 			   __entry->frame = intel_crtc_get_vblank_counter(crtc);
 			   __entry->scanline = intel_get_crtc_scanline(crtc);
 			   ),
 
-	    TP_printk("pipe %c, frame=%u, scanline=%u",
-		      pipe_name(__entry->pipe), __entry->frame, __entry->scanline)
+	    TP_printk("dev %s, pipe %c, plane %s, frame=%u, scanline=%u",
+		      __get_str(dev), pipe_name(__entry->pipe), __get_str(name),
+		      __entry->frame, __entry->scanline)
 );
 
 TRACE_EVENT(intel_fbc_deactivate,
@@ -391,6 +427,8 @@ TRACE_EVENT(intel_fbc_deactivate,
 	    TP_ARGS(plane),
 
 	    TP_STRUCT__entry(
+			     __string(dev, __dev_name_kms(plane))
+			     __string(name, plane->base.name)
 			     __field(enum pipe, pipe)
 			     __field(u32, frame)
 			     __field(u32, scanline)
@@ -399,13 +437,16 @@ TRACE_EVENT(intel_fbc_deactivate,
 	    TP_fast_assign(
 			   struct intel_crtc *crtc = intel_crtc_for_pipe(to_i915(plane->base.dev),
 									 plane->pipe);
+			   __assign_str(dev, __dev_name_kms(plane));
+			   __assign_str(name, plane->base.name)
 			   __entry->pipe = crtc->pipe;
 			   __entry->frame = intel_crtc_get_vblank_counter(crtc);
 			   __entry->scanline = intel_get_crtc_scanline(crtc);
 			   ),
 
-	    TP_printk("pipe %c, frame=%u, scanline=%u",
-		      pipe_name(__entry->pipe), __entry->frame, __entry->scanline)
+	    TP_printk("dev %s, pipe %c, plane %s, frame=%u, scanline=%u",
+		      __get_str(dev), pipe_name(__entry->pipe), __get_str(name),
+		      __entry->frame, __entry->scanline)
 );
 
 TRACE_EVENT(intel_fbc_nuke,
@@ -413,6 +454,8 @@ TRACE_EVENT(intel_fbc_nuke,
 	    TP_ARGS(plane),
 
 	    TP_STRUCT__entry(
+			     __string(dev, __dev_name_kms(plane))
+			     __string(name, plane->base.name)
 			     __field(enum pipe, pipe)
 			     __field(u32, frame)
 			     __field(u32, scanline)
@@ -421,13 +464,16 @@ TRACE_EVENT(intel_fbc_nuke,
 	    TP_fast_assign(
 			   struct intel_crtc *crtc = intel_crtc_for_pipe(to_i915(plane->base.dev),
 									 plane->pipe);
+			   __assign_str(dev, __dev_name_kms(plane));
+			   __assign_str(name, plane->base.name)
 			   __entry->pipe = crtc->pipe;
 			   __entry->frame = intel_crtc_get_vblank_counter(crtc);
 			   __entry->scanline = intel_get_crtc_scanline(crtc);
 			   ),
 
-	    TP_printk("pipe %c, frame=%u, scanline=%u",
-		      pipe_name(__entry->pipe), __entry->frame, __entry->scanline)
+	    TP_printk("dev %s, pipe %c, plane %s, frame=%u, scanline=%u",
+		      __get_str(dev), pipe_name(__entry->pipe), __get_str(name),
+		      __entry->frame, __entry->scanline)
 );
 
 TRACE_EVENT(intel_crtc_vblank_work_start,
@@ -435,20 +481,22 @@ TRACE_EVENT(intel_crtc_vblank_work_start,
 	    TP_ARGS(crtc),
 
 	    TP_STRUCT__entry(
+			     __string(dev, __dev_name_kms(crtc))
 			     __field(enum pipe, pipe)
 			     __field(u32, frame)
 			     __field(u32, scanline)
 			     ),
 
 	    TP_fast_assign(
+			   __assign_str(dev, __dev_name_kms(crtc));
 			   __entry->pipe = crtc->pipe;
 			   __entry->frame = intel_crtc_get_vblank_counter(crtc);
 			   __entry->scanline = intel_get_crtc_scanline(crtc);
 			   ),
 
-	    TP_printk("pipe %c, frame=%u, scanline=%u",
-		      pipe_name(__entry->pipe), __entry->frame,
-		       __entry->scanline)
+	    TP_printk("dev %s, pipe %c, frame=%u, scanline=%u",
+		      __get_str(dev), pipe_name(__entry->pipe),
+		      __entry->frame, __entry->scanline)
 );
 
 TRACE_EVENT(intel_crtc_vblank_work_end,
@@ -456,20 +504,22 @@ TRACE_EVENT(intel_crtc_vblank_work_end,
 	    TP_ARGS(crtc),
 
 	    TP_STRUCT__entry(
+			     __string(dev, __dev_name_kms(crtc))
 			     __field(enum pipe, pipe)
 			     __field(u32, frame)
 			     __field(u32, scanline)
 			     ),
 
 	    TP_fast_assign(
+			   __assign_str(dev, __dev_name_kms(crtc));
 			   __entry->pipe = crtc->pipe;
 			   __entry->frame = intel_crtc_get_vblank_counter(crtc);
 			   __entry->scanline = intel_get_crtc_scanline(crtc);
 			   ),
 
-	    TP_printk("pipe %c, frame=%u, scanline=%u",
-		      pipe_name(__entry->pipe), __entry->frame,
-		       __entry->scanline)
+	    TP_printk("dev %s, pipe %c, frame=%u, scanline=%u",
+		      __get_str(dev), pipe_name(__entry->pipe),
+		      __entry->frame, __entry->scanline)
 );
 
 TRACE_EVENT(intel_pipe_update_start,
@@ -477,6 +527,7 @@ TRACE_EVENT(intel_pipe_update_start,
 	    TP_ARGS(crtc),
 
 	    TP_STRUCT__entry(
+			     __string(dev, __dev_name_kms(crtc))
 			     __field(enum pipe, pipe)
 			     __field(u32, frame)
 			     __field(u32, scanline)
@@ -485,6 +536,7 @@ TRACE_EVENT(intel_pipe_update_start,
 			     ),
 
 	    TP_fast_assign(
+			   __assign_str(dev, __dev_name_kms(crtc));
 			   __entry->pipe = crtc->pipe;
 			   __entry->frame = intel_crtc_get_vblank_counter(crtc);
 			   __entry->scanline = intel_get_crtc_scanline(crtc);
@@ -492,9 +544,10 @@ TRACE_EVENT(intel_pipe_update_start,
 			   __entry->max = crtc->debug.max_vbl;
 			   ),
 
-	    TP_printk("pipe %c, frame=%u, scanline=%u, min=%u, max=%u",
-		      pipe_name(__entry->pipe), __entry->frame,
-		       __entry->scanline, __entry->min, __entry->max)
+	    TP_printk("dev %s, pipe %c, frame=%u, scanline=%u, min=%u, max=%u",
+		      __get_str(dev), pipe_name(__entry->pipe),
+		      __entry->frame, __entry->scanline,
+		      __entry->min, __entry->max)
 );
 
 TRACE_EVENT(intel_pipe_update_vblank_evaded,
@@ -502,6 +555,7 @@ TRACE_EVENT(intel_pipe_update_vblank_evaded,
 	    TP_ARGS(crtc),
 
 	    TP_STRUCT__entry(
+			     __string(dev, __dev_name_kms(crtc))
 			     __field(enum pipe, pipe)
 			     __field(u32, frame)
 			     __field(u32, scanline)
@@ -510,6 +564,7 @@ TRACE_EVENT(intel_pipe_update_vblank_evaded,
 			     ),
 
 	    TP_fast_assign(
+			   __assign_str(dev, __dev_name_kms(crtc));
 			   __entry->pipe = crtc->pipe;
 			   __entry->frame = crtc->debug.start_vbl_count;
 			   __entry->scanline = crtc->debug.scanline_start;
@@ -517,9 +572,10 @@ TRACE_EVENT(intel_pipe_update_vblank_evaded,
 			   __entry->max = crtc->debug.max_vbl;
 			   ),
 
-	    TP_printk("pipe %c, frame=%u, scanline=%u, min=%u, max=%u",
-		      pipe_name(__entry->pipe), __entry->frame,
-		       __entry->scanline, __entry->min, __entry->max)
+	    TP_printk("dev %s, pipe %c, frame=%u, scanline=%u, min=%u, max=%u",
+		      __get_str(dev), pipe_name(__entry->pipe),
+		      __entry->frame, __entry->scanline,
+		      __entry->min, __entry->max)
 );
 
 TRACE_EVENT(intel_pipe_update_end,
@@ -527,56 +583,64 @@ TRACE_EVENT(intel_pipe_update_end,
 	    TP_ARGS(crtc, frame, scanline_end),
 
 	    TP_STRUCT__entry(
+			     __string(dev, __dev_name_kms(crtc))
 			     __field(enum pipe, pipe)
 			     __field(u32, frame)
 			     __field(u32, scanline)
 			     ),
 
 	    TP_fast_assign(
+			   __assign_str(dev, __dev_name_kms(crtc));
 			   __entry->pipe = crtc->pipe;
 			   __entry->frame = frame;
 			   __entry->scanline = scanline_end;
 			   ),
 
-	    TP_printk("pipe %c, frame=%u, scanline=%u",
-		      pipe_name(__entry->pipe), __entry->frame,
-		      __entry->scanline)
+	    TP_printk("dev %s, pipe %c, frame=%u, scanline=%u",
+		      __get_str(dev), pipe_name(__entry->pipe),
+		      __entry->frame, __entry->scanline)
 );
 
 TRACE_EVENT(intel_frontbuffer_invalidate,
-	    TP_PROTO(unsigned int frontbuffer_bits, unsigned int origin),
-	    TP_ARGS(frontbuffer_bits, origin),
+	    TP_PROTO(struct drm_i915_private *i915,
+		     unsigned int frontbuffer_bits, unsigned int origin),
+	    TP_ARGS(i915, frontbuffer_bits, origin),
 
 	    TP_STRUCT__entry(
+			     __string(dev, __dev_name_i915(i915))
 			     __field(unsigned int, frontbuffer_bits)
 			     __field(unsigned int, origin)
 			     ),
 
 	    TP_fast_assign(
+			   __assign_str(dev, __dev_name_i915(i915));
 			   __entry->frontbuffer_bits = frontbuffer_bits;
 			   __entry->origin = origin;
 			   ),
 
-	    TP_printk("frontbuffer_bits=0x%08x, origin=%u",
-		      __entry->frontbuffer_bits, __entry->origin)
+	    TP_printk("dev %s, frontbuffer_bits=0x%08x, origin=%u",
+		      __get_str(dev), __entry->frontbuffer_bits, __entry->origin)
 );
 
 TRACE_EVENT(intel_frontbuffer_flush,
-	    TP_PROTO(unsigned int frontbuffer_bits, unsigned int origin),
-	    TP_ARGS(frontbuffer_bits, origin),
+	    TP_PROTO(struct drm_i915_private *i915,
+		     unsigned int frontbuffer_bits, unsigned int origin),
+	    TP_ARGS(i915, frontbuffer_bits, origin),
 
 	    TP_STRUCT__entry(
+			     __string(dev, __dev_name_i915(i915))
 			     __field(unsigned int, frontbuffer_bits)
 			     __field(unsigned int, origin)
 			     ),
 
 	    TP_fast_assign(
+			   __assign_str(dev, __dev_name_i915(i915));
 			   __entry->frontbuffer_bits = frontbuffer_bits;
 			   __entry->origin = origin;
 			   ),
 
-	    TP_printk("frontbuffer_bits=0x%08x, origin=%u",
-		      __entry->frontbuffer_bits, __entry->origin)
+	    TP_printk("dev %s, frontbuffer_bits=0x%08x, origin=%u",
+		      __get_str(dev), __entry->frontbuffer_bits, __entry->origin)
 );
 
 #endif /* __INTEL_DISPLAY_TRACE_H__ */
diff --git a/drivers/gpu/drm/i915/display/intel_display_types.h b/drivers/gpu/drm/i915/display/intel_display_types.h
index 7f18c052ec16..f07395065a69 100644
--- a/drivers/gpu/drm/i915/display/intel_display_types.h
+++ b/drivers/gpu/drm/i915/display/intel_display_types.h
@@ -969,6 +969,15 @@ struct intel_mpllb_state {
 	u32 mpllb_sscstep;
 };
 
+/* Used by dp and fdi links */
+struct intel_link_m_n {
+	u32 tu;
+	u32 data_m;
+	u32 data_n;
+	u32 link_m;
+	u32 link_n;
+};
+
 struct intel_crtc_state {
 	/*
 	 * uapi (drm) state. This is the software state shown to userspace.
@@ -1366,6 +1375,7 @@ struct intel_crtc {
 	u16 vmax_vblank_start;
 
 	struct intel_display_power_domain_set enabled_power_domains;
+	struct intel_display_power_domain_set hw_readout_power_domains;
 	struct intel_overlay *overlay;
 
 	struct intel_crtc_state *config;
@@ -1803,51 +1813,6 @@ struct intel_dp_mst_encoder {
 	struct intel_connector *connector;
 };
 
-static inline enum dpio_channel
-vlv_dig_port_to_channel(struct intel_digital_port *dig_port)
-{
-	switch (dig_port->base.port) {
-	default:
-		MISSING_CASE(dig_port->base.port);
-		fallthrough;
-	case PORT_B:
-	case PORT_D:
-		return DPIO_CH0;
-	case PORT_C:
-		return DPIO_CH1;
-	}
-}
-
-static inline enum dpio_phy
-vlv_dig_port_to_phy(struct intel_digital_port *dig_port)
-{
-	switch (dig_port->base.port) {
-	default:
-		MISSING_CASE(dig_port->base.port);
-		fallthrough;
-	case PORT_B:
-	case PORT_C:
-		return DPIO_PHY0;
-	case PORT_D:
-		return DPIO_PHY1;
-	}
-}
-
-static inline enum dpio_channel
-vlv_pipe_to_channel(enum pipe pipe)
-{
-	switch (pipe) {
-	default:
-		MISSING_CASE(pipe);
-		fallthrough;
-	case PIPE_A:
-	case PIPE_C:
-		return DPIO_CH0;
-	case PIPE_B:
-		return DPIO_CH1;
-	}
-}
-
 struct intel_load_detect_pipe {
 	struct drm_atomic_state *restore_state;
 };
diff --git a/drivers/gpu/drm/i915/display/intel_dmc.c b/drivers/gpu/drm/i915/display/intel_dmc.c
index 081a4d0083b1..eff3add70611 100644
--- a/drivers/gpu/drm/i915/display/intel_dmc.c
+++ b/drivers/gpu/drm/i915/display/intel_dmc.c
@@ -52,8 +52,8 @@
 
 #define DISPLAY_VER12_DMC_MAX_FW_SIZE	ICL_DMC_MAX_FW_SIZE
 
-#define DG2_DMC_PATH			DMC_PATH(dg2, 2, 07)
-#define DG2_DMC_VERSION_REQUIRED	DMC_VERSION(2, 07)
+#define DG2_DMC_PATH			DMC_PATH(dg2, 2, 08)
+#define DG2_DMC_VERSION_REQUIRED	DMC_VERSION(2, 8)
 MODULE_FIRMWARE(DG2_DMC_PATH);
 
 #define ADLP_DMC_PATH			DMC_PATH(adlp, 2, 16)
diff --git a/drivers/gpu/drm/i915/display/intel_dp.c b/drivers/gpu/drm/i915/display/intel_dp.c
index 7400d6b4c587..75070eb07d4b 100644
--- a/drivers/gpu/drm/i915/display/intel_dp.c
+++ b/drivers/gpu/drm/i915/display/intel_dp.c
@@ -46,6 +46,7 @@
 #include "g4x_dp.h"
 #include "i915_debugfs.h"
 #include "i915_drv.h"
+#include "i915_reg.h"
 #include "intel_atomic.h"
 #include "intel_audio.h"
 #include "intel_backlight.h"
@@ -3678,61 +3679,6 @@ static void intel_dp_phy_pattern_update(struct intel_dp *intel_dp,
 	}
 }
 
-static void
-intel_dp_autotest_phy_ddi_disable(struct intel_dp *intel_dp,
-				  const struct intel_crtc_state *crtc_state)
-{
-	struct intel_digital_port *dig_port = dp_to_dig_port(intel_dp);
-	struct drm_device *dev = dig_port->base.base.dev;
-	struct drm_i915_private *dev_priv = to_i915(dev);
-	struct intel_crtc *crtc = to_intel_crtc(dig_port->base.base.crtc);
-	enum pipe pipe = crtc->pipe;
-	u32 trans_ddi_func_ctl_value, trans_conf_value, dp_tp_ctl_value;
-
-	trans_ddi_func_ctl_value = intel_de_read(dev_priv,
-						 TRANS_DDI_FUNC_CTL(pipe));
-	trans_conf_value = intel_de_read(dev_priv, PIPECONF(pipe));
-	dp_tp_ctl_value = intel_de_read(dev_priv, TGL_DP_TP_CTL(pipe));
-
-	trans_ddi_func_ctl_value &= ~(TRANS_DDI_FUNC_ENABLE |
-				      TGL_TRANS_DDI_PORT_MASK);
-	trans_conf_value &= ~PIPECONF_ENABLE;
-	dp_tp_ctl_value &= ~DP_TP_CTL_ENABLE;
-
-	intel_de_write(dev_priv, PIPECONF(pipe), trans_conf_value);
-	intel_de_write(dev_priv, TRANS_DDI_FUNC_CTL(pipe),
-		       trans_ddi_func_ctl_value);
-	intel_de_write(dev_priv, TGL_DP_TP_CTL(pipe), dp_tp_ctl_value);
-}
-
-static void
-intel_dp_autotest_phy_ddi_enable(struct intel_dp *intel_dp,
-				 const struct intel_crtc_state *crtc_state)
-{
-	struct intel_digital_port *dig_port = dp_to_dig_port(intel_dp);
-	struct drm_device *dev = dig_port->base.base.dev;
-	struct drm_i915_private *dev_priv = to_i915(dev);
-	enum port port = dig_port->base.port;
-	struct intel_crtc *crtc = to_intel_crtc(dig_port->base.base.crtc);
-	enum pipe pipe = crtc->pipe;
-	u32 trans_ddi_func_ctl_value, trans_conf_value, dp_tp_ctl_value;
-
-	trans_ddi_func_ctl_value = intel_de_read(dev_priv,
-						 TRANS_DDI_FUNC_CTL(pipe));
-	trans_conf_value = intel_de_read(dev_priv, PIPECONF(pipe));
-	dp_tp_ctl_value = intel_de_read(dev_priv, TGL_DP_TP_CTL(pipe));
-
-	trans_ddi_func_ctl_value |= TRANS_DDI_FUNC_ENABLE |
-				    TGL_TRANS_DDI_SELECT_PORT(port);
-	trans_conf_value |= PIPECONF_ENABLE;
-	dp_tp_ctl_value |= DP_TP_CTL_ENABLE;
-
-	intel_de_write(dev_priv, PIPECONF(pipe), trans_conf_value);
-	intel_de_write(dev_priv, TGL_DP_TP_CTL(pipe), dp_tp_ctl_value);
-	intel_de_write(dev_priv, TRANS_DDI_FUNC_CTL(pipe),
-		       trans_ddi_func_ctl_value);
-}
-
 static void intel_dp_process_phy_request(struct intel_dp *intel_dp,
 					 const struct intel_crtc_state *crtc_state)
 {
@@ -3751,14 +3697,10 @@ static void intel_dp_process_phy_request(struct intel_dp *intel_dp,
 	intel_dp_get_adjust_train(intel_dp, crtc_state, DP_PHY_DPRX,
 				  link_status);
 
-	intel_dp_autotest_phy_ddi_disable(intel_dp, crtc_state);
-
 	intel_dp_set_signal_levels(intel_dp, crtc_state, DP_PHY_DPRX);
 
 	intel_dp_phy_pattern_update(intel_dp, crtc_state);
 
-	intel_dp_autotest_phy_ddi_enable(intel_dp, crtc_state);
-
 	drm_dp_dpcd_write(&intel_dp->aux, DP_TRAINING_LANE0_SET,
 			  intel_dp->train_set, crtc_state->lane_count);
 
@@ -4876,6 +4818,12 @@ void intel_dp_encoder_flush_work(struct drm_encoder *encoder)
 
 	intel_pps_vdd_off_sync(intel_dp);
 
+	/*
+	 * Ensure power off delay is respected on module remove, so that we can
+	 * reduce delays at driver probe. See pps_init_timestamps().
+	 */
+	intel_pps_wait_power_cycle(intel_dp);
+
 	intel_dp_aux_fini(intel_dp);
 }
 
diff --git a/drivers/gpu/drm/i915/display/intel_dp_aux.c b/drivers/gpu/drm/i915/display/intel_dp_aux.c
index 48c375c65a41..664bebdecea7 100644
--- a/drivers/gpu/drm/i915/display/intel_dp_aux.c
+++ b/drivers/gpu/drm/i915/display/intel_dp_aux.c
@@ -4,6 +4,7 @@
  */
 
 #include "i915_drv.h"
+#include "i915_reg.h"
 #include "i915_trace.h"
 #include "intel_display_types.h"
 #include "intel_dp_aux.h"
diff --git a/drivers/gpu/drm/i915/display/intel_dp_hdcp.c b/drivers/gpu/drm/i915/display/intel_dp_hdcp.c
index 35360dd543ac..e0c177161407 100644
--- a/drivers/gpu/drm/i915/display/intel_dp_hdcp.c
+++ b/drivers/gpu/drm/i915/display/intel_dp_hdcp.c
@@ -11,6 +11,7 @@
 #include <drm/display/drm_hdcp_helper.h>
 #include <drm/drm_print.h>
 
+#include "i915_reg.h"
 #include "intel_ddi.h"
 #include "intel_de.h"
 #include "intel_display_types.h"
diff --git a/drivers/gpu/drm/i915/display/intel_dp_mst.c b/drivers/gpu/drm/i915/display/intel_dp_mst.c
index cd4e61026d98..4077a979a924 100644
--- a/drivers/gpu/drm/i915/display/intel_dp_mst.c
+++ b/drivers/gpu/drm/i915/display/intel_dp_mst.c
@@ -29,6 +29,7 @@
 #include <drm/drm_probe_helper.h>
 
 #include "i915_drv.h"
+#include "i915_reg.h"
 #include "intel_atomic.h"
 #include "intel_audio.h"
 #include "intel_connector.h"
diff --git a/drivers/gpu/drm/i915/display/intel_dpio_phy.c b/drivers/gpu/drm/i915/display/intel_dpio_phy.c
index 8732b8722ed7..7eb7440b3180 100644
--- a/drivers/gpu/drm/i915/display/intel_dpio_phy.c
+++ b/drivers/gpu/drm/i915/display/intel_dpio_phy.c
@@ -21,6 +21,7 @@
  * DEALINGS IN THE SOFTWARE.
  */
 
+#include "i915_reg.h"
 #include "intel_ddi.h"
 #include "intel_ddi_buf_trans.h"
 #include "intel_de.h"
@@ -655,6 +656,48 @@ bxt_ddi_phy_get_lane_lat_optim_mask(struct intel_encoder *encoder)
 	return mask;
 }
 
+enum dpio_channel vlv_dig_port_to_channel(struct intel_digital_port *dig_port)
+{
+	switch (dig_port->base.port) {
+	default:
+		MISSING_CASE(dig_port->base.port);
+		fallthrough;
+	case PORT_B:
+	case PORT_D:
+		return DPIO_CH0;
+	case PORT_C:
+		return DPIO_CH1;
+	}
+}
+
+enum dpio_phy vlv_dig_port_to_phy(struct intel_digital_port *dig_port)
+{
+	switch (dig_port->base.port) {
+	default:
+		MISSING_CASE(dig_port->base.port);
+		fallthrough;
+	case PORT_B:
+	case PORT_C:
+		return DPIO_PHY0;
+	case PORT_D:
+		return DPIO_PHY1;
+	}
+}
+
+enum dpio_channel vlv_pipe_to_channel(enum pipe pipe)
+{
+	switch (pipe) {
+	default:
+		MISSING_CASE(pipe);
+		fallthrough;
+	case PIPE_A:
+	case PIPE_C:
+		return DPIO_CH0;
+	case PIPE_B:
+		return DPIO_CH1;
+	}
+}
+
 void chv_set_phy_signal_level(struct intel_encoder *encoder,
 			      const struct intel_crtc_state *crtc_state,
 			      u32 deemph_reg_value, u32 margin_reg_value,
diff --git a/drivers/gpu/drm/i915/display/intel_dpio_phy.h b/drivers/gpu/drm/i915/display/intel_dpio_phy.h
index 9c3d008e8e1a..9c7725dacb47 100644
--- a/drivers/gpu/drm/i915/display/intel_dpio_phy.h
+++ b/drivers/gpu/drm/i915/display/intel_dpio_phy.h
@@ -8,13 +8,24 @@
 
 #include <linux/types.h>
 
-enum dpio_channel;
-enum dpio_phy;
+enum pipe;
 enum port;
 struct drm_i915_private;
 struct intel_crtc_state;
+struct intel_digital_port;
 struct intel_encoder;
 
+enum dpio_channel {
+	DPIO_CH0,
+	DPIO_CH1,
+};
+
+enum dpio_phy {
+	DPIO_PHY0,
+	DPIO_PHY1,
+	DPIO_PHY2,
+};
+
 void bxt_port_to_phy_channel(struct drm_i915_private *dev_priv, enum port port,
 			     enum dpio_phy *phy, enum dpio_channel *ch);
 void bxt_ddi_phy_set_signal_levels(struct intel_encoder *encoder,
@@ -30,6 +41,10 @@ void bxt_ddi_phy_set_lane_optim_mask(struct intel_encoder *encoder,
 				     u8 lane_lat_optim_mask);
 u8 bxt_ddi_phy_get_lane_lat_optim_mask(struct intel_encoder *encoder);
 
+enum dpio_channel vlv_dig_port_to_channel(struct intel_digital_port *dig_port);
+enum dpio_phy vlv_dig_port_to_phy(struct intel_digital_port *dig_port);
+enum dpio_channel vlv_pipe_to_channel(enum pipe pipe);
+
 void chv_set_phy_signal_level(struct intel_encoder *encoder,
 			      const struct intel_crtc_state *crtc_state,
 			      u32 deemph_reg_value, u32 margin_reg_value,
diff --git a/drivers/gpu/drm/i915/display/intel_dpll.c b/drivers/gpu/drm/i915/display/intel_dpll.c
index b15ba78d64d6..c236aafe9be0 100644
--- a/drivers/gpu/drm/i915/display/intel_dpll.c
+++ b/drivers/gpu/drm/i915/display/intel_dpll.c
@@ -6,10 +6,12 @@
 #include <linux/kernel.h>
 #include <linux/string_helpers.h>
 
+#include "i915_reg.h"
 #include "intel_crtc.h"
 #include "intel_de.h"
 #include "intel_display.h"
 #include "intel_display_types.h"
+#include "intel_dpio_phy.h"
 #include "intel_dpll.h"
 #include "intel_lvds.h"
 #include "intel_panel.h"
diff --git a/drivers/gpu/drm/i915/display/intel_dpll_mgr.c b/drivers/gpu/drm/i915/display/intel_dpll_mgr.c
index 7c6c094a0a01..1974eb580ed1 100644
--- a/drivers/gpu/drm/i915/display/intel_dpll_mgr.c
+++ b/drivers/gpu/drm/i915/display/intel_dpll_mgr.c
@@ -23,6 +23,7 @@
 
 #include <linux/string_helpers.h>
 
+#include "i915_reg.h"
 #include "intel_de.h"
 #include "intel_display_types.h"
 #include "intel_dkl_phy.h"
@@ -30,6 +31,7 @@
 #include "intel_dpio_phy.h"
 #include "intel_dpll.h"
 #include "intel_dpll_mgr.h"
+#include "intel_hti.h"
 #include "intel_mg_phy_regs.h"
 #include "intel_pch_refclk.h"
 #include "intel_tc.h"
@@ -3163,14 +3165,6 @@ static void icl_update_active_dpll(struct intel_atomic_state *state,
 	icl_set_active_port_dpll(crtc_state, port_dpll_id);
 }
 
-static u32 intel_get_hti_plls(struct drm_i915_private *i915)
-{
-	if (!(i915->hti_state & HDPORT_ENABLED))
-		return 0;
-
-	return REG_FIELD_GET(HDPORT_DPLL_USED_MASK, i915->hti_state);
-}
-
 static int icl_compute_combo_phy_dpll(struct intel_atomic_state *state,
 				      struct intel_crtc *crtc)
 {
@@ -3245,7 +3239,7 @@ static int icl_get_combo_phy_dpll(struct intel_atomic_state *state,
 	}
 
 	/* Eliminate DPLLs from consideration if reserved by HTI */
-	dpll_mask &= ~intel_get_hti_plls(dev_priv);
+	dpll_mask &= ~intel_hti_dpll_mask(dev_priv);
 
 	port_dpll->pll = intel_find_shared_dpll(state, crtc,
 						&port_dpll->hw_state,
diff --git a/drivers/gpu/drm/i915/display/intel_drrs.c b/drivers/gpu/drm/i915/display/intel_drrs.c
index e27408efaae2..5b9e44443814 100644
--- a/drivers/gpu/drm/i915/display/intel_drrs.c
+++ b/drivers/gpu/drm/i915/display/intel_drrs.c
@@ -4,6 +4,7 @@
  */
 
 #include "i915_drv.h"
+#include "i915_reg.h"
 #include "intel_atomic.h"
 #include "intel_de.h"
 #include "intel_display_types.h"
diff --git a/drivers/gpu/drm/i915/display/intel_dsb.c b/drivers/gpu/drm/i915/display/intel_dsb.c
index fc9c3e41c333..1e1c6107d51b 100644
--- a/drivers/gpu/drm/i915/display/intel_dsb.c
+++ b/drivers/gpu/drm/i915/display/intel_dsb.c
@@ -7,6 +7,7 @@
 #include "gem/i915_gem_internal.h"
 
 #include "i915_drv.h"
+#include "i915_reg.h"
 #include "intel_de.h"
 #include "intel_display_types.h"
 #include "intel_dsb.h"
diff --git a/drivers/gpu/drm/i915/display/intel_dsi_vbt.c b/drivers/gpu/drm/i915/display/intel_dsi_vbt.c
index 75e8cc4337c9..fce69fa446d5 100644
--- a/drivers/gpu/drm/i915/display/intel_dsi_vbt.c
+++ b/drivers/gpu/drm/i915/display/intel_dsi_vbt.c
@@ -137,9 +137,9 @@ static enum port intel_dsi_seq_port_to_port(struct intel_dsi *intel_dsi,
 		return ffs(intel_dsi->ports) - 1;
 
 	if (seq_port) {
-		if (intel_dsi->ports & PORT_B)
+		if (intel_dsi->ports & BIT(PORT_B))
 			return PORT_B;
-		else if (intel_dsi->ports & PORT_C)
+		else if (intel_dsi->ports & BIT(PORT_C))
 			return PORT_C;
 	}
 
diff --git a/drivers/gpu/drm/i915/display/intel_dvo.c b/drivers/gpu/drm/i915/display/intel_dvo.c
index 595087288922..c86f9890754d 100644
--- a/drivers/gpu/drm/i915/display/intel_dvo.c
+++ b/drivers/gpu/drm/i915/display/intel_dvo.c
@@ -32,6 +32,7 @@
 #include <drm/drm_crtc.h>
 
 #include "i915_drv.h"
+#include "i915_reg.h"
 #include "intel_connector.h"
 #include "intel_de.h"
 #include "intel_display_types.h"
diff --git a/drivers/gpu/drm/i915/display/intel_dvo_dev.h b/drivers/gpu/drm/i915/display/intel_dvo_dev.h
index 50205f064d93..ecff7b190856 100644
--- a/drivers/gpu/drm/i915/display/intel_dvo_dev.h
+++ b/drivers/gpu/drm/i915/display/intel_dvo_dev.h
@@ -23,12 +23,12 @@
 #ifndef __INTEL_DVO_DEV_H__
 #define __INTEL_DVO_DEV_H__
 
-#include <linux/i2c.h>
-
-#include <drm/drm_crtc.h>
-
 #include "i915_reg_defs.h"
 
+enum drm_connector_status;
+struct drm_display_mode;
+struct i2c_adapter;
+
 struct intel_dvo_device {
 	const char *name;
 	int type;
diff --git a/drivers/gpu/drm/i915/display/intel_fbc.c b/drivers/gpu/drm/i915/display/intel_fbc.c
index 3f24f326b989..b5ee5ea0d010 100644
--- a/drivers/gpu/drm/i915/display/intel_fbc.c
+++ b/drivers/gpu/drm/i915/display/intel_fbc.c
@@ -1183,7 +1183,7 @@ static bool intel_fbc_can_flip_nuke(struct intel_atomic_state *state,
 	const struct drm_framebuffer *old_fb = old_plane_state->hw.fb;
 	const struct drm_framebuffer *new_fb = new_plane_state->hw.fb;
 
-	if (drm_atomic_crtc_needs_modeset(&new_crtc_state->uapi))
+	if (intel_crtc_needs_modeset(new_crtc_state))
 		return false;
 
 	if (!intel_fbc_is_ok(old_plane_state) ||
diff --git a/drivers/gpu/drm/i915/display/intel_fbdev.c b/drivers/gpu/drm/i915/display/intel_fbdev.c
index 05c3d4739c85..03ed4607a46d 100644
--- a/drivers/gpu/drm/i915/display/intel_fbdev.c
+++ b/drivers/gpu/drm/i915/display/intel_fbdev.c
@@ -124,6 +124,8 @@ static const struct fb_ops intelfb_ops = {
 	.owner = THIS_MODULE,
 	DRM_FB_HELPER_DEFAULT_OPS,
 	.fb_set_par = intel_fbdev_set_par,
+	.fb_read = drm_fb_helper_cfb_read,
+	.fb_write = drm_fb_helper_cfb_write,
 	.fb_fillrect = drm_fb_helper_cfb_fillrect,
 	.fb_copyarea = drm_fb_helper_cfb_copyarea,
 	.fb_imageblit = drm_fb_helper_cfb_imageblit,
@@ -254,7 +256,7 @@ static int intelfb_create(struct drm_fb_helper *helper,
 		goto out_unlock;
 	}
 
-	info = drm_fb_helper_alloc_fbi(helper);
+	info = drm_fb_helper_alloc_info(helper);
 	if (IS_ERR(info)) {
 		drm_err(&dev_priv->drm, "Failed to allocate fb_info (%pe)\n", info);
 		ret = PTR_ERR(info);
@@ -584,7 +586,7 @@ void intel_fbdev_unregister(struct drm_i915_private *dev_priv)
 	if (!current_is_async())
 		intel_fbdev_sync(ifbdev);
 
-	drm_fb_helper_unregister_fbi(&ifbdev->helper);
+	drm_fb_helper_unregister_info(&ifbdev->helper);
 }
 
 void intel_fbdev_fini(struct drm_i915_private *dev_priv)
@@ -627,7 +629,7 @@ void intel_fbdev_set_suspend(struct drm_device *dev, int state, bool synchronous
 	if (!ifbdev || !ifbdev->vma)
 		goto set_suspend;
 
-	info = ifbdev->helper.fbdev;
+	info = ifbdev->helper.info;
 
 	if (synchronous) {
 		/* Flush any pending work to turn the console on, and then
diff --git a/drivers/gpu/drm/i915/display/intel_fdi.c b/drivers/gpu/drm/i915/display/intel_fdi.c
index 7f47e5c85c81..063f1da4f229 100644
--- a/drivers/gpu/drm/i915/display/intel_fdi.c
+++ b/drivers/gpu/drm/i915/display/intel_fdi.c
@@ -5,6 +5,7 @@
 
 #include <linux/string_helpers.h>
 
+#include "i915_reg.h"
 #include "intel_atomic.h"
 #include "intel_crtc.h"
 #include "intel_ddi.h"
diff --git a/drivers/gpu/drm/i915/display/intel_fifo_underrun.h b/drivers/gpu/drm/i915/display/intel_fifo_underrun.h
index e04f22ac1f49..2e47d7d3c101 100644
--- a/drivers/gpu/drm/i915/display/intel_fifo_underrun.h
+++ b/drivers/gpu/drm/i915/display/intel_fifo_underrun.h
@@ -8,9 +8,8 @@
 
 #include <linux/types.h>
 
-#include "intel_display.h"
-
 struct drm_i915_private;
+enum pipe;
 
 bool intel_set_cpu_fifo_underrun_reporting(struct drm_i915_private *dev_priv,
 					   enum pipe pipe, bool enable);
diff --git a/drivers/gpu/drm/i915/display/intel_frontbuffer.c b/drivers/gpu/drm/i915/display/intel_frontbuffer.c
index d80e3e8a9b01..17a7aa8b28c2 100644
--- a/drivers/gpu/drm/i915/display/intel_frontbuffer.c
+++ b/drivers/gpu/drm/i915/display/intel_frontbuffer.c
@@ -88,7 +88,7 @@ static void frontbuffer_flush(struct drm_i915_private *i915,
 	if (!frontbuffer_bits)
 		return;
 
-	trace_intel_frontbuffer_flush(frontbuffer_bits, origin);
+	trace_intel_frontbuffer_flush(i915, frontbuffer_bits, origin);
 
 	might_sleep();
 	intel_drrs_flush(i915, frontbuffer_bits);
@@ -176,7 +176,7 @@ void __intel_fb_invalidate(struct intel_frontbuffer *front,
 		spin_unlock(&i915->display.fb_tracking.lock);
 	}
 
-	trace_intel_frontbuffer_invalidate(frontbuffer_bits, origin);
+	trace_intel_frontbuffer_invalidate(i915, frontbuffer_bits, origin);
 
 	might_sleep();
 	intel_psr_invalidate(i915, frontbuffer_bits, origin);
diff --git a/drivers/gpu/drm/i915/display/intel_global_state.c b/drivers/gpu/drm/i915/display/intel_global_state.c
index 7a19215ad844..02b593b1e2ea 100644
--- a/drivers/gpu/drm/i915/display/intel_global_state.c
+++ b/drivers/gpu/drm/i915/display/intel_global_state.c
@@ -45,14 +45,14 @@ void intel_atomic_global_obj_init(struct drm_i915_private *dev_priv,
 
 	obj->state = state;
 	obj->funcs = funcs;
-	list_add_tail(&obj->head, &dev_priv->global_obj_list);
+	list_add_tail(&obj->head, &dev_priv->display.global.obj_list);
 }
 
 void intel_atomic_global_obj_cleanup(struct drm_i915_private *dev_priv)
 {
 	struct intel_global_obj *obj, *next;
 
-	list_for_each_entry_safe(obj, next, &dev_priv->global_obj_list, head) {
+	list_for_each_entry_safe(obj, next, &dev_priv->display.global.obj_list, head) {
 		list_del(&obj->head);
 
 		drm_WARN_ON(&dev_priv->drm, kref_read(&obj->state->ref) != 1);
diff --git a/drivers/gpu/drm/i915/display/intel_global_state.h b/drivers/gpu/drm/i915/display/intel_global_state.h
index 1f16fa3073c9..f01ee0bb3e5a 100644
--- a/drivers/gpu/drm/i915/display/intel_global_state.h
+++ b/drivers/gpu/drm/i915/display/intel_global_state.h
@@ -27,7 +27,7 @@ struct intel_global_obj {
 };
 
 #define intel_for_each_global_obj(obj, dev_priv) \
-	list_for_each_entry(obj, &(dev_priv)->global_obj_list, head)
+	list_for_each_entry(obj, &(dev_priv)->display.global.obj_list, head)
 
 #define for_each_new_global_obj_in_state(__state, obj, new_obj_state, __i) \
 	for ((__i) = 0; \
diff --git a/drivers/gpu/drm/i915/display/intel_gmbus.c b/drivers/gpu/drm/i915/display/intel_gmbus.c
index 74443f57f62d..a5840a28a69d 100644
--- a/drivers/gpu/drm/i915/display/intel_gmbus.c
+++ b/drivers/gpu/drm/i915/display/intel_gmbus.c
@@ -34,6 +34,8 @@
 #include <drm/display/drm_hdcp_helper.h>
 
 #include "i915_drv.h"
+#include "i915_irq.h"
+#include "i915_reg.h"
 #include "intel_de.h"
 #include "intel_display_types.h"
 #include "intel_gmbus.h"
@@ -49,9 +51,27 @@ struct intel_gmbus {
 	struct drm_i915_private *i915;
 };
 
+enum gmbus_gpio {
+	GPIOA,
+	GPIOB,
+	GPIOC,
+	GPIOD,
+	GPIOE,
+	GPIOF,
+	GPIOG,
+	GPIOH,
+	__GPIOI_UNUSED,
+	GPIOJ,
+	GPIOK,
+	GPIOL,
+	GPIOM,
+	GPION,
+	GPIOO,
+};
+
 struct gmbus_pin {
 	const char *name;
-	enum i915_gpio gpio;
+	enum gmbus_gpio gpio;
 };
 
 /* Map gmbus pin pairs to names and registers. */
diff --git a/drivers/gpu/drm/i915/display/intel_hdcp_regs.h b/drivers/gpu/drm/i915/display/intel_hdcp_regs.h
index 2a3733e8966c..8023c85c7fa0 100644
--- a/drivers/gpu/drm/i915/display/intel_hdcp_regs.h
+++ b/drivers/gpu/drm/i915/display/intel_hdcp_regs.h
@@ -6,7 +6,7 @@
 #ifndef __INTEL_HDCP_REGS_H__
 #define __INTEL_HDCP_REGS_H__
 
-#include "i915_reg_defs.h"
+#include "intel_display_reg_defs.h"
 
 /* HDCP Key Registers */
 #define HDCP_KEY_CONF			_MMIO(0x66c00)
diff --git a/drivers/gpu/drm/i915/display/intel_hdmi.c b/drivers/gpu/drm/i915/display/intel_hdmi.c
index 02f8374ea51f..bac85d88054f 100644
--- a/drivers/gpu/drm/i915/display/intel_hdmi.c
+++ b/drivers/gpu/drm/i915/display/intel_hdmi.c
@@ -42,6 +42,7 @@
 
 #include "i915_debugfs.h"
 #include "i915_drv.h"
+#include "i915_reg.h"
 #include "intel_atomic.h"
 #include "intel_connector.h"
 #include "intel_ddi.h"
@@ -2057,13 +2058,6 @@ static bool hdmi_bpc_possible(const struct intel_crtc_state *crtc_state, int bpc
 	if (!intel_hdmi_source_bpc_possible(dev_priv, bpc))
 		return false;
 
-	/*
-	 * HDMI deep color affects the clocks, so it's only possible
-	 * when not cloning with other encoder types.
-	 */
-	if (bpc > 8 && crtc_state->output_types != BIT(INTEL_OUTPUT_HDMI))
-		return false;
-
 	/* Display Wa_1405510057:icl,ehl */
 	if (intel_hdmi_is_ycbcr420(crtc_state) &&
 	    bpc == 10 && DISPLAY_VER(dev_priv) == 11 &&
@@ -2190,9 +2184,13 @@ static bool intel_hdmi_has_audio(struct intel_encoder *encoder,
 }
 
 static enum intel_output_format
-intel_hdmi_output_format(struct intel_connector *connector,
+intel_hdmi_output_format(const struct intel_crtc_state *crtc_state,
+			 struct intel_connector *connector,
 			 bool ycbcr_420_output)
 {
+	if (!crtc_state->has_hdmi_sink)
+		return INTEL_OUTPUT_FORMAT_RGB;
+
 	if (connector->base.ycbcr_420_allowed && ycbcr_420_output)
 		return INTEL_OUTPUT_FORMAT_YCBCR420;
 	else
@@ -2211,7 +2209,8 @@ static int intel_hdmi_compute_output_format(struct intel_encoder *encoder,
 	bool ycbcr_420_only = drm_mode_is_420_only(info, adjusted_mode);
 	int ret;
 
-	crtc_state->output_format = intel_hdmi_output_format(connector, ycbcr_420_only);
+	crtc_state->output_format =
+		intel_hdmi_output_format(crtc_state, connector, ycbcr_420_only);
 
 	if (ycbcr_420_only && !intel_hdmi_is_ycbcr420(crtc_state)) {
 		drm_dbg_kms(&i915->drm,
@@ -2226,13 +2225,19 @@ static int intel_hdmi_compute_output_format(struct intel_encoder *encoder,
 		    !drm_mode_is_420_also(info, adjusted_mode))
 			return ret;
 
-		crtc_state->output_format = intel_hdmi_output_format(connector, true);
+		crtc_state->output_format = intel_hdmi_output_format(crtc_state, connector, true);
 		ret = intel_hdmi_compute_clock(encoder, crtc_state, respect_downstream_limits);
 	}
 
 	return ret;
 }
 
+static bool intel_hdmi_is_cloned(const struct intel_crtc_state *crtc_state)
+{
+	return crtc_state->uapi.encoder_mask &&
+		!is_power_of_2(crtc_state->uapi.encoder_mask);
+}
+
 int intel_hdmi_compute_config(struct intel_encoder *encoder,
 			      struct intel_crtc_state *pipe_config,
 			      struct drm_connector_state *conn_state)
@@ -2248,8 +2253,9 @@ int intel_hdmi_compute_config(struct intel_encoder *encoder,
 		return -EINVAL;
 
 	pipe_config->output_format = INTEL_OUTPUT_FORMAT_RGB;
-	pipe_config->has_hdmi_sink = intel_has_hdmi_sink(intel_hdmi,
-							 conn_state);
+	pipe_config->has_hdmi_sink =
+		intel_has_hdmi_sink(intel_hdmi, conn_state) &&
+		!intel_hdmi_is_cloned(pipe_config);
 
 	if (pipe_config->has_hdmi_sink)
 		pipe_config->has_infoframe = true;
@@ -2257,9 +2263,6 @@ int intel_hdmi_compute_config(struct intel_encoder *encoder,
 	if (adjusted_mode->flags & DRM_MODE_FLAG_DBLCLK)
 		pipe_config->pixel_multiplier = 2;
 
-	if (HAS_PCH_SPLIT(dev_priv) && !HAS_DDI(dev_priv))
-		pipe_config->has_pch_encoder = true;
-
 	pipe_config->has_audio =
 		intel_hdmi_has_audio(encoder, pipe_config, conn_state);
 
diff --git a/drivers/gpu/drm/i915/display/intel_hdmi.h b/drivers/gpu/drm/i915/display/intel_hdmi.h
index 93f65a917c36..774dda2376ed 100644
--- a/drivers/gpu/drm/i915/display/intel_hdmi.h
+++ b/drivers/gpu/drm/i915/display/intel_hdmi.h
@@ -6,20 +6,20 @@
 #ifndef __INTEL_HDMI_H__
 #define __INTEL_HDMI_H__
 
-#include <linux/hdmi.h>
 #include <linux/types.h>
 
+enum hdmi_infoframe_type;
+enum port;
 struct drm_connector;
+struct drm_connector_state;
 struct drm_encoder;
 struct drm_i915_private;
 struct intel_connector;
+struct intel_crtc_state;
 struct intel_digital_port;
 struct intel_encoder;
-struct intel_crtc_state;
 struct intel_hdmi;
-struct drm_connector_state;
 union hdmi_infoframe;
-enum port;
 
 void intel_hdmi_init_connector(struct intel_digital_port *dig_port,
 			       struct intel_connector *intel_connector);
diff --git a/drivers/gpu/drm/i915/display/intel_hti.c b/drivers/gpu/drm/i915/display/intel_hti.c
new file mode 100644
index 000000000000..12a1f4ce1a77
--- /dev/null
+++ b/drivers/gpu/drm/i915/display/intel_hti.c
@@ -0,0 +1,38 @@
+// SPDX-License-Identifier: MIT
+/*
+ * Copyright © 2022 Intel Corporation
+ */
+
+#include "i915_drv.h"
+#include "intel_de.h"
+#include "intel_display.h"
+#include "intel_hti.h"
+#include "intel_hti_regs.h"
+
+void intel_hti_init(struct drm_i915_private *i915)
+{
+	/*
+	 * If the platform has HTI, we need to find out whether it has reserved
+	 * any display resources before we create our display outputs.
+	 */
+	if (INTEL_INFO(i915)->display.has_hti)
+		i915->display.hti.state = intel_de_read(i915, HDPORT_STATE);
+}
+
+bool intel_hti_uses_phy(struct drm_i915_private *i915, enum phy phy)
+{
+	return i915->display.hti.state & HDPORT_ENABLED &&
+		i915->display.hti.state & HDPORT_DDI_USED(phy);
+}
+
+u32 intel_hti_dpll_mask(struct drm_i915_private *i915)
+{
+	if (!(i915->display.hti.state & HDPORT_ENABLED))
+		return 0;
+
+	/*
+	 * Note: This is subtle. The values must coincide with what's defined
+	 * for the platform.
+	 */
+	return REG_FIELD_GET(HDPORT_DPLL_USED_MASK, i915->display.hti.state);
+}
diff --git a/drivers/gpu/drm/i915/display/intel_hti.h b/drivers/gpu/drm/i915/display/intel_hti.h
new file mode 100644
index 000000000000..2893d6668657
--- /dev/null
+++ b/drivers/gpu/drm/i915/display/intel_hti.h
@@ -0,0 +1,18 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2022 Intel Corporation
+ */
+
+#ifndef __INTEL_HTI_H__
+#define __INTEL_HTI_H__
+
+#include <linux/types.h>
+
+struct drm_i915_private;
+enum phy;
+
+void intel_hti_init(struct drm_i915_private *i915);
+bool intel_hti_uses_phy(struct drm_i915_private *i915, enum phy phy);
+u32 intel_hti_dpll_mask(struct drm_i915_private *i915);
+
+#endif /* __INTEL_HTI_H__ */
diff --git a/drivers/gpu/drm/i915/display/intel_hti_regs.h b/drivers/gpu/drm/i915/display/intel_hti_regs.h
new file mode 100644
index 000000000000..e206f2837fc8
--- /dev/null
+++ b/drivers/gpu/drm/i915/display/intel_hti_regs.h
@@ -0,0 +1,16 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2022 Intel Corporation
+ */
+
+#ifndef __INTEL_HTI_REGS_H__
+#define __INTEL_HTI_REGS_H__
+
+#include "i915_reg_defs.h"
+
+#define HDPORT_STATE			_MMIO(0x45050)
+#define   HDPORT_DPLL_USED_MASK		REG_GENMASK(15, 12)
+#define   HDPORT_DDI_USED(phy)		REG_BIT(2 * (phy) + 1)
+#define   HDPORT_ENABLED		REG_BIT(0)
+
+#endif /* __INTEL_HTI_REGS_H__ */
diff --git a/drivers/gpu/drm/i915/display/intel_lpe_audio.c b/drivers/gpu/drm/i915/display/intel_lpe_audio.c
index 22ca8754ea96..8aaaef4d7856 100644
--- a/drivers/gpu/drm/i915/display/intel_lpe_audio.c
+++ b/drivers/gpu/drm/i915/display/intel_lpe_audio.c
@@ -71,6 +71,8 @@
 #include <drm/intel_lpe_audio.h>
 
 #include "i915_drv.h"
+#include "i915_irq.h"
+#include "i915_reg.h"
 #include "intel_de.h"
 #include "intel_lpe_audio.h"
 #include "intel_pci_config.h"
diff --git a/drivers/gpu/drm/i915/display/intel_lspcon.c b/drivers/gpu/drm/i915/display/intel_lspcon.c
index 15d59de8810e..9ff1c0b223ad 100644
--- a/drivers/gpu/drm/i915/display/intel_lspcon.c
+++ b/drivers/gpu/drm/i915/display/intel_lspcon.c
@@ -28,6 +28,7 @@
 #include <drm/drm_atomic_helper.h>
 #include <drm/drm_edid.h>
 
+#include "i915_reg.h"
 #include "intel_de.h"
 #include "intel_display_types.h"
 #include "intel_dp.h"
diff --git a/drivers/gpu/drm/i915/display/intel_lvds.c b/drivers/gpu/drm/i915/display/intel_lvds.c
index 246787bbf5ef..7bf1bdfd03ec 100644
--- a/drivers/gpu/drm/i915/display/intel_lvds.c
+++ b/drivers/gpu/drm/i915/display/intel_lvds.c
@@ -39,6 +39,7 @@
 #include <drm/drm_edid.h>
 
 #include "i915_drv.h"
+#include "i915_reg.h"
 #include "intel_atomic.h"
 #include "intel_backlight.h"
 #include "intel_connector.h"
diff --git a/drivers/gpu/drm/i915/display/intel_mg_phy_regs.h b/drivers/gpu/drm/i915/display/intel_mg_phy_regs.h
index 07978f8d5fb7..0e8248bce52d 100644
--- a/drivers/gpu/drm/i915/display/intel_mg_phy_regs.h
+++ b/drivers/gpu/drm/i915/display/intel_mg_phy_regs.h
@@ -6,7 +6,7 @@
 #ifndef __INTEL_MG_PHY_REGS__
 #define __INTEL_MG_PHY_REGS__
 
-#include "i915_reg_defs.h"
+#include "intel_display_reg_defs.h"
 
 #define MG_PHY_PORT_LN(ln, tc_port, ln0p1, ln0p2, ln1p1) \
 	_MMIO(_PORT(tc_port, ln0p1, ln0p2) + (ln) * ((ln1p1) - (ln0p1)))
diff --git a/drivers/gpu/drm/i915/display/intel_modeset_setup.c b/drivers/gpu/drm/i915/display/intel_modeset_setup.c
index 9d8ca230be39..96395bfbd41d 100644
--- a/drivers/gpu/drm/i915/display/intel_modeset_setup.c
+++ b/drivers/gpu/drm/i915/display/intel_modeset_setup.c
@@ -10,6 +10,7 @@
 #include <drm/drm_atomic_state_helper.h>
 
 #include "i915_drv.h"
+#include "i915_reg.h"
 #include "intel_atomic.h"
 #include "intel_bw.h"
 #include "intel_color.h"
diff --git a/drivers/gpu/drm/i915/display/intel_panel.c b/drivers/gpu/drm/i915/display/intel_panel.c
index 69ce77711b7c..1640726bfbf6 100644
--- a/drivers/gpu/drm/i915/display/intel_panel.c
+++ b/drivers/gpu/drm/i915/display/intel_panel.c
@@ -31,6 +31,7 @@
 #include <linux/kernel.h>
 #include <linux/pwm.h>
 
+#include "i915_reg.h"
 #include "intel_backlight.h"
 #include "intel_connector.h"
 #include "intel_de.h"
diff --git a/drivers/gpu/drm/i915/display/intel_pch_display.c b/drivers/gpu/drm/i915/display/intel_pch_display.c
index 837152dca063..cecc0d007cf3 100644
--- a/drivers/gpu/drm/i915/display/intel_pch_display.c
+++ b/drivers/gpu/drm/i915/display/intel_pch_display.c
@@ -4,6 +4,7 @@
  */
 
 #include "g4x_dp.h"
+#include "i915_reg.h"
 #include "intel_crt.h"
 #include "intel_de.h"
 #include "intel_display_types.h"
diff --git a/drivers/gpu/drm/i915/display/intel_pch_refclk.c b/drivers/gpu/drm/i915/display/intel_pch_refclk.c
index a66097cdc1e0..08a94365b7d1 100644
--- a/drivers/gpu/drm/i915/display/intel_pch_refclk.c
+++ b/drivers/gpu/drm/i915/display/intel_pch_refclk.c
@@ -3,6 +3,7 @@
  * Copyright © 2021 Intel Corporation
  */
 
+#include "i915_reg.h"
 #include "intel_de.h"
 #include "intel_display_types.h"
 #include "intel_panel.h"
diff --git a/drivers/gpu/drm/i915/display/intel_pipe_crc.c b/drivers/gpu/drm/i915/display/intel_pipe_crc.c
index 1c74388c60d7..e9774670e3f6 100644
--- a/drivers/gpu/drm/i915/display/intel_pipe_crc.c
+++ b/drivers/gpu/drm/i915/display/intel_pipe_crc.c
@@ -24,11 +24,12 @@
  *
  */
 
-#include <linux/circ_buf.h>
 #include <linux/ctype.h>
 #include <linux/debugfs.h>
 #include <linux/seq_file.h>
 
+#include "i915_irq.h"
+#include "i915_reg.h"
 #include "intel_atomic.h"
 #include "intel_de.h"
 #include "intel_display_types.h"
diff --git a/drivers/gpu/drm/i915/display/intel_pps.c b/drivers/gpu/drm/i915/display/intel_pps.c
index 21944f5bf3a8..9bbf41a076f7 100644
--- a/drivers/gpu/drm/i915/display/intel_pps.c
+++ b/drivers/gpu/drm/i915/display/intel_pps.c
@@ -5,10 +5,12 @@
 
 #include "g4x_dp.h"
 #include "i915_drv.h"
+#include "i915_reg.h"
 #include "intel_de.h"
 #include "intel_display_power_well.h"
 #include "intel_display_types.h"
 #include "intel_dp.h"
+#include "intel_dpio_phy.h"
 #include "intel_dpll.h"
 #include "intel_lvds.h"
 #include "intel_pps.h"
@@ -1098,7 +1100,13 @@ bool intel_pps_have_panel_power_or_vdd(struct intel_dp *intel_dp)
 
 static void pps_init_timestamps(struct intel_dp *intel_dp)
 {
-	intel_dp->pps.panel_power_off_time = ktime_get_boottime();
+	/*
+	 * Initialize panel power off time to 0, assuming panel power could have
+	 * been toggled between kernel boot and now only by a previously loaded
+	 * and removed i915, which has already ensured sufficient power off
+	 * delay at module remove.
+	 */
+	intel_dp->pps.panel_power_off_time = 0;
 	intel_dp->pps.last_power_on = jiffies;
 	intel_dp->pps.last_backlight_off = jiffies;
 }
diff --git a/drivers/gpu/drm/i915/display/intel_psr.c b/drivers/gpu/drm/i915/display/intel_psr.c
index 904a1049eff3..5b678916e6db 100644
--- a/drivers/gpu/drm/i915/display/intel_psr.c
+++ b/drivers/gpu/drm/i915/display/intel_psr.c
@@ -27,6 +27,7 @@
 #include "display/intel_dp.h"
 
 #include "i915_drv.h"
+#include "i915_reg.h"
 #include "intel_atomic.h"
 #include "intel_crtc.h"
 #include "intel_de.h"
@@ -779,6 +780,7 @@ static bool psr2_granularity_check(struct intel_dp *intel_dp,
 				   struct intel_crtc_state *crtc_state)
 {
 	struct drm_i915_private *dev_priv = dp_to_i915(intel_dp);
+	const struct drm_dsc_config *vdsc_cfg = &crtc_state->dsc.config;
 	const int crtc_hdisplay = crtc_state->hw.adjusted_mode.crtc_hdisplay;
 	const int crtc_vdisplay = crtc_state->hw.adjusted_mode.crtc_vdisplay;
 	u16 y_granularity = 0;
@@ -809,6 +811,10 @@ static bool psr2_granularity_check(struct intel_dp *intel_dp,
 	if (y_granularity == 0 || crtc_vdisplay % y_granularity)
 		return false;
 
+	if (crtc_state->dsc.compression_enable &&
+	    vdsc_cfg->slice_height % y_granularity)
+		return false;
+
 	crtc_state->su_y_granularity = y_granularity;
 	return true;
 }
@@ -1470,7 +1476,8 @@ unlock:
 
 static u32 man_trk_ctl_enable_bit_get(struct drm_i915_private *dev_priv)
 {
-	return IS_ALDERLAKE_P(dev_priv) ? 0 : PSR2_MAN_TRK_CTL_ENABLE;
+	return IS_ALDERLAKE_P(dev_priv) || DISPLAY_VER(dev_priv) >= 14 ? 0 :
+		PSR2_MAN_TRK_CTL_ENABLE;
 }
 
 static u32 man_trk_ctl_single_full_frame_bit_get(struct drm_i915_private *dev_priv)
@@ -1482,14 +1489,14 @@ static u32 man_trk_ctl_single_full_frame_bit_get(struct drm_i915_private *dev_pr
 
 static u32 man_trk_ctl_partial_frame_bit_get(struct drm_i915_private *dev_priv)
 {
-	return IS_ALDERLAKE_P(dev_priv) ?
+	return IS_ALDERLAKE_P(dev_priv) || DISPLAY_VER(dev_priv) >= 14 ?
 	       ADLP_PSR2_MAN_TRK_CTL_SF_PARTIAL_FRAME_UPDATE :
 	       PSR2_MAN_TRK_CTL_SF_PARTIAL_FRAME_UPDATE;
 }
 
 static u32 man_trk_ctl_continuos_full_frame(struct drm_i915_private *dev_priv)
 {
-	return IS_ALDERLAKE_P(dev_priv) ?
+	return IS_ALDERLAKE_P(dev_priv) || DISPLAY_VER(dev_priv) >= 14 ?
 	       ADLP_PSR2_MAN_TRK_CTL_SF_CONTINUOS_FULL_FRAME :
 	       PSR2_MAN_TRK_CTL_SF_CONTINUOS_FULL_FRAME;
 }
@@ -1678,9 +1685,6 @@ static void intel_psr2_sel_fetch_pipe_alignment(const struct intel_crtc_state *c
 	pipe_clip->y1 -= pipe_clip->y1 % y_alignment;
 	if (pipe_clip->y2 % y_alignment)
 		pipe_clip->y2 = ((pipe_clip->y2 / y_alignment) + 1) * y_alignment;
-
-	if (IS_ALDERLAKE_P(dev_priv) && crtc_state->dsc.compression_enable)
-		drm_warn(&dev_priv->drm, "Missing PSR2 sel fetch alignment with DSC\n");
 }
 
 /*
@@ -2209,8 +2213,11 @@ static void _psr_invalidate_handle(struct intel_dp *intel_dp)
 	if (intel_dp->psr.psr2_sel_fetch_enabled) {
 		u32 val;
 
-		if (intel_dp->psr.psr2_sel_fetch_cff_enabled)
+		if (intel_dp->psr.psr2_sel_fetch_cff_enabled) {
+			/* Send one update otherwise lag is observed in screen */
+			intel_de_write(dev_priv, CURSURFLIVE(intel_dp->psr.pipe), 0);
 			return;
+		}
 
 		val = man_trk_ctl_enable_bit_get(dev_priv) |
 		      man_trk_ctl_partial_frame_bit_get(dev_priv) |
diff --git a/drivers/gpu/drm/i915/display/intel_sdvo.c b/drivers/gpu/drm/i915/display/intel_sdvo.c
index 48b7b1aa37b2..329b9d9af667 100644
--- a/drivers/gpu/drm/i915/display/intel_sdvo.c
+++ b/drivers/gpu/drm/i915/display/intel_sdvo.c
@@ -37,6 +37,7 @@
 #include <drm/drm_edid.h>
 
 #include "i915_drv.h"
+#include "i915_reg.h"
 #include "intel_atomic.h"
 #include "intel_connector.h"
 #include "intel_crtc.h"
diff --git a/drivers/gpu/drm/i915/display/intel_snps_phy.c b/drivers/gpu/drm/i915/display/intel_snps_phy.c
index 937cefd6f78f..c799e891f8b5 100644
--- a/drivers/gpu/drm/i915/display/intel_snps_phy.c
+++ b/drivers/gpu/drm/i915/display/intel_snps_phy.c
@@ -5,6 +5,7 @@
 
 #include <linux/util_macros.h>
 
+#include "i915_reg.h"
 #include "intel_ddi.h"
 #include "intel_ddi_buf_trans.h"
 #include "intel_de.h"
diff --git a/drivers/gpu/drm/i915/display/intel_snps_phy_regs.h b/drivers/gpu/drm/i915/display/intel_snps_phy_regs.h
index 0543465aaf14..a04d692169d4 100644
--- a/drivers/gpu/drm/i915/display/intel_snps_phy_regs.h
+++ b/drivers/gpu/drm/i915/display/intel_snps_phy_regs.h
@@ -6,7 +6,7 @@
 #ifndef __INTEL_SNPS_PHY_REGS__
 #define __INTEL_SNPS_PHY_REGS__
 
-#include "i915_reg_defs.h"
+#include "intel_display_reg_defs.h"
 
 #define _SNPS_PHY_A_BASE			0x168000
 #define _SNPS_PHY_B_BASE			0x169000
diff --git a/drivers/gpu/drm/i915/display/intel_sprite.c b/drivers/gpu/drm/i915/display/intel_sprite.c
index 7649c50b5445..e6b4d24b9cd0 100644
--- a/drivers/gpu/drm/i915/display/intel_sprite.c
+++ b/drivers/gpu/drm/i915/display/intel_sprite.c
@@ -42,6 +42,7 @@
 #include <drm/drm_rect.h>
 
 #include "i915_drv.h"
+#include "i915_reg.h"
 #include "i915_vgpu.h"
 #include "i9xx_plane.h"
 #include "intel_atomic_plane.h"
diff --git a/drivers/gpu/drm/i915/display/intel_sprite.h b/drivers/gpu/drm/i915/display/intel_sprite.h
index 4f63e4967731..4635c7ad23f9 100644
--- a/drivers/gpu/drm/i915/display/intel_sprite.h
+++ b/drivers/gpu/drm/i915/display/intel_sprite.h
@@ -8,14 +8,13 @@
 
 #include <linux/types.h>
 
-#include "intel_display.h"
-
 struct drm_device;
 struct drm_display_mode;
 struct drm_file;
 struct drm_i915_private;
 struct intel_crtc_state;
 struct intel_plane_state;
+enum pipe;
 
 /*
  * FIXME: We should instead only take spinlocks once for the entire update
@@ -34,12 +33,6 @@ int intel_sprite_set_colorkey_ioctl(struct drm_device *dev, void *data,
 int intel_plane_check_src_coordinates(struct intel_plane_state *plane_state);
 int chv_plane_check_rotation(const struct intel_plane_state *plane_state);
 
-static inline u8 icl_hdr_plane_mask(void)
-{
-	return BIT(PLANE_PRIMARY) |
-		BIT(PLANE_SPRITE0) | BIT(PLANE_SPRITE1);
-}
-
 int ivb_plane_min_cdclk(const struct intel_crtc_state *crtc_state,
 			const struct intel_plane_state *plane_state);
 int hsw_plane_min_cdclk(const struct intel_crtc_state *crtc_state,
diff --git a/drivers/gpu/drm/i915/display/intel_tv.c b/drivers/gpu/drm/i915/display/intel_tv.c
index cf7d5c1ab406..4d2101ca1692 100644
--- a/drivers/gpu/drm/i915/display/intel_tv.c
+++ b/drivers/gpu/drm/i915/display/intel_tv.c
@@ -35,6 +35,8 @@
 #include <drm/drm_edid.h>
 
 #include "i915_drv.h"
+#include "i915_reg.h"
+#include "i915_irq.h"
 #include "intel_connector.h"
 #include "intel_crtc.h"
 #include "intel_de.h"
diff --git a/drivers/gpu/drm/i915/display/intel_vdsc.c b/drivers/gpu/drm/i915/display/intel_vdsc.c
index 269f9792390d..9d3b77b41b5c 100644
--- a/drivers/gpu/drm/i915/display/intel_vdsc.c
+++ b/drivers/gpu/drm/i915/display/intel_vdsc.c
@@ -10,6 +10,7 @@
 #include <drm/display/drm_dsc_helper.h>
 
 #include "i915_drv.h"
+#include "i915_reg.h"
 #include "intel_crtc.h"
 #include "intel_de.h"
 #include "intel_display_types.h"
diff --git a/drivers/gpu/drm/i915/display/intel_vga.c b/drivers/gpu/drm/i915/display/intel_vga.c
index b5d058404c14..a69bfcac9a94 100644
--- a/drivers/gpu/drm/i915/display/intel_vga.c
+++ b/drivers/gpu/drm/i915/display/intel_vga.c
@@ -10,6 +10,7 @@
 #include <video/vga.h>
 
 #include "i915_drv.h"
+#include "i915_reg.h"
 #include "intel_de.h"
 #include "intel_vga.h"
 
diff --git a/drivers/gpu/drm/i915/display/intel_vrr.c b/drivers/gpu/drm/i915/display/intel_vrr.c
index 5eac99021875..7b1357e82b69 100644
--- a/drivers/gpu/drm/i915/display/intel_vrr.c
+++ b/drivers/gpu/drm/i915/display/intel_vrr.c
@@ -5,6 +5,7 @@
  */
 
 #include "i915_drv.h"
+#include "i915_reg.h"
 #include "intel_de.h"
 #include "intel_display_types.h"
 #include "intel_vrr.h"
diff --git a/drivers/gpu/drm/i915/display/skl_scaler.c b/drivers/gpu/drm/i915/display/skl_scaler.c
index 4092679be21e..d7390067b7d4 100644
--- a/drivers/gpu/drm/i915/display/skl_scaler.c
+++ b/drivers/gpu/drm/i915/display/skl_scaler.c
@@ -2,6 +2,8 @@
 /*
  * Copyright © 2020 Intel Corporation
  */
+
+#include "i915_reg.h"
 #include "intel_de.h"
 #include "intel_display_types.h"
 #include "intel_fb.h"
diff --git a/drivers/gpu/drm/i915/display/skl_universal_plane.c b/drivers/gpu/drm/i915/display/skl_universal_plane.c
index dab9fdfa9212..4b79c2d2d617 100644
--- a/drivers/gpu/drm/i915/display/skl_universal_plane.c
+++ b/drivers/gpu/drm/i915/display/skl_universal_plane.c
@@ -9,6 +9,8 @@
 #include <drm/drm_fourcc.h>
 
 #include "i915_drv.h"
+#include "i915_irq.h"
+#include "i915_reg.h"
 #include "intel_atomic_plane.h"
 #include "intel_de.h"
 #include "intel_display_types.h"
@@ -246,6 +248,11 @@ bool icl_is_nv12_y_plane(struct drm_i915_private *dev_priv,
 		icl_nv12_y_plane_mask(dev_priv) & BIT(plane_id);
 }
 
+u8 icl_hdr_plane_mask(void)
+{
+	return BIT(PLANE_PRIMARY) | BIT(PLANE_SPRITE0) | BIT(PLANE_SPRITE1);
+}
+
 bool icl_is_hdr_plane(struct drm_i915_private *dev_priv, enum plane_id plane_id)
 {
 	return DISPLAY_VER(dev_priv) >= 11 &&
diff --git a/drivers/gpu/drm/i915/display/skl_universal_plane.h b/drivers/gpu/drm/i915/display/skl_universal_plane.h
index 351040b64dc7..be64c201f9b3 100644
--- a/drivers/gpu/drm/i915/display/skl_universal_plane.h
+++ b/drivers/gpu/drm/i915/display/skl_universal_plane.h
@@ -30,6 +30,7 @@ int skl_calc_main_surface_offset(const struct intel_plane_state *plane_state,
 
 bool icl_is_nv12_y_plane(struct drm_i915_private *dev_priv,
 			 enum plane_id plane_id);
+u8 icl_hdr_plane_mask(void);
 bool icl_is_hdr_plane(struct drm_i915_private *dev_priv, enum plane_id plane_id);
 
 #endif
diff --git a/drivers/gpu/drm/i915/display/skl_watermark.c b/drivers/gpu/drm/i915/display/skl_watermark.c
index d58e667016e4..e0766d1be966 100644
--- a/drivers/gpu/drm/i915/display/skl_watermark.c
+++ b/drivers/gpu/drm/i915/display/skl_watermark.c
@@ -2744,7 +2744,7 @@ static int skl_wm_add_affected_planes(struct intel_atomic_state *state,
 		 * power well the hardware state will go out of sync
 		 * with the software state.
 		 */
-		if (!drm_atomic_crtc_needs_modeset(&new_crtc_state->uapi) &&
+		if (!intel_crtc_needs_modeset(new_crtc_state) &&
 		    skl_plane_selected_wm_equals(plane,
 						 &old_crtc_state->wm.skl.optimal,
 						 &new_crtc_state->wm.skl.optimal))
diff --git a/drivers/gpu/drm/i915/display/vlv_dsi.c b/drivers/gpu/drm/i915/display/vlv_dsi.c
index 5a741ea4505f..84481030883a 100644
--- a/drivers/gpu/drm/i915/display/vlv_dsi.c
+++ b/drivers/gpu/drm/i915/display/vlv_dsi.c
@@ -31,6 +31,7 @@
 #include <drm/drm_mipi_dsi.h>
 
 #include "i915_drv.h"
+#include "i915_reg.h"
 #include "intel_atomic.h"
 #include "intel_backlight.h"
 #include "intel_connector.h"
diff --git a/drivers/gpu/drm/i915/display/vlv_dsi_regs.h b/drivers/gpu/drm/i915/display/vlv_dsi_regs.h
index e065b8f2ee08..abbe427e462e 100644
--- a/drivers/gpu/drm/i915/display/vlv_dsi_regs.h
+++ b/drivers/gpu/drm/i915/display/vlv_dsi_regs.h
@@ -6,7 +6,7 @@
 #ifndef __VLV_DSI_REGS_H__
 #define __VLV_DSI_REGS_H__
 
-#include "i915_reg_defs.h"
+#include "intel_display_reg_defs.h"
 
 #define VLV_MIPI_BASE			VLV_DISPLAY_BASE
 #define BXT_MIPI_BASE			0x60000
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_dmabuf.c b/drivers/gpu/drm/i915/gem/i915_gem_dmabuf.c
index 1df74f7aa3dc..fd556a076d05 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_dmabuf.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_dmabuf.c
@@ -97,6 +97,8 @@ static int i915_gem_dmabuf_mmap(struct dma_buf *dma_buf, struct vm_area_struct *
 	struct drm_i915_private *i915 = to_i915(obj->base.dev);
 	int ret;
 
+	dma_resv_assert_held(dma_buf->resv);
+
 	if (obj->base.size < vma->vm_end - vma->vm_start)
 		return -EINVAL;
 
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c
index c0e3e9108fc7..94d86ee24693 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c
@@ -30,6 +30,7 @@
 #include "i915_gem_context.h"
 #include "i915_gem_evict.h"
 #include "i915_gem_ioctls.h"
+#include "i915_reg.h"
 #include "i915_trace.h"
 #include "i915_user_extensions.h"
 
@@ -2466,7 +2467,7 @@ gen8_dispatch_bsd_engine(struct drm_i915_private *dev_priv,
 	/* Check whether the file_priv has already selected one ring. */
 	if ((int)file_priv->bsd_engine < 0)
 		file_priv->bsd_engine =
-			prandom_u32_max(num_vcs_engines(dev_priv));
+			get_random_u32_below(num_vcs_engines(dev_priv));
 
 	return file_priv->bsd_engine;
 }
diff --git a/drivers/gpu/drm/i915/gem/selftests/i915_gem_mman.c b/drivers/gpu/drm/i915/gem/selftests/i915_gem_mman.c
index c04d317df96d..7f6353827735 100644
--- a/drivers/gpu/drm/i915/gem/selftests/i915_gem_mman.c
+++ b/drivers/gpu/drm/i915/gem/selftests/i915_gem_mman.c
@@ -17,6 +17,7 @@
 #include "gt/intel_gt.h"
 #include "gt/intel_gt_pm.h"
 #include "gt/intel_migrate.h"
+#include "i915_reg.h"
 #include "i915_ttm_buddy_manager.h"
 
 #include "huge_gem_object.h"
diff --git a/drivers/gpu/drm/i915/gt/intel_engine_cs.c b/drivers/gpu/drm/i915/gt/intel_engine_cs.c
index 8ede4898905a..99c4b866addd 100644
--- a/drivers/gpu/drm/i915/gt/intel_engine_cs.c
+++ b/drivers/gpu/drm/i915/gt/intel_engine_cs.c
@@ -13,6 +13,8 @@
 
 #include "i915_cmd_parser.h"
 #include "i915_drv.h"
+#include "i915_irq.h"
+#include "i915_reg.h"
 #include "intel_breadcrumbs.h"
 #include "intel_context.h"
 #include "intel_engine.h"
diff --git a/drivers/gpu/drm/i915/gt/intel_execlists_submission.c b/drivers/gpu/drm/i915/gt/intel_execlists_submission.c
index d92512780467..2daffa7c7dfd 100644
--- a/drivers/gpu/drm/i915/gt/intel_execlists_submission.c
+++ b/drivers/gpu/drm/i915/gt/intel_execlists_submission.c
@@ -110,6 +110,7 @@
 #include <linux/string_helpers.h>
 
 #include "i915_drv.h"
+#include "i915_reg.h"
 #include "i915_trace.h"
 #include "i915_vgpu.h"
 #include "gen8_engine_cs.h"
@@ -3689,7 +3690,7 @@ static void virtual_engine_initial_hint(struct virtual_engine *ve)
 	 * NB This does not force us to execute on this engine, it will just
 	 * typically be the first we inspect for submission.
 	 */
-	swp = prandom_u32_max(ve->num_siblings);
+	swp = get_random_u32_below(ve->num_siblings);
 	if (swp)
 		swap(ve->siblings[swp], ve->siblings[0]);
 }
diff --git a/drivers/gpu/drm/i915/gt/intel_gt.c b/drivers/gpu/drm/i915/gt/intel_gt.c
index 92b075c72536..7eeee5a7cb33 100644
--- a/drivers/gpu/drm/i915/gt/intel_gt.c
+++ b/drivers/gpu/drm/i915/gt/intel_gt.c
@@ -11,6 +11,7 @@
 
 #include "i915_drv.h"
 #include "i915_perf_oa_regs.h"
+#include "i915_reg.h"
 #include "intel_context.h"
 #include "intel_engine_pm.h"
 #include "intel_engine_regs.h"
@@ -1113,6 +1114,11 @@ static void mmio_invalidate_full(struct intel_gt *gt)
 			if (!i915_mmio_reg_offset(rb.reg))
 				continue;
 
+			if (GRAPHICS_VER(i915) == 12 && (engine->class == VIDEO_DECODE_CLASS ||
+			    engine->class == VIDEO_ENHANCEMENT_CLASS ||
+			    engine->class == COMPUTE_CLASS))
+				rb.bit = _MASKED_BIT_ENABLE(rb.bit);
+
 			intel_uncore_write_fw(uncore, rb.reg, rb.bit);
 		}
 		awake |= engine->mask;
diff --git a/drivers/gpu/drm/i915/gt/intel_gt_pm.c b/drivers/gpu/drm/i915/gt/intel_gt_pm.c
index ffccffadc3ad..c065950d0bad 100644
--- a/drivers/gpu/drm/i915/gt/intel_gt_pm.c
+++ b/drivers/gpu/drm/i915/gt/intel_gt_pm.c
@@ -7,6 +7,7 @@
 #include <linux/suspend.h>
 
 #include "i915_drv.h"
+#include "i915_irq.h"
 #include "i915_params.h"
 #include "intel_context.h"
 #include "intel_engine_pm.h"
diff --git a/drivers/gpu/drm/i915/gt/intel_gt_regs.h b/drivers/gpu/drm/i915/gt/intel_gt_regs.h
index d255c84b6eef..f8eb807b56f9 100644
--- a/drivers/gpu/drm/i915/gt/intel_gt_regs.h
+++ b/drivers/gpu/drm/i915/gt/intel_gt_regs.h
@@ -7,6 +7,7 @@
 #define __INTEL_GT_REGS__
 
 #include "i915_reg_defs.h"
+#include "display/intel_display_reg_defs.h"	/* VLV_DISPLAY_BASE */
 
 #define MCR_REG(offset)	((const i915_mcr_reg_t){ .reg = (offset) })
 
diff --git a/drivers/gpu/drm/i915/gt/intel_gtt.c b/drivers/gpu/drm/i915/gt/intel_gtt.c
index 919d2462fbf4..e37164a60d37 100644
--- a/drivers/gpu/drm/i915/gt/intel_gtt.c
+++ b/drivers/gpu/drm/i915/gt/intel_gtt.c
@@ -12,6 +12,7 @@
 
 #include "gem/i915_gem_internal.h"
 #include "gem/i915_gem_lmem.h"
+#include "i915_reg.h"
 #include "i915_trace.h"
 #include "i915_utils.h"
 #include "intel_gt.h"
diff --git a/drivers/gpu/drm/i915/gt/intel_ring_submission.c b/drivers/gpu/drm/i915/gt/intel_ring_submission.c
index 7d6db5fc9245..827adb0cfaea 100644
--- a/drivers/gpu/drm/i915/gt/intel_ring_submission.c
+++ b/drivers/gpu/drm/i915/gt/intel_ring_submission.c
@@ -12,7 +12,9 @@
 #include "gen6_ppgtt.h"
 #include "gen7_renderclear.h"
 #include "i915_drv.h"
+#include "i915_irq.h"
 #include "i915_mitigations.h"
+#include "i915_reg.h"
 #include "intel_breadcrumbs.h"
 #include "intel_context.h"
 #include "intel_engine_regs.h"
diff --git a/drivers/gpu/drm/i915/gt/intel_workarounds.c b/drivers/gpu/drm/i915/gt/intel_workarounds.c
index c1c2ea303e9d..bf84efb3f15f 100644
--- a/drivers/gpu/drm/i915/gt/intel_workarounds.c
+++ b/drivers/gpu/drm/i915/gt/intel_workarounds.c
@@ -4,6 +4,7 @@
  */
 
 #include "i915_drv.h"
+#include "i915_reg.h"
 #include "intel_context.h"
 #include "intel_engine_pm.h"
 #include "intel_engine_regs.h"
diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c
index 34bcf44873c6..fe06c93cf6e3 100644
--- a/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c
+++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_submission.c
@@ -30,6 +30,7 @@
 #include "intel_guc_submission.h"
 
 #include "i915_drv.h"
+#include "i915_reg.h"
 #include "i915_trace.h"
 
 /**
diff --git a/drivers/gpu/drm/i915/gvt/cfg_space.c b/drivers/gpu/drm/i915/gvt/cfg_space.c
index 357c5b65e097..9bafac1eaf48 100644
--- a/drivers/gpu/drm/i915/gvt/cfg_space.c
+++ b/drivers/gpu/drm/i915/gvt/cfg_space.c
@@ -244,7 +244,7 @@ static void emulate_pci_bar_write(struct intel_vgpu *vgpu, unsigned int offset,
 }
 
 /**
- * intel_vgpu_emulate_cfg_read - emulate vGPU configuration space write
+ * intel_vgpu_emulate_cfg_write - emulate vGPU configuration space write
  * @vgpu: target vgpu
  * @offset: offset
  * @p_data: write data ptr
diff --git a/drivers/gpu/drm/i915/gvt/cmd_parser.c b/drivers/gpu/drm/i915/gvt/cmd_parser.c
index de13f102d4fd..0ebf5fbf0e39 100644
--- a/drivers/gpu/drm/i915/gvt/cmd_parser.c
+++ b/drivers/gpu/drm/i915/gvt/cmd_parser.c
@@ -37,6 +37,7 @@
 #include <linux/slab.h>
 
 #include "i915_drv.h"
+#include "i915_reg.h"
 #include "gt/intel_engine_regs.h"
 #include "gt/intel_gpu_commands.h"
 #include "gt/intel_gt_regs.h"
diff --git a/drivers/gpu/drm/i915/gvt/display.c b/drivers/gpu/drm/i915/gvt/display.c
index c7722c818b4d..c033249e73f4 100644
--- a/drivers/gpu/drm/i915/gvt/display.c
+++ b/drivers/gpu/drm/i915/gvt/display.c
@@ -36,6 +36,8 @@
 #include "i915_reg.h"
 #include "gvt.h"
 
+#include "display/intel_dpio_phy.h"
+
 static int get_edp_pipe(struct intel_vgpu *vgpu)
 {
 	u32 data = vgpu_vreg(vgpu, _TRANS_DDI_FUNC_CTL_EDP);
diff --git a/drivers/gpu/drm/i915/gvt/dmabuf.h b/drivers/gpu/drm/i915/gvt/dmabuf.h
index 5f8f03fb1d1b..3dcdb6570eda 100644
--- a/drivers/gpu/drm/i915/gvt/dmabuf.h
+++ b/drivers/gpu/drm/i915/gvt/dmabuf.h
@@ -48,7 +48,7 @@ struct intel_vgpu_fb_info {
 	struct intel_vgpu_dmabuf_obj *obj;
 };
 
-/**
+/*
  * struct intel_vgpu_dmabuf_obj- Intel vGPU device buffer object
  */
 struct intel_vgpu_dmabuf_obj {
diff --git a/drivers/gpu/drm/i915/gvt/firmware.c b/drivers/gpu/drm/i915/gvt/firmware.c
index 54fe442238c6..a683c22d5b64 100644
--- a/drivers/gpu/drm/i915/gvt/firmware.c
+++ b/drivers/gpu/drm/i915/gvt/firmware.c
@@ -104,7 +104,7 @@ static int expose_firmware_sysfs(struct intel_gvt *gvt)
 
 	memcpy(p, gvt->firmware.mmio, info->mmio_size);
 
-	crc32_start = offsetof(struct gvt_firmware_header, crc32) + 4;
+	crc32_start = offsetof(struct gvt_firmware_header, version);
 	h->crc32 = crc32_le(0, firmware + crc32_start, size - crc32_start);
 
 	firmware_attr.size = size;
diff --git a/drivers/gpu/drm/i915/gvt/gtt.c b/drivers/gpu/drm/i915/gvt/gtt.c
index ce0eb03709c3..51e5e8fb505b 100644
--- a/drivers/gpu/drm/i915/gvt/gtt.c
+++ b/drivers/gpu/drm/i915/gvt/gtt.c
@@ -282,11 +282,6 @@ static inline int get_next_pt_type(int type)
 	return gtt_type_table[type].next_pt_type;
 }
 
-static inline int get_pt_type(int type)
-{
-	return gtt_type_table[type].pt_type;
-}
-
 static inline int get_entry_type(int type)
 {
 	return gtt_type_table[type].entry_type;
@@ -2785,7 +2780,7 @@ int intel_gvt_init_gtt(struct intel_gvt *gvt)
  * intel_gvt_clean_gtt - clean up mm components of a GVT device
  * @gvt: GVT device
  *
- * This function is called at the driver unloading stage, to clean up the
+ * This function is called at the driver unloading stage, to clean up
  * the mm components of a GVT device.
  *
  */
diff --git a/drivers/gpu/drm/i915/gvt/gvt.h b/drivers/gpu/drm/i915/gvt/gvt.h
index dbf8d7470b2c..62823c0e13ab 100644
--- a/drivers/gpu/drm/i915/gvt/gvt.h
+++ b/drivers/gpu/drm/i915/gvt/gvt.h
@@ -227,8 +227,6 @@ struct intel_vgpu {
 	unsigned long nr_cache_entries;
 	struct mutex cache_lock;
 
-	atomic_t released;
-
 	struct kvm_page_track_notifier_node track_node;
 #define NR_BKT (1 << 18)
 	struct hlist_head ptable[NR_BKT];
diff --git a/drivers/gpu/drm/i915/gvt/handlers.c b/drivers/gpu/drm/i915/gvt/handlers.c
index 1cb388484bf0..735fc83e7026 100644
--- a/drivers/gpu/drm/i915/gvt/handlers.c
+++ b/drivers/gpu/drm/i915/gvt/handlers.c
@@ -43,6 +43,7 @@
 #include "intel_mchbar_regs.h"
 #include "display/intel_display_types.h"
 #include "display/intel_dmc_regs.h"
+#include "display/intel_dpio_phy.h"
 #include "display/intel_fbc.h"
 #include "display/vlv_dsi_pll_regs.h"
 #include "gt/intel_gt_regs.h"
diff --git a/drivers/gpu/drm/i915/gvt/kvmgt.c b/drivers/gpu/drm/i915/gvt/kvmgt.c
index 7a45e5360caf..f5451adcd489 100644
--- a/drivers/gpu/drm/i915/gvt/kvmgt.c
+++ b/drivers/gpu/drm/i915/gvt/kvmgt.c
@@ -664,18 +664,14 @@ static int intel_vgpu_open_device(struct vfio_device *vfio_dev)
 		return -ESRCH;
 	}
 
-	kvm_get_kvm(vgpu->vfio_device.kvm);
-
 	if (__kvmgt_vgpu_exist(vgpu))
 		return -EEXIST;
 
 	vgpu->attached = true;
 
-	kvmgt_protect_table_init(vgpu);
-	gvt_cache_init(vgpu);
-
 	vgpu->track_node.track_write = kvmgt_page_track_write;
 	vgpu->track_node.track_flush_slot = kvmgt_page_track_flush_slot;
+	kvm_get_kvm(vgpu->vfio_device.kvm);
 	kvm_page_track_register_notifier(vgpu->vfio_device.kvm,
 					 &vgpu->track_node);
 
@@ -684,7 +680,6 @@ static int intel_vgpu_open_device(struct vfio_device *vfio_dev)
 
 	intel_gvt_activate_vgpu(vgpu);
 
-	atomic_set(&vgpu->released, 0);
 	return 0;
 }
 
@@ -706,24 +701,25 @@ static void intel_vgpu_close_device(struct vfio_device *vfio_dev)
 	if (!vgpu->attached)
 		return;
 
-	if (atomic_cmpxchg(&vgpu->released, 0, 1))
-		return;
-
 	intel_gvt_release_vgpu(vgpu);
 
 	debugfs_remove(debugfs_lookup(KVMGT_DEBUGFS_FILENAME, vgpu->debugfs));
 
 	kvm_page_track_unregister_notifier(vgpu->vfio_device.kvm,
 					   &vgpu->track_node);
+	kvm_put_kvm(vgpu->vfio_device.kvm);
+
 	kvmgt_protect_table_destroy(vgpu);
 	gvt_cache_destroy(vgpu);
 
+	WARN_ON(vgpu->nr_cache_entries);
+
+	vgpu->gfn_cache = RB_ROOT;
+	vgpu->dma_addr_cache = RB_ROOT;
+
 	intel_vgpu_release_msi_eventfd_ctx(vgpu);
 
 	vgpu->attached = false;
-
-	if (vgpu->vfio_device.kvm)
-		kvm_put_kvm(vgpu->vfio_device.kvm);
 }
 
 static u64 intel_vgpu_get_bar_addr(struct intel_vgpu *vgpu, int bar)
@@ -1451,9 +1447,17 @@ static int intel_vgpu_init_dev(struct vfio_device *vfio_dev)
 	struct intel_vgpu *vgpu = vfio_dev_to_vgpu(vfio_dev);
 	struct intel_vgpu_type *type =
 		container_of(mdev->type, struct intel_vgpu_type, type);
+	int ret;
 
 	vgpu->gvt = kdev_to_i915(mdev->type->parent->dev)->gvt;
-	return intel_gvt_create_vgpu(vgpu, type->conf);
+	ret = intel_gvt_create_vgpu(vgpu, type->conf);
+	if (ret)
+		return ret;
+
+	kvmgt_protect_table_init(vgpu);
+	gvt_cache_init(vgpu);
+
+	return 0;
 }
 
 static void intel_vgpu_release_dev(struct vfio_device *vfio_dev)
@@ -1461,7 +1465,6 @@ static void intel_vgpu_release_dev(struct vfio_device *vfio_dev)
 	struct intel_vgpu *vgpu = vfio_dev_to_vgpu(vfio_dev);
 
 	intel_gvt_destroy_vgpu(vgpu);
-	vfio_free_device(vfio_dev);
 }
 
 static const struct vfio_device_ops intel_vgpu_dev_ops = {
@@ -1474,6 +1477,9 @@ static const struct vfio_device_ops intel_vgpu_dev_ops = {
 	.mmap		= intel_vgpu_mmap,
 	.ioctl		= intel_vgpu_ioctl,
 	.dma_unmap	= intel_vgpu_dma_unmap,
+	.bind_iommufd	= vfio_iommufd_emulated_bind,
+	.unbind_iommufd = vfio_iommufd_emulated_unbind,
+	.attach_ioas	= vfio_iommufd_emulated_attach_ioas,
 };
 
 static int intel_vgpu_probe(struct mdev_device *mdev)
diff --git a/drivers/gpu/drm/i915/gvt/mmio.c b/drivers/gpu/drm/i915/gvt/mmio.c
index 9acc00505fde..5b5def6ddef7 100644
--- a/drivers/gpu/drm/i915/gvt/mmio.c
+++ b/drivers/gpu/drm/i915/gvt/mmio.c
@@ -37,6 +37,7 @@
 #include "i915_reg.h"
 #include "gvt.h"
 
+#include "display/intel_dpio_phy.h"
 #include "gt/intel_gt_regs.h"
 
 /**
diff --git a/drivers/gpu/drm/i915/gvt/mmio_context.c b/drivers/gpu/drm/i915/gvt/mmio_context.c
index 200c1162daa3..490e8ae51228 100644
--- a/drivers/gpu/drm/i915/gvt/mmio_context.c
+++ b/drivers/gpu/drm/i915/gvt/mmio_context.c
@@ -34,6 +34,7 @@
  */
 
 #include "i915_drv.h"
+#include "i915_reg.h"
 #include "gt/intel_context.h"
 #include "gt/intel_engine_regs.h"
 #include "gt/intel_gpu_commands.h"
diff --git a/drivers/gpu/drm/i915/gvt/page_track.c b/drivers/gpu/drm/i915/gvt/page_track.c
index 3375b51c75f1..df34e73cba41 100644
--- a/drivers/gpu/drm/i915/gvt/page_track.c
+++ b/drivers/gpu/drm/i915/gvt/page_track.c
@@ -120,7 +120,7 @@ int intel_vgpu_enable_page_track(struct intel_vgpu *vgpu, unsigned long gfn)
 }
 
 /**
- * intel_vgpu_enable_page_track - cancel write-protection on guest page
+ * intel_vgpu_disable_page_track - cancel write-protection on guest page
  * @vgpu: a vGPU
  * @gfn: the gfn of guest page
  *
diff --git a/drivers/gpu/drm/i915/gvt/vgpu.c b/drivers/gpu/drm/i915/gvt/vgpu.c
index 56c71474008a..3c529c2705dd 100644
--- a/drivers/gpu/drm/i915/gvt/vgpu.c
+++ b/drivers/gpu/drm/i915/gvt/vgpu.c
@@ -158,7 +158,7 @@ void intel_gvt_clean_vgpu_types(struct intel_gvt *gvt)
 }
 
 /**
- * intel_gvt_active_vgpu - activate a virtual GPU
+ * intel_gvt_activate_vgpu - activate a virtual GPU
  * @vgpu: virtual GPU
  *
  * This function is called when user wants to activate a virtual GPU.
@@ -172,7 +172,7 @@ void intel_gvt_activate_vgpu(struct intel_vgpu *vgpu)
 }
 
 /**
- * intel_gvt_deactive_vgpu - deactivate a virtual GPU
+ * intel_gvt_deactivate_vgpu - deactivate a virtual GPU
  * @vgpu: virtual GPU
  *
  * This function is called when user wants to deactivate a virtual GPU.
@@ -295,7 +295,7 @@ out_free_vgpu:
 }
 
 /**
- * intel_gvt_destroy_vgpu - destroy an idle virtual GPU
+ * intel_gvt_destroy_idle_vgpu - destroy an idle virtual GPU
  * @vgpu: virtual GPU
  *
  * This function is called when user wants to destroy an idle virtual GPU.
diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index 32b2b85685bb..97a5f197b7ed 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -285,28 +285,13 @@ struct drm_i915_private {
 
 	unsigned long gem_quirks;
 
-	struct drm_atomic_state *modeset_restore_state;
-	struct drm_modeset_acquire_ctx reset_ctx;
-
 	struct i915_gem_mm mm;
 
-	/* Kernel Modesetting */
-
-	struct list_head global_obj_list;
-
 	bool mchbar_need_disable;
 
 	struct intel_l3_parity l3_parity;
 
 	/*
-	 * HTI (aka HDPORT) state read during initial hw readout.  Most
-	 * platforms don't have HTI, so this will just stay 0.  Those that do
-	 * will use this later to figure out which PLLs and PHYs are unavailable
-	 * for driver usage.
-	 */
-	u32 hti_state;
-
-	/*
 	 * edram size in MB.
 	 * Cannot be determined by PCIID. You must always read a register.
 	 */
diff --git a/drivers/gpu/drm/i915/i915_gpu_error.c b/drivers/gpu/drm/i915/i915_gpu_error.c
index f2d53edcd2ee..9d5d5a397b64 100644
--- a/drivers/gpu/drm/i915/i915_gpu_error.c
+++ b/drivers/gpu/drm/i915/i915_gpu_error.c
@@ -55,6 +55,7 @@
 #include "i915_drv.h"
 #include "i915_gpu_error.h"
 #include "i915_memcpy.h"
+#include "i915_reg.h"
 #include "i915_scatterlist.h"
 #include "i915_utils.h"
 
diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c
index 1efe5c19fac1..edfe363af838 100644
--- a/drivers/gpu/drm/i915/i915_irq.c
+++ b/drivers/gpu/drm/i915/i915_irq.c
@@ -28,7 +28,6 @@
 
 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
 
-#include <linux/circ_buf.h>
 #include <linux/slab.h>
 #include <linux/sysrq.h>
 
@@ -248,7 +247,7 @@ void gen3_irq_reset(struct intel_uncore *uncore, i915_reg_t imr,
 	intel_uncore_posting_read(uncore, iir);
 }
 
-void gen2_irq_reset(struct intel_uncore *uncore)
+static void gen2_irq_reset(struct intel_uncore *uncore)
 {
 	intel_uncore_write16(uncore, GEN2_IMR, 0xffff);
 	intel_uncore_posting_read16(uncore, GEN2_IMR);
@@ -309,8 +308,8 @@ void gen3_irq_init(struct intel_uncore *uncore,
 	intel_uncore_posting_read(uncore, imr);
 }
 
-void gen2_irq_init(struct intel_uncore *uncore,
-		   u32 imr_val, u32 ier_val)
+static void gen2_irq_init(struct intel_uncore *uncore,
+			  u32 imr_val, u32 ier_val)
 {
 	gen2_assert_iir_is_zero(uncore);
 
@@ -3873,7 +3872,7 @@ static void i8xx_irq_reset(struct drm_i915_private *dev_priv)
 
 	i9xx_pipestat_irq_reset(dev_priv);
 
-	GEN2_IRQ_RESET(uncore);
+	gen2_irq_reset(uncore);
 	dev_priv->irq_mask = ~0u;
 }
 
@@ -3899,7 +3898,7 @@ static void i8xx_irq_postinstall(struct drm_i915_private *dev_priv)
 		I915_MASTER_ERROR_INTERRUPT |
 		I915_USER_INTERRUPT;
 
-	GEN2_IRQ_INIT(uncore, dev_priv->irq_mask, enable_mask);
+	gen2_irq_init(uncore, dev_priv->irq_mask, enable_mask);
 
 	/* Interrupt setup is already guaranteed to be single-threaded, this is
 	 * just to make the assert_spin_locked check happy. */
diff --git a/drivers/gpu/drm/i915/i915_irq.h b/drivers/gpu/drm/i915/i915_irq.h
index 82639d9d7e82..9b004fc3444e 100644
--- a/drivers/gpu/drm/i915/i915_irq.h
+++ b/drivers/gpu/drm/i915/i915_irq.h
@@ -90,12 +90,9 @@ void i965_disable_vblank(struct drm_crtc *crtc);
 void ilk_disable_vblank(struct drm_crtc *crtc);
 void bdw_disable_vblank(struct drm_crtc *crtc);
 
-void gen2_irq_reset(struct intel_uncore *uncore);
 void gen3_irq_reset(struct intel_uncore *uncore, i915_reg_t imr,
 		    i915_reg_t iir, i915_reg_t ier);
 
-void gen2_irq_init(struct intel_uncore *uncore,
-		   u32 imr_val, u32 ier_val);
 void gen3_irq_init(struct intel_uncore *uncore,
 		   i915_reg_t imr, u32 imr_val,
 		   i915_reg_t ier, u32 ier_val,
@@ -111,9 +108,6 @@ void gen3_irq_init(struct intel_uncore *uncore,
 #define GEN3_IRQ_RESET(uncore, type) \
 	gen3_irq_reset((uncore), type##IMR, type##IIR, type##IER)
 
-#define GEN2_IRQ_RESET(uncore) \
-	gen2_irq_reset(uncore)
-
 #define GEN8_IRQ_INIT_NDX(uncore, type, which, imr_val, ier_val) \
 ({ \
 	unsigned int which_ = which; \
@@ -129,7 +123,4 @@ void gen3_irq_init(struct intel_uncore *uncore,
 		      type##IER, ier_val, \
 		      type##IIR)
 
-#define GEN2_IRQ_INIT(uncore, imr_val, ier_val) \
-	gen2_irq_init((uncore), imr_val, ier_val)
-
 #endif /* __I915_IRQ_H__ */
diff --git a/drivers/gpu/drm/i915/i915_pci.c b/drivers/gpu/drm/i915/i915_pci.c
index 81952c60f267..46acbf390195 100644
--- a/drivers/gpu/drm/i915/i915_pci.c
+++ b/drivers/gpu/drm/i915/i915_pci.c
@@ -1078,7 +1078,6 @@ static const struct intel_device_info dg2_info = {
 	XE_LPD_FEATURES,
 	.__runtime.cpu_transcoder_mask = BIT(TRANSCODER_A) | BIT(TRANSCODER_B) |
 			       BIT(TRANSCODER_C) | BIT(TRANSCODER_D),
-	.require_force_probe = 1,
 };
 
 static const struct intel_device_info ats_m_info = {
diff --git a/drivers/gpu/drm/i915/i915_perf.c b/drivers/gpu/drm/i915/i915_perf.c
index 6fb47e79294f..824a34ec0b83 100644
--- a/drivers/gpu/drm/i915/i915_perf.c
+++ b/drivers/gpu/drm/i915/i915_perf.c
@@ -215,6 +215,7 @@
 #include "i915_file_private.h"
 #include "i915_perf.h"
 #include "i915_perf_oa_regs.h"
+#include "i915_reg.h"
 
 /* HW requires this to be a power of two, between 128k and 16M, though driver
  * is currently generally designed assuming the largest 16M size is used such
diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h
index 358036e3fc6e..60e55245200b 100644
--- a/drivers/gpu/drm/i915/i915_reg.h
+++ b/drivers/gpu/drm/i915/i915_reg.h
@@ -26,6 +26,7 @@
 #define _I915_REG_H_
 
 #include "i915_reg_defs.h"
+#include "display/intel_display_reg_defs.h"
 
 /**
  * DOC: The i915 register macro definition style guide
@@ -115,75 +116,6 @@
  *  #define GEN8_BAR                    _MMIO(0xb888)
  */
 
-#define DISPLAY_MMIO_BASE(dev_priv)	(INTEL_INFO(dev_priv)->display.mmio_offset)
-
-/*
- * Given the first two numbers __a and __b of arbitrarily many evenly spaced
- * numbers, pick the 0-based __index'th value.
- *
- * Always prefer this over _PICK() if the numbers are evenly spaced.
- */
-#define _PICK_EVEN(__index, __a, __b) ((__a) + (__index) * ((__b) - (__a)))
-
-/*
- * Given the arbitrary numbers in varargs, pick the 0-based __index'th number.
- *
- * Always prefer _PICK_EVEN() over this if the numbers are evenly spaced.
- */
-#define _PICK(__index, ...) (((const u32 []){ __VA_ARGS__ })[__index])
-
-/*
- * Named helper wrappers around _PICK_EVEN() and _PICK().
- */
-#define _PIPE(pipe, a, b)		_PICK_EVEN(pipe, a, b)
-#define _PLANE(plane, a, b)		_PICK_EVEN(plane, a, b)
-#define _TRANS(tran, a, b)		_PICK_EVEN(tran, a, b)
-#define _PORT(port, a, b)		_PICK_EVEN(port, a, b)
-#define _PLL(pll, a, b)			_PICK_EVEN(pll, a, b)
-#define _PHY(phy, a, b)			_PICK_EVEN(phy, a, b)
-
-#define _MMIO_PIPE(pipe, a, b)		_MMIO(_PIPE(pipe, a, b))
-#define _MMIO_PLANE(plane, a, b)	_MMIO(_PLANE(plane, a, b))
-#define _MMIO_TRANS(tran, a, b)		_MMIO(_TRANS(tran, a, b))
-#define _MMIO_PORT(port, a, b)		_MMIO(_PORT(port, a, b))
-#define _MMIO_PLL(pll, a, b)		_MMIO(_PLL(pll, a, b))
-#define _MMIO_PHY(phy, a, b)		_MMIO(_PHY(phy, a, b))
-
-#define _PHY3(phy, ...)			_PICK(phy, __VA_ARGS__)
-
-#define _MMIO_PIPE3(pipe, a, b, c)	_MMIO(_PICK(pipe, a, b, c))
-#define _MMIO_PORT3(pipe, a, b, c)	_MMIO(_PICK(pipe, a, b, c))
-#define _MMIO_PHY3(phy, a, b, c)	_MMIO(_PHY3(phy, a, b, c))
-#define _MMIO_PLL3(pll, ...)		_MMIO(_PICK(pll, __VA_ARGS__))
-
-
-/*
- * Device info offset array based helpers for groups of registers with unevenly
- * spaced base offsets.
- */
-#define _MMIO_PIPE2(pipe, reg)		_MMIO(INTEL_INFO(dev_priv)->display.pipe_offsets[(pipe)] - \
-					      INTEL_INFO(dev_priv)->display.pipe_offsets[PIPE_A] + \
-					      DISPLAY_MMIO_BASE(dev_priv) + (reg))
-#define _MMIO_TRANS2(tran, reg)		_MMIO(INTEL_INFO(dev_priv)->display.trans_offsets[(tran)] - \
-					      INTEL_INFO(dev_priv)->display.trans_offsets[TRANSCODER_A] + \
-					      DISPLAY_MMIO_BASE(dev_priv) + (reg))
-#define _MMIO_CURSOR2(pipe, reg)	_MMIO(INTEL_INFO(dev_priv)->display.cursor_offsets[(pipe)] - \
-					      INTEL_INFO(dev_priv)->display.cursor_offsets[PIPE_A] + \
-					      DISPLAY_MMIO_BASE(dev_priv) + (reg))
-
-#define __MASKED_FIELD(mask, value) ((mask) << 16 | (value))
-#define _MASKED_FIELD(mask, value) ({					   \
-	if (__builtin_constant_p(mask))					   \
-		BUILD_BUG_ON_MSG(((mask) & 0xffff0000), "Incorrect mask"); \
-	if (__builtin_constant_p(value))				   \
-		BUILD_BUG_ON_MSG((value) & 0xffff0000, "Incorrect value"); \
-	if (__builtin_constant_p(mask) && __builtin_constant_p(value))	   \
-		BUILD_BUG_ON_MSG((value) & ~(mask),			   \
-				 "Incorrect value for mask");		   \
-	__MASKED_FIELD(mask, value); })
-#define _MASKED_BIT_ENABLE(a)	({ typeof(a) _a = (a); _MASKED_FIELD(_a, _a); })
-#define _MASKED_BIT_DISABLE(a)	(_MASKED_FIELD((a), 0))
-
 #define GU_CNTL				_MMIO(0x101010)
 #define   LMEM_INIT			REG_BIT(7)
 #define   DRIVERFLR			REG_BIT(31)
@@ -1151,11 +1083,6 @@
 #define MBUS_JOIN_PIPE_SELECT(pipe)	REG_FIELD_PREP(MBUS_JOIN_PIPE_SELECT_MASK, pipe)
 #define MBUS_JOIN_PIPE_SELECT_NONE	MBUS_JOIN_PIPE_SELECT(7)
 
-#define HDPORT_STATE			_MMIO(0x45050)
-#define   HDPORT_DPLL_USED_MASK		REG_GENMASK(15, 12)
-#define   HDPORT_DDI_USED(phy)		REG_BIT(2 * (phy) + 1)
-#define   HDPORT_ENABLED		REG_BIT(0)
-
 /* Make render/texture TLB fetches lower priorty than associated data
  *   fetches. This is not turned on by default
  */
@@ -1785,9 +1712,10 @@
 #define _PALETTE_A		0xa000
 #define _PALETTE_B		0xa800
 #define _CHV_PALETTE_C		0xc000
-#define PALETTE_RED_MASK        REG_GENMASK(23, 16)
-#define PALETTE_GREEN_MASK      REG_GENMASK(15, 8)
-#define PALETTE_BLUE_MASK       REG_GENMASK(7, 0)
+/* 8bit mode / i965+ 10.6 interpolated mode ldw/udw */
+#define   PALETTE_RED_MASK		REG_GENMASK(23, 16)
+#define   PALETTE_GREEN_MASK		REG_GENMASK(15, 8)
+#define   PALETTE_BLUE_MASK		REG_GENMASK(7, 0)
 #define PALETTE(pipe, i)	_MMIO(DISPLAY_MMIO_BASE(dev_priv) + \
 				      _PICK((pipe), _PALETTE_A,		\
 					    _PALETTE_B, _CHV_PALETTE_C) + \
@@ -3762,9 +3690,10 @@
 
 /* Skylake+ pipe bottom (background) color */
 #define _SKL_BOTTOM_COLOR_A		0x70034
+#define _SKL_BOTTOM_COLOR_B		0x71034
 #define   SKL_BOTTOM_COLOR_GAMMA_ENABLE		REG_BIT(31)
 #define   SKL_BOTTOM_COLOR_CSC_ENABLE		REG_BIT(30)
-#define SKL_BOTTOM_COLOR(pipe)		_MMIO_PIPE2(pipe, _SKL_BOTTOM_COLOR_A)
+#define SKL_BOTTOM_COLOR(pipe)		_MMIO_PIPE(pipe, _SKL_BOTTOM_COLOR_A, _SKL_BOTTOM_COLOR_B)
 
 #define _ICL_PIPE_A_STATUS			0x70058
 #define ICL_PIPESTATUS(pipe)			_MMIO_PIPE2(pipe, _ICL_PIPE_A_STATUS)
@@ -5382,17 +5311,24 @@
 /* legacy palette */
 #define _LGC_PALETTE_A           0x4a000
 #define _LGC_PALETTE_B           0x4a800
-#define LGC_PALETTE_RED_MASK     REG_GENMASK(23, 16)
-#define LGC_PALETTE_GREEN_MASK   REG_GENMASK(15, 8)
-#define LGC_PALETTE_BLUE_MASK    REG_GENMASK(7, 0)
+/* see PALETTE_* for the bits */
 #define LGC_PALETTE(pipe, i) _MMIO(_PIPE(pipe, _LGC_PALETTE_A, _LGC_PALETTE_B) + (i) * 4)
 
 /* ilk/snb precision palette */
 #define _PREC_PALETTE_A           0x4b000
 #define _PREC_PALETTE_B           0x4c000
-#define   PREC_PALETTE_RED_MASK   REG_GENMASK(29, 20)
-#define   PREC_PALETTE_GREEN_MASK REG_GENMASK(19, 10)
-#define   PREC_PALETTE_BLUE_MASK  REG_GENMASK(9, 0)
+/* 10bit mode */
+#define   PREC_PALETTE_10_RED_MASK		REG_GENMASK(29, 20)
+#define   PREC_PALETTE_10_GREEN_MASK		REG_GENMASK(19, 10)
+#define   PREC_PALETTE_10_BLUE_MASK		REG_GENMASK(9, 0)
+/* 12.4 interpolated mode ldw */
+#define   PREC_PALETTE_12P4_RED_LDW_MASK	REG_GENMASK(29, 24)
+#define   PREC_PALETTE_12P4_GREEN_LDW_MASK	REG_GENMASK(19, 14)
+#define   PREC_PALETTE_12P4_BLUE_LDW_MASK	REG_GENMASK(9, 4)
+/* 12.4 interpolated mode udw */
+#define   PREC_PALETTE_12P4_RED_UDW_MASK	REG_GENMASK(29, 20)
+#define   PREC_PALETTE_12P4_GREEN_UDW_MASK	REG_GENMASK(19, 10)
+#define   PREC_PALETTE_12P4_BLUE_UDW_MASK	REG_GENMASK(9, 0)
 #define PREC_PALETTE(pipe, i) _MMIO(_PIPE(pipe, _PREC_PALETTE_A, _PREC_PALETTE_B) + (i) * 4)
 
 #define  _PREC_PIPEAGCMAX              0x4d000
@@ -7631,12 +7567,10 @@ enum skl_power_gate {
 #define _PAL_PREC_DATA_A	0x4A404
 #define _PAL_PREC_DATA_B	0x4AC04
 #define _PAL_PREC_DATA_C	0x4B404
+/* see PREC_PALETTE_* for the bits */
 #define _PAL_PREC_GC_MAX_A	0x4A410
 #define _PAL_PREC_GC_MAX_B	0x4AC10
 #define _PAL_PREC_GC_MAX_C	0x4B410
-#define   PREC_PAL_DATA_RED_MASK	REG_GENMASK(29, 20)
-#define   PREC_PAL_DATA_GREEN_MASK	REG_GENMASK(19, 10)
-#define   PREC_PAL_DATA_BLUE_MASK	REG_GENMASK(9, 0)
 #define _PAL_PREC_EXT_GC_MAX_A	0x4A420
 #define _PAL_PREC_EXT_GC_MAX_B	0x4AC20
 #define _PAL_PREC_EXT_GC_MAX_C	0x4B420
@@ -7669,12 +7603,7 @@ enum skl_power_gate {
 
 #define _PAL_PREC_MULTI_SEG_DATA_A	0x4A40C
 #define _PAL_PREC_MULTI_SEG_DATA_B	0x4AC0C
-#define  PAL_PREC_MULTI_SEG_RED_LDW_MASK   REG_GENMASK(29, 24)
-#define  PAL_PREC_MULTI_SEG_RED_UDW_MASK   REG_GENMASK(29, 20)
-#define  PAL_PREC_MULTI_SEG_GREEN_LDW_MASK REG_GENMASK(19, 14)
-#define  PAL_PREC_MULTI_SEG_GREEN_UDW_MASK REG_GENMASK(19, 10)
-#define  PAL_PREC_MULTI_SEG_BLUE_LDW_MASK  REG_GENMASK(9, 4)
-#define  PAL_PREC_MULTI_SEG_BLUE_UDW_MASK  REG_GENMASK(9, 0)
+/* see PREC_PALETTE_12P4_* for the bits */
 
 #define PREC_PAL_MULTI_SEG_INDEX(pipe)	_MMIO_PIPE(pipe, \
 					_PAL_PREC_MULTI_SEG_INDEX_A, \
@@ -7735,13 +7664,17 @@ enum skl_power_gate {
 #define _CGM_PIPE_A_CSC_COEFF67	(VLV_DISPLAY_BASE + 0x6790C)
 #define _CGM_PIPE_A_CSC_COEFF8	(VLV_DISPLAY_BASE + 0x67910)
 #define _CGM_PIPE_A_DEGAMMA	(VLV_DISPLAY_BASE + 0x66000)
-#define   CGM_PIPE_DEGAMMA_RED_MASK	REG_GENMASK(13, 0)
-#define   CGM_PIPE_DEGAMMA_GREEN_MASK	REG_GENMASK(29, 16)
-#define   CGM_PIPE_DEGAMMA_BLUE_MASK	REG_GENMASK(13, 0)
+/* cgm degamma ldw */
+#define   CGM_PIPE_DEGAMMA_GREEN_LDW_MASK	REG_GENMASK(29, 16)
+#define   CGM_PIPE_DEGAMMA_BLUE_LDW_MASK	REG_GENMASK(13, 0)
+/* cgm degamma udw */
+#define   CGM_PIPE_DEGAMMA_RED_UDW_MASK		REG_GENMASK(13, 0)
 #define _CGM_PIPE_A_GAMMA	(VLV_DISPLAY_BASE + 0x67000)
-#define   CGM_PIPE_GAMMA_RED_MASK	REG_GENMASK(9, 0)
-#define   CGM_PIPE_GAMMA_GREEN_MASK	REG_GENMASK(25, 16)
-#define   CGM_PIPE_GAMMA_BLUE_MASK	REG_GENMASK(9, 0)
+/* cgm gamma ldw */
+#define   CGM_PIPE_GAMMA_GREEN_LDW_MASK		REG_GENMASK(25, 16)
+#define   CGM_PIPE_GAMMA_BLUE_LDW_MASK		REG_GENMASK(9, 0)
+/* cgm gamma udw */
+#define   CGM_PIPE_GAMMA_RED_UDW_MASK		REG_GENMASK(9, 0)
 #define _CGM_PIPE_A_MODE	(VLV_DISPLAY_BASE + 0x67A00)
 #define   CGM_PIPE_MODE_GAMMA	(1 << 2)
 #define   CGM_PIPE_MODE_CSC	(1 << 1)
diff --git a/drivers/gpu/drm/i915/i915_reg_defs.h b/drivers/gpu/drm/i915/i915_reg_defs.h
index f1859046a9c4..be43580a6979 100644
--- a/drivers/gpu/drm/i915/i915_reg_defs.h
+++ b/drivers/gpu/drm/i915/i915_reg_defs.h
@@ -98,6 +98,34 @@
  */
 #define REG_FIELD_GET64(__mask, __val)	((u64)FIELD_GET(__mask, __val))
 
+#define __MASKED_FIELD(mask, value) ((mask) << 16 | (value))
+#define _MASKED_FIELD(mask, value) ({					   \
+	if (__builtin_constant_p(mask))					   \
+		BUILD_BUG_ON_MSG(((mask) & 0xffff0000), "Incorrect mask"); \
+	if (__builtin_constant_p(value))				   \
+		BUILD_BUG_ON_MSG((value) & 0xffff0000, "Incorrect value"); \
+	if (__builtin_constant_p(mask) && __builtin_constant_p(value))	   \
+		BUILD_BUG_ON_MSG((value) & ~(mask),			   \
+				 "Incorrect value for mask");		   \
+	__MASKED_FIELD(mask, value); })
+#define _MASKED_BIT_ENABLE(a)	({ typeof(a) _a = (a); _MASKED_FIELD(_a, _a); })
+#define _MASKED_BIT_DISABLE(a)	(_MASKED_FIELD((a), 0))
+
+/*
+ * Given the first two numbers __a and __b of arbitrarily many evenly spaced
+ * numbers, pick the 0-based __index'th value.
+ *
+ * Always prefer this over _PICK() if the numbers are evenly spaced.
+ */
+#define _PICK_EVEN(__index, __a, __b) ((__a) + (__index) * ((__b) - (__a)))
+
+/*
+ * Given the arbitrary numbers in varargs, pick the 0-based __index'th number.
+ *
+ * Always prefer _PICK_EVEN() over this if the numbers are evenly spaced.
+ */
+#define _PICK(__index, ...) (((const u32 []){ __VA_ARGS__ })[__index])
+
 typedef struct {
 	u32 reg;
 } i915_reg_t;
@@ -120,6 +148,4 @@ typedef struct {
 #define i915_mmio_reg_equal(a, b) (i915_mmio_reg_offset(a) == i915_mmio_reg_offset(b))
 #define i915_mmio_reg_valid(r) (!i915_mmio_reg_equal(r, INVALID_MMIO_REG))
 
-#define VLV_DISPLAY_BASE		0x180000
-
 #endif /* __I915_REG_DEFS__ */
diff --git a/drivers/gpu/drm/i915/i915_sw_fence.c b/drivers/gpu/drm/i915/i915_sw_fence.c
index cc2a8821d22a..8a9aad523eec 100644
--- a/drivers/gpu/drm/i915/i915_sw_fence.c
+++ b/drivers/gpu/drm/i915/i915_sw_fence.c
@@ -465,7 +465,7 @@ static void irq_i915_sw_fence_work(struct irq_work *wrk)
 	struct i915_sw_dma_fence_cb_timer *cb =
 		container_of(wrk, typeof(*cb), work);
 
-	del_timer_sync(&cb->timer);
+	timer_shutdown_sync(&cb->timer);
 	dma_fence_put(cb->dma);
 
 	kfree_rcu(cb, rcu);
diff --git a/drivers/gpu/drm/i915/i915_trace.h b/drivers/gpu/drm/i915/i915_trace.h
index c70a02517e02..f6f9228a1351 100644
--- a/drivers/gpu/drm/i915/i915_trace.h
+++ b/drivers/gpu/drm/i915/i915_trace.h
@@ -15,7 +15,6 @@
 #include "gt/intel_engine.h"
 
 #include "i915_drv.h"
-#include "i915_irq.h"
 
 /* object tracking */
 
diff --git a/drivers/gpu/drm/i915/i915_user_extensions.c b/drivers/gpu/drm/i915/i915_user_extensions.c
index c822d0aafd2d..e3f808372c47 100644
--- a/drivers/gpu/drm/i915/i915_user_extensions.c
+++ b/drivers/gpu/drm/i915/i915_user_extensions.c
@@ -51,7 +51,7 @@ int i915_user_extensions(struct i915_user_extension __user *ext,
 			return err;
 
 		if (get_user(next, &ext->next_extension) ||
-		    overflows_type(next, ext))
+		    overflows_type(next, uintptr_t))
 			return -EFAULT;
 
 		ext = u64_to_user_ptr(next);
diff --git a/drivers/gpu/drm/i915/i915_utils.h b/drivers/gpu/drm/i915/i915_utils.h
index b64192d9c7da..2c430c0c3bad 100644
--- a/drivers/gpu/drm/i915/i915_utils.h
+++ b/drivers/gpu/drm/i915/i915_utils.h
@@ -111,10 +111,6 @@ bool i915_error_injected(void);
 #define range_overflows_end_t(type, start, size, max) \
 	range_overflows_end((type)(start), (type)(size), (type)(max))
 
-/* Note we don't consider signbits :| */
-#define overflows_type(x, T) \
-	(sizeof(x) > sizeof(T) && (x) >> BITS_PER_TYPE(T))
-
 #define ptr_mask_bits(ptr, n) ({					\
 	unsigned long __v = (unsigned long)(ptr);			\
 	(typeof(ptr))(__v & -BIT(n));					\
diff --git a/drivers/gpu/drm/i915/intel_device_info.c b/drivers/gpu/drm/i915/intel_device_info.c
index 1dc1fb29a776..849baf6c3b3c 100644
--- a/drivers/gpu/drm/i915/intel_device_info.c
+++ b/drivers/gpu/drm/i915/intel_device_info.c
@@ -30,9 +30,10 @@
 #include "display/intel_cdclk.h"
 #include "display/intel_de.h"
 #include "gt/intel_gt_regs.h"
-#include "intel_device_info.h"
 #include "i915_drv.h"
+#include "i915_reg.h"
 #include "i915_utils.h"
+#include "intel_device_info.h"
 
 #define PLATFORM_NAME(x) [INTEL_##x] = #x
 static const char * const platform_names[] = {
@@ -488,7 +489,7 @@ void intel_device_info_runtime_init(struct drm_i915_private *dev_priv)
 		if (DISPLAY_VER(dev_priv) >= 11 && (dfsm & ICL_DFSM_DMC_DISABLE))
 			runtime->has_dmc = 0;
 
-		if (DISPLAY_VER(dev_priv) >= 10 &&
+		if (IS_DISPLAY_VER(dev_priv, 10, 12) &&
 		    (dfsm & GLK_DFSM_DISPLAY_DSC_DISABLE))
 			runtime->has_dsc = 0;
 	}
diff --git a/drivers/gpu/drm/i915/intel_dram.c b/drivers/gpu/drm/i915/intel_dram.c
index 2403ccd52c74..bba8cb6e8ae4 100644
--- a/drivers/gpu/drm/i915/intel_dram.c
+++ b/drivers/gpu/drm/i915/intel_dram.c
@@ -471,8 +471,7 @@ static int xelpdp_get_dram_info(struct drm_i915_private *i915)
 	u32 val = intel_uncore_read(&i915->uncore, MTL_MEM_SS_INFO_GLOBAL);
 	struct dram_info *dram_info = &i915->dram_info;
 
-	val = REG_FIELD_GET(MTL_DDR_TYPE_MASK, val);
-	switch (val) {
+	switch (REG_FIELD_GET(MTL_DDR_TYPE_MASK, val)) {
 	case 0:
 		dram_info->type = INTEL_DRAM_DDR4;
 		break;
diff --git a/drivers/gpu/drm/i915/intel_gvt_mmio_table.c b/drivers/gpu/drm/i915/intel_gvt_mmio_table.c
index 638b77d64bf4..ce6b3c3b636a 100644
--- a/drivers/gpu/drm/i915/intel_gvt_mmio_table.c
+++ b/drivers/gpu/drm/i915/intel_gvt_mmio_table.c
@@ -6,6 +6,7 @@
 #include "display/intel_audio_regs.h"
 #include "display/intel_backlight_regs.h"
 #include "display/intel_dmc_regs.h"
+#include "display/intel_dpio_phy.h"
 #include "display/vlv_dsi_pll_regs.h"
 #include "gt/intel_gt_regs.h"
 #include "gvt/gvt.h"
diff --git a/drivers/gpu/drm/i915/intel_memory_region.c b/drivers/gpu/drm/i915/intel_memory_region.c
index 9a4a7fb55582..b9a164efd6ae 100644
--- a/drivers/gpu/drm/i915/intel_memory_region.c
+++ b/drivers/gpu/drm/i915/intel_memory_region.c
@@ -38,7 +38,7 @@ static int __iopagetest(struct intel_memory_region *mem,
 			u8 value, resource_size_t offset,
 			const void *caller)
 {
-	int byte = prandom_u32_max(pagesize);
+	int byte = get_random_u32_below(pagesize);
 	u8 result[3];
 
 	memset_io(va, value, pagesize); /* or GPF! */
@@ -92,7 +92,7 @@ static int iopagetest(struct intel_memory_region *mem,
 static resource_size_t random_page(resource_size_t last)
 {
 	/* Limited to low 44b (16TiB), but should suffice for a spot check */
-	return prandom_u32_max(last >> PAGE_SHIFT) << PAGE_SHIFT;
+	return get_random_u32_below(last >> PAGE_SHIFT) << PAGE_SHIFT;
 }
 
 static int iomemtest(struct intel_memory_region *mem,
diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c
index ee34e2785636..73c88b1c9545 100644
--- a/drivers/gpu/drm/i915/intel_pm.c
+++ b/drivers/gpu/drm/i915/intel_pm.c
@@ -1426,7 +1426,7 @@ static int g4x_compute_intermediate_wm(struct intel_atomic_state *state,
 	enum plane_id plane_id;
 
 	if (!new_crtc_state->hw.active ||
-	    drm_atomic_crtc_needs_modeset(&new_crtc_state->uapi)) {
+	    intel_crtc_needs_modeset(new_crtc_state)) {
 		*intermediate = *optimal;
 
 		intermediate->cxsr = false;
@@ -1914,7 +1914,6 @@ static int vlv_compute_pipe_wm(struct intel_atomic_state *state,
 {
 	struct intel_crtc_state *crtc_state =
 		intel_atomic_get_new_crtc_state(state, crtc);
-	bool needs_modeset = drm_atomic_crtc_needs_modeset(&crtc_state->uapi);
 	const struct intel_plane_state *old_plane_state;
 	const struct intel_plane_state *new_plane_state;
 	struct intel_plane *plane;
@@ -1941,7 +1940,7 @@ static int vlv_compute_pipe_wm(struct intel_atomic_state *state,
 	 * FIFO setting we took over from the BIOS even if there
 	 * are no active planes on the crtc.
 	 */
-	if (needs_modeset)
+	if (intel_crtc_needs_modeset(crtc_state))
 		dirty = ~0;
 
 	if (!dirty)
@@ -1961,7 +1960,7 @@ static int vlv_compute_pipe_wm(struct intel_atomic_state *state,
 		if (ret)
 			return ret;
 
-		if (needs_modeset ||
+		if (intel_crtc_needs_modeset(crtc_state) ||
 		    memcmp(old_fifo_state, new_fifo_state,
 			   sizeof(*new_fifo_state)) != 0)
 			crtc_state->fifo_changed = true;
@@ -2084,7 +2083,7 @@ static int vlv_compute_intermediate_wm(struct intel_atomic_state *state,
 	int level;
 
 	if (!new_crtc_state->hw.active ||
-	    drm_atomic_crtc_needs_modeset(&new_crtc_state->uapi)) {
+	    intel_crtc_needs_modeset(new_crtc_state)) {
 		*intermediate = *optimal;
 
 		intermediate->cxsr = false;
@@ -3142,7 +3141,7 @@ static int ilk_compute_intermediate_wm(struct intel_atomic_state *state,
 	 */
 	*a = new_crtc_state->wm.ilk.optimal;
 	if (!new_crtc_state->hw.active ||
-	    drm_atomic_crtc_needs_modeset(&new_crtc_state->uapi) ||
+	    intel_crtc_needs_modeset(new_crtc_state) ||
 	    state->skip_intermediate_wm)
 		return 0;
 
diff --git a/drivers/gpu/drm/i915/intel_step.c b/drivers/gpu/drm/i915/intel_step.c
index 75d7a86c60c0..84a6fe736a3b 100644
--- a/drivers/gpu/drm/i915/intel_step.c
+++ b/drivers/gpu/drm/i915/intel_step.c
@@ -131,6 +131,10 @@ static const struct intel_step_info adls_rpls_revids[] = {
 	[0xC] = { COMMON_GT_MEDIA_STEP(D0), .display_step = STEP_C0 },
 };
 
+static const struct intel_step_info adlp_rplp_revids[] = {
+	[0x4] = { COMMON_GT_MEDIA_STEP(C0), .display_step = STEP_E0 },
+};
+
 static const struct intel_step_info adlp_n_revids[] = {
 	[0x0] = { COMMON_GT_MEDIA_STEP(A0), .display_step = STEP_D0 },
 };
@@ -187,6 +191,9 @@ void intel_step_init(struct drm_i915_private *i915)
 	} else if (IS_ADLP_N(i915)) {
 		revids = adlp_n_revids;
 		size = ARRAY_SIZE(adlp_n_revids);
+	} else if (IS_ADLP_RPLP(i915)) {
+		revids = adlp_rplp_revids;
+		size = ARRAY_SIZE(adlp_rplp_revids);
 	} else if (IS_ALDERLAKE_P(i915)) {
 		revids = adlp_revids;
 		size = ARRAY_SIZE(adlp_revids);
diff --git a/drivers/gpu/drm/i915/intel_uncore.c b/drivers/gpu/drm/i915/intel_uncore.c
index 25eac164c4ba..8dee9e62a73e 100644
--- a/drivers/gpu/drm/i915/intel_uncore.c
+++ b/drivers/gpu/drm/i915/intel_uncore.c
@@ -29,6 +29,7 @@
 
 #include "i915_drv.h"
 #include "i915_iosf_mbi.h"
+#include "i915_reg.h"
 #include "i915_trace.h"
 #include "i915_vgpu.h"
 #include "intel_pm.h"
@@ -823,9 +824,9 @@ void intel_uncore_forcewake_flush(struct intel_uncore *uncore,
 }
 
 /**
- * intel_uncore_forcewake_put__locked - grab forcewake domain references
+ * intel_uncore_forcewake_put__locked - release forcewake domain references
  * @uncore: the intel_uncore structure
- * @fw_domains: forcewake domains to get reference on
+ * @fw_domains: forcewake domains to put references
  *
  * See intel_uncore_forcewake_put(). This variant places the onus
  * on the caller to explicitly handle the dev_priv->uncore.lock spinlock.
diff --git a/drivers/gpu/drm/i915/vlv_sideband.c b/drivers/gpu/drm/i915/vlv_sideband.c
index c26001300ebd..6eea6e1a99c0 100644
--- a/drivers/gpu/drm/i915/vlv_sideband.c
+++ b/drivers/gpu/drm/i915/vlv_sideband.c
@@ -8,6 +8,8 @@
 #include "i915_reg.h"
 #include "vlv_sideband.h"
 
+#include "display/intel_dpio_phy.h"
+
 /*
  * IOSF sideband, see VLV2_SidebandMsg_HAS.docx and
  * VLV_VLV2_PUNIT_HAS_0.8.docx
diff --git a/drivers/gpu/drm/imx/Kconfig b/drivers/gpu/drm/imx/Kconfig
index 975de4ff7313..fd5b2471fdf0 100644
--- a/drivers/gpu/drm/imx/Kconfig
+++ b/drivers/gpu/drm/imx/Kconfig
@@ -4,7 +4,6 @@ config DRM_IMX
 	select DRM_KMS_HELPER
 	select VIDEOMODE_HELPERS
 	select DRM_GEM_DMA_HELPER
-	select DRM_KMS_HELPER
 	depends on DRM && (ARCH_MXC || ARCH_MULTIPLATFORM || COMPILE_TEST)
 	depends on IMX_IPUV3_CORE
 	help
diff --git a/drivers/gpu/drm/imx/dcss/dcss-kms.c b/drivers/gpu/drm/imx/dcss/dcss-kms.c
index b4f82ebca532..18df3888b7f9 100644
--- a/drivers/gpu/drm/imx/dcss/dcss-kms.c
+++ b/drivers/gpu/drm/imx/dcss/dcss-kms.c
@@ -7,7 +7,7 @@
 #include <drm/drm_atomic_helper.h>
 #include <drm/drm_bridge_connector.h>
 #include <drm/drm_drv.h>
-#include <drm/drm_fb_helper.h>
+#include <drm/drm_fbdev_generic.h>
 #include <drm/drm_gem_dma_helper.h>
 #include <drm/drm_gem_framebuffer_helper.h>
 #include <drm/drm_of.h>
@@ -21,7 +21,6 @@ DEFINE_DRM_GEM_DMA_FOPS(dcss_cma_fops);
 
 static const struct drm_mode_config_funcs dcss_drm_mode_config_funcs = {
 	.fb_create = drm_gem_fb_create,
-	.output_poll_changed = drm_fb_helper_output_poll_changed,
 	.atomic_check = drm_atomic_helper_check,
 	.atomic_commit = drm_atomic_helper_commit,
 };
diff --git a/drivers/gpu/drm/imx/imx-drm-core.c b/drivers/gpu/drm/imx/imx-drm-core.c
index 8dd8b0f912af..e060fa6cbcb9 100644
--- a/drivers/gpu/drm/imx/imx-drm-core.c
+++ b/drivers/gpu/drm/imx/imx-drm-core.c
@@ -16,7 +16,7 @@
 #include <drm/drm_atomic.h>
 #include <drm/drm_atomic_helper.h>
 #include <drm/drm_drv.h>
-#include <drm/drm_fb_helper.h>
+#include <drm/drm_fbdev_generic.h>
 #include <drm/drm_gem_dma_helper.h>
 #include <drm/drm_gem_framebuffer_helper.h>
 #include <drm/drm_managed.h>
diff --git a/drivers/gpu/drm/imx/imx-ldb.c b/drivers/gpu/drm/imx/imx-ldb.c
index 41799011f73b..c45fc8f4744d 100644
--- a/drivers/gpu/drm/imx/imx-ldb.c
+++ b/drivers/gpu/drm/imx/imx-ldb.c
@@ -7,6 +7,7 @@
 
 #include <linux/clk.h>
 #include <linux/component.h>
+#include <linux/i2c.h>
 #include <linux/media-bus-format.h>
 #include <linux/mfd/syscon.h>
 #include <linux/mfd/syscon/imx6q-iomuxc-gpr.h>
@@ -23,7 +24,6 @@
 #include <drm/drm_atomic_helper.h>
 #include <drm/drm_bridge.h>
 #include <drm/drm_edid.h>
-#include <drm/drm_fb_helper.h>
 #include <drm/drm_managed.h>
 #include <drm/drm_of.h>
 #include <drm/drm_panel.h>
diff --git a/drivers/gpu/drm/imx/imx-tve.c b/drivers/gpu/drm/imx/imx-tve.c
index 6b34fac3f73a..d6832f506322 100644
--- a/drivers/gpu/drm/imx/imx-tve.c
+++ b/drivers/gpu/drm/imx/imx-tve.c
@@ -19,7 +19,6 @@
 
 #include <drm/drm_atomic_helper.h>
 #include <drm/drm_edid.h>
-#include <drm/drm_fb_helper.h>
 #include <drm/drm_managed.h>
 #include <drm/drm_probe_helper.h>
 #include <drm/drm_simple_kms_helper.h>
@@ -218,8 +217,9 @@ static int imx_tve_connector_get_modes(struct drm_connector *connector)
 	return ret;
 }
 
-static int imx_tve_connector_mode_valid(struct drm_connector *connector,
-					struct drm_display_mode *mode)
+static enum drm_mode_status
+imx_tve_connector_mode_valid(struct drm_connector *connector,
+			     struct drm_display_mode *mode)
 {
 	struct imx_tve *tve = con_to_tve(connector);
 	unsigned long rate;
diff --git a/drivers/gpu/drm/imx/parallel-display.c b/drivers/gpu/drm/imx/parallel-display.c
index 06723b2e9b84..0fa0b590830b 100644
--- a/drivers/gpu/drm/imx/parallel-display.c
+++ b/drivers/gpu/drm/imx/parallel-display.c
@@ -8,6 +8,7 @@
 #include <linux/component.h>
 #include <linux/media-bus-format.h>
 #include <linux/module.h>
+#include <linux/of.h>
 #include <linux/platform_device.h>
 #include <linux/videodev2.h>
 
@@ -16,7 +17,6 @@
 #include <drm/drm_atomic_helper.h>
 #include <drm/drm_bridge.h>
 #include <drm/drm_edid.h>
-#include <drm/drm_fb_helper.h>
 #include <drm/drm_managed.h>
 #include <drm/drm_of.h>
 #include <drm/drm_panel.h>
diff --git a/drivers/gpu/drm/ingenic/ingenic-drm-drv.c b/drivers/gpu/drm/ingenic/ingenic-drm-drv.c
index ab0515d2c420..3d5af44bf92d 100644
--- a/drivers/gpu/drm/ingenic/ingenic-drm-drv.c
+++ b/drivers/gpu/drm/ingenic/ingenic-drm-drv.c
@@ -32,7 +32,7 @@
 #include <drm/drm_encoder.h>
 #include <drm/drm_gem_dma_helper.h>
 #include <drm/drm_fb_dma_helper.h>
-#include <drm/drm_fb_helper.h>
+#include <drm/drm_fbdev_generic.h>
 #include <drm/drm_fourcc.h>
 #include <drm/drm_framebuffer.h>
 #include <drm/drm_gem_atomic_helper.h>
@@ -1018,7 +1018,6 @@ static const struct drm_bridge_funcs ingenic_drm_bridge_funcs = {
 
 static const struct drm_mode_config_funcs ingenic_drm_mode_config_funcs = {
 	.fb_create		= ingenic_drm_gem_fb_create,
-	.output_poll_changed	= drm_fb_helper_output_poll_changed,
 	.atomic_check		= drm_atomic_helper_check,
 	.atomic_commit		= drm_atomic_helper_commit,
 };
@@ -1629,7 +1628,11 @@ static int ingenic_drm_init(void)
 			return err;
 	}
 
-	return platform_driver_register(&ingenic_drm_driver);
+	err = platform_driver_register(&ingenic_drm_driver);
+	if (IS_ENABLED(CONFIG_DRM_INGENIC_IPU) && err)
+		platform_driver_unregister(ingenic_ipu_driver_ptr);
+
+	return err;
 }
 module_init(ingenic_drm_init);
 
diff --git a/drivers/gpu/drm/kmb/kmb_drv.c b/drivers/gpu/drm/kmb/kmb_drv.c
index 2382ccb3ee99..d29c678f6c91 100644
--- a/drivers/gpu/drm/kmb/kmb_drv.c
+++ b/drivers/gpu/drm/kmb/kmb_drv.c
@@ -15,7 +15,7 @@
 
 #include <drm/drm_atomic_helper.h>
 #include <drm/drm_drv.h>
-#include <drm/drm_fb_helper.h>
+#include <drm/drm_fbdev_generic.h>
 #include <drm/drm_gem_dma_helper.h>
 #include <drm/drm_gem_framebuffer_helper.h>
 #include <drm/drm_module.h>
diff --git a/drivers/gpu/drm/kmb/kmb_plane.c b/drivers/gpu/drm/kmb/kmb_plane.c
index a42f63f6f957..d172a302f902 100644
--- a/drivers/gpu/drm/kmb/kmb_plane.c
+++ b/drivers/gpu/drm/kmb/kmb_plane.c
@@ -9,7 +9,6 @@
 #include <drm/drm_crtc.h>
 #include <drm/drm_crtc_helper.h>
 #include <drm/drm_fb_dma_helper.h>
-#include <drm/drm_fb_helper.h>
 #include <drm/drm_fourcc.h>
 #include <drm/drm_framebuffer.h>
 #include <drm/drm_gem_dma_helper.h>
diff --git a/drivers/gpu/drm/lima/lima_devfreq.c b/drivers/gpu/drm/lima/lima_devfreq.c
index 011be7ff51e1..bc8fb4e38d0a 100644
--- a/drivers/gpu/drm/lima/lima_devfreq.c
+++ b/drivers/gpu/drm/lima/lima_devfreq.c
@@ -112,11 +112,6 @@ int lima_devfreq_init(struct lima_device *ldev)
 	unsigned long cur_freq;
 	int ret;
 	const char *regulator_names[] = { "mali", NULL };
-	const char *clk_names[] = { "core", NULL };
-	struct dev_pm_opp_config config = {
-		.regulator_names = regulator_names,
-		.clk_names = clk_names,
-	};
 
 	if (!device_property_present(dev, "operating-points-v2"))
 		/* Optional, continue without devfreq */
@@ -124,7 +119,15 @@ int lima_devfreq_init(struct lima_device *ldev)
 
 	spin_lock_init(&ldevfreq->lock);
 
-	ret = devm_pm_opp_set_config(dev, &config);
+	/*
+	 * clkname is set separately so it is not affected by the optional
+	 * regulator setting which may return error.
+	 */
+	ret = devm_pm_opp_set_clkname(dev, "core");
+	if (ret)
+		return ret;
+
+	ret = devm_pm_opp_set_regulators(dev, regulator_names);
 	if (ret) {
 		/* Continue if the optional regulator is missing */
 		if (ret != -ENODEV)
diff --git a/drivers/gpu/drm/logicvc/logicvc_drm.c b/drivers/gpu/drm/logicvc/logicvc_drm.c
index cc9a4e965f77..9de24d9f0c96 100644
--- a/drivers/gpu/drm/logicvc/logicvc_drm.c
+++ b/drivers/gpu/drm/logicvc/logicvc_drm.c
@@ -17,7 +17,7 @@
 
 #include <drm/drm_atomic_helper.h>
 #include <drm/drm_drv.h>
-#include <drm/drm_fb_helper.h>
+#include <drm/drm_fbdev_generic.h>
 #include <drm/drm_gem_dma_helper.h>
 #include <drm/drm_print.h>
 
diff --git a/drivers/gpu/drm/logicvc/logicvc_mode.c b/drivers/gpu/drm/logicvc/logicvc_mode.c
index d8207ffda1af..9971950ebd4e 100644
--- a/drivers/gpu/drm/logicvc/logicvc_mode.c
+++ b/drivers/gpu/drm/logicvc/logicvc_mode.c
@@ -10,7 +10,6 @@
 #include <drm/drm_atomic_helper.h>
 #include <drm/drm_crtc_helper.h>
 #include <drm/drm_drv.h>
-#include <drm/drm_fb_helper.h>
 #include <drm/drm_gem_dma_helper.h>
 #include <drm/drm_gem_framebuffer_helper.h>
 #include <drm/drm_mode_config.h>
@@ -26,7 +25,6 @@
 
 static const struct drm_mode_config_funcs logicvc_mode_config_funcs = {
 	.fb_create		= drm_gem_fb_create,
-	.output_poll_changed	= drm_fb_helper_output_poll_changed,
 	.atomic_check		= drm_atomic_helper_check,
 	.atomic_commit		= drm_atomic_helper_commit,
 };
diff --git a/drivers/gpu/drm/mcde/mcde_drv.c b/drivers/gpu/drm/mcde/mcde_drv.c
index 1c4482ad507d..4aedb050d2a5 100644
--- a/drivers/gpu/drm/mcde/mcde_drv.c
+++ b/drivers/gpu/drm/mcde/mcde_drv.c
@@ -69,7 +69,7 @@
 #include <drm/drm_bridge.h>
 #include <drm/drm_drv.h>
 #include <drm/drm_fb_dma_helper.h>
-#include <drm/drm_fb_helper.h>
+#include <drm/drm_fbdev_generic.h>
 #include <drm/drm_gem.h>
 #include <drm/drm_gem_dma_helper.h>
 #include <drm/drm_gem_framebuffer_helper.h>
@@ -203,7 +203,6 @@ DEFINE_DRM_GEM_DMA_FOPS(drm_fops);
 static const struct drm_driver mcde_drm_driver = {
 	.driver_features =
 		DRIVER_MODESET | DRIVER_GEM | DRIVER_ATOMIC,
-	.lastclose = drm_fb_helper_lastclose,
 	.ioctls = NULL,
 	.fops = &drm_fops,
 	.name = "mcde",
diff --git a/drivers/gpu/drm/mediatek/mtk_disp_ovl.c b/drivers/gpu/drm/mediatek/mtk_disp_ovl.c
index 002b0f6cae1a..84daeaffab6a 100644
--- a/drivers/gpu/drm/mediatek/mtk_disp_ovl.c
+++ b/drivers/gpu/drm/mediatek/mtk_disp_ovl.c
@@ -29,17 +29,22 @@
 #define DISP_REG_OVL_DATAPATH_CON		0x0024
 #define OVL_LAYER_SMI_ID_EN				BIT(0)
 #define OVL_BGCLR_SEL_IN				BIT(2)
+#define OVL_LAYER_AFBC_EN(n)				BIT(4+n)
 #define DISP_REG_OVL_ROI_BGCLR			0x0028
 #define DISP_REG_OVL_SRC_CON			0x002c
 #define DISP_REG_OVL_CON(n)			(0x0030 + 0x20 * (n))
 #define DISP_REG_OVL_SRC_SIZE(n)		(0x0038 + 0x20 * (n))
 #define DISP_REG_OVL_OFFSET(n)			(0x003c + 0x20 * (n))
+#define DISP_REG_OVL_PITCH_MSB(n)		(0x0040 + 0x20 * (n))
+#define OVL_PITCH_MSB_2ND_SUBBUF			BIT(16)
 #define DISP_REG_OVL_PITCH(n)			(0x0044 + 0x20 * (n))
 #define DISP_REG_OVL_RDMA_CTRL(n)		(0x00c0 + 0x20 * (n))
 #define DISP_REG_OVL_RDMA_GMC(n)		(0x00c8 + 0x20 * (n))
 #define DISP_REG_OVL_ADDR_MT2701		0x0040
 #define DISP_REG_OVL_ADDR_MT8173		0x0f40
 #define DISP_REG_OVL_ADDR(ovl, n)		((ovl)->data->addr + 0x20 * (n))
+#define DISP_REG_OVL_HDR_ADDR(ovl, n)		((ovl)->data->addr + 0x20 * (n) + 0x04)
+#define DISP_REG_OVL_HDR_PITCH(ovl, n)		((ovl)->data->addr + 0x20 * (n) + 0x08)
 
 #define GMC_THRESHOLD_BITS	16
 #define GMC_THRESHOLD_HIGH	((1 << GMC_THRESHOLD_BITS) / 4)
@@ -67,6 +72,7 @@ struct mtk_disp_ovl_data {
 	unsigned int layer_nr;
 	bool fmt_rgb565_is_0;
 	bool smi_id_en;
+	bool supports_afbc;
 };
 
 /*
@@ -172,7 +178,14 @@ void mtk_ovl_stop(struct device *dev)
 		reg = reg & ~OVL_LAYER_SMI_ID_EN;
 		writel_relaxed(reg, ovl->regs + DISP_REG_OVL_DATAPATH_CON);
 	}
+}
 
+static void mtk_ovl_set_afbc(struct mtk_disp_ovl *ovl, struct cmdq_pkt *cmdq_pkt,
+			     int idx, bool enabled)
+{
+	mtk_ddp_write_mask(cmdq_pkt, enabled ? OVL_LAYER_AFBC_EN(idx) : 0,
+			   &ovl->cmdq_reg, ovl->regs,
+			   DISP_REG_OVL_DATAPATH_CON, OVL_LAYER_AFBC_EN(idx));
 }
 
 void mtk_ovl_config(struct device *dev, unsigned int w,
@@ -310,11 +323,23 @@ void mtk_ovl_layer_config(struct device *dev, unsigned int idx,
 	struct mtk_disp_ovl *ovl = dev_get_drvdata(dev);
 	struct mtk_plane_pending_state *pending = &state->pending;
 	unsigned int addr = pending->addr;
-	unsigned int pitch = pending->pitch & 0xffff;
+	unsigned int hdr_addr = pending->hdr_addr;
+	unsigned int pitch = pending->pitch;
+	unsigned int hdr_pitch = pending->hdr_pitch;
 	unsigned int fmt = pending->format;
 	unsigned int offset = (pending->y << 16) | pending->x;
 	unsigned int src_size = (pending->height << 16) | pending->width;
 	unsigned int con;
+	bool is_afbc = pending->modifier != DRM_FORMAT_MOD_LINEAR;
+	union overlay_pitch {
+		struct split_pitch {
+			u16 lsb;
+			u16 msb;
+		} split_pitch;
+		u32 pitch;
+	} overlay_pitch;
+
+	overlay_pitch.pitch = pitch;
 
 	if (!pending->enable) {
 		mtk_ovl_layer_off(dev, idx, cmdq_pkt);
@@ -335,9 +360,12 @@ void mtk_ovl_layer_config(struct device *dev, unsigned int idx,
 		addr += pending->pitch - 1;
 	}
 
+	if (ovl->data->supports_afbc)
+		mtk_ovl_set_afbc(ovl, cmdq_pkt, idx, is_afbc);
+
 	mtk_ddp_write_relaxed(cmdq_pkt, con, &ovl->cmdq_reg, ovl->regs,
 			      DISP_REG_OVL_CON(idx));
-	mtk_ddp_write_relaxed(cmdq_pkt, pitch, &ovl->cmdq_reg, ovl->regs,
+	mtk_ddp_write_relaxed(cmdq_pkt, overlay_pitch.split_pitch.lsb, &ovl->cmdq_reg, ovl->regs,
 			      DISP_REG_OVL_PITCH(idx));
 	mtk_ddp_write_relaxed(cmdq_pkt, src_size, &ovl->cmdq_reg, ovl->regs,
 			      DISP_REG_OVL_SRC_SIZE(idx));
@@ -346,6 +374,20 @@ void mtk_ovl_layer_config(struct device *dev, unsigned int idx,
 	mtk_ddp_write_relaxed(cmdq_pkt, addr, &ovl->cmdq_reg, ovl->regs,
 			      DISP_REG_OVL_ADDR(ovl, idx));
 
+	if (is_afbc) {
+		mtk_ddp_write_relaxed(cmdq_pkt, hdr_addr, &ovl->cmdq_reg, ovl->regs,
+				      DISP_REG_OVL_HDR_ADDR(ovl, idx));
+		mtk_ddp_write_relaxed(cmdq_pkt,
+				      OVL_PITCH_MSB_2ND_SUBBUF | overlay_pitch.split_pitch.msb,
+				      &ovl->cmdq_reg, ovl->regs, DISP_REG_OVL_PITCH_MSB(idx));
+		mtk_ddp_write_relaxed(cmdq_pkt, hdr_pitch, &ovl->cmdq_reg, ovl->regs,
+				      DISP_REG_OVL_HDR_PITCH(ovl, idx));
+	} else {
+		mtk_ddp_write_relaxed(cmdq_pkt,
+				      overlay_pitch.split_pitch.msb,
+				      &ovl->cmdq_reg, ovl->regs, DISP_REG_OVL_PITCH_MSB(idx));
+	}
+
 	mtk_ovl_layer_on(dev, idx, cmdq_pkt);
 }
 
@@ -492,6 +534,15 @@ static const struct mtk_disp_ovl_data mt8192_ovl_2l_driver_data = {
 	.smi_id_en = true,
 };
 
+static const struct mtk_disp_ovl_data mt8195_ovl_driver_data = {
+	.addr = DISP_REG_OVL_ADDR_MT8173,
+	.gmc_bits = 10,
+	.layer_nr = 4,
+	.fmt_rgb565_is_0 = true,
+	.smi_id_en = true,
+	.supports_afbc = true,
+};
+
 static const struct of_device_id mtk_disp_ovl_driver_dt_match[] = {
 	{ .compatible = "mediatek,mt2701-disp-ovl",
 	  .data = &mt2701_ovl_driver_data},
@@ -505,6 +556,8 @@ static const struct of_device_id mtk_disp_ovl_driver_dt_match[] = {
 	  .data = &mt8192_ovl_driver_data},
 	{ .compatible = "mediatek,mt8192-disp-ovl-2l",
 	  .data = &mt8192_ovl_2l_driver_data},
+	{ .compatible = "mediatek,mt8195-disp-ovl",
+	  .data = &mt8195_ovl_driver_data},
 	{},
 };
 MODULE_DEVICE_TABLE(of, mtk_disp_ovl_driver_dt_match);
diff --git a/drivers/gpu/drm/mediatek/mtk_dpi.c b/drivers/gpu/drm/mediatek/mtk_dpi.c
index 508a6d994e83..4317595a15d1 100644
--- a/drivers/gpu/drm/mediatek/mtk_dpi.c
+++ b/drivers/gpu/drm/mediatek/mtk_dpi.c
@@ -461,9 +461,6 @@ static void mtk_dpi_power_off(struct mtk_dpi *dpi)
 	if (--dpi->refcount != 0)
 		return;
 
-	if (dpi->pinctrl && dpi->pins_gpio)
-		pinctrl_select_state(dpi->pinctrl, dpi->pins_gpio);
-
 	mtk_dpi_disable(dpi);
 	clk_disable_unprepare(dpi->pixel_clk);
 	clk_disable_unprepare(dpi->engine_clk);
@@ -488,9 +485,6 @@ static int mtk_dpi_power_on(struct mtk_dpi *dpi)
 		goto err_pixel;
 	}
 
-	if (dpi->pinctrl && dpi->pins_dpi)
-		pinctrl_select_state(dpi->pinctrl, dpi->pins_dpi);
-
 	return 0;
 
 err_pixel:
@@ -721,12 +715,18 @@ static void mtk_dpi_bridge_disable(struct drm_bridge *bridge)
 	struct mtk_dpi *dpi = bridge_to_dpi(bridge);
 
 	mtk_dpi_power_off(dpi);
+
+	if (dpi->pinctrl && dpi->pins_gpio)
+		pinctrl_select_state(dpi->pinctrl, dpi->pins_gpio);
 }
 
 static void mtk_dpi_bridge_enable(struct drm_bridge *bridge)
 {
 	struct mtk_dpi *dpi = bridge_to_dpi(bridge);
 
+	if (dpi->pinctrl && dpi->pins_dpi)
+		pinctrl_select_state(dpi->pinctrl, dpi->pins_dpi);
+
 	mtk_dpi_power_on(dpi);
 	mtk_dpi_set_display_mode(dpi, &dpi->mode);
 	mtk_dpi_enable(dpi);
@@ -929,6 +929,20 @@ static const struct mtk_dpi_conf mt8183_conf = {
 	.csc_enable_bit = CSC_ENABLE,
 };
 
+static const struct mtk_dpi_conf mt8188_dpintf_conf = {
+	.cal_factor = mt8195_dpintf_calculate_factor,
+	.max_clock_khz = 600000,
+	.output_fmts = mt8195_output_fmts,
+	.num_output_fmts = ARRAY_SIZE(mt8195_output_fmts),
+	.pixels_per_iter = 4,
+	.input_2pixel = false,
+	.dimension_mask = DPINTF_HPW_MASK,
+	.hvsize_mask = DPINTF_HSIZE_MASK,
+	.channel_swap_shift = DPINTF_CH_SWAP,
+	.yuv422_en_bit = DPINTF_YUV422_EN,
+	.csc_enable_bit = DPINTF_CSC_ENABLE,
+};
+
 static const struct mtk_dpi_conf mt8192_conf = {
 	.cal_factor = mt8183_calculate_factor,
 	.reg_h_fre_con = 0xe0,
@@ -1079,6 +1093,9 @@ static const struct of_device_id mtk_dpi_of_ids[] = {
 	{ .compatible = "mediatek,mt8183-dpi",
 	  .data = &mt8183_conf,
 	},
+	{ .compatible = "mediatek,mt8188-dp-intf",
+	  .data = &mt8188_dpintf_conf,
+	},
 	{ .compatible = "mediatek,mt8192-dpi",
 	  .data = &mt8192_conf,
 	},
diff --git a/drivers/gpu/drm/mediatek/mtk_drm_drv.c b/drivers/gpu/drm/mediatek/mtk_drm_drv.c
index 91f58db5915f..cd5b18ef7951 100644
--- a/drivers/gpu/drm/mediatek/mtk_drm_drv.c
+++ b/drivers/gpu/drm/mediatek/mtk_drm_drv.c
@@ -17,7 +17,7 @@
 #include <drm/drm_atomic.h>
 #include <drm/drm_atomic_helper.h>
 #include <drm/drm_drv.h>
-#include <drm/drm_fb_helper.h>
+#include <drm/drm_fbdev_generic.h>
 #include <drm/drm_fourcc.h>
 #include <drm/drm_gem.h>
 #include <drm/drm_gem_dma_helper.h>
@@ -387,6 +387,12 @@ static int mtk_drm_kms_init(struct drm_device *drm)
 		goto put_mutex_dev;
 
 	/*
+	 * Ensure internal panels are at the top of the connector list before
+	 * crtc creation.
+	 */
+	drm_helper_move_panel_connectors_to_head(drm);
+
+	/*
 	 * We currently support two fixed data streams, each optional,
 	 * and each statically assigned to a crtc:
 	 * OVL0 -> COLOR0 -> AAL -> OD -> RDMA0 -> UFOE -> DSI0 ...
@@ -631,6 +637,8 @@ static const struct of_device_id mtk_ddp_comp_dt_ids[] = {
 	  .data = (void *)MTK_DPI },
 	{ .compatible = "mediatek,mt8183-dpi",
 	  .data = (void *)MTK_DPI },
+	{ .compatible = "mediatek,mt8188-dp-intf",
+	  .data = (void *)MTK_DP_INTF },
 	{ .compatible = "mediatek,mt8192-dpi",
 	  .data = (void *)MTK_DPI },
 	{ .compatible = "mediatek,mt8195-dp-intf",
diff --git a/drivers/gpu/drm/mediatek/mtk_drm_plane.c b/drivers/gpu/drm/mediatek/mtk_drm_plane.c
index 2f5e007dd380..d54fbf34b000 100644
--- a/drivers/gpu/drm/mediatek/mtk_drm_plane.c
+++ b/drivers/gpu/drm/mediatek/mtk_drm_plane.c
@@ -11,6 +11,7 @@
 #include <drm/drm_fourcc.h>
 #include <drm/drm_framebuffer.h>
 #include <drm/drm_gem_atomic_helper.h>
+#include <linux/align.h>
 
 #include "mtk_drm_crtc.h"
 #include "mtk_drm_ddp_comp.h"
@@ -32,6 +33,14 @@ static const u32 formats[] = {
 	DRM_FORMAT_YUYV,
 };
 
+static const u64 modifiers[] = {
+	DRM_FORMAT_MOD_LINEAR,
+	DRM_FORMAT_MOD_ARM_AFBC(AFBC_FORMAT_MOD_BLOCK_SIZE_32x8 |
+				AFBC_FORMAT_MOD_SPLIT |
+				AFBC_FORMAT_MOD_SPARSE),
+	DRM_FORMAT_MOD_INVALID,
+};
+
 static void mtk_plane_reset(struct drm_plane *plane)
 {
 	struct mtk_plane_state *state;
@@ -51,6 +60,7 @@ static void mtk_plane_reset(struct drm_plane *plane)
 
 	state->base.plane = plane;
 	state->pending.format = DRM_FORMAT_RGB565;
+	state->pending.modifier = DRM_FORMAT_MOD_LINEAR;
 }
 
 static struct drm_plane_state *mtk_plane_duplicate_state(struct drm_plane *plane)
@@ -71,6 +81,32 @@ static struct drm_plane_state *mtk_plane_duplicate_state(struct drm_plane *plane
 	return &state->base;
 }
 
+static bool mtk_plane_format_mod_supported(struct drm_plane *plane,
+					   uint32_t format,
+					   uint64_t modifier)
+{
+	if (modifier == DRM_FORMAT_MOD_LINEAR)
+		return true;
+
+	if (modifier != DRM_FORMAT_MOD_ARM_AFBC(
+				AFBC_FORMAT_MOD_BLOCK_SIZE_32x8 |
+				AFBC_FORMAT_MOD_SPLIT |
+				AFBC_FORMAT_MOD_SPARSE))
+		return false;
+
+	if (format != DRM_FORMAT_XRGB8888 &&
+	    format != DRM_FORMAT_ARGB8888 &&
+	    format != DRM_FORMAT_BGRX8888 &&
+	    format != DRM_FORMAT_BGRA8888 &&
+	    format != DRM_FORMAT_ABGR8888 &&
+	    format != DRM_FORMAT_XBGR8888 &&
+	    format != DRM_FORMAT_RGB888 &&
+	    format != DRM_FORMAT_BGR888)
+		return false;
+
+	return true;
+}
+
 static void mtk_drm_plane_destroy_state(struct drm_plane *plane,
 					struct drm_plane_state *state)
 {
@@ -119,21 +155,52 @@ static void mtk_plane_update_new_state(struct drm_plane_state *new_state,
 	struct drm_gem_object *gem;
 	struct mtk_drm_gem_obj *mtk_gem;
 	unsigned int pitch, format;
+	u64 modifier;
 	dma_addr_t addr;
+	dma_addr_t hdr_addr = 0;
+	unsigned int hdr_pitch = 0;
 
 	gem = fb->obj[0];
 	mtk_gem = to_mtk_gem_obj(gem);
 	addr = mtk_gem->dma_addr;
 	pitch = fb->pitches[0];
 	format = fb->format->format;
+	modifier = fb->modifier;
 
-	addr += (new_state->src.x1 >> 16) * fb->format->cpp[0];
-	addr += (new_state->src.y1 >> 16) * pitch;
+	if (modifier == DRM_FORMAT_MOD_LINEAR) {
+		addr += (new_state->src.x1 >> 16) * fb->format->cpp[0];
+		addr += (new_state->src.y1 >> 16) * pitch;
+	} else {
+		int width_in_blocks = ALIGN(fb->width, AFBC_DATA_BLOCK_WIDTH)
+				      / AFBC_DATA_BLOCK_WIDTH;
+		int height_in_blocks = ALIGN(fb->height, AFBC_DATA_BLOCK_HEIGHT)
+				       / AFBC_DATA_BLOCK_HEIGHT;
+		int x_offset_in_blocks = (new_state->src.x1 >> 16) / AFBC_DATA_BLOCK_WIDTH;
+		int y_offset_in_blocks = (new_state->src.y1 >> 16) / AFBC_DATA_BLOCK_HEIGHT;
+		int hdr_size;
+
+		hdr_pitch = width_in_blocks * AFBC_HEADER_BLOCK_SIZE;
+		pitch = width_in_blocks * AFBC_DATA_BLOCK_WIDTH *
+			AFBC_DATA_BLOCK_HEIGHT * fb->format->cpp[0];
+
+		hdr_size = ALIGN(hdr_pitch * height_in_blocks, AFBC_HEADER_ALIGNMENT);
+
+		hdr_addr = addr + hdr_pitch * y_offset_in_blocks +
+			   AFBC_HEADER_BLOCK_SIZE * x_offset_in_blocks;
+		/* The data plane is offset by 1 additional block. */
+		addr = addr + hdr_size +
+		       pitch * y_offset_in_blocks +
+		       AFBC_DATA_BLOCK_WIDTH * AFBC_DATA_BLOCK_HEIGHT *
+		       fb->format->cpp[0] * (x_offset_in_blocks + 1);
+	}
 
 	mtk_plane_state->pending.enable = true;
 	mtk_plane_state->pending.pitch = pitch;
+	mtk_plane_state->pending.hdr_pitch = hdr_pitch;
 	mtk_plane_state->pending.format = format;
+	mtk_plane_state->pending.modifier = modifier;
 	mtk_plane_state->pending.addr = addr;
+	mtk_plane_state->pending.hdr_addr = hdr_addr;
 	mtk_plane_state->pending.x = new_state->dst.x1;
 	mtk_plane_state->pending.y = new_state->dst.y1;
 	mtk_plane_state->pending.width = drm_rect_width(&new_state->dst);
@@ -172,6 +239,7 @@ static const struct drm_plane_funcs mtk_plane_funcs = {
 	.reset = mtk_plane_reset,
 	.atomic_duplicate_state = mtk_plane_duplicate_state,
 	.atomic_destroy_state = mtk_drm_plane_destroy_state,
+	.format_mod_supported = mtk_plane_format_mod_supported,
 };
 
 static int mtk_plane_atomic_check(struct drm_plane *plane,
@@ -253,7 +321,7 @@ int mtk_plane_init(struct drm_device *dev, struct drm_plane *plane,
 
 	err = drm_universal_plane_init(dev, plane, possible_crtcs,
 				       &mtk_plane_funcs, formats,
-				       ARRAY_SIZE(formats), NULL, type, NULL);
+				       ARRAY_SIZE(formats), modifiers, type, NULL);
 	if (err) {
 		DRM_ERROR("failed to initialize plane\n");
 		return err;
diff --git a/drivers/gpu/drm/mediatek/mtk_drm_plane.h b/drivers/gpu/drm/mediatek/mtk_drm_plane.h
index 2d5ec66e3df1..8f39011cdbfc 100644
--- a/drivers/gpu/drm/mediatek/mtk_drm_plane.h
+++ b/drivers/gpu/drm/mediatek/mtk_drm_plane.h
@@ -10,12 +10,20 @@
 #include <drm/drm_crtc.h>
 #include <linux/types.h>
 
+#define AFBC_DATA_BLOCK_WIDTH 32
+#define AFBC_DATA_BLOCK_HEIGHT 8
+#define AFBC_HEADER_BLOCK_SIZE 16
+#define AFBC_HEADER_ALIGNMENT 1024
+
 struct mtk_plane_pending_state {
 	bool				config;
 	bool				enable;
 	dma_addr_t			addr;
+	dma_addr_t			hdr_addr;
 	unsigned int			pitch;
+	unsigned int			hdr_pitch;
 	unsigned int			format;
+	unsigned long long		modifier;
 	unsigned int			x;
 	unsigned int			y;
 	unsigned int			width;
diff --git a/drivers/gpu/drm/mediatek/mtk_hdmi.c b/drivers/gpu/drm/mediatek/mtk_hdmi.c
index 4c80b6896dc3..0a8e0a13f516 100644
--- a/drivers/gpu/drm/mediatek/mtk_hdmi.c
+++ b/drivers/gpu/drm/mediatek/mtk_hdmi.c
@@ -1202,9 +1202,10 @@ static enum drm_connector_status mtk_hdmi_detect(struct mtk_hdmi *hdmi)
 	return mtk_hdmi_update_plugged_status(hdmi);
 }
 
-static int mtk_hdmi_bridge_mode_valid(struct drm_bridge *bridge,
-				      const struct drm_display_info *info,
-				      const struct drm_display_mode *mode)
+static enum drm_mode_status
+mtk_hdmi_bridge_mode_valid(struct drm_bridge *bridge,
+			   const struct drm_display_info *info,
+			   const struct drm_display_mode *mode)
 {
 	struct mtk_hdmi *hdmi = hdmi_ctx_from_bridge(bridge);
 	struct drm_bridge *next_bridge;
@@ -1217,7 +1218,7 @@ static int mtk_hdmi_bridge_mode_valid(struct drm_bridge *bridge,
 	if (next_bridge) {
 		struct drm_display_mode adjusted_mode;
 
-		drm_mode_copy(&adjusted_mode, mode);
+		drm_mode_init(&adjusted_mode, mode);
 		if (!drm_bridge_chain_mode_fixup(next_bridge, mode,
 						 &adjusted_mode))
 			return MODE_BAD;
diff --git a/drivers/gpu/drm/meson/meson_drv.c b/drivers/gpu/drm/meson/meson_drv.c
index 3b24a924b7b9..79bfe3938d3c 100644
--- a/drivers/gpu/drm/meson/meson_drv.c
+++ b/drivers/gpu/drm/meson/meson_drv.c
@@ -18,7 +18,7 @@
 #include <drm/drm_aperture.h>
 #include <drm/drm_atomic_helper.h>
 #include <drm/drm_drv.h>
-#include <drm/drm_fb_helper.h>
+#include <drm/drm_fbdev_generic.h>
 #include <drm/drm_gem_dma_helper.h>
 #include <drm/drm_gem_framebuffer_helper.h>
 #include <drm/drm_modeset_helper_vtables.h>
diff --git a/drivers/gpu/drm/meson/meson_encoder_cvbs.c b/drivers/gpu/drm/meson/meson_encoder_cvbs.c
index 5675bc2a92cf..3f73b211fa8e 100644
--- a/drivers/gpu/drm/meson/meson_encoder_cvbs.c
+++ b/drivers/gpu/drm/meson/meson_encoder_cvbs.c
@@ -116,9 +116,10 @@ static int meson_encoder_cvbs_get_modes(struct drm_bridge *bridge,
 	return i;
 }
 
-static int meson_encoder_cvbs_mode_valid(struct drm_bridge *bridge,
-					const struct drm_display_info *display_info,
-					const struct drm_display_mode *mode)
+static enum drm_mode_status
+meson_encoder_cvbs_mode_valid(struct drm_bridge *bridge,
+			      const struct drm_display_info *display_info,
+			      const struct drm_display_mode *mode)
 {
 	if (meson_cvbs_get_mode(mode))
 		return MODE_OK;
diff --git a/drivers/gpu/drm/mgag200/mgag200_drv.c b/drivers/gpu/drm/mgag200/mgag200_drv.c
index ece6cd102dbb..976f0ab2006b 100644
--- a/drivers/gpu/drm/mgag200/mgag200_drv.c
+++ b/drivers/gpu/drm/mgag200/mgag200_drv.c
@@ -11,6 +11,7 @@
 
 #include <drm/drm_aperture.h>
 #include <drm/drm_drv.h>
+#include <drm/drm_fbdev_generic.h>
 #include <drm/drm_file.h>
 #include <drm/drm_ioctl.h>
 #include <drm/drm_managed.h>
diff --git a/drivers/gpu/drm/mgag200/mgag200_drv.h b/drivers/gpu/drm/mgag200/mgag200_drv.h
index f0c2349404b4..9e604dbb8e44 100644
--- a/drivers/gpu/drm/mgag200/mgag200_drv.h
+++ b/drivers/gpu/drm/mgag200/mgag200_drv.h
@@ -18,7 +18,6 @@
 #include <drm/drm_connector.h>
 #include <drm/drm_crtc.h>
 #include <drm/drm_encoder.h>
-#include <drm/drm_fb_helper.h>
 #include <drm/drm_gem.h>
 #include <drm/drm_gem_shmem_helper.h>
 #include <drm/drm_plane.h>
diff --git a/drivers/gpu/drm/msm/Kconfig b/drivers/gpu/drm/msm/Kconfig
index 4e0cbd682725..3c9dfdb0b328 100644
--- a/drivers/gpu/drm/msm/Kconfig
+++ b/drivers/gpu/drm/msm/Kconfig
@@ -155,7 +155,7 @@ config DRM_MSM_HDMI
 	  Compile in support for the HDMI output MSM DRM driver. It can
 	  be a primary or a secondary display on device. Note that this is used
 	  only for the direct HDMI output. If the device outputs HDMI data
-	  throught some kind of DSI-to-HDMI bridge, this option can be disabled.
+	  through some kind of DSI-to-HDMI bridge, this option can be disabled.
 
 config DRM_MSM_HDMI_HDCP
 	bool "Enable HDMI HDCP support in MSM DRM driver"
diff --git a/drivers/gpu/drm/msm/adreno/a3xx_gpu.c b/drivers/gpu/drm/msm/adreno/a3xx_gpu.c
index 2c8b9899625b..948785ed07bb 100644
--- a/drivers/gpu/drm/msm/adreno/a3xx_gpu.c
+++ b/drivers/gpu/drm/msm/adreno/a3xx_gpu.c
@@ -500,7 +500,7 @@ static const struct adreno_gpu_funcs funcs = {
 #endif
 		.gpu_state_get = a3xx_gpu_state_get,
 		.gpu_state_put = adreno_gpu_state_put,
-		.create_address_space = adreno_iommu_create_address_space,
+		.create_address_space = adreno_create_address_space,
 		.get_rptr = a3xx_get_rptr,
 	},
 };
diff --git a/drivers/gpu/drm/msm/adreno/a4xx_gpu.c b/drivers/gpu/drm/msm/adreno/a4xx_gpu.c
index 7cb8d9849c07..3e09d3a7a0ac 100644
--- a/drivers/gpu/drm/msm/adreno/a4xx_gpu.c
+++ b/drivers/gpu/drm/msm/adreno/a4xx_gpu.c
@@ -606,8 +606,7 @@ static int a4xx_pm_suspend(struct msm_gpu *gpu) {
 
 static int a4xx_get_timestamp(struct msm_gpu *gpu, uint64_t *value)
 {
-	*value = gpu_read64(gpu, REG_A4XX_RBBM_PERFCTR_CP_0_LO,
-		REG_A4XX_RBBM_PERFCTR_CP_0_HI);
+	*value = gpu_read64(gpu, REG_A4XX_RBBM_PERFCTR_CP_0_LO);
 
 	return 0;
 }
@@ -635,7 +634,7 @@ static const struct adreno_gpu_funcs funcs = {
 #endif
 		.gpu_state_get = a4xx_gpu_state_get,
 		.gpu_state_put = adreno_gpu_state_put,
-		.create_address_space = adreno_iommu_create_address_space,
+		.create_address_space = adreno_create_address_space,
 		.get_rptr = a4xx_get_rptr,
 	},
 	.get_timestamp = a4xx_get_timestamp,
diff --git a/drivers/gpu/drm/msm/adreno/a5xx_gpu.c b/drivers/gpu/drm/msm/adreno/a5xx_gpu.c
index 3dcec7acb384..660ba0db8900 100644
--- a/drivers/gpu/drm/msm/adreno/a5xx_gpu.c
+++ b/drivers/gpu/drm/msm/adreno/a5xx_gpu.c
@@ -605,11 +605,9 @@ static int a5xx_ucode_init(struct msm_gpu *gpu)
 		a5xx_ucode_check_version(a5xx_gpu, a5xx_gpu->pfp_bo);
 	}
 
-	gpu_write64(gpu, REG_A5XX_CP_ME_INSTR_BASE_LO,
-		REG_A5XX_CP_ME_INSTR_BASE_HI, a5xx_gpu->pm4_iova);
+	gpu_write64(gpu, REG_A5XX_CP_ME_INSTR_BASE_LO, a5xx_gpu->pm4_iova);
 
-	gpu_write64(gpu, REG_A5XX_CP_PFP_INSTR_BASE_LO,
-		REG_A5XX_CP_PFP_INSTR_BASE_HI, a5xx_gpu->pfp_iova);
+	gpu_write64(gpu, REG_A5XX_CP_PFP_INSTR_BASE_LO, a5xx_gpu->pfp_iova);
 
 	return 0;
 }
@@ -868,8 +866,7 @@ static int a5xx_hw_init(struct msm_gpu *gpu)
 	 * memory rendering at this point in time and we don't want to block off
 	 * part of the virtual memory space.
 	 */
-	gpu_write64(gpu, REG_A5XX_RBBM_SECVID_TSB_TRUSTED_BASE_LO,
-		REG_A5XX_RBBM_SECVID_TSB_TRUSTED_BASE_HI, 0x00000000);
+	gpu_write64(gpu, REG_A5XX_RBBM_SECVID_TSB_TRUSTED_BASE_LO, 0x00000000);
 	gpu_write(gpu, REG_A5XX_RBBM_SECVID_TSB_TRUSTED_SIZE, 0x00000000);
 
 	/* Put the GPU into 64 bit by default */
@@ -908,8 +905,7 @@ static int a5xx_hw_init(struct msm_gpu *gpu)
 		return ret;
 
 	/* Set the ringbuffer address */
-	gpu_write64(gpu, REG_A5XX_CP_RB_BASE, REG_A5XX_CP_RB_BASE_HI,
-		gpu->rb[0]->iova);
+	gpu_write64(gpu, REG_A5XX_CP_RB_BASE, gpu->rb[0]->iova);
 
 	/*
 	 * If the microcode supports the WHERE_AM_I opcode then we can use that
@@ -936,7 +932,7 @@ static int a5xx_hw_init(struct msm_gpu *gpu)
 		}
 
 		gpu_write64(gpu, REG_A5XX_CP_RB_RPTR_ADDR,
-			REG_A5XX_CP_RB_RPTR_ADDR_HI, shadowptr(a5xx_gpu, gpu->rb[0]));
+			    shadowptr(a5xx_gpu, gpu->rb[0]));
 	} else if (gpu->nr_rings > 1) {
 		/* Disable preemption if WHERE_AM_I isn't available */
 		a5xx_preempt_fini(gpu);
@@ -1239,9 +1235,9 @@ static void a5xx_fault_detect_irq(struct msm_gpu *gpu)
 		gpu_read(gpu, REG_A5XX_RBBM_STATUS),
 		gpu_read(gpu, REG_A5XX_CP_RB_RPTR),
 		gpu_read(gpu, REG_A5XX_CP_RB_WPTR),
-		gpu_read64(gpu, REG_A5XX_CP_IB1_BASE, REG_A5XX_CP_IB1_BASE_HI),
+		gpu_read64(gpu, REG_A5XX_CP_IB1_BASE),
 		gpu_read(gpu, REG_A5XX_CP_IB1_BUFSZ),
-		gpu_read64(gpu, REG_A5XX_CP_IB2_BASE, REG_A5XX_CP_IB2_BASE_HI),
+		gpu_read64(gpu, REG_A5XX_CP_IB2_BASE),
 		gpu_read(gpu, REG_A5XX_CP_IB2_BUFSZ));
 
 	/* Turn off the hangcheck timer to keep it from bothering us */
@@ -1427,8 +1423,7 @@ static int a5xx_pm_suspend(struct msm_gpu *gpu)
 
 static int a5xx_get_timestamp(struct msm_gpu *gpu, uint64_t *value)
 {
-	*value = gpu_read64(gpu, REG_A5XX_RBBM_ALWAYSON_COUNTER_LO,
-		REG_A5XX_RBBM_ALWAYSON_COUNTER_HI);
+	*value = gpu_read64(gpu, REG_A5XX_RBBM_ALWAYSON_COUNTER_LO);
 
 	return 0;
 }
@@ -1465,8 +1460,7 @@ static int a5xx_crashdumper_run(struct msm_gpu *gpu,
 	if (IS_ERR_OR_NULL(dumper->ptr))
 		return -EINVAL;
 
-	gpu_write64(gpu, REG_A5XX_CP_CRASH_SCRIPT_BASE_LO,
-		REG_A5XX_CP_CRASH_SCRIPT_BASE_HI, dumper->iova);
+	gpu_write64(gpu, REG_A5XX_CP_CRASH_SCRIPT_BASE_LO, dumper->iova);
 
 	gpu_write(gpu, REG_A5XX_CP_CRASH_DUMP_CNTL, 1);
 
@@ -1666,8 +1660,7 @@ static u64 a5xx_gpu_busy(struct msm_gpu *gpu, unsigned long *out_sample_rate)
 {
 	u64 busy_cycles;
 
-	busy_cycles = gpu_read64(gpu, REG_A5XX_RBBM_PERFCTR_RBBM_0_LO,
-			REG_A5XX_RBBM_PERFCTR_RBBM_0_HI);
+	busy_cycles = gpu_read64(gpu, REG_A5XX_RBBM_PERFCTR_RBBM_0_LO);
 	*out_sample_rate = clk_get_rate(gpu->core_clk);
 
 	return busy_cycles;
@@ -1705,7 +1698,7 @@ static const struct adreno_gpu_funcs funcs = {
 		.gpu_busy = a5xx_gpu_busy,
 		.gpu_state_get = a5xx_gpu_state_get,
 		.gpu_state_put = a5xx_gpu_state_put,
-		.create_address_space = adreno_iommu_create_address_space,
+		.create_address_space = adreno_create_address_space,
 		.get_rptr = a5xx_get_rptr,
 	},
 	.get_timestamp = a5xx_get_timestamp,
diff --git a/drivers/gpu/drm/msm/adreno/a5xx_preempt.c b/drivers/gpu/drm/msm/adreno/a5xx_preempt.c
index 8abc9a2b114a..7658e89844b4 100644
--- a/drivers/gpu/drm/msm/adreno/a5xx_preempt.c
+++ b/drivers/gpu/drm/msm/adreno/a5xx_preempt.c
@@ -137,7 +137,6 @@ void a5xx_preempt_trigger(struct msm_gpu *gpu)
 
 	/* Set the address of the incoming preemption record */
 	gpu_write64(gpu, REG_A5XX_CP_CONTEXT_SWITCH_RESTORE_ADDR_LO,
-		REG_A5XX_CP_CONTEXT_SWITCH_RESTORE_ADDR_HI,
 		a5xx_gpu->preempt_iova[ring->id]);
 
 	a5xx_gpu->next_ring = ring;
@@ -211,8 +210,7 @@ void a5xx_preempt_hw_init(struct msm_gpu *gpu)
 	}
 
 	/* Write a 0 to signal that we aren't switching pagetables */
-	gpu_write64(gpu, REG_A5XX_CP_CONTEXT_SWITCH_SMMU_INFO_LO,
-		REG_A5XX_CP_CONTEXT_SWITCH_SMMU_INFO_HI, 0);
+	gpu_write64(gpu, REG_A5XX_CP_CONTEXT_SWITCH_SMMU_INFO_LO, 0);
 
 	/* Reset the preemption state */
 	set_preempt_state(a5xx_gpu, PREEMPT_NONE);
diff --git a/drivers/gpu/drm/msm/adreno/a6xx_gmu.c b/drivers/gpu/drm/msm/adreno/a6xx_gmu.c
index e033d6a67a20..6484b97c5344 100644
--- a/drivers/gpu/drm/msm/adreno/a6xx_gmu.c
+++ b/drivers/gpu/drm/msm/adreno/a6xx_gmu.c
@@ -1213,19 +1213,17 @@ static int a6xx_gmu_memory_alloc(struct a6xx_gmu *gmu, struct a6xx_gmu_bo *bo,
 
 static int a6xx_gmu_memory_probe(struct a6xx_gmu *gmu)
 {
-	struct iommu_domain *domain;
 	struct msm_mmu *mmu;
 
-	domain = iommu_domain_alloc(&platform_bus_type);
-	if (!domain)
+	mmu = msm_iommu_new(gmu->dev, 0);
+	if (!mmu)
 		return -ENODEV;
+	if (IS_ERR(mmu))
+		return PTR_ERR(mmu);
 
-	mmu = msm_iommu_new(gmu->dev, domain);
 	gmu->aspace = msm_gem_address_space_create(mmu, "gmu", 0x0, 0x80000000);
-	if (IS_ERR(gmu->aspace)) {
-		iommu_domain_free(domain);
+	if (IS_ERR(gmu->aspace))
 		return PTR_ERR(gmu->aspace);
-	}
 
 	return 0;
 }
diff --git a/drivers/gpu/drm/msm/adreno/a6xx_gpu.c b/drivers/gpu/drm/msm/adreno/a6xx_gpu.c
index fdc578016e0b..36c8fb699b56 100644
--- a/drivers/gpu/drm/msm/adreno/a6xx_gpu.c
+++ b/drivers/gpu/drm/msm/adreno/a6xx_gpu.c
@@ -247,8 +247,7 @@ static void a6xx_submit(struct msm_gpu *gpu, struct msm_gem_submit *submit)
 	OUT_RING(ring, submit->seqno);
 
 	trace_msm_gpu_submit_flush(submit,
-		gpu_read64(gpu, REG_A6XX_CP_ALWAYS_ON_COUNTER_LO,
-			REG_A6XX_CP_ALWAYS_ON_COUNTER_HI));
+		gpu_read64(gpu, REG_A6XX_CP_ALWAYS_ON_COUNTER_LO));
 
 	a6xx_flush(gpu, ring);
 }
@@ -947,8 +946,7 @@ static int a6xx_ucode_init(struct msm_gpu *gpu)
 		}
 	}
 
-	gpu_write64(gpu, REG_A6XX_CP_SQE_INSTR_BASE,
-		REG_A6XX_CP_SQE_INSTR_BASE+1, a6xx_gpu->sqe_iova);
+	gpu_write64(gpu, REG_A6XX_CP_SQE_INSTR_BASE, a6xx_gpu->sqe_iova);
 
 	return 0;
 }
@@ -999,8 +997,7 @@ static int hw_init(struct msm_gpu *gpu)
 	 * memory rendering at this point in time and we don't want to block off
 	 * part of the virtual memory space.
 	 */
-	gpu_write64(gpu, REG_A6XX_RBBM_SECVID_TSB_TRUSTED_BASE_LO,
-		REG_A6XX_RBBM_SECVID_TSB_TRUSTED_BASE_HI, 0x00000000);
+	gpu_write64(gpu, REG_A6XX_RBBM_SECVID_TSB_TRUSTED_BASE_LO, 0x00000000);
 	gpu_write(gpu, REG_A6XX_RBBM_SECVID_TSB_TRUSTED_SIZE, 0x00000000);
 
 	/* Turn on 64 bit addressing for all blocks */
@@ -1049,11 +1046,9 @@ static int hw_init(struct msm_gpu *gpu)
 
 	if (!adreno_is_a650_family(adreno_gpu)) {
 		/* Set the GMEM VA range [0x100000:0x100000 + gpu->gmem - 1] */
-		gpu_write64(gpu, REG_A6XX_UCHE_GMEM_RANGE_MIN_LO,
-			REG_A6XX_UCHE_GMEM_RANGE_MIN_HI, 0x00100000);
+		gpu_write64(gpu, REG_A6XX_UCHE_GMEM_RANGE_MIN_LO, 0x00100000);
 
 		gpu_write64(gpu, REG_A6XX_UCHE_GMEM_RANGE_MAX_LO,
-			REG_A6XX_UCHE_GMEM_RANGE_MAX_HI,
 			0x00100000 + adreno_gpu->gmem - 1);
 	}
 
@@ -1145,8 +1140,7 @@ static int hw_init(struct msm_gpu *gpu)
 		goto out;
 
 	/* Set the ringbuffer address */
-	gpu_write64(gpu, REG_A6XX_CP_RB_BASE, REG_A6XX_CP_RB_BASE_HI,
-		gpu->rb[0]->iova);
+	gpu_write64(gpu, REG_A6XX_CP_RB_BASE, gpu->rb[0]->iova);
 
 	/* Targets that support extended APRIV can use the RPTR shadow from
 	 * hardware but all the other ones need to disable the feature. Targets
@@ -1178,7 +1172,6 @@ static int hw_init(struct msm_gpu *gpu)
 		}
 
 		gpu_write64(gpu, REG_A6XX_CP_RB_RPTR_ADDR_LO,
-			REG_A6XX_CP_RB_RPTR_ADDR_HI,
 			shadowptr(a6xx_gpu, gpu->rb[0]));
 	}
 
@@ -1499,9 +1492,9 @@ static void a6xx_fault_detect_irq(struct msm_gpu *gpu)
 		gpu_read(gpu, REG_A6XX_RBBM_STATUS),
 		gpu_read(gpu, REG_A6XX_CP_RB_RPTR),
 		gpu_read(gpu, REG_A6XX_CP_RB_WPTR),
-		gpu_read64(gpu, REG_A6XX_CP_IB1_BASE, REG_A6XX_CP_IB1_BASE_HI),
+		gpu_read64(gpu, REG_A6XX_CP_IB1_BASE),
 		gpu_read(gpu, REG_A6XX_CP_IB1_REM_SIZE),
-		gpu_read64(gpu, REG_A6XX_CP_IB2_BASE, REG_A6XX_CP_IB2_BASE_HI),
+		gpu_read64(gpu, REG_A6XX_CP_IB2_BASE),
 		gpu_read(gpu, REG_A6XX_CP_IB2_REM_SIZE));
 
 	/* Turn off the hangcheck timer to keep it from bothering us */
@@ -1712,8 +1705,7 @@ static int a6xx_get_timestamp(struct msm_gpu *gpu, uint64_t *value)
 	/* Force the GPU power on so we can read this register */
 	a6xx_gmu_set_oob(&a6xx_gpu->gmu, GMU_OOB_PERFCOUNTER_SET);
 
-	*value = gpu_read64(gpu, REG_A6XX_CP_ALWAYS_ON_COUNTER_LO,
-			    REG_A6XX_CP_ALWAYS_ON_COUNTER_HI);
+	*value = gpu_read64(gpu, REG_A6XX_CP_ALWAYS_ON_COUNTER_LO);
 
 	a6xx_gmu_clear_oob(&a6xx_gpu->gmu, GMU_OOB_PERFCOUNTER_SET);
 
@@ -1786,43 +1778,16 @@ a6xx_create_address_space(struct msm_gpu *gpu, struct platform_device *pdev)
 {
 	struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
 	struct a6xx_gpu *a6xx_gpu = to_a6xx_gpu(adreno_gpu);
-	struct iommu_domain *iommu;
-	struct msm_mmu *mmu;
-	struct msm_gem_address_space *aspace;
-	u64 start, size;
-
-	iommu = iommu_domain_alloc(&platform_bus_type);
-	if (!iommu)
-		return NULL;
+	unsigned long quirks = 0;
 
 	/*
 	 * This allows GPU to set the bus attributes required to use system
 	 * cache on behalf of the iommu page table walker.
 	 */
 	if (!IS_ERR_OR_NULL(a6xx_gpu->htw_llc_slice))
-		adreno_set_llc_attributes(iommu);
-
-	mmu = msm_iommu_new(&pdev->dev, iommu);
-	if (IS_ERR(mmu)) {
-		iommu_domain_free(iommu);
-		return ERR_CAST(mmu);
-	}
-
-	/*
-	 * Use the aperture start or SZ_16M, whichever is greater. This will
-	 * ensure that we align with the allocated pagetable range while still
-	 * allowing room in the lower 32 bits for GMEM and whatnot
-	 */
-	start = max_t(u64, SZ_16M, iommu->geometry.aperture_start);
-	size = iommu->geometry.aperture_end - start + 1;
-
-	aspace = msm_gem_address_space_create(mmu, "gpu",
-		start & GENMASK_ULL(48, 0), size);
+		quirks |= IO_PGTABLE_QUIRK_ARM_OUTER_WBWA;
 
-	if (IS_ERR(aspace) && !IS_ERR(mmu))
-		mmu->funcs->destroy(mmu);
-
-	return aspace;
+	return adreno_iommu_create_address_space(gpu, pdev, quirks);
 }
 
 static struct msm_gem_address_space *
@@ -1851,6 +1816,39 @@ static uint32_t a6xx_get_rptr(struct msm_gpu *gpu, struct msm_ringbuffer *ring)
 	return ring->memptrs->rptr = gpu_read(gpu, REG_A6XX_CP_RB_RPTR);
 }
 
+static bool a6xx_progress(struct msm_gpu *gpu, struct msm_ringbuffer *ring)
+{
+	struct msm_cp_state cp_state = {
+		.ib1_base = gpu_read64(gpu, REG_A6XX_CP_IB1_BASE),
+		.ib2_base = gpu_read64(gpu, REG_A6XX_CP_IB2_BASE),
+		.ib1_rem  = gpu_read(gpu, REG_A6XX_CP_IB1_REM_SIZE),
+		.ib2_rem  = gpu_read(gpu, REG_A6XX_CP_IB2_REM_SIZE),
+	};
+	bool progress;
+
+	/*
+	 * Adjust the remaining data to account for what has already been
+	 * fetched from memory, but not yet consumed by the SQE.
+	 *
+	 * This is not *technically* correct, the amount buffered could
+	 * exceed the IB size due to hw prefetching ahead, but:
+	 *
+	 * (1) We aren't trying to find the exact position, just whether
+	 *     progress has been made
+	 * (2) The CP_REG_TO_MEM at the end of a submit should be enough
+	 *     to prevent prefetching into an unrelated submit.  (And
+	 *     either way, at some point the ROQ will be full.)
+	 */
+	cp_state.ib1_rem += gpu_read(gpu, REG_A6XX_CP_CSQ_IB1_STAT) >> 16;
+	cp_state.ib2_rem += gpu_read(gpu, REG_A6XX_CP_CSQ_IB2_STAT) >> 16;
+
+	progress = !!memcmp(&cp_state, &ring->last_cp_state, sizeof(cp_state));
+
+	ring->last_cp_state = cp_state;
+
+	return progress;
+}
+
 static u32 a618_get_speed_bin(u32 fuse)
 {
 	if (fuse == 0)
@@ -1906,7 +1904,7 @@ static u32 fuse_to_supp_hw(struct device *dev, struct adreno_rev rev, u32 fuse)
 
 	if (val == UINT_MAX) {
 		DRM_DEV_ERROR(dev,
-			"missing support for speed-bin: %u. Some OPPs may not be supported by hardware",
+			"missing support for speed-bin: %u. Some OPPs may not be supported by hardware\n",
 			fuse);
 		return UINT_MAX;
 	}
@@ -1916,7 +1914,7 @@ static u32 fuse_to_supp_hw(struct device *dev, struct adreno_rev rev, u32 fuse)
 
 static int a6xx_set_supported_hw(struct device *dev, struct adreno_rev rev)
 {
-	u32 supp_hw = UINT_MAX;
+	u32 supp_hw;
 	u32 speedbin;
 	int ret;
 
@@ -1928,15 +1926,13 @@ static int a6xx_set_supported_hw(struct device *dev, struct adreno_rev rev)
 	if (ret == -ENOENT) {
 		return 0;
 	} else if (ret) {
-		DRM_DEV_ERROR(dev,
-			      "failed to read speed-bin (%d). Some OPPs may not be supported by hardware",
-			      ret);
-		goto done;
+		dev_err_probe(dev, ret,
+			      "failed to read speed-bin. Some OPPs may not be supported by hardware\n");
+		return ret;
 	}
 
 	supp_hw = fuse_to_supp_hw(dev, rev, speedbin);
 
-done:
 	ret = devm_pm_opp_set_supported_hw(dev, &supp_hw, 1);
 	if (ret)
 		return ret;
@@ -1969,6 +1965,7 @@ static const struct adreno_gpu_funcs funcs = {
 		.create_address_space = a6xx_create_address_space,
 		.create_private_address_space = a6xx_create_private_address_space,
 		.get_rptr = a6xx_get_rptr,
+		.progress = a6xx_progress,
 	},
 	.get_timestamp = a6xx_get_timestamp,
 };
@@ -2005,13 +2002,6 @@ struct msm_gpu *a6xx_gpu_init(struct drm_device *dev)
 			adreno_cmp_rev(ADRENO_REV(6, 3, 5, ANY_ID), info->rev)))
 		adreno_gpu->base.hw_apriv = true;
 
-	/*
-	 * For now only clamp to idle freq for devices where this is known not
-	 * to cause power supply issues:
-	 */
-	if (info && (info->revn == 618))
-		gpu->clamp_to_idle = true;
-
 	a6xx_llc_slices_init(pdev, a6xx_gpu);
 
 	ret = a6xx_set_supported_hw(&pdev->dev, config->rev);
@@ -2026,6 +2016,13 @@ struct msm_gpu *a6xx_gpu_init(struct drm_device *dev)
 		return ERR_PTR(ret);
 	}
 
+	/*
+	 * For now only clamp to idle freq for devices where this is known not
+	 * to cause power supply issues:
+	 */
+	if (adreno_is_a618(adreno_gpu) || adreno_is_7c3(adreno_gpu))
+		gpu->clamp_to_idle = true;
+
 	/* Check if there is a GMU phandle and set it up */
 	node = of_parse_phandle(pdev->dev.of_node, "qcom,gmu", 0);
 
diff --git a/drivers/gpu/drm/msm/adreno/a6xx_gpu_state.c b/drivers/gpu/drm/msm/adreno/a6xx_gpu_state.c
index 55f443328d8e..a023d5f962dc 100644
--- a/drivers/gpu/drm/msm/adreno/a6xx_gpu_state.c
+++ b/drivers/gpu/drm/msm/adreno/a6xx_gpu_state.c
@@ -91,7 +91,7 @@ struct a6xx_state_memobj {
 static void *state_kcalloc(struct a6xx_gpu_state *a6xx_state, int nr, size_t objsize)
 {
 	struct a6xx_state_memobj *obj =
-		kzalloc((nr * objsize) + sizeof(*obj), GFP_KERNEL);
+		kvzalloc((nr * objsize) + sizeof(*obj), GFP_KERNEL);
 
 	if (!obj)
 		return NULL;
@@ -147,8 +147,7 @@ static int a6xx_crashdumper_run(struct msm_gpu *gpu,
 	/* Make sure all pending memory writes are posted */
 	wmb();
 
-	gpu_write64(gpu, REG_A6XX_CP_CRASH_SCRIPT_BASE_LO,
-		REG_A6XX_CP_CRASH_SCRIPT_BASE_HI, dumper->iova);
+	gpu_write64(gpu, REG_A6XX_CP_CRASH_SCRIPT_BASE_LO, dumper->iova);
 
 	gpu_write(gpu, REG_A6XX_CP_CRASH_DUMP_CNTL, 1);
 
@@ -813,6 +812,9 @@ static struct msm_gpu_state_bo *a6xx_snapshot_gmu_bo(
 {
 	struct msm_gpu_state_bo *snapshot;
 
+	if (!bo->size)
+		return NULL;
+
 	snapshot = state_kcalloc(a6xx_state, 1, sizeof(*snapshot));
 	if (!snapshot)
 		return NULL;
@@ -1040,8 +1042,13 @@ static void a6xx_gpu_state_destroy(struct kref *kref)
 	if (a6xx_state->gmu_hfi)
 		kvfree(a6xx_state->gmu_hfi->data);
 
-	list_for_each_entry_safe(obj, tmp, &a6xx_state->objs, node)
-		kfree(obj);
+	if (a6xx_state->gmu_debug)
+		kvfree(a6xx_state->gmu_debug->data);
+
+	list_for_each_entry_safe(obj, tmp, &a6xx_state->objs, node) {
+		list_del(&obj->node);
+		kvfree(obj);
+	}
 
 	adreno_gpu_state_destroy(state);
 	kfree(a6xx_state);
diff --git a/drivers/gpu/drm/msm/adreno/adreno_device.c b/drivers/gpu/drm/msm/adreno/adreno_device.c
index 24b489b6129a..628806423f7d 100644
--- a/drivers/gpu/drm/msm/adreno/adreno_device.c
+++ b/drivers/gpu/drm/msm/adreno/adreno_device.c
@@ -679,6 +679,9 @@ static int adreno_system_suspend(struct device *dev)
 	struct msm_gpu *gpu = dev_to_gpu(dev);
 	int remaining, ret;
 
+	if (!gpu)
+		return 0;
+
 	suspend_scheduler(gpu);
 
 	remaining = wait_event_timeout(gpu->retire_event,
@@ -700,7 +703,12 @@ out:
 
 static int adreno_system_resume(struct device *dev)
 {
-	resume_scheduler(dev_to_gpu(dev));
+	struct msm_gpu *gpu = dev_to_gpu(dev);
+
+	if (!gpu)
+		return 0;
+
+	resume_scheduler(gpu);
 	return pm_runtime_force_resume(dev);
 }
 
diff --git a/drivers/gpu/drm/msm/adreno/adreno_gpu.c b/drivers/gpu/drm/msm/adreno/adreno_gpu.c
index 382fb7f9e497..57586c794b84 100644
--- a/drivers/gpu/drm/msm/adreno/adreno_gpu.c
+++ b/drivers/gpu/drm/msm/adreno/adreno_gpu.c
@@ -191,37 +191,38 @@ int adreno_zap_shader_load(struct msm_gpu *gpu, u32 pasid)
 	return zap_shader_load_mdt(gpu, adreno_gpu->info->zapfw, pasid);
 }
 
-void adreno_set_llc_attributes(struct iommu_domain *iommu)
+struct msm_gem_address_space *
+adreno_create_address_space(struct msm_gpu *gpu,
+			    struct platform_device *pdev)
 {
-	iommu_set_pgtable_quirks(iommu, IO_PGTABLE_QUIRK_ARM_OUTER_WBWA);
+	return adreno_iommu_create_address_space(gpu, pdev, 0);
 }
 
 struct msm_gem_address_space *
 adreno_iommu_create_address_space(struct msm_gpu *gpu,
-		struct platform_device *pdev)
+				  struct platform_device *pdev,
+				  unsigned long quirks)
 {
-	struct iommu_domain *iommu;
+	struct iommu_domain_geometry *geometry;
 	struct msm_mmu *mmu;
 	struct msm_gem_address_space *aspace;
 	u64 start, size;
 
-	iommu = iommu_domain_alloc(&platform_bus_type);
-	if (!iommu)
-		return NULL;
-
-	mmu = msm_iommu_new(&pdev->dev, iommu);
-	if (IS_ERR(mmu)) {
-		iommu_domain_free(iommu);
+	mmu = msm_iommu_new(&pdev->dev, quirks);
+	if (IS_ERR_OR_NULL(mmu))
 		return ERR_CAST(mmu);
-	}
+
+	geometry = msm_iommu_get_geometry(mmu);
+	if (IS_ERR(geometry))
+		return ERR_CAST(geometry);
 
 	/*
 	 * Use the aperture start or SZ_16M, whichever is greater. This will
 	 * ensure that we align with the allocated pagetable range while still
 	 * allowing room in the lower 32 bits for GMEM and whatnot
 	 */
-	start = max_t(u64, SZ_16M, iommu->geometry.aperture_start);
-	size = iommu->geometry.aperture_end - start + 1;
+	start = max_t(u64, SZ_16M, geometry->aperture_start);
+	size = geometry->aperture_end - start + 1;
 
 	aspace = msm_gem_address_space_create(mmu, "gpu",
 		start & GENMASK_ULL(48, 0), size);
@@ -729,7 +730,12 @@ static char *adreno_gpu_ascii85_encode(u32 *src, size_t len)
 	return buf;
 }
 
-/* len is expected to be in bytes */
+/* len is expected to be in bytes
+ *
+ * WARNING: *ptr should be allocated with kvmalloc or friends.  It can be free'd
+ * with kvfree() and replaced with a newly kvmalloc'd buffer on the first call
+ * when the unencoded raw data is encoded
+ */
 void adreno_show_object(struct drm_printer *p, void **ptr, int len,
 		bool *encoded)
 {
diff --git a/drivers/gpu/drm/msm/adreno/adreno_gpu.h b/drivers/gpu/drm/msm/adreno/adreno_gpu.h
index e7adc5c632d0..5d4b1c95033f 100644
--- a/drivers/gpu/drm/msm/adreno/adreno_gpu.h
+++ b/drivers/gpu/drm/msm/adreno/adreno_gpu.h
@@ -335,10 +335,13 @@ void adreno_show_object(struct drm_printer *p, void **ptr, int len,
  * attached targets
  */
 struct msm_gem_address_space *
-adreno_iommu_create_address_space(struct msm_gpu *gpu,
-		struct platform_device *pdev);
+adreno_create_address_space(struct msm_gpu *gpu,
+			    struct platform_device *pdev);
 
-void adreno_set_llc_attributes(struct iommu_domain *iommu);
+struct msm_gem_address_space *
+adreno_iommu_create_address_space(struct msm_gpu *gpu,
+				  struct platform_device *pdev,
+				  unsigned long quirks);
 
 int adreno_read_speedbin(struct device *dev, u32 *speedbin);
 
diff --git a/drivers/gpu/drm/msm/disp/dpu1/dpu_encoder_phys_vid.c b/drivers/gpu/drm/msm/disp/dpu1/dpu_encoder_phys_vid.c
index 2c14646661b7..0f71e8fe7be7 100644
--- a/drivers/gpu/drm/msm/disp/dpu1/dpu_encoder_phys_vid.c
+++ b/drivers/gpu/drm/msm/disp/dpu1/dpu_encoder_phys_vid.c
@@ -237,12 +237,13 @@ static void dpu_encoder_phys_vid_setup_timing_engine(
 	unsigned long lock_flags;
 	struct dpu_hw_intf_cfg intf_cfg = { 0 };
 
+	drm_mode_init(&mode, &phys_enc->cached_mode);
+
 	if (!phys_enc->hw_ctl->ops.setup_intf_cfg) {
 		DPU_ERROR("invalid encoder %d\n", phys_enc != NULL);
 		return;
 	}
 
-	mode = phys_enc->cached_mode;
 	if (!phys_enc->hw_intf->ops.setup_timing_gen) {
 		DPU_ERROR("timing engine setup is not supported\n");
 		return;
@@ -634,7 +635,9 @@ static int dpu_encoder_phys_vid_get_frame_count(
 {
 	struct intf_status s = {0};
 	u32 fetch_start = 0;
-	struct drm_display_mode mode = phys_enc->cached_mode;
+	struct drm_display_mode mode;
+
+	drm_mode_init(&mode, &phys_enc->cached_mode);
 
 	if (!dpu_encoder_phys_vid_is_master(phys_enc))
 		return -EINVAL;
diff --git a/drivers/gpu/drm/msm/disp/dpu1/dpu_formats.c b/drivers/gpu/drm/msm/disp/dpu1/dpu_formats.c
index f436a1f3419d..d95540309d4d 100644
--- a/drivers/gpu/drm/msm/disp/dpu1/dpu_formats.c
+++ b/drivers/gpu/drm/msm/disp/dpu1/dpu_formats.c
@@ -434,6 +434,12 @@ static const struct dpu_format dpu_format_map[] = {
 		DPU_CHROMA_H2V1, DPU_FORMAT_FLAG_YUV,
 		DPU_FETCH_LINEAR, 2),
 
+	PSEUDO_YUV_FMT_LOOSE(P010,
+		0, COLOR_8BIT, COLOR_8BIT, COLOR_8BIT,
+		C1_B_Cb, C2_R_Cr,
+		DPU_CHROMA_420, DPU_FORMAT_FLAG_DX | DPU_FORMAT_FLAG_YUV,
+		DPU_FETCH_LINEAR, 2),
+
 	INTERLEAVED_YUV_FMT(VYUY,
 		0, COLOR_8BIT, COLOR_8BIT, COLOR_8BIT,
 		C2_R_Cr, C0_G_Y, C1_B_Cb, C0_G_Y,
@@ -524,12 +530,26 @@ static const struct dpu_format dpu_format_map_ubwc[] = {
 		true, 4, DPU_FORMAT_FLAG_DX | DPU_FORMAT_FLAG_COMPRESSED,
 		DPU_FETCH_UBWC, 2, DPU_TILE_HEIGHT_UBWC),
 
+	INTERLEAVED_RGB_FMT_TILED(XRGB2101010,
+		COLOR_8BIT, COLOR_8BIT, COLOR_8BIT, COLOR_8BIT,
+		C2_R_Cr, C0_G_Y, C1_B_Cb, C3_ALPHA, 4,
+		true, 4, DPU_FORMAT_FLAG_DX | DPU_FORMAT_FLAG_COMPRESSED,
+		DPU_FETCH_UBWC, 2, DPU_TILE_HEIGHT_UBWC),
+
 	PSEUDO_YUV_FMT_TILED(NV12,
 		0, COLOR_8BIT, COLOR_8BIT, COLOR_8BIT,
 		C1_B_Cb, C2_R_Cr,
 		DPU_CHROMA_420, DPU_FORMAT_FLAG_YUV |
 				DPU_FORMAT_FLAG_COMPRESSED,
 		DPU_FETCH_UBWC, 4, DPU_TILE_HEIGHT_NV12),
+
+	PSEUDO_YUV_FMT_TILED(P010,
+		0, COLOR_8BIT, COLOR_8BIT, COLOR_8BIT,
+		C1_B_Cb, C2_R_Cr,
+		DPU_CHROMA_420, DPU_FORMAT_FLAG_DX |
+				DPU_FORMAT_FLAG_YUV |
+				DPU_FORMAT_FLAG_COMPRESSED,
+		DPU_FETCH_UBWC, 4, DPU_TILE_HEIGHT_UBWC),
 };
 
 /* _dpu_get_v_h_subsample_rate - Get subsample rates for all formats we support
@@ -571,13 +591,15 @@ static int _dpu_format_get_media_color_ubwc(const struct dpu_format *fmt)
 		{DRM_FORMAT_XBGR8888, COLOR_FMT_RGBA8888_UBWC},
 		{DRM_FORMAT_XRGB8888, COLOR_FMT_RGBA8888_UBWC},
 		{DRM_FORMAT_ABGR2101010, COLOR_FMT_RGBA1010102_UBWC},
+		{DRM_FORMAT_XRGB2101010, COLOR_FMT_RGBA1010102_UBWC},
 		{DRM_FORMAT_XBGR2101010, COLOR_FMT_RGBA1010102_UBWC},
 		{DRM_FORMAT_BGR565, COLOR_FMT_RGB565_UBWC},
 	};
 	int color_fmt = -1;
 	int i;
 
-	if (fmt->base.pixel_format == DRM_FORMAT_NV12) {
+	if (fmt->base.pixel_format == DRM_FORMAT_NV12 ||
+	    fmt->base.pixel_format == DRM_FORMAT_P010) {
 		if (DPU_FORMAT_IS_DX(fmt)) {
 			if (fmt->unpack_tight)
 				color_fmt = COLOR_FMT_NV12_BPP10_UBWC;
diff --git a/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_catalog.c b/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_catalog.c
index 27f029fdc682..2196e205efa5 100644
--- a/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_catalog.c
+++ b/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_catalog.c
@@ -156,6 +156,7 @@ static const uint32_t plane_formats[] = {
 	DRM_FORMAT_RGBX8888,
 	DRM_FORMAT_BGRX8888,
 	DRM_FORMAT_XBGR8888,
+	DRM_FORMAT_XRGB2101010,
 	DRM_FORMAT_RGB888,
 	DRM_FORMAT_BGR888,
 	DRM_FORMAT_RGB565,
@@ -184,6 +185,7 @@ static const uint32_t plane_formats_yuv[] = {
 	DRM_FORMAT_RGBA8888,
 	DRM_FORMAT_BGRX8888,
 	DRM_FORMAT_BGRA8888,
+	DRM_FORMAT_XRGB2101010,
 	DRM_FORMAT_XRGB8888,
 	DRM_FORMAT_XBGR8888,
 	DRM_FORMAT_RGBX8888,
@@ -208,6 +210,7 @@ static const uint32_t plane_formats_yuv[] = {
 	DRM_FORMAT_RGBX4444,
 	DRM_FORMAT_BGRX4444,
 
+	DRM_FORMAT_P010,
 	DRM_FORMAT_NV12,
 	DRM_FORMAT_NV21,
 	DRM_FORMAT_NV16,
@@ -318,6 +321,18 @@ static const struct dpu_caps sc7180_dpu_caps = {
 	.pixel_ram_size = DEFAULT_PIXEL_RAM_SIZE,
 };
 
+static const struct dpu_caps sm6115_dpu_caps = {
+	.max_mixer_width = DEFAULT_DPU_OUTPUT_LINE_WIDTH,
+	.max_mixer_blendstages = 0x4,
+	.qseed_type = DPU_SSPP_SCALER_QSEED3LITE,
+	.smart_dma_rev = DPU_SSPP_SMART_DMA_V2, /* TODO: v2.5 */
+	.ubwc_version = DPU_HW_UBWC_VER_20,
+	.has_dim_layer = true,
+	.has_idle_pc = true,
+	.max_linewidth = 2160,
+	.pixel_ram_size = DEFAULT_PIXEL_RAM_SIZE,
+};
+
 static const struct dpu_caps sm8150_dpu_caps = {
 	.max_mixer_width = DEFAULT_DPU_OUTPUT_LINE_WIDTH,
 	.max_mixer_blendstages = 0xb,
@@ -472,6 +487,19 @@ static const struct dpu_mdp_cfg sc8180x_mdp[] = {
 	},
 };
 
+static const struct dpu_mdp_cfg sm6115_mdp[] = {
+	{
+	.name = "top_0", .id = MDP_TOP,
+	.base = 0x0, .len = 0x494,
+	.features = 0,
+	.highest_bank_bit = 0x1,
+	.clk_ctrls[DPU_CLK_CTRL_VIG0] = {
+		.reg_off = 0x2ac, .bit_off = 0},
+	.clk_ctrls[DPU_CLK_CTRL_DMA0] = {
+		.reg_off = 0x2ac, .bit_off = 8},
+	},
+};
+
 static const struct dpu_mdp_cfg sm8250_mdp[] = {
 	{
 	.name = "top_0", .id = MDP_TOP,
@@ -849,6 +877,16 @@ static const struct dpu_sspp_cfg sc7180_sspp[] = {
 		sdm845_dma_sblk_2, 9, SSPP_TYPE_DMA, DPU_CLK_CTRL_CURSOR1),
 };
 
+static const struct dpu_sspp_sub_blks sm6115_vig_sblk_0 =
+				_VIG_SBLK("0", 2, DPU_SSPP_SCALER_QSEED3LITE);
+
+static const struct dpu_sspp_cfg sm6115_sspp[] = {
+	SSPP_BLK("sspp_0", SSPP_VIG0, 0x4000, VIG_SM8250_MASK,
+		sm6115_vig_sblk_0, 0, SSPP_TYPE_VIG, DPU_CLK_CTRL_VIG0),
+	SSPP_BLK("sspp_8", SSPP_DMA0, 0x24000,  DMA_SDM845_MASK,
+		sdm845_dma_sblk_0, 1, SSPP_TYPE_DMA, DPU_CLK_CTRL_DMA0),
+};
+
 static const struct dpu_sspp_sub_blks sm8250_vig_sblk_0 =
 				_VIG_SBLK("0", 5, DPU_SSPP_SCALER_QSEED3LITE);
 static const struct dpu_sspp_sub_blks sm8250_vig_sblk_1 =
@@ -1175,6 +1213,13 @@ static const struct dpu_pingpong_cfg sm8150_pp[] = {
 			-1),
 };
 
+static const struct dpu_pingpong_cfg sc7280_pp[] = {
+	PP_BLK("pingpong_0", PINGPONG_0, 0x59000, 0, sc7280_pp_sblk, -1, -1),
+	PP_BLK("pingpong_1", PINGPONG_1, 0x6a000, 0, sc7280_pp_sblk, -1, -1),
+	PP_BLK("pingpong_2", PINGPONG_2, 0x6b000, 0, sc7280_pp_sblk, -1, -1),
+	PP_BLK("pingpong_3", PINGPONG_3, 0x6c000, 0, sc7280_pp_sblk, -1, -1),
+};
+
 static struct dpu_pingpong_cfg qcm2290_pp[] = {
 	PP_BLK("pingpong_0", PINGPONG_0, 0x70000, 0, sdm845_pp_sblk,
 		DPU_IRQ_IDX(MDP_SSPP_TOP0_INTR, 8),
@@ -1198,13 +1243,6 @@ static const struct dpu_merge_3d_cfg sm8150_merge_3d[] = {
 	MERGE_3D_BLK("merge_3d_2", MERGE_3D_2, 0x83200),
 };
 
-static const struct dpu_pingpong_cfg sc7280_pp[] = {
-	PP_BLK("pingpong_0", PINGPONG_0, 0x59000, 0, sc7280_pp_sblk, -1, -1),
-	PP_BLK("pingpong_1", PINGPONG_1, 0x6a000, 0, sc7280_pp_sblk, -1, -1),
-	PP_BLK("pingpong_2", PINGPONG_2, 0x6b000, 0, sc7280_pp_sblk, -1, -1),
-	PP_BLK("pingpong_3", PINGPONG_3, 0x6c000, 0, sc7280_pp_sblk, -1, -1),
-};
-
 /*************************************************************
  * DSC sub blocks config
  *************************************************************/
@@ -1587,6 +1625,35 @@ static const struct dpu_perf_cfg sc7180_perf_data = {
 	.bw_inefficiency_factor = 120,
 };
 
+static const struct dpu_perf_cfg sm6115_perf_data = {
+	.max_bw_low = 3100000,
+	.max_bw_high = 4000000,
+	.min_core_ib = 2400000,
+	.min_llcc_ib = 800000,
+	.min_dram_ib = 800000,
+	.min_prefill_lines = 24,
+	.danger_lut_tbl = {0xff, 0xffff, 0x0},
+	.safe_lut_tbl = {0xfff0, 0xff00, 0xffff},
+	.qos_lut_tbl = {
+		{.nentry = ARRAY_SIZE(sc7180_qos_linear),
+		.entries = sc7180_qos_linear
+		},
+		{.nentry = ARRAY_SIZE(sc7180_qos_macrotile),
+		.entries = sc7180_qos_macrotile
+		},
+		{.nentry = ARRAY_SIZE(sc7180_qos_nrt),
+		.entries = sc7180_qos_nrt
+		},
+		/* TODO: macrotile-qseed is different from macrotile */
+	},
+	.cdp_cfg = {
+		{.rd_enable = 1, .wr_enable = 1},
+		{.rd_enable = 1, .wr_enable = 0}
+	},
+	.clk_inefficiency_factor = 105,
+	.bw_inefficiency_factor = 120,
+};
+
 static const struct dpu_perf_cfg sm8150_perf_data = {
 	.max_bw_low = 12800000,
 	.max_bw_high = 12800000,
@@ -1798,6 +1865,28 @@ static const struct dpu_mdss_cfg sc7180_dpu_cfg = {
 	.mdss_irqs = IRQ_SC7180_MASK,
 };
 
+static const struct dpu_mdss_cfg sm6115_dpu_cfg = {
+	.caps = &sm6115_dpu_caps,
+	.mdp_count = ARRAY_SIZE(sm6115_mdp),
+	.mdp = sm6115_mdp,
+	.ctl_count = ARRAY_SIZE(qcm2290_ctl),
+	.ctl = qcm2290_ctl,
+	.sspp_count = ARRAY_SIZE(sm6115_sspp),
+	.sspp = sm6115_sspp,
+	.mixer_count = ARRAY_SIZE(qcm2290_lm),
+	.mixer = qcm2290_lm,
+	.dspp_count = ARRAY_SIZE(qcm2290_dspp),
+	.dspp = qcm2290_dspp,
+	.pingpong_count = ARRAY_SIZE(qcm2290_pp),
+	.pingpong = qcm2290_pp,
+	.intf_count = ARRAY_SIZE(qcm2290_intf),
+	.intf = qcm2290_intf,
+	.vbif_count = ARRAY_SIZE(sdm845_vbif),
+	.vbif = sdm845_vbif,
+	.perf = &sm6115_perf_data,
+	.mdss_irqs = IRQ_SC7180_MASK,
+};
+
 static const struct dpu_mdss_cfg sm8150_dpu_cfg = {
 	.caps = &sm8150_dpu_caps,
 	.mdp_count = ARRAY_SIZE(sdm845_mdp),
@@ -1932,6 +2021,7 @@ static const struct dpu_mdss_hw_cfg_handler cfg_handler[] = {
 	{ .hw_rev = DPU_HW_VER_510, .dpu_cfg = &sc8180x_dpu_cfg},
 	{ .hw_rev = DPU_HW_VER_600, .dpu_cfg = &sm8250_dpu_cfg},
 	{ .hw_rev = DPU_HW_VER_620, .dpu_cfg = &sc7180_dpu_cfg},
+	{ .hw_rev = DPU_HW_VER_630, .dpu_cfg = &sm6115_dpu_cfg},
 	{ .hw_rev = DPU_HW_VER_650, .dpu_cfg = &qcm2290_dpu_cfg},
 	{ .hw_rev = DPU_HW_VER_720, .dpu_cfg = &sc7280_dpu_cfg},
 };
diff --git a/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_catalog.h b/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_catalog.h
index 38aa38ab1568..3b645d5aa9aa 100644
--- a/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_catalog.h
+++ b/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_catalog.h
@@ -44,6 +44,7 @@
 #define DPU_HW_VER_510	DPU_HW_VER(5, 1, 1) /* sc8180 */
 #define DPU_HW_VER_600	DPU_HW_VER(6, 0, 0) /* sm8250 */
 #define DPU_HW_VER_620	DPU_HW_VER(6, 2, 0) /* sc7180 v1.0 */
+#define DPU_HW_VER_630	DPU_HW_VER(6, 3, 0) /* sm6115|sm4250 */
 #define DPU_HW_VER_650	DPU_HW_VER(6, 5, 0) /* qcm2290|sm4125 */
 #define DPU_HW_VER_720	DPU_HW_VER(7, 2, 0) /* sc7280 */
 
diff --git a/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_dsc.c b/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_dsc.c
index f2ddcfb6f7ee..3662df698dae 100644
--- a/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_dsc.c
+++ b/drivers/gpu/drm/msm/disp/dpu1/dpu_hw_dsc.c
@@ -42,7 +42,7 @@ static void dpu_hw_dsc_config(struct dpu_hw_dsc *hw_dsc,
 			      u32 initial_lines)
 {
 	struct dpu_hw_blk_reg_map *c = &hw_dsc->hw;
-	u32 data, lsb, bpp;
+	u32 data;
 	u32 slice_last_group_size;
 	u32 det_thresh_flatness;
 	bool is_cmd_mode = !(mode & DSC_MODE_VIDEO);
@@ -56,14 +56,7 @@ static void dpu_hw_dsc_config(struct dpu_hw_dsc *hw_dsc,
 	data = (initial_lines << 20);
 	data |= ((slice_last_group_size - 1) << 18);
 	/* bpp is 6.4 format, 4 LSBs bits are for fractional part */
-	data |= dsc->bits_per_pixel << 12;
-	lsb = dsc->bits_per_pixel % 4;
-	bpp = dsc->bits_per_pixel / 4;
-	bpp *= 4;
-	bpp <<= 4;
-	bpp |= lsb;
-
-	data |= bpp << 8;
+	data |= (dsc->bits_per_pixel << 8);
 	data |= (dsc->block_pred_enable << 7);
 	data |= (dsc->line_buf_depth << 3);
 	data |= (dsc->simple_422 << 2);
diff --git a/drivers/gpu/drm/msm/disp/dpu1/dpu_kms.c b/drivers/gpu/drm/msm/disp/dpu1/dpu_kms.c
index 5e6e2626151e..b71199511a52 100644
--- a/drivers/gpu/drm/msm/disp/dpu1/dpu_kms.c
+++ b/drivers/gpu/drm/msm/disp/dpu1/dpu_kms.c
@@ -194,7 +194,7 @@ struct dpu_debugfs_regset32 {
 	struct dpu_kms *dpu_kms;
 };
 
-static int _dpu_debugfs_show_regset32(struct seq_file *s, void *data)
+static int dpu_regset32_show(struct seq_file *s, void *data)
 {
 	struct dpu_debugfs_regset32 *regset = s->private;
 	struct dpu_kms *dpu_kms = regset->dpu_kms;
@@ -227,19 +227,7 @@ static int _dpu_debugfs_show_regset32(struct seq_file *s, void *data)
 
 	return 0;
 }
-
-static int dpu_debugfs_open_regset32(struct inode *inode,
-		struct file *file)
-{
-	return single_open(file, _dpu_debugfs_show_regset32, inode->i_private);
-}
-
-static const struct file_operations dpu_fops_regset32 = {
-	.open =		dpu_debugfs_open_regset32,
-	.read =		seq_read,
-	.llseek =	seq_lseek,
-	.release =	single_release,
-};
+DEFINE_SHOW_ATTRIBUTE(dpu_regset32);
 
 void dpu_debugfs_create_regset32(const char *name, umode_t mode,
 		void *parent,
@@ -259,7 +247,7 @@ void dpu_debugfs_create_regset32(const char *name, umode_t mode,
 	regset->blk_len = length;
 	regset->dpu_kms = dpu_kms;
 
-	debugfs_create_file(name, mode, parent, regset, &dpu_fops_regset32);
+	debugfs_create_file(name, mode, parent, regset, &dpu_regset32_fops);
 }
 
 static int dpu_kms_debugfs_init(struct msm_kms *kms, struct drm_minor *minor)
@@ -1304,6 +1292,7 @@ static const struct of_device_id dpu_dt_match[] = {
 	{ .compatible = "qcom,sc7180-dpu", },
 	{ .compatible = "qcom,sc7280-dpu", },
 	{ .compatible = "qcom,sc8180x-dpu", },
+	{ .compatible = "qcom,sm6115-dpu", },
 	{ .compatible = "qcom,sm8150-dpu", },
 	{ .compatible = "qcom,sm8250-dpu", },
 	{}
diff --git a/drivers/gpu/drm/msm/disp/dpu1/dpu_plane.c b/drivers/gpu/drm/msm/disp/dpu1/dpu_plane.c
index 658005f609f4..86719020afe2 100644
--- a/drivers/gpu/drm/msm/disp/dpu1/dpu_plane.c
+++ b/drivers/gpu/drm/msm/disp/dpu1/dpu_plane.c
@@ -69,9 +69,11 @@ static const uint32_t qcom_compressed_supported_formats[] = {
 	DRM_FORMAT_ARGB8888,
 	DRM_FORMAT_XBGR8888,
 	DRM_FORMAT_XRGB8888,
+	DRM_FORMAT_XRGB2101010,
 	DRM_FORMAT_BGR565,
 
 	DRM_FORMAT_NV12,
+	DRM_FORMAT_P010,
 };
 
 /**
diff --git a/drivers/gpu/drm/msm/disp/mdp4/mdp4_kms.c b/drivers/gpu/drm/msm/disp/mdp4/mdp4_kms.c
index 964573d26d26..9a1a0769575d 100644
--- a/drivers/gpu/drm/msm/disp/mdp4/mdp4_kms.c
+++ b/drivers/gpu/drm/msm/disp/mdp4/mdp4_kms.c
@@ -387,7 +387,7 @@ static int mdp4_kms_init(struct drm_device *dev)
 	struct msm_drm_private *priv = dev->dev_private;
 	struct mdp4_kms *mdp4_kms;
 	struct msm_kms *kms = NULL;
-	struct iommu_domain *iommu;
+	struct msm_mmu *mmu;
 	struct msm_gem_address_space *aspace;
 	int irq, ret;
 	u32 major, minor;
@@ -499,10 +499,15 @@ static int mdp4_kms_init(struct drm_device *dev)
 	mdp4_disable(mdp4_kms);
 	mdelay(16);
 
-	iommu = iommu_domain_alloc(pdev->dev.bus);
-	if (iommu) {
-		struct msm_mmu *mmu = msm_iommu_new(&pdev->dev, iommu);
-
+	mmu = msm_iommu_new(&pdev->dev, 0);
+	if (IS_ERR(mmu)) {
+		ret = PTR_ERR(mmu);
+		goto fail;
+	} else if (!mmu) {
+		DRM_DEV_INFO(dev->dev, "no iommu, fallback to phys "
+				"contig buffers for scanout\n");
+		aspace = NULL;
+	} else {
 		aspace  = msm_gem_address_space_create(mmu,
 			"mdp4", 0x1000, 0x100000000 - 0x1000);
 
@@ -514,10 +519,6 @@ static int mdp4_kms_init(struct drm_device *dev)
 		}
 
 		kms->aspace = aspace;
-	} else {
-		DRM_DEV_INFO(dev->dev, "no iommu, fallback to phys "
-				"contig buffers for scanout\n");
-		aspace = NULL;
 	}
 
 	ret = modeset_init(mdp4_kms);
diff --git a/drivers/gpu/drm/msm/disp/mdp4/mdp4_lvds_connector.c b/drivers/gpu/drm/msm/disp/mdp4/mdp4_lvds_connector.c
index 7288041dd86a..7444b75c4215 100644
--- a/drivers/gpu/drm/msm/disp/mdp4/mdp4_lvds_connector.c
+++ b/drivers/gpu/drm/msm/disp/mdp4/mdp4_lvds_connector.c
@@ -56,8 +56,9 @@ static int mdp4_lvds_connector_get_modes(struct drm_connector *connector)
 	return ret;
 }
 
-static int mdp4_lvds_connector_mode_valid(struct drm_connector *connector,
-				 struct drm_display_mode *mode)
+static enum drm_mode_status
+mdp4_lvds_connector_mode_valid(struct drm_connector *connector,
+			       struct drm_display_mode *mode)
 {
 	struct mdp4_lvds_connector *mdp4_lvds_connector =
 			to_mdp4_lvds_connector(connector);
diff --git a/drivers/gpu/drm/msm/disp/mdp5/mdp5_kms.c b/drivers/gpu/drm/msm/disp/mdp5/mdp5_kms.c
index b0d21838a134..29ae5c9613f3 100644
--- a/drivers/gpu/drm/msm/disp/mdp5/mdp5_kms.c
+++ b/drivers/gpu/drm/msm/disp/mdp5/mdp5_kms.c
@@ -203,7 +203,7 @@ static int mdp5_set_split_display(struct msm_kms *kms,
 							  slave_encoder);
 }
 
-static void mdp5_destroy(struct platform_device *pdev);
+static void mdp5_destroy(struct mdp5_kms *mdp5_kms);
 
 static void mdp5_kms_destroy(struct msm_kms *kms)
 {
@@ -223,7 +223,7 @@ static void mdp5_kms_destroy(struct msm_kms *kms)
 	}
 
 	mdp_kms_destroy(&mdp5_kms->base);
-	mdp5_destroy(mdp5_kms->pdev);
+	mdp5_destroy(mdp5_kms);
 }
 
 #ifdef CONFIG_DEBUG_FS
@@ -559,6 +559,8 @@ static int mdp5_kms_init(struct drm_device *dev)
 	int irq, i, ret;
 
 	ret = mdp5_init(to_platform_device(dev->dev), dev);
+	if (ret)
+		return ret;
 
 	/* priv->kms would have been populated by the MDP5 driver */
 	kms = priv->kms;
@@ -632,9 +634,8 @@ fail:
 	return ret;
 }
 
-static void mdp5_destroy(struct platform_device *pdev)
+static void mdp5_destroy(struct mdp5_kms *mdp5_kms)
 {
-	struct mdp5_kms *mdp5_kms = platform_get_drvdata(pdev);
 	int i;
 
 	if (mdp5_kms->ctlm)
@@ -648,7 +649,7 @@ static void mdp5_destroy(struct platform_device *pdev)
 		kfree(mdp5_kms->intfs[i]);
 
 	if (mdp5_kms->rpm_enabled)
-		pm_runtime_disable(&pdev->dev);
+		pm_runtime_disable(&mdp5_kms->pdev->dev);
 
 	drm_atomic_private_obj_fini(&mdp5_kms->glob_state);
 	drm_modeset_lock_fini(&mdp5_kms->glob_state_lock);
@@ -797,8 +798,6 @@ static int mdp5_init(struct platform_device *pdev, struct drm_device *dev)
 		goto fail;
 	}
 
-	platform_set_drvdata(pdev, mdp5_kms);
-
 	spin_lock_init(&mdp5_kms->resource_lock);
 
 	mdp5_kms->dev = dev;
@@ -839,6 +838,9 @@ static int mdp5_init(struct platform_device *pdev, struct drm_device *dev)
 	 */
 	clk_set_rate(mdp5_kms->core_clk, 200000000);
 
+	/* set uninit-ed kms */
+	priv->kms = &mdp5_kms->base.base;
+
 	pm_runtime_enable(&pdev->dev);
 	mdp5_kms->rpm_enabled = true;
 
@@ -890,13 +892,10 @@ static int mdp5_init(struct platform_device *pdev, struct drm_device *dev)
 	if (ret)
 		goto fail;
 
-	/* set uninit-ed kms */
-	priv->kms = &mdp5_kms->base.base;
-
 	return 0;
 fail:
 	if (mdp5_kms)
-		mdp5_destroy(pdev);
+		mdp5_destroy(mdp5_kms);
 	return ret;
 }
 
@@ -953,7 +952,8 @@ static int mdp5_dev_remove(struct platform_device *pdev)
 static __maybe_unused int mdp5_runtime_suspend(struct device *dev)
 {
 	struct platform_device *pdev = to_platform_device(dev);
-	struct mdp5_kms *mdp5_kms = platform_get_drvdata(pdev);
+	struct msm_drm_private *priv = platform_get_drvdata(pdev);
+	struct mdp5_kms *mdp5_kms = to_mdp5_kms(to_mdp_kms(priv->kms));
 
 	DBG("");
 
@@ -963,7 +963,8 @@ static __maybe_unused int mdp5_runtime_suspend(struct device *dev)
 static __maybe_unused int mdp5_runtime_resume(struct device *dev)
 {
 	struct platform_device *pdev = to_platform_device(dev);
-	struct mdp5_kms *mdp5_kms = platform_get_drvdata(pdev);
+	struct msm_drm_private *priv = platform_get_drvdata(pdev);
+	struct mdp5_kms *mdp5_kms = to_mdp5_kms(to_mdp_kms(priv->kms));
 
 	DBG("");
 
diff --git a/drivers/gpu/drm/msm/dp/dp_ctrl.c b/drivers/gpu/drm/msm/dp/dp_ctrl.c
index 3854c9f1f7e9..dd26ca651a05 100644
--- a/drivers/gpu/drm/msm/dp/dp_ctrl.c
+++ b/drivers/gpu/drm/msm/dp/dp_ctrl.c
@@ -1243,8 +1243,7 @@ static int dp_ctrl_link_train(struct dp_ctrl_private *ctrl,
 {
 	int ret = 0;
 	const u8 *dpcd = ctrl->panel->dpcd;
-	u8 encoding = DP_SET_ANSI_8B10B;
-	u8 ssc;
+	u8 encoding[] = { 0, DP_SET_ANSI_8B10B };
 	u8 assr;
 	struct dp_link_info link_info = {0};
 
@@ -1256,13 +1255,11 @@ static int dp_ctrl_link_train(struct dp_ctrl_private *ctrl,
 
 	dp_aux_link_configure(ctrl->aux, &link_info);
 
-	if (drm_dp_max_downspread(dpcd)) {
-		ssc = DP_SPREAD_AMP_0_5;
-		drm_dp_dpcd_write(ctrl->aux, DP_DOWNSPREAD_CTRL, &ssc, 1);
-	}
+	if (drm_dp_max_downspread(dpcd))
+		encoding[0] |= DP_SPREAD_AMP_0_5;
 
-	drm_dp_dpcd_write(ctrl->aux, DP_MAIN_LINK_CHANNEL_CODING_SET,
-				&encoding, 1);
+	/* config DOWNSPREAD_CTRL and MAIN_LINK_CHANNEL_CODING_SET */
+	drm_dp_dpcd_write(ctrl->aux, DP_DOWNSPREAD_CTRL, encoding, 2);
 
 	if (drm_dp_alternate_scrambler_reset_cap(dpcd)) {
 		assr = DP_ALTERNATE_SCRAMBLER_RESET_ENABLE;
diff --git a/drivers/gpu/drm/msm/dp/dp_display.c b/drivers/gpu/drm/msm/dp/dp_display.c
index bfd0aeff3f0d..7ff60e5ff325 100644
--- a/drivers/gpu/drm/msm/dp/dp_display.c
+++ b/drivers/gpu/drm/msm/dp/dp_display.c
@@ -857,7 +857,7 @@ static int dp_display_set_mode(struct msm_dp *dp_display,
 
 	dp = container_of(dp_display, struct dp_display_private, dp_display);
 
-	dp->panel->dp_mode.drm_mode = mode->drm_mode;
+	drm_mode_copy(&dp->panel->dp_mode.drm_mode, &mode->drm_mode);
 	dp->panel->dp_mode.bpp = mode->bpp;
 	dp->panel->dp_mode.capabilities = mode->capabilities;
 	dp_panel_init_panel_info(dp->panel);
@@ -1249,7 +1249,7 @@ int dp_display_request_irq(struct msm_dp *dp_display)
 		return -EINVAL;
 	}
 
-	rc = devm_request_irq(&dp->pdev->dev, dp->irq,
+	rc = devm_request_irq(dp_display->drm_dev->dev, dp->irq,
 			dp_display_irq_handler,
 			IRQF_TRIGGER_HIGH, "dp_display_isr", dp);
 	if (rc < 0) {
@@ -1528,6 +1528,11 @@ void msm_dp_debugfs_init(struct msm_dp *dp_display, struct drm_minor *minor)
 	}
 }
 
+static void of_dp_aux_depopulate_bus_void(void *data)
+{
+	of_dp_aux_depopulate_bus(data);
+}
+
 static int dp_display_get_next_bridge(struct msm_dp *dp)
 {
 	int rc;
@@ -1552,10 +1557,16 @@ static int dp_display_get_next_bridge(struct msm_dp *dp)
 		 * panel driver is probed asynchronously but is the best we
 		 * can do without a bigger driver reorganization.
 		 */
-		rc = devm_of_dp_aux_populate_ep_devices(dp_priv->aux);
+		rc = of_dp_aux_populate_bus(dp_priv->aux, NULL);
 		of_node_put(aux_bus);
 		if (rc)
 			goto error;
+
+		rc = devm_add_action_or_reset(dp->drm_dev->dev,
+						of_dp_aux_depopulate_bus_void,
+						dp_priv->aux);
+		if (rc)
+			goto error;
 	} else if (dp->is_edp) {
 		DRM_ERROR("eDP aux_bus not found\n");
 		return -ENODEV;
@@ -1568,7 +1579,7 @@ static int dp_display_get_next_bridge(struct msm_dp *dp)
 	 * For DisplayPort interfaces external bridges are optional, so
 	 * silently ignore an error if one is not present (-ENODEV).
 	 */
-	rc = dp_parser_find_next_bridge(dp_priv->parser);
+	rc = devm_dp_parser_find_next_bridge(dp->drm_dev->dev, dp_priv->parser);
 	if (!dp->is_edp && rc == -ENODEV)
 		return 0;
 
@@ -1589,14 +1600,10 @@ error:
 int msm_dp_modeset_init(struct msm_dp *dp_display, struct drm_device *dev,
 			struct drm_encoder *encoder)
 {
-	struct msm_drm_private *priv;
+	struct msm_drm_private *priv = dev->dev_private;
 	struct dp_display_private *dp_priv;
 	int ret;
 
-	if (WARN_ON(!encoder) || WARN_ON(!dp_display) || WARN_ON(!dev))
-		return -EINVAL;
-
-	priv = dev->dev_private;
 	dp_display->drm_dev = dev;
 
 	dp_priv = container_of(dp_display, struct dp_display_private, dp_display);
diff --git a/drivers/gpu/drm/msm/dp/dp_drm.c b/drivers/gpu/drm/msm/dp/dp_drm.c
index 6df25f7662e7..6db82f9b03af 100644
--- a/drivers/gpu/drm/msm/dp/dp_drm.c
+++ b/drivers/gpu/drm/msm/dp/dp_drm.c
@@ -31,6 +31,36 @@ static enum drm_connector_status dp_bridge_detect(struct drm_bridge *bridge)
 					connector_status_disconnected;
 }
 
+static int dp_bridge_atomic_check(struct drm_bridge *bridge,
+			    struct drm_bridge_state *bridge_state,
+			    struct drm_crtc_state *crtc_state,
+			    struct drm_connector_state *conn_state)
+{
+	struct msm_dp *dp;
+
+	dp = to_dp_bridge(bridge)->dp_display;
+
+	drm_dbg_dp(dp->drm_dev, "is_connected = %s\n",
+		(dp->is_connected) ? "true" : "false");
+
+	/*
+	 * There is no protection in the DRM framework to check if the display
+	 * pipeline has been already disabled before trying to disable it again.
+	 * Hence if the sink is unplugged, the pipeline gets disabled, but the
+	 * crtc->active is still true. Any attempt to set the mode or manually
+	 * disable this encoder will result in the crash.
+	 *
+	 * TODO: add support for telling the DRM subsystem that the pipeline is
+	 * disabled by the hardware and thus all access to it should be forbidden.
+	 * After that this piece of code can be removed.
+	 */
+	if (bridge->ops & DRM_BRIDGE_OP_HPD)
+		return (dp->is_connected) ? 0 : -ENOTCONN;
+
+	return 0;
+}
+
+
 /**
  * dp_bridge_get_modes - callback to add drm modes via drm_mode_probed_add()
  * @bridge: Poiner to drm bridge
@@ -61,6 +91,9 @@ static int dp_bridge_get_modes(struct drm_bridge *bridge, struct drm_connector *
 }
 
 static const struct drm_bridge_funcs dp_bridge_ops = {
+	.atomic_duplicate_state = drm_atomic_helper_bridge_duplicate_state,
+	.atomic_destroy_state   = drm_atomic_helper_bridge_destroy_state,
+	.atomic_reset           = drm_atomic_helper_bridge_reset,
 	.enable       = dp_bridge_enable,
 	.disable      = dp_bridge_disable,
 	.post_disable = dp_bridge_post_disable,
@@ -68,6 +101,7 @@ static const struct drm_bridge_funcs dp_bridge_ops = {
 	.mode_valid   = dp_bridge_mode_valid,
 	.get_modes    = dp_bridge_get_modes,
 	.detect       = dp_bridge_detect,
+	.atomic_check = dp_bridge_atomic_check,
 };
 
 struct drm_bridge *dp_bridge_init(struct msm_dp *dp_display, struct drm_device *dev,
diff --git a/drivers/gpu/drm/msm/dp/dp_link.c b/drivers/gpu/drm/msm/dp/dp_link.c
index 36bb6191d2f0..f1f1d646539d 100644
--- a/drivers/gpu/drm/msm/dp/dp_link.c
+++ b/drivers/gpu/drm/msm/dp/dp_link.c
@@ -49,23 +49,26 @@ static int dp_aux_link_power_up(struct drm_dp_aux *aux,
 					struct dp_link_info *link)
 {
 	u8 value;
-	int err;
+	ssize_t len;
+	int i;
 
 	if (link->revision < 0x11)
 		return 0;
 
-	err = drm_dp_dpcd_readb(aux, DP_SET_POWER, &value);
-	if (err < 0)
-		return err;
+	len = drm_dp_dpcd_readb(aux, DP_SET_POWER, &value);
+	if (len < 0)
+		return len;
 
 	value &= ~DP_SET_POWER_MASK;
 	value |= DP_SET_POWER_D0;
 
-	err = drm_dp_dpcd_writeb(aux, DP_SET_POWER, value);
-	if (err < 0)
-		return err;
-
-	usleep_range(1000, 2000);
+	/* retry for 1ms to give the sink time to wake up */
+	for (i = 0; i < 3; i++) {
+		len = drm_dp_dpcd_writeb(aux, DP_SET_POWER, value);
+		usleep_range(1000, 2000);
+		if (len == 1)
+			break;
+	}
 
 	return 0;
 }
diff --git a/drivers/gpu/drm/msm/dp/dp_parser.c b/drivers/gpu/drm/msm/dp/dp_parser.c
index dd732215d55b..dcbe893d66d7 100644
--- a/drivers/gpu/drm/msm/dp/dp_parser.c
+++ b/drivers/gpu/drm/msm/dp/dp_parser.c
@@ -240,12 +240,12 @@ static int dp_parser_clock(struct dp_parser *parser)
 	return 0;
 }
 
-int dp_parser_find_next_bridge(struct dp_parser *parser)
+int devm_dp_parser_find_next_bridge(struct device *dev, struct dp_parser *parser)
 {
-	struct device *dev = &parser->pdev->dev;
+	struct platform_device *pdev = parser->pdev;
 	struct drm_bridge *bridge;
 
-	bridge = devm_drm_of_get_bridge(dev, dev->of_node, 1, 0);
+	bridge = devm_drm_of_get_bridge(dev, pdev->dev.of_node, 1, 0);
 	if (IS_ERR(bridge))
 		return PTR_ERR(bridge);
 
diff --git a/drivers/gpu/drm/msm/dp/dp_parser.h b/drivers/gpu/drm/msm/dp/dp_parser.h
index 866c1a82bf1a..d30ab773db46 100644
--- a/drivers/gpu/drm/msm/dp/dp_parser.h
+++ b/drivers/gpu/drm/msm/dp/dp_parser.h
@@ -138,8 +138,9 @@ struct dp_parser {
 struct dp_parser *dp_parser_get(struct platform_device *pdev);
 
 /**
- * dp_parser_find_next_bridge() - find an additional bridge to DP
+ * devm_dp_parser_find_next_bridge() - find an additional bridge to DP
  *
+ * @dev: device to tie bridge lifetime to
  * @parser: dp_parser data from client
  *
  * This function is used to find any additional bridge attached to
@@ -147,6 +148,6 @@ struct dp_parser *dp_parser_get(struct platform_device *pdev);
  *
  * Return: 0 if able to get the bridge, otherwise negative errno for failure.
  */
-int dp_parser_find_next_bridge(struct dp_parser *parser);
+int devm_dp_parser_find_next_bridge(struct device *dev, struct dp_parser *parser);
 
 #endif
diff --git a/drivers/gpu/drm/msm/dsi/dsi.c b/drivers/gpu/drm/msm/dsi/dsi.c
index 39bbabb5daf6..31fdee2052be 100644
--- a/drivers/gpu/drm/msm/dsi/dsi.c
+++ b/drivers/gpu/drm/msm/dsi/dsi.c
@@ -211,13 +211,14 @@ void __exit msm_dsi_unregister(void)
 int msm_dsi_modeset_init(struct msm_dsi *msm_dsi, struct drm_device *dev,
 			 struct drm_encoder *encoder)
 {
-	struct msm_drm_private *priv;
+	struct msm_drm_private *priv = dev->dev_private;
 	int ret;
 
-	if (WARN_ON(!encoder) || WARN_ON(!msm_dsi) || WARN_ON(!dev))
-		return -EINVAL;
+	if (priv->num_bridges == ARRAY_SIZE(priv->bridges)) {
+		DRM_DEV_ERROR(dev->dev, "too many bridges\n");
+		return -ENOSPC;
+	}
 
-	priv = dev->dev_private;
 	msm_dsi->dev = dev;
 
 	ret = msm_dsi_host_modeset_init(msm_dsi->host, dev);
diff --git a/drivers/gpu/drm/msm/dsi/dsi_host.c b/drivers/gpu/drm/msm/dsi/dsi_host.c
index 7fbf391c024f..89aadd3b3202 100644
--- a/drivers/gpu/drm/msm/dsi/dsi_host.c
+++ b/drivers/gpu/drm/msm/dsi/dsi_host.c
@@ -21,6 +21,7 @@
 
 #include <video/mipi_display.h>
 
+#include <drm/display/drm_dsc_helper.h>
 #include <drm/drm_of.h>
 
 #include "dsi.h"
@@ -33,7 +34,7 @@
 
 #define DSI_RESET_TOGGLE_DELAY_MS 20
 
-static int dsi_populate_dsc_params(struct drm_dsc_config *dsc);
+static int dsi_populate_dsc_params(struct msm_dsi_host *msm_host, struct drm_dsc_config *dsc);
 
 static int dsi_get_version(const void __iomem *base, u32 *major, u32 *minor)
 {
@@ -842,17 +843,15 @@ static void dsi_ctrl_config(struct msm_dsi_host *msm_host, bool enable,
 static void dsi_update_dsc_timing(struct msm_dsi_host *msm_host, bool is_cmd_mode, u32 hdisplay)
 {
 	struct drm_dsc_config *dsc = msm_host->dsc;
-	u32 reg, intf_width, reg_ctrl, reg_ctrl2;
+	u32 reg, reg_ctrl, reg_ctrl2;
 	u32 slice_per_intf, total_bytes_per_intf;
 	u32 pkt_per_line;
-	u32 bytes_in_slice;
 	u32 eol_byte_num;
 
 	/* first calculate dsc parameters and then program
 	 * compress mode registers
 	 */
-	intf_width = hdisplay;
-	slice_per_intf = DIV_ROUND_UP(intf_width, dsc->slice_width);
+	slice_per_intf = DIV_ROUND_UP(hdisplay, dsc->slice_width);
 
 	/* If slice_per_pkt is greater than slice_per_intf
 	 * then default to 1. This can happen during partial
@@ -861,12 +860,7 @@ static void dsi_update_dsc_timing(struct msm_dsi_host *msm_host, bool is_cmd_mod
 	if (slice_per_intf > dsc->slice_count)
 		dsc->slice_count = 1;
 
-	slice_per_intf = DIV_ROUND_UP(hdisplay, dsc->slice_width);
-	bytes_in_slice = DIV_ROUND_UP(dsc->slice_width * dsc->bits_per_pixel, 8);
-
-	dsc->slice_chunk_size = bytes_in_slice;
-
-	total_bytes_per_intf = bytes_in_slice * slice_per_intf;
+	total_bytes_per_intf = dsc->slice_chunk_size * slice_per_intf;
 
 	eol_byte_num = total_bytes_per_intf % 3;
 	pkt_per_line = slice_per_intf / dsc->slice_count;
@@ -892,7 +886,7 @@ static void dsi_update_dsc_timing(struct msm_dsi_host *msm_host, bool is_cmd_mod
 		reg_ctrl |= reg;
 
 		reg_ctrl2 &= ~DSI_COMMAND_COMPRESSION_MODE_CTRL2_STREAM0_SLICE_WIDTH__MASK;
-		reg_ctrl2 |= DSI_COMMAND_COMPRESSION_MODE_CTRL2_STREAM0_SLICE_WIDTH(bytes_in_slice);
+		reg_ctrl2 |= DSI_COMMAND_COMPRESSION_MODE_CTRL2_STREAM0_SLICE_WIDTH(dsc->slice_chunk_size);
 
 		dsi_write(msm_host, REG_DSI_COMMAND_COMPRESSION_MODE_CTRL, reg_ctrl);
 		dsi_write(msm_host, REG_DSI_COMMAND_COMPRESSION_MODE_CTRL2, reg_ctrl2);
@@ -915,6 +909,7 @@ static void dsi_timing_setup(struct msm_dsi_host *msm_host, bool is_bonded_dsi)
 	u32 va_end = va_start + mode->vdisplay;
 	u32 hdisplay = mode->hdisplay;
 	u32 wc;
+	int ret;
 
 	DBG("");
 
@@ -950,7 +945,9 @@ static void dsi_timing_setup(struct msm_dsi_host *msm_host, bool is_bonded_dsi)
 		/* we do the calculations for dsc parameters here so that
 		 * panel can use these parameters
 		 */
-		dsi_populate_dsc_params(dsc);
+		ret = dsi_populate_dsc_params(msm_host, dsc);
+		if (ret)
+			return;
 
 		/* Divide the display by 3 but keep back/font porch and
 		 * pulse width same
@@ -1754,18 +1751,20 @@ static char bpg_offset[DSC_NUM_BUF_RANGES] = {
 	2, 0, 0, -2, -4, -6, -8, -8, -8, -10, -10, -12, -12, -12, -12
 };
 
-static int dsi_populate_dsc_params(struct drm_dsc_config *dsc)
-{
-	int mux_words_size;
-	int groups_per_line, groups_total;
-	int min_rate_buffer_size;
-	int hrd_delay;
-	int pre_num_extra_mux_bits, num_extra_mux_bits;
-	int slice_bits;
-	int target_bpp_x16;
-	int data;
-	int final_value, final_scale;
+static int dsi_populate_dsc_params(struct msm_dsi_host *msm_host, struct drm_dsc_config *dsc)
+{
 	int i;
+	u16 bpp = dsc->bits_per_pixel >> 4;
+
+	if (dsc->bits_per_pixel & 0xf) {
+		DRM_DEV_ERROR(&msm_host->pdev->dev, "DSI does not support fractional bits_per_pixel\n");
+		return -EINVAL;
+	}
+
+	if (dsc->bits_per_component != 8) {
+		DRM_DEV_ERROR(&msm_host->pdev->dev, "DSI does not support bits_per_component != 8 yet\n");
+		return -EOPNOTSUPP;
+	}
 
 	dsc->rc_model_size = 8192;
 	dsc->first_line_bpg_offset = 12;
@@ -1783,16 +1782,21 @@ static int dsi_populate_dsc_params(struct drm_dsc_config *dsc)
 	for (i = 0; i < DSC_NUM_BUF_RANGES; i++) {
 		dsc->rc_range_params[i].range_min_qp = min_qp[i];
 		dsc->rc_range_params[i].range_max_qp = max_qp[i];
-		dsc->rc_range_params[i].range_bpg_offset = bpg_offset[i];
+		/*
+		 * Range BPG Offset contains two's-complement signed values that fill
+		 * 8 bits, yet the registers and DCS PPS field are only 6 bits wide.
+		 */
+		dsc->rc_range_params[i].range_bpg_offset = bpg_offset[i] & DSC_RANGE_BPG_OFFSET_MASK;
 	}
 
-	dsc->initial_offset = 6144; /* Not bpp 12 */
-	if (dsc->bits_per_pixel != 8)
+	dsc->initial_offset = 6144;		/* Not bpp 12 */
+	if (bpp != 8)
 		dsc->initial_offset = 2048;	/* bpp = 12 */
 
-	mux_words_size = 48;		/* bpc == 8/10 */
-	if (dsc->bits_per_component == 12)
-		mux_words_size = 64;
+	if (dsc->bits_per_component <= 10)
+		dsc->mux_word_size = DSC_MUX_WORD_SIZE_8_10_BPC;
+	else
+		dsc->mux_word_size = DSC_MUX_WORD_SIZE_12_BPC;
 
 	dsc->initial_xmit_delay = 512;
 	dsc->initial_scale_value = 32;
@@ -1804,63 +1808,8 @@ static int dsi_populate_dsc_params(struct drm_dsc_config *dsc)
 	dsc->flatness_max_qp = 12;
 	dsc->rc_quant_incr_limit0 = 11;
 	dsc->rc_quant_incr_limit1 = 11;
-	dsc->mux_word_size = DSC_MUX_WORD_SIZE_8_10_BPC;
-
-	/* FIXME: need to call drm_dsc_compute_rc_parameters() so that rest of
-	 * params are calculated
-	 */
-	groups_per_line = DIV_ROUND_UP(dsc->slice_width, 3);
-	dsc->slice_chunk_size = dsc->slice_width * dsc->bits_per_pixel / 8;
-	if ((dsc->slice_width * dsc->bits_per_pixel) % 8)
-		dsc->slice_chunk_size++;
 
-	/* rbs-min */
-	min_rate_buffer_size =  dsc->rc_model_size - dsc->initial_offset +
-				dsc->initial_xmit_delay * dsc->bits_per_pixel +
-				groups_per_line * dsc->first_line_bpg_offset;
-
-	hrd_delay = DIV_ROUND_UP(min_rate_buffer_size, dsc->bits_per_pixel);
-
-	dsc->initial_dec_delay = hrd_delay - dsc->initial_xmit_delay;
-
-	dsc->initial_scale_value = 8 * dsc->rc_model_size /
-				       (dsc->rc_model_size - dsc->initial_offset);
-
-	slice_bits = 8 * dsc->slice_chunk_size * dsc->slice_height;
-
-	groups_total = groups_per_line * dsc->slice_height;
-
-	data = dsc->first_line_bpg_offset * 2048;
-
-	dsc->nfl_bpg_offset = DIV_ROUND_UP(data, (dsc->slice_height - 1));
-
-	pre_num_extra_mux_bits = 3 * (mux_words_size + (4 * dsc->bits_per_component + 4) - 2);
-
-	num_extra_mux_bits = pre_num_extra_mux_bits - (mux_words_size -
-			     ((slice_bits - pre_num_extra_mux_bits) % mux_words_size));
-
-	data = 2048 * (dsc->rc_model_size - dsc->initial_offset + num_extra_mux_bits);
-	dsc->slice_bpg_offset = DIV_ROUND_UP(data, groups_total);
-
-	/* bpp * 16 + 0.5 */
-	data = dsc->bits_per_pixel * 16;
-	data *= 2;
-	data++;
-	data /= 2;
-	target_bpp_x16 = data;
-
-	data = (dsc->initial_xmit_delay * target_bpp_x16) / 16;
-	final_value =  dsc->rc_model_size - data + num_extra_mux_bits;
-	dsc->final_offset = final_value;
-
-	final_scale = 8 * dsc->rc_model_size / (dsc->rc_model_size - final_value);
-
-	data = (final_scale - 9) * (dsc->nfl_bpg_offset + dsc->slice_bpg_offset);
-	dsc->scale_increment_interval = (2048 * dsc->final_offset) / data;
-
-	dsc->scale_decrement_interval = groups_per_line / (dsc->initial_scale_value - 8);
-
-	return 0;
+	return drm_dsc_compute_rc_parameters(dsc);
 }
 
 static int dsi_host_parse_dt(struct msm_dsi_host *msm_host)
diff --git a/drivers/gpu/drm/msm/dsi/phy/dsi_phy.c b/drivers/gpu/drm/msm/dsi/phy/dsi_phy.c
index 7fc0975cb869..ee6051367679 100644
--- a/drivers/gpu/drm/msm/dsi/phy/dsi_phy.c
+++ b/drivers/gpu/drm/msm/dsi/phy/dsi_phy.c
@@ -549,6 +549,8 @@ static const struct of_device_id dsi_phy_dt_match[] = {
 #ifdef CONFIG_DRM_MSM_DSI_14NM_PHY
 	{ .compatible = "qcom,dsi-phy-14nm",
 	  .data = &dsi_phy_14nm_cfgs },
+	{ .compatible = "qcom,dsi-phy-14nm-2290",
+	  .data = &dsi_phy_14nm_2290_cfgs },
 	{ .compatible = "qcom,dsi-phy-14nm-660",
 	  .data = &dsi_phy_14nm_660_cfgs },
 	{ .compatible = "qcom,dsi-phy-14nm-8953",
diff --git a/drivers/gpu/drm/msm/dsi/phy/dsi_phy.h b/drivers/gpu/drm/msm/dsi/phy/dsi_phy.h
index 60a99c6525b2..1096afedd616 100644
--- a/drivers/gpu/drm/msm/dsi/phy/dsi_phy.h
+++ b/drivers/gpu/drm/msm/dsi/phy/dsi_phy.h
@@ -50,6 +50,7 @@ extern const struct msm_dsi_phy_cfg dsi_phy_20nm_cfgs;
 extern const struct msm_dsi_phy_cfg dsi_phy_28nm_8960_cfgs;
 extern const struct msm_dsi_phy_cfg dsi_phy_14nm_cfgs;
 extern const struct msm_dsi_phy_cfg dsi_phy_14nm_660_cfgs;
+extern const struct msm_dsi_phy_cfg dsi_phy_14nm_2290_cfgs;
 extern const struct msm_dsi_phy_cfg dsi_phy_14nm_8953_cfgs;
 extern const struct msm_dsi_phy_cfg dsi_phy_10nm_cfgs;
 extern const struct msm_dsi_phy_cfg dsi_phy_10nm_8998_cfgs;
diff --git a/drivers/gpu/drm/msm/dsi/phy/dsi_phy_14nm.c b/drivers/gpu/drm/msm/dsi/phy/dsi_phy_14nm.c
index 0f8f4ca46429..9f488adea7f5 100644
--- a/drivers/gpu/drm/msm/dsi/phy/dsi_phy_14nm.c
+++ b/drivers/gpu/drm/msm/dsi/phy/dsi_phy_14nm.c
@@ -1081,3 +1081,20 @@ const struct msm_dsi_phy_cfg dsi_phy_14nm_8953_cfgs = {
 	.io_start = { 0x1a94400, 0x1a96400 },
 	.num_dsi_phy = 2,
 };
+
+const struct msm_dsi_phy_cfg dsi_phy_14nm_2290_cfgs = {
+	.has_phy_lane = true,
+	.regulator_data = dsi_phy_14nm_17mA_regulators,
+	.num_regulators = ARRAY_SIZE(dsi_phy_14nm_17mA_regulators),
+	.ops = {
+		.enable = dsi_14nm_phy_enable,
+		.disable = dsi_14nm_phy_disable,
+		.pll_init = dsi_pll_14nm_init,
+		.save_pll_state = dsi_14nm_pll_save_state,
+		.restore_pll_state = dsi_14nm_pll_restore_state,
+	},
+	.min_pll_rate = VCO_MIN_RATE,
+	.max_pll_rate = VCO_MAX_RATE,
+	.io_start = { 0x5e94400 },
+	.num_dsi_phy = 1,
+};
diff --git a/drivers/gpu/drm/msm/hdmi/hdmi.c b/drivers/gpu/drm/msm/hdmi/hdmi.c
index 93fe61b86967..4d3fdc806bef 100644
--- a/drivers/gpu/drm/msm/hdmi/hdmi.c
+++ b/drivers/gpu/drm/msm/hdmi/hdmi.c
@@ -68,16 +68,17 @@ static void msm_hdmi_destroy(struct hdmi *hdmi)
 		destroy_workqueue(hdmi->workq);
 	msm_hdmi_hdcp_destroy(hdmi);
 
+	if (hdmi->i2c)
+		msm_hdmi_i2c_destroy(hdmi->i2c);
+}
+
+static void msm_hdmi_put_phy(struct hdmi *hdmi)
+{
 	if (hdmi->phy_dev) {
 		put_device(hdmi->phy_dev);
 		hdmi->phy = NULL;
 		hdmi->phy_dev = NULL;
 	}
-
-	if (hdmi->i2c)
-		msm_hdmi_i2c_destroy(hdmi->i2c);
-
-	platform_set_drvdata(hdmi->pdev, NULL);
 }
 
 static int msm_hdmi_get_phy(struct hdmi *hdmi)
@@ -93,22 +94,18 @@ static int msm_hdmi_get_phy(struct hdmi *hdmi)
 	}
 
 	phy_pdev = of_find_device_by_node(phy_node);
-	if (phy_pdev)
-		hdmi->phy = platform_get_drvdata(phy_pdev);
-
 	of_node_put(phy_node);
 
-	if (!phy_pdev) {
-		DRM_DEV_ERROR(&pdev->dev, "phy driver is not ready\n");
-		return -EPROBE_DEFER;
-	}
+	if (!phy_pdev)
+		return dev_err_probe(&pdev->dev, -EPROBE_DEFER, "phy driver is not ready\n");
+
+	hdmi->phy = platform_get_drvdata(phy_pdev);
 	if (!hdmi->phy) {
-		DRM_DEV_ERROR(&pdev->dev, "phy driver is not ready\n");
 		put_device(&phy_pdev->dev);
-		return -EPROBE_DEFER;
+		return dev_err_probe(&pdev->dev, -EPROBE_DEFER, "phy driver is not ready\n");
 	}
 
-	hdmi->phy_dev = get_device(&phy_pdev->dev);
+	hdmi->phy_dev = &phy_pdev->dev;
 
 	return 0;
 }
@@ -117,142 +114,10 @@ static int msm_hdmi_get_phy(struct hdmi *hdmi)
  * we are to EPROBE_DEFER we want to do it here, rather than later
  * at modeset_init() time
  */
-static struct hdmi *msm_hdmi_init(struct platform_device *pdev)
+static int msm_hdmi_init(struct hdmi *hdmi)
 {
-	struct hdmi_platform_config *config = pdev->dev.platform_data;
-	struct hdmi *hdmi = NULL;
-	struct resource *res;
-	int i, ret;
-
-	hdmi = devm_kzalloc(&pdev->dev, sizeof(*hdmi), GFP_KERNEL);
-	if (!hdmi) {
-		ret = -ENOMEM;
-		goto fail;
-	}
-
-	hdmi->pdev = pdev;
-	hdmi->config = config;
-	spin_lock_init(&hdmi->reg_lock);
-
-	ret = drm_of_find_panel_or_bridge(pdev->dev.of_node, 1, 0, NULL, &hdmi->next_bridge);
-	if (ret && ret != -ENODEV)
-		goto fail;
-
-	hdmi->mmio = msm_ioremap(pdev, config->mmio_name);
-	if (IS_ERR(hdmi->mmio)) {
-		ret = PTR_ERR(hdmi->mmio);
-		goto fail;
-	}
-
-	/* HDCP needs physical address of hdmi register */
-	res = platform_get_resource_byname(pdev, IORESOURCE_MEM,
-		config->mmio_name);
-	if (!res) {
-		ret = -EINVAL;
-		goto fail;
-	}
-	hdmi->mmio_phy_addr = res->start;
-
-	hdmi->qfprom_mmio = msm_ioremap(pdev, config->qfprom_mmio_name);
-	if (IS_ERR(hdmi->qfprom_mmio)) {
-		DRM_DEV_INFO(&pdev->dev, "can't find qfprom resource\n");
-		hdmi->qfprom_mmio = NULL;
-	}
-
-	hdmi->hpd_regs = devm_kcalloc(&pdev->dev,
-				      config->hpd_reg_cnt,
-				      sizeof(hdmi->hpd_regs[0]),
-				      GFP_KERNEL);
-	if (!hdmi->hpd_regs) {
-		ret = -ENOMEM;
-		goto fail;
-	}
-	for (i = 0; i < config->hpd_reg_cnt; i++)
-		hdmi->hpd_regs[i].supply = config->hpd_reg_names[i];
-
-	ret = devm_regulator_bulk_get(&pdev->dev, config->hpd_reg_cnt, hdmi->hpd_regs);
-	if (ret) {
-		DRM_DEV_ERROR(&pdev->dev, "failed to get hpd regulator: %d\n", ret);
-		goto fail;
-	}
-
-	hdmi->pwr_regs = devm_kcalloc(&pdev->dev,
-				      config->pwr_reg_cnt,
-				      sizeof(hdmi->pwr_regs[0]),
-				      GFP_KERNEL);
-	if (!hdmi->pwr_regs) {
-		ret = -ENOMEM;
-		goto fail;
-	}
-
-	for (i = 0; i < config->pwr_reg_cnt; i++)
-		hdmi->pwr_regs[i].supply = config->pwr_reg_names[i];
-
-	ret = devm_regulator_bulk_get(&pdev->dev, config->pwr_reg_cnt, hdmi->pwr_regs);
-	if (ret) {
-		DRM_DEV_ERROR(&pdev->dev, "failed to get pwr regulator: %d\n", ret);
-		goto fail;
-	}
-
-	hdmi->hpd_clks = devm_kcalloc(&pdev->dev,
-				      config->hpd_clk_cnt,
-				      sizeof(hdmi->hpd_clks[0]),
-				      GFP_KERNEL);
-	if (!hdmi->hpd_clks) {
-		ret = -ENOMEM;
-		goto fail;
-	}
-	for (i = 0; i < config->hpd_clk_cnt; i++) {
-		struct clk *clk;
-
-		clk = msm_clk_get(pdev, config->hpd_clk_names[i]);
-		if (IS_ERR(clk)) {
-			ret = PTR_ERR(clk);
-			DRM_DEV_ERROR(&pdev->dev, "failed to get hpd clk: %s (%d)\n",
-					config->hpd_clk_names[i], ret);
-			goto fail;
-		}
-
-		hdmi->hpd_clks[i] = clk;
-	}
-
-	hdmi->pwr_clks = devm_kcalloc(&pdev->dev,
-				      config->pwr_clk_cnt,
-				      sizeof(hdmi->pwr_clks[0]),
-				      GFP_KERNEL);
-	if (!hdmi->pwr_clks) {
-		ret = -ENOMEM;
-		goto fail;
-	}
-	for (i = 0; i < config->pwr_clk_cnt; i++) {
-		struct clk *clk;
-
-		clk = msm_clk_get(pdev, config->pwr_clk_names[i]);
-		if (IS_ERR(clk)) {
-			ret = PTR_ERR(clk);
-			DRM_DEV_ERROR(&pdev->dev, "failed to get pwr clk: %s (%d)\n",
-					config->pwr_clk_names[i], ret);
-			goto fail;
-		}
-
-		hdmi->pwr_clks[i] = clk;
-	}
-
-	hdmi->hpd_gpiod = devm_gpiod_get_optional(&pdev->dev, "hpd", GPIOD_IN);
-	/* This will catch e.g. -EPROBE_DEFER */
-	if (IS_ERR(hdmi->hpd_gpiod)) {
-		ret = PTR_ERR(hdmi->hpd_gpiod);
-		DRM_DEV_ERROR(&pdev->dev, "failed to get hpd gpio: (%d)\n", ret);
-		goto fail;
-	}
-
-	if (!hdmi->hpd_gpiod)
-		DBG("failed to get HPD gpio");
-
-	if (hdmi->hpd_gpiod)
-		gpiod_set_consumer_name(hdmi->hpd_gpiod, "HDMI_HPD");
-
-	pm_runtime_enable(&pdev->dev);
+	struct platform_device *pdev = hdmi->pdev;
+	int ret;
 
 	hdmi->workq = alloc_ordered_workqueue("msm_hdmi", 0);
 
@@ -264,25 +129,18 @@ static struct hdmi *msm_hdmi_init(struct platform_device *pdev)
 		goto fail;
 	}
 
-	ret = msm_hdmi_get_phy(hdmi);
-	if (ret) {
-		DRM_DEV_ERROR(&pdev->dev, "failed to get phy\n");
-		goto fail;
-	}
-
 	hdmi->hdcp_ctrl = msm_hdmi_hdcp_init(hdmi);
 	if (IS_ERR(hdmi->hdcp_ctrl)) {
 		dev_warn(&pdev->dev, "failed to init hdcp: disabled\n");
 		hdmi->hdcp_ctrl = NULL;
 	}
 
-	return hdmi;
+	return 0;
 
 fail:
-	if (hdmi)
-		msm_hdmi_destroy(hdmi);
+	msm_hdmi_destroy(hdmi);
 
-	return ERR_PTR(ret);
+	return ret;
 }
 
 /* Second part of initialization, the drm/kms level modeset_init,
@@ -297,9 +155,13 @@ int msm_hdmi_modeset_init(struct hdmi *hdmi,
 		struct drm_device *dev, struct drm_encoder *encoder)
 {
 	struct msm_drm_private *priv = dev->dev_private;
-	struct platform_device *pdev = hdmi->pdev;
 	int ret;
 
+	if (priv->num_bridges == ARRAY_SIZE(priv->bridges)) {
+		DRM_DEV_ERROR(dev->dev, "too many bridges\n");
+		return -ENOSPC;
+	}
+
 	hdmi->dev = dev;
 	hdmi->encoder = encoder;
 
@@ -332,14 +194,7 @@ int msm_hdmi_modeset_init(struct hdmi *hdmi,
 
 	drm_connector_attach_encoder(hdmi->connector, hdmi->encoder);
 
-	hdmi->irq = irq_of_parse_and_map(pdev->dev.of_node, 0);
-	if (!hdmi->irq) {
-		ret = -EINVAL;
-		DRM_DEV_ERROR(dev->dev, "failed to get irq\n");
-		goto fail;
-	}
-
-	ret = devm_request_irq(&pdev->dev, hdmi->irq,
+	ret = devm_request_irq(dev->dev, hdmi->irq,
 			msm_hdmi_irq, IRQF_TRIGGER_HIGH,
 			"hdmi_isr", hdmi);
 	if (ret < 0) {
@@ -358,8 +213,6 @@ int msm_hdmi_modeset_init(struct hdmi *hdmi,
 
 	priv->bridges[priv->num_bridges++]       = hdmi->bridge;
 
-	platform_set_drvdata(pdev, hdmi);
-
 	return 0;
 
 fail:
@@ -387,7 +240,7 @@ fail:
 static const char *hpd_reg_names_8960[] = {"core-vdda"};
 static const char *hpd_clk_names_8960[] = {"core", "master_iface", "slave_iface"};
 
-static struct hdmi_platform_config hdmi_tx_8960_config = {
+static const struct hdmi_platform_config hdmi_tx_8960_config = {
 		HDMI_CFG(hpd_reg, 8960),
 		HDMI_CFG(hpd_clk, 8960),
 };
@@ -397,7 +250,7 @@ static const char *pwr_clk_names_8x74[] = {"extp", "alt_iface"};
 static const char *hpd_clk_names_8x74[] = {"iface", "core", "mdp_core"};
 static unsigned long hpd_clk_freq_8x74[] = {0, 19200000, 0};
 
-static struct hdmi_platform_config hdmi_tx_8974_config = {
+static const struct hdmi_platform_config hdmi_tx_8974_config = {
 		HDMI_CFG(pwr_reg, 8x74),
 		HDMI_CFG(pwr_clk, 8x74),
 		HDMI_CFG(hpd_clk, 8x74),
@@ -512,26 +365,12 @@ static int msm_hdmi_register_audio_driver(struct hdmi *hdmi, struct device *dev)
 static int msm_hdmi_bind(struct device *dev, struct device *master, void *data)
 {
 	struct msm_drm_private *priv = dev_get_drvdata(master);
-	struct hdmi_platform_config *hdmi_cfg;
-	struct hdmi *hdmi;
-	struct device_node *of_node = dev->of_node;
+	struct hdmi *hdmi = dev_get_drvdata(dev);
 	int err;
 
-	hdmi_cfg = (struct hdmi_platform_config *)
-			of_device_get_match_data(dev);
-	if (!hdmi_cfg) {
-		DRM_DEV_ERROR(dev, "unknown hdmi_cfg: %pOFn\n", of_node);
-		return -ENXIO;
-	}
-
-	hdmi_cfg->mmio_name     = "core_physical";
-	hdmi_cfg->qfprom_mmio_name = "qfprom_physical";
-
-	dev->platform_data = hdmi_cfg;
-
-	hdmi = msm_hdmi_init(to_platform_device(dev));
-	if (IS_ERR(hdmi))
-		return PTR_ERR(hdmi);
+	err = msm_hdmi_init(hdmi);
+	if (err)
+		return err;
 	priv->hdmi = hdmi;
 
 	err = msm_hdmi_register_audio_driver(hdmi, dev);
@@ -564,12 +403,150 @@ static const struct component_ops msm_hdmi_ops = {
 
 static int msm_hdmi_dev_probe(struct platform_device *pdev)
 {
+	const struct hdmi_platform_config *config;
+	struct device *dev = &pdev->dev;
+	struct hdmi *hdmi;
+	struct resource *res;
+	int i, ret;
+
+	config = of_device_get_match_data(dev);
+	if (!config)
+		return -EINVAL;
+
+	hdmi = devm_kzalloc(&pdev->dev, sizeof(*hdmi), GFP_KERNEL);
+	if (!hdmi)
+		return -ENOMEM;
+
+	hdmi->pdev = pdev;
+	hdmi->config = config;
+	spin_lock_init(&hdmi->reg_lock);
+
+	ret = drm_of_find_panel_or_bridge(pdev->dev.of_node, 1, 0, NULL, &hdmi->next_bridge);
+	if (ret && ret != -ENODEV)
+		return ret;
+
+	hdmi->mmio = msm_ioremap(pdev, "core_physical");
+	if (IS_ERR(hdmi->mmio))
+		return PTR_ERR(hdmi->mmio);
+
+	/* HDCP needs physical address of hdmi register */
+	res = platform_get_resource_byname(pdev, IORESOURCE_MEM,
+		"core_physical");
+	if (!res)
+		return -EINVAL;
+	hdmi->mmio_phy_addr = res->start;
+
+	hdmi->qfprom_mmio = msm_ioremap(pdev, "qfprom_physical");
+	if (IS_ERR(hdmi->qfprom_mmio)) {
+		DRM_DEV_INFO(&pdev->dev, "can't find qfprom resource\n");
+		hdmi->qfprom_mmio = NULL;
+	}
+
+	hdmi->irq = platform_get_irq(pdev, 0);
+	if (hdmi->irq < 0)
+		return hdmi->irq;
+
+	hdmi->hpd_regs = devm_kcalloc(&pdev->dev,
+				      config->hpd_reg_cnt,
+				      sizeof(hdmi->hpd_regs[0]),
+				      GFP_KERNEL);
+	if (!hdmi->hpd_regs)
+		return -ENOMEM;
+
+	for (i = 0; i < config->hpd_reg_cnt; i++)
+		hdmi->hpd_regs[i].supply = config->hpd_reg_names[i];
+
+	ret = devm_regulator_bulk_get(&pdev->dev, config->hpd_reg_cnt, hdmi->hpd_regs);
+	if (ret)
+		return dev_err_probe(dev, ret, "failed to get hpd regulators\n");
+
+	hdmi->pwr_regs = devm_kcalloc(&pdev->dev,
+				      config->pwr_reg_cnt,
+				      sizeof(hdmi->pwr_regs[0]),
+				      GFP_KERNEL);
+	if (!hdmi->pwr_regs)
+		return -ENOMEM;
+
+	for (i = 0; i < config->pwr_reg_cnt; i++)
+		hdmi->pwr_regs[i].supply = config->pwr_reg_names[i];
+
+	ret = devm_regulator_bulk_get(&pdev->dev, config->pwr_reg_cnt, hdmi->pwr_regs);
+	if (ret)
+		return dev_err_probe(dev, ret, "failed to get pwr regulators\n");
+
+	hdmi->hpd_clks = devm_kcalloc(&pdev->dev,
+				      config->hpd_clk_cnt,
+				      sizeof(hdmi->hpd_clks[0]),
+				      GFP_KERNEL);
+	if (!hdmi->hpd_clks)
+		return -ENOMEM;
+
+	for (i = 0; i < config->hpd_clk_cnt; i++) {
+		struct clk *clk;
+
+		clk = msm_clk_get(pdev, config->hpd_clk_names[i]);
+		if (IS_ERR(clk))
+			return dev_err_probe(dev, PTR_ERR(clk),
+					     "failed to get hpd clk: %s\n",
+					     config->hpd_clk_names[i]);
+
+		hdmi->hpd_clks[i] = clk;
+	}
+
+	hdmi->pwr_clks = devm_kcalloc(&pdev->dev,
+				      config->pwr_clk_cnt,
+				      sizeof(hdmi->pwr_clks[0]),
+				      GFP_KERNEL);
+	if (!hdmi->pwr_clks)
+		return -ENOMEM;
+
+	for (i = 0; i < config->pwr_clk_cnt; i++) {
+		struct clk *clk;
+
+		clk = msm_clk_get(pdev, config->pwr_clk_names[i]);
+		if (IS_ERR(clk))
+			return dev_err_probe(dev, PTR_ERR(clk),
+					     "failed to get pwr clk: %s\n",
+					     config->pwr_clk_names[i]);
+
+		hdmi->pwr_clks[i] = clk;
+	}
+
+	hdmi->hpd_gpiod = devm_gpiod_get_optional(&pdev->dev, "hpd", GPIOD_IN);
+	/* This will catch e.g. -EPROBE_DEFER */
+	if (IS_ERR(hdmi->hpd_gpiod))
+		return dev_err_probe(dev, PTR_ERR(hdmi->hpd_gpiod),
+				     "failed to get hpd gpio\n");
+
+	if (!hdmi->hpd_gpiod)
+		DBG("failed to get HPD gpio");
+
+	if (hdmi->hpd_gpiod)
+		gpiod_set_consumer_name(hdmi->hpd_gpiod, "HDMI_HPD");
+
+	ret = msm_hdmi_get_phy(hdmi);
+	if (ret) {
+		DRM_DEV_ERROR(&pdev->dev, "failed to get phy\n");
+		return ret;
+	}
+
+	ret = devm_pm_runtime_enable(&pdev->dev);
+	if (ret)
+		return ret;
+
+	platform_set_drvdata(pdev, hdmi);
+
 	return component_add(&pdev->dev, &msm_hdmi_ops);
 }
 
 static int msm_hdmi_dev_remove(struct platform_device *pdev)
 {
+	struct hdmi *hdmi = dev_get_drvdata(&pdev->dev);
+
 	component_del(&pdev->dev, &msm_hdmi_ops);
+
+	msm_hdmi_put_phy(hdmi);
+
 	return 0;
 }
 
diff --git a/drivers/gpu/drm/msm/hdmi/hdmi.h b/drivers/gpu/drm/msm/hdmi/hdmi.h
index 04a74381aaf7..e8dbee50637f 100644
--- a/drivers/gpu/drm/msm/hdmi/hdmi.h
+++ b/drivers/gpu/drm/msm/hdmi/hdmi.h
@@ -86,9 +86,6 @@ struct hdmi {
 
 /* platform config data (ie. from DT, or pdata) */
 struct hdmi_platform_config {
-	const char *mmio_name;
-	const char *qfprom_mmio_name;
-
 	/* regulators that need to be on for hpd: */
 	const char **hpd_reg_names;
 	int hpd_reg_cnt;
diff --git a/drivers/gpu/drm/msm/msm_drv.c b/drivers/gpu/drm/msm/msm_drv.c
index 28034c21f6bc..8b0b0ac74a6f 100644
--- a/drivers/gpu/drm/msm/msm_drv.c
+++ b/drivers/gpu/drm/msm/msm_drv.c
@@ -247,6 +247,7 @@ static int msm_drm_uninit(struct device *dev)
 
 	for (i = 0; i < priv->num_bridges; i++)
 		drm_bridge_remove(priv->bridges[i]);
+	priv->num_bridges = 0;
 
 	pm_runtime_get_sync(dev);
 	msm_irq_uninstall(ddev);
@@ -276,7 +277,6 @@ static int msm_drm_uninit(struct device *dev)
 
 struct msm_gem_address_space *msm_kms_init_aspace(struct drm_device *dev)
 {
-	struct iommu_domain *domain;
 	struct msm_gem_address_space *aspace;
 	struct msm_mmu *mmu;
 	struct device *mdp_dev = dev->dev;
@@ -292,22 +292,21 @@ struct msm_gem_address_space *msm_kms_init_aspace(struct drm_device *dev)
 	else
 		iommu_dev = mdss_dev;
 
-	domain = iommu_domain_alloc(iommu_dev->bus);
-	if (!domain) {
+	mmu = msm_iommu_new(iommu_dev, 0);
+	if (IS_ERR(mmu))
+		return ERR_CAST(mmu);
+
+	if (!mmu) {
 		drm_info(dev, "no IOMMU, fallback to phys contig buffers for scanout\n");
 		return NULL;
 	}
 
-	mmu = msm_iommu_new(iommu_dev, domain);
-	if (IS_ERR(mmu)) {
-		iommu_domain_free(domain);
-		return ERR_CAST(mmu);
-	}
-
 	aspace = msm_gem_address_space_create(mmu, "mdp_kms",
 		0x1000, 0x100000000 - 0x1000);
-	if (IS_ERR(aspace))
+	if (IS_ERR(aspace)) {
+		dev_err(mdp_dev, "aspace create, error %pe\n", aspace);
 		mmu->funcs->destroy(mmu);
+	}
 
 	return aspace;
 }
@@ -419,7 +418,6 @@ static int msm_drm_init(struct device *dev, const struct drm_driver *drv)
 	priv->dev = ddev;
 
 	priv->wq = alloc_ordered_workqueue("msm", 0);
-	priv->hangcheck_period = DRM_MSM_HANGCHECK_DEFAULT_PERIOD;
 
 	INIT_LIST_HEAD(&priv->objects);
 	mutex_init(&priv->obj_lock);
@@ -818,6 +816,7 @@ static int msm_ioctl_gem_info(struct drm_device *dev, void *data,
 	case MSM_INFO_GET_OFFSET:
 	case MSM_INFO_GET_IOVA:
 	case MSM_INFO_SET_IOVA:
+	case MSM_INFO_GET_FLAGS:
 		/* value returned as immediate, not pointer, so len==0: */
 		if (args->len)
 			return -EINVAL;
@@ -845,6 +844,15 @@ static int msm_ioctl_gem_info(struct drm_device *dev, void *data,
 	case MSM_INFO_SET_IOVA:
 		ret = msm_ioctl_gem_info_set_iova(dev, file, obj, args->value);
 		break;
+	case MSM_INFO_GET_FLAGS:
+		if (obj->import_attach) {
+			ret = -EINVAL;
+			break;
+		}
+		/* Hide internal kernel-only flags: */
+		args->value = to_msm_bo(obj)->flags & MSM_BO_FLAGS;
+		ret = 0;
+		break;
 	case MSM_INFO_SET_NAME:
 		/* length check should leave room for terminating null: */
 		if (args->len >= sizeof(msm_obj->name)) {
diff --git a/drivers/gpu/drm/msm/msm_drv.h b/drivers/gpu/drm/msm/msm_drv.h
index b2ea262296a4..d4e0ef608950 100644
--- a/drivers/gpu/drm/msm/msm_drv.h
+++ b/drivers/gpu/drm/msm/msm_drv.h
@@ -224,7 +224,13 @@ struct msm_drm_private {
 
 	struct drm_atomic_state *pm_state;
 
-	/* For hang detection, in ms */
+	/**
+	 * hangcheck_period: For hang detection, in ms
+	 *
+	 * Note that in practice, a submit/job will get at least two hangcheck
+	 * periods, due to checking for progress being implemented as simply
+	 * "have the CP position registers changed since last time?"
+	 */
 	unsigned int hangcheck_period;
 
 	/**
diff --git a/drivers/gpu/drm/msm/msm_fbdev.c b/drivers/gpu/drm/msm/msm_fbdev.c
index b373e3000320..31e1e30cb52a 100644
--- a/drivers/gpu/drm/msm/msm_fbdev.c
+++ b/drivers/gpu/drm/msm/msm_fbdev.c
@@ -93,7 +93,7 @@ static int msm_fbdev_create(struct drm_fb_helper *helper,
 		goto fail;
 	}
 
-	fbi = drm_fb_helper_alloc_fbi(helper);
+	fbi = drm_fb_helper_alloc_info(helper);
 	if (IS_ERR(fbi)) {
 		DRM_DEV_ERROR(dev->dev, "failed to allocate fb info\n");
 		ret = PTR_ERR(fbi);
@@ -182,7 +182,7 @@ void msm_fbdev_free(struct drm_device *dev)
 
 	DBG();
 
-	drm_fb_helper_unregister_fbi(helper);
+	drm_fb_helper_unregister_info(helper);
 
 	drm_fb_helper_fini(helper);
 
diff --git a/drivers/gpu/drm/msm/msm_gem_shrinker.c b/drivers/gpu/drm/msm/msm_gem_shrinker.c
index 1de14e67f96b..051bdbc093cf 100644
--- a/drivers/gpu/drm/msm/msm_gem_shrinker.c
+++ b/drivers/gpu/drm/msm/msm_gem_shrinker.c
@@ -15,7 +15,7 @@
 /* Default disabled for now until it has some more testing on the different
  * iommu combinations that can be paired with the driver:
  */
-static bool enable_eviction = false;
+static bool enable_eviction = true;
 MODULE_PARM_DESC(enable_eviction, "Enable swappable GEM buffers");
 module_param(enable_eviction, bool, 0600);
 
diff --git a/drivers/gpu/drm/msm/msm_gem_submit.c b/drivers/gpu/drm/msm/msm_gem_submit.c
index 5599d93ec0d2..73a2ca122c57 100644
--- a/drivers/gpu/drm/msm/msm_gem_submit.c
+++ b/drivers/gpu/drm/msm/msm_gem_submit.c
@@ -334,8 +334,7 @@ static int submit_fence_sync(struct msm_gem_submit *submit, bool no_implicit)
 		if (ret)
 			return ret;
 
-		/* exclusive fences must be ordered */
-		if (no_implicit && !write)
+		if (no_implicit)
 			continue;
 
 		ret = drm_sched_job_add_implicit_dependencies(&submit->base,
@@ -501,11 +500,11 @@ out:
  */
 static void submit_cleanup(struct msm_gem_submit *submit, bool error)
 {
-	unsigned cleanup_flags = BO_LOCKED | BO_OBJ_PINNED;
+	unsigned cleanup_flags = BO_LOCKED;
 	unsigned i;
 
 	if (error)
-		cleanup_flags |= BO_VMA_PINNED;
+		cleanup_flags |= BO_VMA_PINNED | BO_OBJ_PINNED;
 
 	for (i = 0; i < submit->nr_bos; i++) {
 		struct msm_gem_object *msm_obj = submit->bos[i].obj;
@@ -706,7 +705,7 @@ int msm_ioctl_gem_submit(struct drm_device *dev, void *data,
 	struct msm_drm_private *priv = dev->dev_private;
 	struct drm_msm_gem_submit *args = data;
 	struct msm_file_private *ctx = file->driver_priv;
-	struct msm_gem_submit *submit = NULL;
+	struct msm_gem_submit *submit;
 	struct msm_gpu *gpu = priv->gpu;
 	struct msm_gpu_submitqueue *queue;
 	struct msm_ringbuffer *ring;
@@ -946,8 +945,7 @@ out_unlock:
 		put_unused_fd(out_fence_fd);
 	mutex_unlock(&queue->lock);
 out_post_unlock:
-	if (submit)
-		msm_gem_submit_put(submit);
+	msm_gem_submit_put(submit);
 	if (!IS_ERR_OR_NULL(post_deps)) {
 		for (i = 0; i < args->nr_out_syncobjs; ++i) {
 			kfree(post_deps[i].chain);
diff --git a/drivers/gpu/drm/msm/msm_gpu.c b/drivers/gpu/drm/msm/msm_gpu.c
index 0098ee8438aa..30ed45af76ad 100644
--- a/drivers/gpu/drm/msm/msm_gpu.c
+++ b/drivers/gpu/drm/msm/msm_gpu.c
@@ -492,6 +492,21 @@ static void hangcheck_timer_reset(struct msm_gpu *gpu)
 			round_jiffies_up(jiffies + msecs_to_jiffies(priv->hangcheck_period)));
 }
 
+static bool made_progress(struct msm_gpu *gpu, struct msm_ringbuffer *ring)
+{
+	if (ring->hangcheck_progress_retries >= DRM_MSM_HANGCHECK_PROGRESS_RETRIES)
+		return false;
+
+	if (!gpu->funcs->progress)
+		return false;
+
+	if (!gpu->funcs->progress(gpu, ring))
+		return false;
+
+	ring->hangcheck_progress_retries++;
+	return true;
+}
+
 static void hangcheck_handler(struct timer_list *t)
 {
 	struct msm_gpu *gpu = from_timer(gpu, t, hangcheck_timer);
@@ -502,9 +517,12 @@ static void hangcheck_handler(struct timer_list *t)
 	if (fence != ring->hangcheck_fence) {
 		/* some progress has been made.. ya! */
 		ring->hangcheck_fence = fence;
-	} else if (fence_before(fence, ring->fctx->last_fence)) {
+		ring->hangcheck_progress_retries = 0;
+	} else if (fence_before(fence, ring->fctx->last_fence) &&
+			!made_progress(gpu, ring)) {
 		/* no progress and not done.. hung! */
 		ring->hangcheck_fence = fence;
+		ring->hangcheck_progress_retries = 0;
 		DRM_DEV_ERROR(dev->dev, "%s: hangcheck detected gpu lockup rb %d!\n",
 				gpu->name, ring->id);
 		DRM_DEV_ERROR(dev->dev, "%s:     completed fence: %u\n",
@@ -830,6 +848,7 @@ int msm_gpu_init(struct drm_device *drm, struct platform_device *pdev,
 		struct msm_gpu *gpu, const struct msm_gpu_funcs *funcs,
 		const char *name, struct msm_gpu_config *config)
 {
+	struct msm_drm_private *priv = drm->dev_private;
 	int i, ret, nr_rings = config->nr_rings;
 	void *memptrs;
 	uint64_t memptrs_iova;
@@ -857,6 +876,16 @@ int msm_gpu_init(struct drm_device *drm, struct platform_device *pdev,
 	kthread_init_work(&gpu->recover_work, recover_worker);
 	kthread_init_work(&gpu->fault_work, fault_worker);
 
+	priv->hangcheck_period = DRM_MSM_HANGCHECK_DEFAULT_PERIOD;
+
+	/*
+	 * If progress detection is supported, halve the hangcheck timer
+	 * duration, as it takes two iterations of the hangcheck handler
+	 * to detect a hang.
+	 */
+	if (funcs->progress)
+		priv->hangcheck_period /= 2;
+
 	timer_setup(&gpu->hangcheck_timer, hangcheck_handler, 0);
 
 	spin_lock_init(&gpu->perf_lock);
@@ -997,4 +1026,6 @@ void msm_gpu_cleanup(struct msm_gpu *gpu)
 	}
 
 	msm_devfreq_cleanup(gpu);
+
+	platform_set_drvdata(gpu->pdev, NULL);
 }
diff --git a/drivers/gpu/drm/msm/msm_gpu.h b/drivers/gpu/drm/msm/msm_gpu.h
index ff911e7305ce..651786bc55e5 100644
--- a/drivers/gpu/drm/msm/msm_gpu.h
+++ b/drivers/gpu/drm/msm/msm_gpu.h
@@ -78,6 +78,15 @@ struct msm_gpu_funcs {
 	struct msm_gem_address_space *(*create_private_address_space)
 		(struct msm_gpu *gpu);
 	uint32_t (*get_rptr)(struct msm_gpu *gpu, struct msm_ringbuffer *ring);
+
+	/**
+	 * progress: Has the GPU made progress?
+	 *
+	 * Return true if GPU position in cmdstream has advanced (or changed)
+	 * since the last call.  To avoid false negatives, this should account
+	 * for cmdstream that is buffered in this FIFO upstream of the CP fw.
+	 */
+	bool (*progress)(struct msm_gpu *gpu, struct msm_ringbuffer *ring);
 };
 
 /* Additional state for iommu faults: */
@@ -237,6 +246,7 @@ struct msm_gpu {
 #define DRM_MSM_INACTIVE_PERIOD   66 /* in ms (roughly four frames) */
 
 #define DRM_MSM_HANGCHECK_DEFAULT_PERIOD 500 /* in ms */
+#define DRM_MSM_HANGCHECK_PROGRESS_RETRIES 3
 	struct timer_list hangcheck_timer;
 
 	/* Fault info for most recent iova fault: */
@@ -280,6 +290,10 @@ struct msm_gpu {
 static inline struct msm_gpu *dev_to_gpu(struct device *dev)
 {
 	struct adreno_smmu_priv *adreno_smmu = dev_get_drvdata(dev);
+
+	if (!adreno_smmu)
+		return NULL;
+
 	return container_of(adreno_smmu, struct msm_gpu, adreno_smmu);
 }
 
@@ -536,7 +550,7 @@ static inline void gpu_rmw(struct msm_gpu *gpu, u32 reg, u32 mask, u32 or)
 	msm_rmw(gpu->mmio + (reg << 2), mask, or);
 }
 
-static inline u64 gpu_read64(struct msm_gpu *gpu, u32 lo, u32 hi)
+static inline u64 gpu_read64(struct msm_gpu *gpu, u32 reg)
 {
 	u64 val;
 
@@ -554,17 +568,17 @@ static inline u64 gpu_read64(struct msm_gpu *gpu, u32 lo, u32 hi)
 	 * when the lo is read, so make sure to read the lo first to trigger
 	 * that
 	 */
-	val = (u64) msm_readl(gpu->mmio + (lo << 2));
-	val |= ((u64) msm_readl(gpu->mmio + (hi << 2)) << 32);
+	val = (u64) msm_readl(gpu->mmio + (reg << 2));
+	val |= ((u64) msm_readl(gpu->mmio + ((reg + 1) << 2)) << 32);
 
 	return val;
 }
 
-static inline void gpu_write64(struct msm_gpu *gpu, u32 lo, u32 hi, u64 val)
+static inline void gpu_write64(struct msm_gpu *gpu, u32 reg, u64 val)
 {
 	/* Why not a writeq here? Read the screed above */
-	msm_writel(lower_32_bits(val), gpu->mmio + (lo << 2));
-	msm_writel(upper_32_bits(val), gpu->mmio + (hi << 2));
+	msm_writel(lower_32_bits(val), gpu->mmio + (reg << 2));
+	msm_writel(upper_32_bits(val), gpu->mmio + ((reg + 1) << 2));
 }
 
 int msm_gpu_pm_suspend(struct msm_gpu *gpu);
diff --git a/drivers/gpu/drm/msm/msm_iommu.c b/drivers/gpu/drm/msm/msm_iommu.c
index 5577cea7c009..c2507582ecf3 100644
--- a/drivers/gpu/drm/msm/msm_iommu.c
+++ b/drivers/gpu/drm/msm/msm_iommu.c
@@ -186,6 +186,13 @@ int msm_iommu_pagetable_params(struct msm_mmu *mmu,
 	return 0;
 }
 
+struct iommu_domain_geometry *msm_iommu_get_geometry(struct msm_mmu *mmu)
+{
+	struct msm_iommu *iommu = to_msm_iommu(mmu);
+
+	return &iommu->domain->geometry;
+}
+
 static const struct msm_mmu_funcs pagetable_funcs = {
 		.map = msm_iommu_pagetable_map,
 		.unmap = msm_iommu_pagetable_unmap,
@@ -367,17 +374,23 @@ static const struct msm_mmu_funcs funcs = {
 		.resume_translation = msm_iommu_resume_translation,
 };
 
-struct msm_mmu *msm_iommu_new(struct device *dev, struct iommu_domain *domain)
+struct msm_mmu *msm_iommu_new(struct device *dev, unsigned long quirks)
 {
+	struct iommu_domain *domain;
 	struct msm_iommu *iommu;
 	int ret;
 
+	domain = iommu_domain_alloc(dev->bus);
 	if (!domain)
-		return ERR_PTR(-ENODEV);
+		return NULL;
+
+	iommu_set_pgtable_quirks(domain, quirks);
 
 	iommu = kzalloc(sizeof(*iommu), GFP_KERNEL);
-	if (!iommu)
+	if (!iommu) {
+		iommu_domain_free(domain);
 		return ERR_PTR(-ENOMEM);
+	}
 
 	iommu->domain = domain;
 	msm_mmu_init(&iommu->base, dev, &funcs, MSM_MMU_IOMMU);
@@ -386,6 +399,7 @@ struct msm_mmu *msm_iommu_new(struct device *dev, struct iommu_domain *domain)
 
 	ret = iommu_attach_device(iommu->domain, dev);
 	if (ret) {
+		iommu_domain_free(domain);
 		kfree(iommu);
 		return ERR_PTR(ret);
 	}
diff --git a/drivers/gpu/drm/msm/msm_mdss.c b/drivers/gpu/drm/msm/msm_mdss.c
index e13c5c12b775..86b28add1fff 100644
--- a/drivers/gpu/drm/msm/msm_mdss.c
+++ b/drivers/gpu/drm/msm/msm_mdss.c
@@ -22,6 +22,7 @@
 #define HW_REV				0x0
 #define HW_INTR_STATUS			0x0010
 
+#define UBWC_DEC_HW_VERSION		0x58
 #define UBWC_STATIC			0x144
 #define UBWC_CTRL_2			0x150
 #define UBWC_PREDICTION_MODE		0x154
@@ -174,9 +175,63 @@ static int _msm_mdss_irq_domain_add(struct msm_mdss *msm_mdss)
 	return 0;
 }
 
+#define UBWC_1_0 0x10000000
+#define UBWC_2_0 0x20000000
+#define UBWC_3_0 0x30000000
+#define UBWC_4_0 0x40000000
+
+static void msm_mdss_setup_ubwc_dec_20(struct msm_mdss *msm_mdss,
+				       u32 ubwc_static)
+{
+	writel_relaxed(ubwc_static, msm_mdss->mmio + UBWC_STATIC);
+}
+
+static void msm_mdss_setup_ubwc_dec_30(struct msm_mdss *msm_mdss,
+				       unsigned int ubwc_version,
+				       u32 ubwc_swizzle,
+				       u32 highest_bank_bit,
+				       u32 macrotile_mode)
+{
+	u32 value = (ubwc_swizzle & 0x1) |
+		    (highest_bank_bit & 0x3) << 4 |
+		    (macrotile_mode & 0x1) << 12;
+
+	if (ubwc_version == UBWC_3_0)
+		value |= BIT(10);
+
+	if (ubwc_version == UBWC_1_0)
+		value |= BIT(8);
+
+	writel_relaxed(value, msm_mdss->mmio + UBWC_STATIC);
+}
+
+static void msm_mdss_setup_ubwc_dec_40(struct msm_mdss *msm_mdss,
+				       unsigned int ubwc_version,
+				       u32 ubwc_swizzle,
+				       u32 ubwc_static,
+				       u32 highest_bank_bit,
+				       u32 macrotile_mode)
+{
+	u32 value = (ubwc_swizzle & 0x7) |
+		    (ubwc_static & 0x1) << 3 |
+		    (highest_bank_bit & 0x7) << 4 |
+		    (macrotile_mode & 0x1) << 12;
+
+	writel_relaxed(value, msm_mdss->mmio + UBWC_STATIC);
+
+	if (ubwc_version == UBWC_3_0) {
+		writel_relaxed(1, msm_mdss->mmio + UBWC_CTRL_2);
+		writel_relaxed(0, msm_mdss->mmio + UBWC_PREDICTION_MODE);
+	} else {
+		writel_relaxed(2, msm_mdss->mmio + UBWC_CTRL_2);
+		writel_relaxed(1, msm_mdss->mmio + UBWC_PREDICTION_MODE);
+	}
+}
+
 static int msm_mdss_enable(struct msm_mdss *msm_mdss)
 {
 	int ret;
+	u32 hw_rev;
 
 	/*
 	 * Several components have AXI clocks that can only be turned on if
@@ -198,26 +253,39 @@ static int msm_mdss_enable(struct msm_mdss *msm_mdss)
 	if (msm_mdss->is_mdp5)
 		return 0;
 
+	hw_rev = readl_relaxed(msm_mdss->mmio + HW_REV);
+	dev_dbg(msm_mdss->dev, "HW_REV: 0x%x\n", hw_rev);
+	dev_dbg(msm_mdss->dev, "UBWC_DEC_HW_VERSION: 0x%x\n",
+		readl_relaxed(msm_mdss->mmio + UBWC_DEC_HW_VERSION));
+
 	/*
 	 * ubwc config is part of the "mdss" region which is not accessible
 	 * from the rest of the driver. hardcode known configurations here
+	 *
+	 * Decoder version can be read from the UBWC_DEC_HW_VERSION reg,
+	 * UBWC_n and the rest of params comes from hw_catalog.
+	 * Unforunately this driver can not access hw catalog, so we have to
+	 * hardcode them here.
 	 */
-	switch (readl_relaxed(msm_mdss->mmio + HW_REV)) {
+	switch (hw_rev) {
 	case DPU_HW_VER_500:
 	case DPU_HW_VER_501:
-		writel_relaxed(0x420, msm_mdss->mmio + UBWC_STATIC);
+		msm_mdss_setup_ubwc_dec_30(msm_mdss, UBWC_3_0, 0, 2, 0);
 		break;
 	case DPU_HW_VER_600:
-		/* TODO: 0x102e for LP_DDR4 */
-		writel_relaxed(0x103e, msm_mdss->mmio + UBWC_STATIC);
-		writel_relaxed(2, msm_mdss->mmio + UBWC_CTRL_2);
-		writel_relaxed(1, msm_mdss->mmio + UBWC_PREDICTION_MODE);
+		/* TODO: highest_bank_bit = 2 for LP_DDR4 */
+		msm_mdss_setup_ubwc_dec_40(msm_mdss, UBWC_4_0, 6, 1, 3, 1);
 		break;
 	case DPU_HW_VER_620:
-		writel_relaxed(0x1e, msm_mdss->mmio + UBWC_STATIC);
+		/* UBWC_2_0 */
+		msm_mdss_setup_ubwc_dec_20(msm_mdss, 0x1e);
+		break;
+	case DPU_HW_VER_630:
+		/* UBWC_2_0 */
+		msm_mdss_setup_ubwc_dec_20(msm_mdss, 0x11f);
 		break;
 	case DPU_HW_VER_720:
-		writel_relaxed(0x101e, msm_mdss->mmio + UBWC_STATIC);
+		msm_mdss_setup_ubwc_dec_40(msm_mdss, UBWC_3_0, 6, 1, 1, 1);
 		break;
 	}
 
@@ -445,6 +513,7 @@ static const struct of_device_id mdss_dt_match[] = {
 	{ .compatible = "qcom,sc7180-mdss" },
 	{ .compatible = "qcom,sc7280-mdss" },
 	{ .compatible = "qcom,sc8180x-mdss" },
+	{ .compatible = "qcom,sm6115-mdss" },
 	{ .compatible = "qcom,sm8150-mdss" },
 	{ .compatible = "qcom,sm8250-mdss" },
 	{}
diff --git a/drivers/gpu/drm/msm/msm_mmu.h b/drivers/gpu/drm/msm/msm_mmu.h
index de158e1bf765..74cd81e701ff 100644
--- a/drivers/gpu/drm/msm/msm_mmu.h
+++ b/drivers/gpu/drm/msm/msm_mmu.h
@@ -40,7 +40,7 @@ static inline void msm_mmu_init(struct msm_mmu *mmu, struct device *dev,
 	mmu->type = type;
 }
 
-struct msm_mmu *msm_iommu_new(struct device *dev, struct iommu_domain *domain);
+struct msm_mmu *msm_iommu_new(struct device *dev, unsigned long quirks);
 struct msm_mmu *msm_gpummu_new(struct device *dev, struct msm_gpu *gpu);
 
 static inline void msm_mmu_set_fault_handler(struct msm_mmu *mmu, void *arg,
@@ -58,5 +58,6 @@ void msm_gpummu_params(struct msm_mmu *mmu, dma_addr_t *pt_base,
 
 int msm_iommu_pagetable_params(struct msm_mmu *mmu, phys_addr_t *ttbr,
 		int *asid);
+struct iommu_domain_geometry *msm_iommu_get_geometry(struct msm_mmu *mmu);
 
 #endif /* __MSM_MMU_H__ */
diff --git a/drivers/gpu/drm/msm/msm_ringbuffer.c b/drivers/gpu/drm/msm/msm_ringbuffer.c
index cad4c3525f0b..57a8e9564540 100644
--- a/drivers/gpu/drm/msm/msm_ringbuffer.c
+++ b/drivers/gpu/drm/msm/msm_ringbuffer.c
@@ -25,7 +25,8 @@ static struct dma_fence *msm_job_run(struct drm_sched_job *job)
 
 		msm_gem_lock(obj);
 		msm_gem_unpin_vma_fenced(submit->bos[i].vma, fctx);
-		submit->bos[i].flags &= ~BO_VMA_PINNED;
+		msm_gem_unpin_locked(obj);
+		submit->bos[i].flags &= ~(BO_VMA_PINNED | BO_OBJ_PINNED);
 		msm_gem_unlock(obj);
 	}
 
diff --git a/drivers/gpu/drm/msm/msm_ringbuffer.h b/drivers/gpu/drm/msm/msm_ringbuffer.h
index 2a5045abe46e..698b333abccd 100644
--- a/drivers/gpu/drm/msm/msm_ringbuffer.h
+++ b/drivers/gpu/drm/msm/msm_ringbuffer.h
@@ -35,6 +35,11 @@ struct msm_rbmemptrs {
 	volatile u64 ttbr0;
 };
 
+struct msm_cp_state {
+	uint64_t ib1_base, ib2_base;
+	uint32_t ib1_rem, ib2_rem;
+};
+
 struct msm_ringbuffer {
 	struct msm_gpu *gpu;
 	int id;
@@ -64,6 +69,29 @@ struct msm_ringbuffer {
 	uint64_t memptrs_iova;
 	struct msm_fence_context *fctx;
 
+	/**
+	 * hangcheck_progress_retries:
+	 *
+	 * The number of extra hangcheck duration cycles that we have given
+	 * due to it appearing that the GPU is making forward progress.
+	 *
+	 * For GPU generations which support progress detection (see.
+	 * msm_gpu_funcs::progress()), if the GPU appears to be making progress
+	 * (ie. the CP has advanced in the command stream, we'll allow up to
+	 * DRM_MSM_HANGCHECK_PROGRESS_RETRIES expirations of the hangcheck timer
+	 * before killing the job.  But to detect progress we need two sample
+	 * points, so the duration of the hangcheck timer is halved.  In other
+	 * words we'll let the submit run for up to:
+	 *
+	 * (DRM_MSM_HANGCHECK_DEFAULT_PERIOD / 2) * (DRM_MSM_HANGCHECK_PROGRESS_RETRIES + 1)
+	 */
+	int hangcheck_progress_retries;
+
+	/**
+	 * last_cp_state: The state of the CP at the last call to gpu->progress()
+	 */
+	struct msm_cp_state last_cp_state;
+
 	/*
 	 * preempt_lock protects preemption and serializes wptr updates against
 	 * preemption.  Can be aquired from irq context.
diff --git a/drivers/gpu/drm/mxsfb/lcdif_drv.c b/drivers/gpu/drm/mxsfb/lcdif_drv.c
index 075002ed6fb0..cc2ceb301b96 100644
--- a/drivers/gpu/drm/mxsfb/lcdif_drv.c
+++ b/drivers/gpu/drm/mxsfb/lcdif_drv.c
@@ -16,7 +16,7 @@
 #include <drm/drm_atomic_helper.h>
 #include <drm/drm_bridge.h>
 #include <drm/drm_drv.h>
-#include <drm/drm_fb_helper.h>
+#include <drm/drm_fbdev_generic.h>
 #include <drm/drm_gem_dma_helper.h>
 #include <drm/drm_gem_framebuffer_helper.h>
 #include <drm/drm_mode_config.h>
diff --git a/drivers/gpu/drm/mxsfb/lcdif_kms.c b/drivers/gpu/drm/mxsfb/lcdif_kms.c
index f0ad6e2a9352..262bc43b1079 100644
--- a/drivers/gpu/drm/mxsfb/lcdif_kms.c
+++ b/drivers/gpu/drm/mxsfb/lcdif_kms.c
@@ -5,6 +5,7 @@
  * This code is based on drivers/gpu/drm/mxsfb/mxsfb*
  */
 
+#include <linux/bitfield.h>
 #include <linux/clk.h>
 #include <linux/io.h>
 #include <linux/iopoll.h>
@@ -332,6 +333,18 @@ static void lcdif_enable_controller(struct lcdif_drm_private *lcdif)
 {
 	u32 reg;
 
+	/* Set FIFO Panic watermarks, low 1/3, high 2/3 . */
+	writel(FIELD_PREP(PANIC0_THRES_LOW_MASK, 1 * PANIC0_THRES_MAX / 3) |
+	       FIELD_PREP(PANIC0_THRES_HIGH_MASK, 2 * PANIC0_THRES_MAX / 3),
+	       lcdif->base + LCDC_V8_PANIC0_THRES);
+
+	/*
+	 * Enable FIFO Panic, this does not generate interrupt, but
+	 * boosts NoC priority based on FIFO Panic watermarks.
+	 */
+	writel(INT_ENABLE_D1_PLANE_PANIC_EN,
+	       lcdif->base + LCDC_V8_INT_ENABLE_D1);
+
 	reg = readl(lcdif->base + LCDC_V8_DISP_PARA);
 	reg |= DISP_PARA_DISP_ON;
 	writel(reg, lcdif->base + LCDC_V8_DISP_PARA);
@@ -359,6 +372,9 @@ static void lcdif_disable_controller(struct lcdif_drm_private *lcdif)
 	reg = readl(lcdif->base + LCDC_V8_DISP_PARA);
 	reg &= ~DISP_PARA_DISP_ON;
 	writel(reg, lcdif->base + LCDC_V8_DISP_PARA);
+
+	/* Disable FIFO Panic NoC priority booster. */
+	writel(0, lcdif->base + LCDC_V8_INT_ENABLE_D1);
 }
 
 static void lcdif_reset_block(struct lcdif_drm_private *lcdif)
diff --git a/drivers/gpu/drm/mxsfb/lcdif_regs.h b/drivers/gpu/drm/mxsfb/lcdif_regs.h
index fb74eb5ccbf1..c55dfb236c1d 100644
--- a/drivers/gpu/drm/mxsfb/lcdif_regs.h
+++ b/drivers/gpu/drm/mxsfb/lcdif_regs.h
@@ -255,6 +255,7 @@
 
 #define PANIC0_THRES_LOW_MASK		GENMASK(24, 16)
 #define PANIC0_THRES_HIGH_MASK		GENMASK(8, 0)
+#define PANIC0_THRES_MAX		511
 
 #define LCDIF_MIN_XRES			120
 #define LCDIF_MIN_YRES			120
diff --git a/drivers/gpu/drm/mxsfb/mxsfb_drv.c b/drivers/gpu/drm/mxsfb/mxsfb_drv.c
index b29b332ed381..810edea0a31e 100644
--- a/drivers/gpu/drm/mxsfb/mxsfb_drv.c
+++ b/drivers/gpu/drm/mxsfb/mxsfb_drv.c
@@ -20,7 +20,7 @@
 #include <drm/drm_bridge.h>
 #include <drm/drm_connector.h>
 #include <drm/drm_drv.h>
-#include <drm/drm_fb_helper.h>
+#include <drm/drm_fbdev_generic.h>
 #include <drm/drm_fourcc.h>
 #include <drm/drm_gem_dma_helper.h>
 #include <drm/drm_gem_framebuffer_helper.h>
diff --git a/drivers/gpu/drm/nouveau/Kbuild b/drivers/gpu/drm/nouveau/Kbuild
index 60586fb8275e..5e5617006da5 100644
--- a/drivers/gpu/drm/nouveau/Kbuild
+++ b/drivers/gpu/drm/nouveau/Kbuild
@@ -54,10 +54,6 @@ nouveau-y += nouveau_bios.o
 nouveau-y += nouveau_connector.o
 nouveau-y += nouveau_display.o
 nouveau-y += nouveau_dp.o
-nouveau-y += nouveau_fbcon.o
-nouveau-y += nv04_fbcon.o
-nouveau-y += nv50_fbcon.o
-nouveau-y += nvc0_fbcon.o
 include $(src)/dispnv04/Kbuild
 include $(src)/dispnv50/Kbuild
 
diff --git a/drivers/gpu/drm/nouveau/dispnv04/crtc.c b/drivers/gpu/drm/nouveau/dispnv04/crtc.c
index ee92d576d277..0e0f117bc70b 100644
--- a/drivers/gpu/drm/nouveau/dispnv04/crtc.c
+++ b/drivers/gpu/drm/nouveau/dispnv04/crtc.c
@@ -23,6 +23,7 @@
  * DEALINGS IN THE SOFTWARE.
  */
 #include <drm/drm_crtc_helper.h>
+#include <drm/drm_fb_helper.h>
 #include <drm/drm_fourcc.h>
 #include <drm/drm_plane_helper.h>
 #include <drm/drm_vblank.h>
@@ -37,7 +38,6 @@
 #include "nouveau_crtc.h"
 #include "hw.h"
 #include "nvreg.h"
-#include "nouveau_fbcon.h"
 #include "disp.h"
 #include "nouveau_dma.h"
 
@@ -761,7 +761,8 @@ static void nv_crtc_destroy(struct drm_crtc *crtc)
 	nouveau_bo_unmap(nv_crtc->cursor.nvbo);
 	nouveau_bo_unpin(nv_crtc->cursor.nvbo);
 	nouveau_bo_ref(NULL, &nv_crtc->cursor.nvbo);
-	nvif_notify_dtor(&nv_crtc->vblank);
+	nvif_event_dtor(&nv_crtc->vblank);
+	nvif_head_dtor(&nv_crtc->head);
 	kfree(nv_crtc);
 }
 
@@ -914,14 +915,6 @@ nv04_crtc_mode_set_base_atomic(struct drm_crtc *crtc,
 			       struct drm_framebuffer *fb,
 			       int x, int y, enum mode_set_atomic state)
 {
-	struct nouveau_drm *drm = nouveau_drm(crtc->dev);
-	struct drm_device *dev = drm->dev;
-
-	if (state == ENTER_ATOMIC_MODE_SET)
-		nouveau_fbcon_accel_save_disable(dev);
-	else
-		nouveau_fbcon_accel_restore(dev);
-
 	return nv04_crtc_do_mode_set_base(crtc, fb, x, y, true);
 }
 
@@ -1080,10 +1073,10 @@ nv04_finish_page_flip(struct nouveau_channel *chan,
 }
 
 int
-nv04_flip_complete(struct nvif_notify *notify)
+nv04_flip_complete(struct nvif_event *event, void *argv, u32 argc)
 {
-	struct nouveau_cli *cli = (void *)notify->object->client;
-	struct nouveau_drm *drm = cli->drm;
+	struct nv04_display *disp = container_of(event, typeof(*disp), flip);
+	struct nouveau_drm *drm = disp->drm;
 	struct nouveau_channel *chan = drm->channel;
 	struct nv04_page_flip_state state;
 
@@ -1094,7 +1087,7 @@ nv04_flip_complete(struct nvif_notify *notify)
 				 state.bpp / 8);
 	}
 
-	return NVIF_NOTIFY_KEEP;
+	return NVIF_EVENT_KEEP;
 }
 
 static int
@@ -1279,13 +1272,13 @@ static const struct drm_plane_funcs nv04_primary_plane_funcs = {
 	DRM_PLANE_NON_ATOMIC_FUNCS,
 };
 
-static int nv04_crtc_vblank_handler(struct nvif_notify *notify)
+static int
+nv04_crtc_vblank_handler(struct nvif_event *event, void *repv, u32 repc)
 {
-	struct nouveau_crtc *nv_crtc =
-		container_of(notify, struct nouveau_crtc, vblank);
+	struct nouveau_crtc *nv_crtc = container_of(event, struct nouveau_crtc, vblank);
 
 	drm_crtc_handle_vblank(&nv_crtc->base);
-	return NVIF_NOTIFY_KEEP;
+	return NVIF_EVENT_KEEP;
 }
 
 int
@@ -1341,14 +1334,10 @@ nv04_crtc_create(struct drm_device *dev, int crtc_num)
 
 	nv04_cursor_init(nv_crtc);
 
-	ret = nvif_notify_ctor(&disp->disp.object, "kmsVbl", nv04_crtc_vblank_handler,
-			       false, NV04_DISP_NTFY_VBLANK,
-			       &(struct nvif_notify_head_req_v0) {
-				    .head = nv_crtc->index,
-			       },
-			       sizeof(struct nvif_notify_head_req_v0),
-			       sizeof(struct nvif_notify_head_rep_v0),
-			       &nv_crtc->vblank);
+	ret = nvif_head_ctor(&disp->disp, nv_crtc->base.name, nv_crtc->index, &nv_crtc->head);
+	if (ret)
+		return ret;
 
-	return ret;
+	return nvif_head_vblank_event_ctor(&nv_crtc->head, "kmsVbl", nv04_crtc_vblank_handler,
+					   false, &nv_crtc->vblank);
 }
diff --git a/drivers/gpu/drm/nouveau/dispnv04/disp.c b/drivers/gpu/drm/nouveau/dispnv04/disp.c
index 99fee4d8cd31..e9ac3fb27ff7 100644
--- a/drivers/gpu/drm/nouveau/dispnv04/disp.c
+++ b/drivers/gpu/drm/nouveau/dispnv04/disp.c
@@ -61,7 +61,7 @@ nv04_display_fini(struct drm_device *dev, bool runtime, bool suspend)
 	struct drm_crtc *crtc;
 
 	/* Disable flip completion events. */
-	nvif_notify_put(&disp->flip);
+	nvif_event_block(&disp->flip);
 
 	/* Disable vblank interrupts. */
 	NVWriteCRTC(dev, 0, NV_PCRTC_INTR_EN_0, 0);
@@ -121,7 +121,7 @@ nv04_display_init(struct drm_device *dev, bool resume, bool runtime)
 		encoder->enc_save(&encoder->base.base);
 
 	/* Enable flip completion events. */
-	nvif_notify_get(&disp->flip);
+	nvif_event_allow(&disp->flip);
 
 	if (!resume)
 		return 0;
@@ -202,7 +202,7 @@ nv04_display_destroy(struct drm_device *dev)
 
 	nouveau_hw_save_vga_fonts(dev, 0);
 
-	nvif_notify_dtor(&disp->flip);
+	nvif_event_dtor(&disp->flip);
 
 	nouveau_display(dev)->priv = NULL;
 	vfree(disp);
@@ -227,6 +227,8 @@ nv04_display_create(struct drm_device *dev)
 	if (!disp)
 		return -ENOMEM;
 
+	disp->drm = drm;
+
 	nvif_object_map(&drm->client.device.object, NULL, 0);
 
 	nouveau_display(dev)->priv = disp;
@@ -239,9 +241,10 @@ nv04_display_create(struct drm_device *dev)
 
 	/* Request page flip completion event. */
 	if (drm->channel) {
-		nvif_notify_ctor(&drm->channel->nvsw, "kmsFlip", nv04_flip_complete,
-				 false, NV04_NVSW_NTFY_UEVENT,
-				 NULL, 0, 0, &disp->flip);
+		ret = nvif_event_ctor(&drm->channel->nvsw, "kmsFlip", 0, nv04_flip_complete,
+				      true, NULL, 0, &disp->flip);
+		if (ret)
+			return ret;
 	}
 
 	nouveau_hw_save_vga_fonts(dev, 1);
diff --git a/drivers/gpu/drm/nouveau/dispnv04/disp.h b/drivers/gpu/drm/nouveau/dispnv04/disp.h
index f0a24126641a..11a6663758ec 100644
--- a/drivers/gpu/drm/nouveau/dispnv04/disp.h
+++ b/drivers/gpu/drm/nouveau/dispnv04/disp.h
@@ -6,6 +6,8 @@
 
 #include "nouveau_display.h"
 
+#include <nvif/event.h>
+
 struct nouveau_encoder;
 
 enum nv04_fp_display_regs {
@@ -84,7 +86,8 @@ struct nv04_display {
 	uint32_t saved_vga_font[4][16384];
 	uint32_t dac_users[4];
 	struct nouveau_bo *image[2];
-	struct nvif_notify flip;
+	struct nvif_event flip;
+	struct nouveau_drm *drm;
 };
 
 static inline struct nv04_display *
@@ -179,5 +182,5 @@ nouveau_bios_run_init_table(struct drm_device *dev, u16 table,
 	);
 }
 
-int nv04_flip_complete(struct nvif_notify *);
+int nv04_flip_complete(struct nvif_event *, void *, u32);
 #endif
diff --git a/drivers/gpu/drm/nouveau/dispnv50/crc.c b/drivers/gpu/drm/nouveau/dispnv50/crc.c
index b834e8a9ae77..9c942fbd836d 100644
--- a/drivers/gpu/drm/nouveau/dispnv50/crc.c
+++ b/drivers/gpu/drm/nouveau/dispnv50/crc.c
@@ -463,7 +463,7 @@ void nv50_crc_atomic_set(struct nv50_head *head,
 	if (!outp)
 		return;
 
-	func->set_src(head, outp->or, nv50_crc_source_type(outp, asyh->crc.src),
+	func->set_src(head, outp->outp.or.id, nv50_crc_source_type(outp, asyh->crc.src),
 		      &crc->ctx[crc->ctx_idx]);
 }
 
diff --git a/drivers/gpu/drm/nouveau/dispnv50/disp.c b/drivers/gpu/drm/nouveau/dispnv50/disp.c
index aa94f8e284dd..edcb2529b402 100644
--- a/drivers/gpu/drm/nouveau/dispnv50/disp.c
+++ b/drivers/gpu/drm/nouveau/dispnv50/disp.c
@@ -46,8 +46,8 @@
 
 #include <nvif/class.h>
 #include <nvif/cl0002.h>
-#include <nvif/cl5070.h>
 #include <nvif/event.h>
+#include <nvif/if0012.h>
 #include <nvif/if0014.h>
 #include <nvif/timer.h>
 
@@ -64,7 +64,6 @@
 #include "nouveau_connector.h"
 #include "nouveau_encoder.h"
 #include "nouveau_fence.h"
-#include "nouveau_fbcon.h"
 
 #include <subdev/bios/dp.h>
 
@@ -317,52 +316,6 @@ nv50_outp_dump_caps(struct nouveau_drm *drm,
 		 outp->base.base.name, outp->caps.dp_interlace);
 }
 
-static void
-nv50_outp_release(struct nouveau_encoder *nv_encoder)
-{
-	struct nv50_disp *disp = nv50_disp(nv_encoder->base.base.dev);
-	struct {
-		struct nv50_disp_mthd_v1 base;
-	} args = {
-		.base.version = 1,
-		.base.method = NV50_DISP_MTHD_V1_RELEASE,
-		.base.hasht  = nv_encoder->dcb->hasht,
-		.base.hashm  = nv_encoder->dcb->hashm,
-	};
-
-	nvif_mthd(&disp->disp->object, 0, &args, sizeof(args));
-	nv_encoder->or = -1;
-	nv_encoder->link = 0;
-}
-
-static int
-nv50_outp_acquire(struct nouveau_encoder *nv_encoder, bool hda)
-{
-	struct nouveau_drm *drm = nouveau_drm(nv_encoder->base.base.dev);
-	struct nv50_disp *disp = nv50_disp(drm->dev);
-	struct {
-		struct nv50_disp_mthd_v1 base;
-		struct nv50_disp_acquire_v0 info;
-	} args = {
-		.base.version = 1,
-		.base.method = NV50_DISP_MTHD_V1_ACQUIRE,
-		.base.hasht  = nv_encoder->dcb->hasht,
-		.base.hashm  = nv_encoder->dcb->hashm,
-		.info.hda = hda,
-	};
-	int ret;
-
-	ret = nvif_mthd(&disp->disp->object, 0, &args, sizeof(args));
-	if (ret) {
-		NV_ERROR(drm, "error acquiring output path: %d\n", ret);
-		return ret;
-	}
-
-	nv_encoder->or = args.info.or;
-	nv_encoder->link = args.info.link;
-	return 0;
-}
-
 static int
 nv50_outp_atomic_check_view(struct drm_encoder *encoder,
 			    struct drm_crtc_state *crtc_state,
@@ -489,9 +442,9 @@ nv50_dac_atomic_disable(struct drm_encoder *encoder, struct drm_atomic_state *st
 	struct nv50_core *core = nv50_disp(encoder->dev)->core;
 	const u32 ctrl = NVDEF(NV507D, DAC_SET_CONTROL, OWNER, NONE);
 
-	core->func->dac->ctrl(core, nv_encoder->or, ctrl, NULL);
+	core->func->dac->ctrl(core, nv_encoder->outp.or.id, ctrl, NULL);
 	nv_encoder->crtc = NULL;
-	nv50_outp_release(nv_encoder);
+	nvif_outp_release(&nv_encoder->outp);
 }
 
 static void
@@ -516,9 +469,9 @@ nv50_dac_atomic_enable(struct drm_encoder *encoder, struct drm_atomic_state *sta
 
 	ctrl |= NVDEF(NV507D, DAC_SET_CONTROL, PROTOCOL, RGB_CRT);
 
-	nv50_outp_acquire(nv_encoder, false);
+	nvif_outp_acquire_rgb_crt(&nv_encoder->outp);
 
-	core->func->dac->ctrl(core, nv_encoder->or, ctrl, asyh);
+	core->func->dac->ctrl(core, nv_encoder->outp.or.id, ctrl, asyh);
 	asyh->or.depth = 0;
 
 	nv_encoder->crtc = &nv_crtc->base;
@@ -634,7 +587,7 @@ nv50_audio_component_get_eld(struct device *kdev, int port, int dev_id,
 		nv_connector = nouveau_connector(nv_encoder->audio.connector);
 		nv_crtc = nouveau_crtc(nv_encoder->crtc);
 
-		if (!nv_crtc || nv_encoder->or != port || nv_crtc->index != dev_id)
+		if (!nv_crtc || nv_encoder->outp.or.id != port || nv_crtc->index != dev_id)
 			continue;
 
 		*enabled = nv_encoder->audio.enabled;
@@ -718,33 +671,37 @@ nv50_audio_component_fini(struct nouveau_drm *drm)
 /******************************************************************************
  * Audio
  *****************************************************************************/
+static bool
+nv50_audio_supported(struct drm_encoder *encoder)
+{
+	struct nv50_disp *disp = nv50_disp(encoder->dev);
+
+	if (disp->disp->object.oclass <= GT200_DISP ||
+	    disp->disp->object.oclass == GT206_DISP)
+		return false;
+
+	return true;
+}
+
 static void
 nv50_audio_disable(struct drm_encoder *encoder, struct nouveau_crtc *nv_crtc)
 {
 	struct nouveau_drm *drm = nouveau_drm(encoder->dev);
 	struct nouveau_encoder *nv_encoder = nouveau_encoder(encoder);
-	struct nv50_disp *disp = nv50_disp(encoder->dev);
-	struct {
-		struct nv50_disp_mthd_v1 base;
-		struct nv50_disp_sor_hda_eld_v0 eld;
-	} args = {
-		.base.version = 1,
-		.base.method  = NV50_DISP_MTHD_V1_SOR_HDA_ELD,
-		.base.hasht   = nv_encoder->dcb->hasht,
-		.base.hashm   = (0xf0ff & nv_encoder->dcb->hashm) |
-				(0x0100 << nv_crtc->index),
-	};
+	struct nvif_outp *outp = &nv_encoder->outp;
+
+	if (!nv50_audio_supported(encoder))
+		return;
 
 	mutex_lock(&drm->audio.lock);
 	if (nv_encoder->audio.enabled) {
 		nv_encoder->audio.enabled = false;
 		nv_encoder->audio.connector = NULL;
-		nvif_mthd(&disp->disp->object, 0, &args, sizeof(args));
+		nvif_outp_hda_eld(&nv_encoder->outp, nv_crtc->index, NULL, 0);
 	}
 	mutex_unlock(&drm->audio.lock);
 
-	nv50_audio_component_eld_notify(drm->audio.component, nv_encoder->or,
-					nv_crtc->index);
+	nv50_audio_component_eld_notify(drm->audio.component, outp->or.id, nv_crtc->index);
 }
 
 static void
@@ -754,159 +711,101 @@ nv50_audio_enable(struct drm_encoder *encoder, struct nouveau_crtc *nv_crtc,
 {
 	struct nouveau_drm *drm = nouveau_drm(encoder->dev);
 	struct nouveau_encoder *nv_encoder = nouveau_encoder(encoder);
-	struct nv50_disp *disp = nv50_disp(encoder->dev);
-	struct __packed {
-		struct {
-			struct nv50_disp_mthd_v1 mthd;
-			struct nv50_disp_sor_hda_eld_v0 eld;
-		} base;
-		u8 data[sizeof(nv_connector->base.eld)];
-	} args = {
-		.base.mthd.version = 1,
-		.base.mthd.method  = NV50_DISP_MTHD_V1_SOR_HDA_ELD,
-		.base.mthd.hasht   = nv_encoder->dcb->hasht,
-		.base.mthd.hashm   = (0xf0ff & nv_encoder->dcb->hashm) |
-				     (0x0100 << nv_crtc->index),
-	};
-
-	if (!drm_detect_monitor_audio(nv_connector->edid))
+	struct nvif_outp *outp = &nv_encoder->outp;
+
+	if (!nv50_audio_supported(encoder) || !drm_detect_monitor_audio(nv_connector->edid))
 		return;
 
 	mutex_lock(&drm->audio.lock);
 
-	memcpy(args.data, nv_connector->base.eld, sizeof(args.data));
-
-	nvif_mthd(&disp->disp->object, 0, &args,
-		  sizeof(args.base) + drm_eld_size(args.data));
+	nvif_outp_hda_eld(&nv_encoder->outp, nv_crtc->index, nv_connector->base.eld,
+			  drm_eld_size(nv_connector->base.eld));
 	nv_encoder->audio.enabled = true;
 	nv_encoder->audio.connector = &nv_connector->base;
 
 	mutex_unlock(&drm->audio.lock);
 
-	nv50_audio_component_eld_notify(drm->audio.component, nv_encoder->or,
-					nv_crtc->index);
+	nv50_audio_component_eld_notify(drm->audio.component, outp->or.id, nv_crtc->index);
 }
 
 /******************************************************************************
  * HDMI
  *****************************************************************************/
 static void
-nv50_hdmi_disable(struct drm_encoder *encoder, struct nouveau_crtc *nv_crtc)
-{
-	struct nouveau_encoder *nv_encoder = nouveau_encoder(encoder);
-	struct nv50_disp *disp = nv50_disp(encoder->dev);
-	struct {
-		struct nv50_disp_mthd_v1 base;
-		struct nv50_disp_sor_hdmi_pwr_v0 pwr;
-	} args = {
-		.base.version = 1,
-		.base.method = NV50_DISP_MTHD_V1_SOR_HDMI_PWR,
-		.base.hasht  = nv_encoder->dcb->hasht,
-		.base.hashm  = (0xf0ff & nv_encoder->dcb->hashm) |
-			       (0x0100 << nv_crtc->index),
-	};
-
-	nvif_mthd(&disp->disp->object, 0, &args, sizeof(args));
-}
-
-static void
 nv50_hdmi_enable(struct drm_encoder *encoder, struct nouveau_crtc *nv_crtc,
 		 struct nouveau_connector *nv_connector, struct drm_atomic_state *state,
-		 struct drm_display_mode *mode)
+		 struct drm_display_mode *mode, bool hda)
 {
 	struct nouveau_drm *drm = nouveau_drm(encoder->dev);
 	struct nouveau_encoder *nv_encoder = nouveau_encoder(encoder);
-	struct nv50_disp *disp = nv50_disp(encoder->dev);
-	struct {
-		struct nv50_disp_mthd_v1 base;
-		struct nv50_disp_sor_hdmi_pwr_v0 pwr;
-		u8 infoframes[2 * 17]; /* two frames, up to 17 bytes each */
-	} args = {
-		.base.version = 1,
-		.base.method = NV50_DISP_MTHD_V1_SOR_HDMI_PWR,
-		.base.hasht  = nv_encoder->dcb->hasht,
-		.base.hashm  = (0xf0ff & nv_encoder->dcb->hashm) |
-			       (0x0100 << nv_crtc->index),
-		.pwr.state = 1,
-		.pwr.rekey = 56, /* binary driver, and tegra, constant */
-	};
-	struct drm_hdmi_info *hdmi;
+	struct drm_hdmi_info *hdmi = &nv_connector->base.display_info.hdmi;
+	union hdmi_infoframe infoframe = { 0 };
+	const u8 rekey = 56; /* binary driver, and tegra, constant */
+	u8 scdc = 0;
 	u32 max_ac_packet;
-	union hdmi_infoframe avi_frame;
-	union hdmi_infoframe vendor_frame;
-	bool high_tmds_clock_ratio = false, scrambling = false;
-	u8 config;
-	int ret;
-	int size;
-
-	if (!drm_detect_hdmi_monitor(nv_connector->edid))
-		return;
-
-	hdmi = &nv_connector->base.display_info.hdmi;
-
-	ret = drm_hdmi_avi_infoframe_from_display_mode(&avi_frame.avi,
-						       &nv_connector->base, mode);
-	if (!ret) {
-		drm_hdmi_avi_infoframe_quant_range(&avi_frame.avi,
-						   &nv_connector->base, mode,
-						   HDMI_QUANTIZATION_RANGE_FULL);
-		/* We have an AVI InfoFrame, populate it to the display */
-		args.pwr.avi_infoframe_length
-			= hdmi_infoframe_pack(&avi_frame, args.infoframes, 17);
-	}
-
-	ret = drm_hdmi_vendor_infoframe_from_display_mode(&vendor_frame.vendor.hdmi,
-							  &nv_connector->base, mode);
-	if (!ret) {
-		/* We have a Vendor InfoFrame, populate it to the display */
-		args.pwr.vendor_infoframe_length
-			= hdmi_infoframe_pack(&vendor_frame,
-					      args.infoframes
-					      + args.pwr.avi_infoframe_length,
-					      17);
-	}
+	struct {
+		struct nvif_outp_infoframe_v0 infoframe;
+		u8 data[17];
+	} args = { 0 };
+	int ret, size;
 
 	max_ac_packet  = mode->htotal - mode->hdisplay;
-	max_ac_packet -= args.pwr.rekey;
+	max_ac_packet -= rekey;
 	max_ac_packet -= 18; /* constant from tegra */
-	args.pwr.max_ac_packet = max_ac_packet / 32;
+	max_ac_packet /= 32;
 
 	if (hdmi->scdc.scrambling.supported) {
-		high_tmds_clock_ratio = mode->clock > 340000;
-		scrambling = high_tmds_clock_ratio ||
-			hdmi->scdc.scrambling.low_rates;
-	}
+		const bool high_tmds_clock_ratio = mode->clock > 340000;
 
-	args.pwr.scdc =
-		NV50_DISP_SOR_HDMI_PWR_V0_SCDC_SCRAMBLE * scrambling |
-		NV50_DISP_SOR_HDMI_PWR_V0_SCDC_DIV_BY_4 * high_tmds_clock_ratio;
+		ret = drm_scdc_readb(nv_encoder->i2c, SCDC_TMDS_CONFIG, &scdc);
+		if (ret < 0) {
+			NV_ERROR(drm, "Failure to read SCDC_TMDS_CONFIG: %d\n", ret);
+			return;
+		}
 
-	size = sizeof(args.base)
-		+ sizeof(args.pwr)
-		+ args.pwr.avi_infoframe_length
-		+ args.pwr.vendor_infoframe_length;
-	nvif_mthd(&disp->disp->object, 0, &args, size);
+		scdc &= ~(SCDC_TMDS_BIT_CLOCK_RATIO_BY_40 | SCDC_SCRAMBLING_ENABLE);
+		if (high_tmds_clock_ratio || hdmi->scdc.scrambling.low_rates)
+			scdc |= SCDC_SCRAMBLING_ENABLE;
+		if (high_tmds_clock_ratio)
+			scdc |= SCDC_TMDS_BIT_CLOCK_RATIO_BY_40;
 
-	nv50_audio_enable(encoder, nv_crtc, nv_connector, state, mode);
+		ret = drm_scdc_writeb(nv_encoder->i2c, SCDC_TMDS_CONFIG, scdc);
+		if (ret < 0)
+			NV_ERROR(drm, "Failure to write SCDC_TMDS_CONFIG = 0x%02x: %d\n",
+				 scdc, ret);
+	}
 
-	/* If SCDC is supported by the downstream monitor, update
-	 * divider / scrambling settings to what we programmed above.
-	 */
-	if (!hdmi->scdc.scrambling.supported)
+	ret = nvif_outp_acquire_tmds(&nv_encoder->outp, nv_crtc->index, true,
+				     max_ac_packet, rekey, scdc, hda);
+	if (ret)
 		return;
 
-	ret = drm_scdc_readb(nv_encoder->i2c, SCDC_TMDS_CONFIG, &config);
-	if (ret < 0) {
-		NV_ERROR(drm, "Failure to read SCDC_TMDS_CONFIG: %d\n", ret);
-		return;
+	/* AVI InfoFrame. */
+	args.infoframe.version = 0;
+	args.infoframe.head = nv_crtc->index;
+
+	if (!drm_hdmi_avi_infoframe_from_display_mode(&infoframe.avi, &nv_connector->base, mode)) {
+		drm_hdmi_avi_infoframe_quant_range(&infoframe.avi, &nv_connector->base, mode,
+						   HDMI_QUANTIZATION_RANGE_FULL);
+
+		size = hdmi_infoframe_pack(&infoframe, args.data, ARRAY_SIZE(args.data));
+	} else {
+		size = 0;
 	}
-	config &= ~(SCDC_TMDS_BIT_CLOCK_RATIO_BY_40 | SCDC_SCRAMBLING_ENABLE);
-	config |= SCDC_TMDS_BIT_CLOCK_RATIO_BY_40 * high_tmds_clock_ratio;
-	config |= SCDC_SCRAMBLING_ENABLE * scrambling;
-	ret = drm_scdc_writeb(nv_encoder->i2c, SCDC_TMDS_CONFIG, config);
-	if (ret < 0)
-		NV_ERROR(drm, "Failure to write SCDC_TMDS_CONFIG = 0x%02x: %d\n",
-			 config, ret);
+
+	nvif_outp_infoframe(&nv_encoder->outp, NVIF_OUTP_INFOFRAME_V0_AVI, &args.infoframe, size);
+
+	/* Vendor InfoFrame. */
+	memset(&args.data, 0, sizeof(args.data));
+	if (!drm_hdmi_vendor_infoframe_from_display_mode(&infoframe.vendor.hdmi,
+							 &nv_connector->base, mode))
+		size = hdmi_infoframe_pack(&infoframe, args.data, ARRAY_SIZE(args.data));
+	else
+		size = 0;
+
+	nvif_outp_infoframe(&nv_encoder->outp, NVIF_OUTP_INFOFRAME_V0_VSI, &args.infoframe, size);
+
+	nv50_audio_enable(encoder, nv_crtc, nv_connector, state, mode);
 }
 
 /******************************************************************************
@@ -979,16 +878,6 @@ nv50_msto_prepare(struct drm_atomic_state *state,
 	struct nv50_mstc *mstc = msto->mstc;
 	struct nv50_mstm *mstm = mstc->mstm;
 	struct drm_dp_mst_atomic_payload *payload;
-	struct {
-		struct nv50_disp_mthd_v1 base;
-		struct nv50_disp_sor_dp_mst_vcpi_v0 vcpi;
-	} args = {
-		.base.version = 1,
-		.base.method = NV50_DISP_MTHD_V1_SOR_DP_MST_VCPI,
-		.base.hasht  = mstm->outp->dcb->hasht,
-		.base.hashm  = (0xf0ff & mstm->outp->dcb->hashm) |
-			       (0x0100 << msto->head->base.index),
-	};
 
 	NV_ATOMIC(drm, "%s: msto prepare\n", msto->encoder.name);
 
@@ -997,22 +886,16 @@ nv50_msto_prepare(struct drm_atomic_state *state,
 	// TODO: Figure out if we want to do a better job of handling VCPI allocation failures here?
 	if (msto->disabled) {
 		drm_dp_remove_payload(mgr, mst_state, payload);
+
+		nvif_outp_dp_mst_vcpi(&mstm->outp->outp, msto->head->base.index, 0, 0, 0, 0);
 	} else {
 		if (msto->enabled)
 			drm_dp_add_payload_part1(mgr, mst_state, payload);
 
-		args.vcpi.start_slot = payload->vc_start_slot;
-		args.vcpi.num_slots = payload->time_slots;
-		args.vcpi.pbn = payload->pbn;
-		args.vcpi.aligned_pbn = payload->time_slots * mst_state->pbn_div;
+		nvif_outp_dp_mst_vcpi(&mstm->outp->outp, msto->head->base.index,
+				      payload->vc_start_slot, payload->time_slots,
+				      payload->pbn, payload->time_slots * mst_state->pbn_div);
 	}
-
-	NV_ATOMIC(drm, "%s: %s: %02x %02x %04x %04x\n",
-		  msto->encoder.name, msto->head->base.base.name,
-		  args.vcpi.start_slot, args.vcpi.num_slots,
-		  args.vcpi.pbn, args.vcpi.aligned_pbn);
-
-	nvif_mthd(&drm->display->disp.object, 0, &args, sizeof(args));
 }
 
 static int
@@ -1107,10 +990,12 @@ nv50_msto_atomic_enable(struct drm_encoder *encoder, struct drm_atomic_state *st
 	if (WARN_ON(!mstc))
 		return;
 
-	if (!mstm->links++)
-		nv50_outp_acquire(mstm->outp, false /*XXX: MST audio.*/);
+	if (!mstm->links++) {
+		/*XXX: MST audio. */
+		nvif_outp_acquire_dp(&mstm->outp->outp, mstm->outp->dp.dpcd, 0, 0, false, true);
+	}
 
-	if (mstm->outp->link & 1)
+	if (mstm->outp->outp.or.link & 1)
 		proto = NV917D_SOR_SET_CONTROL_PROTOCOL_DP_A;
 	else
 		proto = NV917D_SOR_SET_CONTROL_PROTOCOL_DP_B;
@@ -1405,7 +1290,7 @@ nv50_mstm_prepare(struct drm_atomic_state *state,
 
 	if (mstm->disabled) {
 		if (!mstm->links)
-			nv50_outp_release(mstm->outp);
+			nvif_outp_release(&mstm->outp->outp);
 		mstm->disabled = false;
 	}
 }
@@ -1473,26 +1358,6 @@ nv50_mstm_remove(struct nv50_mstm *mstm)
 	drm_dp_mst_topology_mgr_set_mst(&mstm->mgr, false);
 }
 
-static int
-nv50_mstm_enable(struct nv50_mstm *mstm, int state)
-{
-	struct nouveau_encoder *outp = mstm->outp;
-	struct {
-		struct nv50_disp_mthd_v1 base;
-		struct nv50_disp_sor_dp_mst_link_v0 mst;
-	} args = {
-		.base.version = 1,
-		.base.method = NV50_DISP_MTHD_V1_SOR_DP_MST_LINK,
-		.base.hasht = outp->dcb->hasht,
-		.base.hashm = outp->dcb->hashm,
-		.mst.state = state,
-	};
-	struct nouveau_drm *drm = nouveau_drm(outp->base.base.dev);
-	struct nvif_object *disp = &drm->display->disp.object;
-
-	return nvif_mthd(disp, 0, &args, sizeof(args));
-}
-
 int
 nv50_mstm_detect(struct nouveau_encoder *outp)
 {
@@ -1513,15 +1378,9 @@ nv50_mstm_detect(struct nouveau_encoder *outp)
 		return ret;
 
 	/* And start enabling */
-	ret = nv50_mstm_enable(mstm, true);
-	if (ret)
-		return ret;
-
 	ret = drm_dp_mst_topology_mgr_set_mst(&mstm->mgr, true);
-	if (ret) {
-		nv50_mstm_enable(mstm, false);
+	if (ret)
 		return ret;
-	}
 
 	mstm->is_mst = true;
 	return 1;
@@ -1623,7 +1482,7 @@ nv50_sor_update(struct nouveau_encoder *nv_encoder, u8 head,
 		asyh->or.depth = depth;
 	}
 
-	core->func->sor->ctrl(core, nv_encoder->or, nv_encoder->ctrl, asyh);
+	core->func->sor->ctrl(core, nv_encoder->outp.or.id, nv_encoder->ctrl, asyh);
 }
 
 /* TODO: Should we extend this to PWM-only backlights?
@@ -1666,8 +1525,7 @@ nv50_sor_atomic_disable(struct drm_encoder *encoder, struct drm_atomic_state *st
 
 	nv_encoder->update(nv_encoder, nv_crtc->index, NULL, 0, 0);
 	nv50_audio_disable(encoder, nv_crtc);
-	nv50_hdmi_disable(&nv_encoder->base.base, nv_crtc);
-	nv50_outp_release(nv_encoder);
+	nvif_outp_release(&nv_encoder->outp);
 	nv_encoder->crtc = NULL;
 }
 
@@ -1679,16 +1537,8 @@ nv50_sor_atomic_enable(struct drm_encoder *encoder, struct drm_atomic_state *sta
 	struct nv50_head_atom *asyh =
 		nv50_head_atom(drm_atomic_get_new_crtc_state(state, &nv_crtc->base));
 	struct drm_display_mode *mode = &asyh->state.adjusted_mode;
-	struct {
-		struct nv50_disp_mthd_v1 base;
-		struct nv50_disp_sor_lvds_script_v0 lvds;
-	} lvds = {
-		.base.version = 1,
-		.base.method  = NV50_DISP_MTHD_V1_SOR_LVDS_SCRIPT,
-		.base.hasht   = nv_encoder->dcb->hasht,
-		.base.hashm   = nv_encoder->dcb->hashm,
-	};
 	struct nv50_disp *disp = nv50_disp(encoder->dev);
+	struct nvif_outp *outp = &nv_encoder->outp;
 	struct drm_device *dev = encoder->dev;
 	struct nouveau_drm *drm = nouveau_drm(dev);
 	struct nouveau_connector *nv_connector;
@@ -1696,7 +1546,7 @@ nv50_sor_atomic_enable(struct drm_encoder *encoder, struct drm_atomic_state *sta
 	struct nouveau_backlight *backlight;
 #endif
 	struct nvbios *bios = &drm->vbios;
-	bool hda = false;
+	bool lvds_dual = false, lvds_8bpc = false, hda = false;
 	u8 proto = NV507D_SOR_SET_CONTROL_PROTOCOL_CUSTOM;
 	u8 depth = NV837D_SOR_SET_CONTROL_PIXEL_DEPTH_DEFAULT;
 
@@ -1707,11 +1557,16 @@ nv50_sor_atomic_enable(struct drm_encoder *encoder, struct drm_atomic_state *sta
 	     disp->disp->object.oclass >= GF110_DISP) &&
 	    drm_detect_monitor_audio(nv_connector->edid))
 		hda = true;
-	nv50_outp_acquire(nv_encoder, hda);
 
 	switch (nv_encoder->dcb->type) {
 	case DCB_OUTPUT_TMDS:
-		if (nv_encoder->link & 1) {
+		if (disp->disp->object.oclass == NV50_DISP ||
+		    !drm_detect_hdmi_monitor(nv_connector->edid))
+			nvif_outp_acquire_tmds(outp, nv_crtc->index, false, 0, 0, 0, false);
+		else
+			nv50_hdmi_enable(encoder, nv_crtc, nv_connector, state, mode, hda);
+
+		if (nv_encoder->outp.or.link & 1) {
 			proto = NV507D_SOR_SET_CONTROL_PROTOCOL_SINGLE_TMDS_A;
 			/* Only enable dual-link if:
 			 *  - Need to (i.e. rate > 165MHz)
@@ -1726,44 +1581,41 @@ nv50_sor_atomic_enable(struct drm_encoder *encoder, struct drm_atomic_state *sta
 		} else {
 			proto = NV507D_SOR_SET_CONTROL_PROTOCOL_SINGLE_TMDS_B;
 		}
-
-		nv50_hdmi_enable(&nv_encoder->base.base, nv_crtc, nv_connector, state, mode);
 		break;
 	case DCB_OUTPUT_LVDS:
 		proto = NV507D_SOR_SET_CONTROL_PROTOCOL_LVDS_CUSTOM;
 
 		if (bios->fp_no_ddc) {
-			if (bios->fp.dual_link)
-				lvds.lvds.script |= 0x0100;
-			if (bios->fp.if_is_24bit)
-				lvds.lvds.script |= 0x0200;
+			lvds_dual = bios->fp.dual_link;
+			lvds_8bpc = bios->fp.if_is_24bit;
 		} else {
 			if (nv_connector->type == DCB_CONNECTOR_LVDS_SPWG) {
 				if (((u8 *)nv_connector->edid)[121] == 2)
-					lvds.lvds.script |= 0x0100;
+					lvds_dual = true;
 			} else
 			if (mode->clock >= bios->fp.duallink_transition_clk) {
-				lvds.lvds.script |= 0x0100;
+				lvds_dual = true;
 			}
 
-			if (lvds.lvds.script & 0x0100) {
+			if (lvds_dual) {
 				if (bios->fp.strapless_is_24bit & 2)
-					lvds.lvds.script |= 0x0200;
+					lvds_8bpc = true;
 			} else {
 				if (bios->fp.strapless_is_24bit & 1)
-					lvds.lvds.script |= 0x0200;
+					lvds_8bpc = true;
 			}
 
 			if (asyh->or.bpc == 8)
-				lvds.lvds.script |= 0x0200;
+				lvds_8bpc = true;
 		}
 
-		nvif_mthd(&disp->disp->object, 0, &lvds, sizeof(lvds));
+		nvif_outp_acquire_lvds(&nv_encoder->outp, lvds_dual, lvds_8bpc);
 		break;
 	case DCB_OUTPUT_DP:
+		nvif_outp_acquire_dp(&nv_encoder->outp, nv_encoder->dp.dpcd, 0, 0, hda, false);
 		depth = nv50_dp_bpc_to_depth(asyh->or.bpc);
 
-		if (nv_encoder->link & 1)
+		if (nv_encoder->outp.or.link & 1)
 			proto = NV887D_SOR_SET_CONTROL_PROTOCOL_DP_A;
 		else
 			proto = NV887D_SOR_SET_CONTROL_PROTOCOL_DP_B;
@@ -1921,9 +1773,9 @@ nv50_pior_atomic_disable(struct drm_encoder *encoder, struct drm_atomic_state *s
 	struct nv50_core *core = nv50_disp(encoder->dev)->core;
 	const u32 ctrl = NVDEF(NV507D, PIOR_SET_CONTROL, OWNER, NONE);
 
-	core->func->pior->ctrl(core, nv_encoder->or, ctrl, NULL);
+	core->func->pior->ctrl(core, nv_encoder->outp.or.id, ctrl, NULL);
 	nv_encoder->crtc = NULL;
-	nv50_outp_release(nv_encoder);
+	nvif_outp_release(&nv_encoder->outp);
 }
 
 static void
@@ -1944,8 +1796,6 @@ nv50_pior_atomic_enable(struct drm_encoder *encoder, struct drm_atomic_state *st
 		break;
 	}
 
-	nv50_outp_acquire(nv_encoder, false);
-
 	switch (asyh->or.bpc) {
 	case 10: asyh->or.depth = NV837D_PIOR_SET_CONTROL_PIXEL_DEPTH_BPP_30_444; break;
 	case  8: asyh->or.depth = NV837D_PIOR_SET_CONTROL_PIXEL_DEPTH_BPP_24_444; break;
@@ -1955,15 +1805,19 @@ nv50_pior_atomic_enable(struct drm_encoder *encoder, struct drm_atomic_state *st
 
 	switch (nv_encoder->dcb->type) {
 	case DCB_OUTPUT_TMDS:
+		ctrl |= NVDEF(NV507D, PIOR_SET_CONTROL, PROTOCOL, EXT_TMDS_ENC);
+		nvif_outp_acquire_tmds(&nv_encoder->outp, false, false, 0, 0, 0, false);
+		break;
 	case DCB_OUTPUT_DP:
 		ctrl |= NVDEF(NV507D, PIOR_SET_CONTROL, PROTOCOL, EXT_TMDS_ENC);
+		nvif_outp_acquire_dp(&nv_encoder->outp, nv_encoder->dp.dpcd, 0, 0, false, false);
 		break;
 	default:
 		BUG();
 		break;
 	}
 
-	core->func->pior->ctrl(core, nv_encoder->or, ctrl, asyh);
+	core->func->pior->ctrl(core, nv_encoder->outp.or.id, ctrl, asyh);
 	nv_encoder->crtc = &nv_crtc->base;
 }
 
@@ -2587,7 +2441,7 @@ nv50_disp_atomic_state_alloc(struct drm_device *dev)
 static const struct drm_mode_config_funcs
 nv50_disp_func = {
 	.fb_create = nouveau_user_framebuffer_create,
-	.output_poll_changed = nouveau_fbcon_output_poll_changed,
+	.output_poll_changed = drm_fb_helper_output_poll_changed,
 	.atomic_check = nv50_disp_atomic_check,
 	.atomic_commit = nv50_disp_atomic_commit,
 	.atomic_state_alloc = nv50_disp_atomic_state_alloc,
diff --git a/drivers/gpu/drm/nouveau/dispnv50/head.c b/drivers/gpu/drm/nouveau/dispnv50/head.c
index c3c57be54e1c..f006e56e1e08 100644
--- a/drivers/gpu/drm/nouveau/dispnv50/head.c
+++ b/drivers/gpu/drm/nouveau/dispnv50/head.c
@@ -517,7 +517,8 @@ nv50_head_destroy(struct drm_crtc *crtc)
 {
 	struct nv50_head *head = nv50_head(crtc);
 
-	nvif_notify_dtor(&head->base.vblank);
+	nvif_event_dtor(&head->base.vblank);
+	nvif_head_dtor(&head->base.head);
 	nv50_lut_fini(&head->olut);
 	drm_crtc_cleanup(crtc);
 	kfree(head);
@@ -554,15 +555,15 @@ nvd9_head_func = {
 	.late_register = nv50_head_late_register,
 };
 
-static int nv50_head_vblank_handler(struct nvif_notify *notify)
+static int
+nv50_head_vblank_handler(struct nvif_event *event, void *repv, u32 repc)
 {
-	struct nouveau_crtc *nv_crtc =
-		container_of(notify, struct nouveau_crtc, vblank);
+	struct nouveau_crtc *nv_crtc = container_of(event, struct nouveau_crtc, vblank);
 
 	if (drm_crtc_handle_vblank(&nv_crtc->base))
 		nv50_crc_handle_vblank(nv50_head(&nv_crtc->base));
 
-	return NVIF_NOTIFY_KEEP;
+	return NVIF_EVENT_KEEP;
 }
 
 struct nv50_head *
@@ -624,14 +625,12 @@ nv50_head_create(struct drm_device *dev, int index)
 		}
 	}
 
-	ret = nvif_notify_ctor(&disp->disp->object, "kmsVbl", nv50_head_vblank_handler,
-			       false, NV04_DISP_NTFY_VBLANK,
-			       &(struct nvif_notify_head_req_v0) {
-				    .head = nv_crtc->index,
-			       },
-			       sizeof(struct nvif_notify_head_req_v0),
-			       sizeof(struct nvif_notify_head_rep_v0),
-			       &nv_crtc->vblank);
+	ret = nvif_head_ctor(disp->disp, head->base.base.name, head->base.index, &head->base.head);
+	if (ret)
+		return ERR_PTR(ret);
+
+	ret = nvif_head_vblank_event_ctor(&head->base.head, "kmsVbl", nv50_head_vblank_handler,
+					  false, &nv_crtc->vblank);
 	if (ret)
 		return ERR_PTR(ret);
 
diff --git a/drivers/gpu/drm/nouveau/include/nvfw/acr.h b/drivers/gpu/drm/nouveau/include/nvfw/acr.h
index e65d6a8db104..6f19560bc54b 100644
--- a/drivers/gpu/drm/nouveau/include/nvfw/acr.h
+++ b/drivers/gpu/drm/nouveau/include/nvfw/acr.h
@@ -39,6 +39,23 @@ struct wpr_header_v1 {
 
 void wpr_header_v1_dump(struct nvkm_subdev *, const struct wpr_header_v1 *);
 
+struct wpr_generic_header {
+#define WPR_GENERIC_HEADER_ID_LSF_UCODE_DESC     1
+#define WPR_GENERIC_HEADER_ID_LSF_WPR_HEADER     2
+#define WPR_GENERIC_HEADER_ID_LSF_SHARED_SUB_WPR 3
+#define WPR_GENERIC_HEADER_ID_LSF_LSB_HEADER     4
+	u16 identifier;
+	u16 version;
+	u32 size;
+};
+
+struct wpr_header_v2 {
+	struct wpr_generic_header hdr;
+	struct wpr_header_v1 wpr;
+};
+
+void wpr_header_v2_dump(struct nvkm_subdev *, const struct wpr_header_v2 *);
+
 struct lsf_signature {
 	u8 prd_keys[2][16];
 	u8 dbg_keys[2][16];
@@ -89,6 +106,74 @@ struct lsb_header_v1 {
 
 void lsb_header_v1_dump(struct nvkm_subdev *, struct lsb_header_v1 *);
 
+struct lsb_header_v2 {
+	struct wpr_generic_header hdr;
+	struct lsf_signature_v2 {
+		struct wpr_generic_header hdr;
+		u32 falcon_id;
+		u8 prd_present;
+		u8 dbg_present;
+		u16 reserved;
+		u32 sig_size;
+		u8 prod_sig[2][384 + 128];
+		u8 debug_sig[2][384 + 128];
+		u16 sig_algo_ver;
+		u16 sig_algo;
+		u16 hash_algo_ver;
+		u16 hash_algo;
+		u32 sig_algo_padding_type;
+		u8 depmap[11 * 2 * 4];
+		u32 depmap_count;
+		u8 supports_versioning;
+		u8 pad[3];
+		u32 ls_ucode_version;
+		u32 ls_ucode_id;
+		u32 ucode_ls_encrypted;
+		u32 ls_eng_algo_type;
+		u32 ls_eng_algo_ver;
+		u8 ls_enc_iv[16];
+		u8 rsvd[36];
+	} signature;
+	u32 ucode_off;
+	u32 ucode_size;
+	u32 data_size;
+	u32 bl_code_size;
+	u32 bl_imem_off;
+	u32 bl_data_off;
+	u32 bl_data_size;
+	u32 rsvd0;
+	u32 app_code_off;
+	u32 app_code_size;
+	u32 app_data_off;
+	u32 app_data_size;
+	u32 app_imem_offset;
+	u32 app_dmem_offset;
+	u32 flags;
+	u32 monitor_code_offset;
+	u32 monitor_data_offset;
+	u32 manifest_offset;
+	struct hs_fmc_params {
+		u8 hs_fmc;
+		u8 padding[3];
+		u16 pkc_algo;
+		u16 pkc_algo_version;
+		u32 engid_mask;
+		u32 ucode_id;
+		u32 fuse_ver;
+		u8 pkc_signature[384 + 128];
+		u8 pkc_key[2048];
+		u8 rsvd[4];
+	} hs_fmc_params;
+	struct hs_ovl_sig_blob_params {
+		u8 hs_ovl_sig_blob_present;
+		u32 hs_ovl_sig_blob_offset;
+		u32 hs_ovl_sig_blob_size;
+	} hs_ovl_sig_blob_params;
+	u8 rsvd[20];
+};
+
+void lsb_header_v2_dump(struct nvkm_subdev *, struct lsb_header_v2 *);
+
 struct flcn_acr_desc {
 	union {
 		u8 reserved_dmem[0x200];
diff --git a/drivers/gpu/drm/nouveau/include/nvfw/hs.h b/drivers/gpu/drm/nouveau/include/nvfw/hs.h
index b53bbc4cd130..8c4cd08a7b5f 100644
--- a/drivers/gpu/drm/nouveau/include/nvfw/hs.h
+++ b/drivers/gpu/drm/nouveau/include/nvfw/hs.h
@@ -17,6 +17,20 @@ struct nvfw_hs_header {
 
 const struct nvfw_hs_header *nvfw_hs_header(struct nvkm_subdev *, const void *);
 
+struct nvfw_hs_header_v2 {
+	u32 sig_prod_offset;
+	u32 sig_prod_size;
+	u32 patch_loc;
+	u32 patch_sig;
+	u32 meta_data_offset;
+	u32 meta_data_size;
+	u32 num_sig;
+	u32 header_offset;
+	u32 header_size;
+};
+
+const struct nvfw_hs_header_v2 *nvfw_hs_header_v2(struct nvkm_subdev *, const void *);
+
 struct nvfw_hs_load_header {
 	u32 non_sec_code_off;
 	u32 non_sec_code_size;
@@ -28,4 +42,18 @@ struct nvfw_hs_load_header {
 
 const struct nvfw_hs_load_header *
 nvfw_hs_load_header(struct nvkm_subdev *, const void *);
+
+struct nvfw_hs_load_header_v2 {
+	u32 os_code_offset;
+	u32 os_code_size;
+	u32 os_data_offset;
+	u32 os_data_size;
+	u32 num_apps;
+	struct {
+		u32 offset;
+		u32 size;
+	} app[0];
+};
+
+const struct nvfw_hs_load_header_v2 *nvfw_hs_load_header_v2(struct nvkm_subdev *, const void *);
 #endif
diff --git a/drivers/gpu/drm/nouveau/include/nvfw/ls.h b/drivers/gpu/drm/nouveau/include/nvfw/ls.h
index f63692a2a16c..d531121bfa35 100644
--- a/drivers/gpu/drm/nouveau/include/nvfw/ls.h
+++ b/drivers/gpu/drm/nouveau/include/nvfw/ls.h
@@ -50,4 +50,55 @@ struct nvfw_ls_desc_v1 {
 
 const struct nvfw_ls_desc_v1 *
 nvfw_ls_desc_v1(struct nvkm_subdev *, const void *);
+
+struct nvfw_ls_desc_v2 {
+	u32 descriptor_size;
+	u32 image_size;
+	u32 tools_version;
+	u32 app_version;
+	char date[64];
+	u32 secure_bootloader;
+	u32 bootloader_start_offset;
+	u32 bootloader_size;
+	u32 bootloader_imem_offset;
+	u32 bootloader_entry_point;
+	u32 app_start_offset;
+	u32 app_size;
+	u32 app_imem_offset;
+	u32 app_imem_entry;
+	u32 app_dmem_offset;
+	u32 app_resident_code_offset;
+	u32 app_resident_code_size;
+	u32 app_resident_data_offset;
+	u32 app_resident_data_size;
+	u32 nb_imem_overlays;
+	u32 nb_dmem_overlays;
+	struct {
+		u32 start;
+		u32 size;
+	} load_ovl[64];
+};
+
+const struct nvfw_ls_desc_v2 *nvfw_ls_desc_v2(struct nvkm_subdev *, const void *);
+
+struct nvfw_ls_hsbl_bin_hdr {
+	u32 bin_magic;
+	u32 bin_ver;
+	u32 bin_size;
+	u32 header_offset;
+};
+
+const struct nvfw_ls_hsbl_bin_hdr *nvfw_ls_hsbl_bin_hdr(struct nvkm_subdev *, const void *);
+
+struct nvfw_ls_hsbl_hdr {
+	u32 sig_prod_offset;
+	u32 sig_prod_size;
+	u32 patch_loc;
+	u32 patch_sig;
+	u32 meta_data_offset;
+	u32 meta_data_size;
+	u32 num_sig;
+};
+
+const struct nvfw_ls_hsbl_hdr *nvfw_ls_hsbl_hdr(struct nvkm_subdev *, const void *);
 #endif
diff --git a/drivers/gpu/drm/nouveau/include/nvfw/sec2.h b/drivers/gpu/drm/nouveau/include/nvfw/sec2.h
index 9a37ad4179cb..b3331d679c4e 100644
--- a/drivers/gpu/drm/nouveau/include/nvfw/sec2.h
+++ b/drivers/gpu/drm/nouveau/include/nvfw/sec2.h
@@ -10,6 +10,7 @@ struct nv_sec2_args {
 };
 
 #define NV_SEC2_UNIT_INIT                                                  0x01
+#define NV_SEC2_UNIT_UNLOAD                                                0x06
 #define NV_SEC2_UNIT_ACR                                                   0x08
 
 struct nv_sec2_init_msg {
@@ -33,6 +34,29 @@ struct nv_sec2_init_msg {
 	u16 sw_managed_area_size;
 };
 
+struct nv_sec2_init_msg_v1 {
+	struct nvfw_falcon_msg hdr;
+#define NV_SEC2_INIT_MSG_INIT                                              0x00
+	u8 msg_type;
+
+	u8 num_queues;
+	u16 os_debug_entry_point;
+
+	struct {
+		u32 offset;
+		u16 size;
+		u8 index;
+#define NV_SEC2_INIT_MSG_QUEUE_ID_CMDQ                                     0x00
+#define NV_SEC2_INIT_MSG_QUEUE_ID_MSGQ                                     0x01
+		u8 id;
+	} queue_info[2];
+
+	u32 sw_managed_area_offset;
+	u16 sw_managed_area_size;
+
+	u32 unkn[8];
+};
+
 struct nv_sec2_acr_cmd {
 	struct nvfw_falcon_cmd hdr;
 #define NV_SEC2_ACR_CMD_BOOTSTRAP_FALCON                                   0x00
@@ -57,4 +81,25 @@ struct nv_sec2_acr_bootstrap_falcon_msg {
 	u32 error_code;
 	u32 falcon_id;
 };
+
+#define NV_SEC2_UNIT_V2_INIT   0x01
+#define NV_SEC2_UNIT_V2_UNLOAD 0x05
+#define NV_SEC2_UNIT_V2_ACR    0x07
+
+struct nv_sec2_acr_bootstrap_falcon_cmd_v1 {
+	struct nv_sec2_acr_cmd cmd;
+#define NV_SEC2_ACR_BOOTSTRAP_FALCON_FLAGS_RESET_YES                 0x00000000
+#define NV_SEC2_ACR_BOOTSTRAP_FALCON_FLAGS_RESET_NO                  0x00000001
+	u32 flags;
+	u32 falcon_id;
+	u32 unkn08;
+	u32 unkn0c;
+};
+
+struct nv_sec2_acr_bootstrap_falcon_msg_v1 {
+	struct nv_sec2_acr_msg msg;
+	u32 error_code;
+	u32 falcon_id;
+	u32 unkn08;
+};
 #endif
diff --git a/drivers/gpu/drm/nouveau/include/nvif/cl0046.h b/drivers/gpu/drm/nouveau/include/nvif/cl0046.h
index d490d401870a..eca7c3950654 100644
--- a/drivers/gpu/drm/nouveau/include/nvif/cl0046.h
+++ b/drivers/gpu/drm/nouveau/include/nvif/cl0046.h
@@ -2,28 +2,5 @@
 #ifndef __NVIF_CL0046_H__
 #define __NVIF_CL0046_H__
 
-#define NV04_DISP_NTFY_VBLANK                                              0x00
 #define NV04_DISP_NTFY_CONN                                                0x01
-
-struct nv04_disp_mthd_v0 {
-	__u8  version;
-#define NV04_DISP_SCANOUTPOS                                               0x00
-	__u8  method;
-	__u8  head;
-	__u8  pad03[5];
-};
-
-struct nv04_disp_scanoutpos_v0 {
-	__u8  version;
-	__u8  pad01[7];
-	__s64 time[2];
-	__u16 vblanks;
-	__u16 vblanke;
-	__u16 vtotal;
-	__u16 vline;
-	__u16 hblanks;
-	__u16 hblanke;
-	__u16 htotal;
-	__u16 hline;
-};
 #endif
diff --git a/drivers/gpu/drm/nouveau/include/nvif/cl006b.h b/drivers/gpu/drm/nouveau/include/nvif/cl006b.h
deleted file mode 100644
index c960c449e430..000000000000
--- a/drivers/gpu/drm/nouveau/include/nvif/cl006b.h
+++ /dev/null
@@ -1,12 +0,0 @@
-/* SPDX-License-Identifier: MIT */
-#ifndef __NVIF_CL006B_H__
-#define __NVIF_CL006B_H__
-
-struct nv03_channel_dma_v0 {
-	__u8  version;
-	__u8  chid;
-	__u8  pad02[2];
-	__u32 offset;
-	__u64 pushbuf;
-};
-#endif
diff --git a/drivers/gpu/drm/nouveau/include/nvif/cl0080.h b/drivers/gpu/drm/nouveau/include/nvif/cl0080.h
index 59759c4fb62e..8b5a240d57e4 100644
--- a/drivers/gpu/drm/nouveau/include/nvif/cl0080.h
+++ b/drivers/gpu/drm/nouveau/include/nvif/cl0080.h
@@ -68,7 +68,7 @@ struct nv_device_time_v0 {
 
 /* Returns the number of available runlists. */
 #define NV_DEVICE_HOST_RUNLISTS                       NV_DEVICE_HOST(0x00000000)
-/* Returns the number of available channels. */
+/* Returns the number of available channels (0 if per-runlist). */
 #define NV_DEVICE_HOST_CHANNELS                       NV_DEVICE_HOST(0x00000001)
 
 /* Returns a mask of available engine types on runlist(data). */
@@ -90,4 +90,6 @@ struct nv_device_time_v0 {
 #define NV_DEVICE_HOST_RUNLIST_ENGINES_SEC2                          0x00004000
 #define NV_DEVICE_HOST_RUNLIST_ENGINES_NVDEC                         0x00008000
 #define NV_DEVICE_HOST_RUNLIST_ENGINES_NVENC                         0x00010000
+/* Returns the number of available channels on runlist(data). */
+#define NV_DEVICE_HOST_RUNLIST_CHANNELS               NV_DEVICE_HOST(0x00000101)
 #endif
diff --git a/drivers/gpu/drm/nouveau/include/nvif/cl506e.h b/drivers/gpu/drm/nouveau/include/nvif/cl506e.h
deleted file mode 100644
index 9df289c7a84f..000000000000
--- a/drivers/gpu/drm/nouveau/include/nvif/cl506e.h
+++ /dev/null
@@ -1,13 +0,0 @@
-/* SPDX-License-Identifier: MIT */
-#ifndef __NVIF_CL506E_H__
-#define __NVIF_CL506E_H__
-
-struct nv50_channel_dma_v0 {
-	__u8  version;
-	__u8  chid;
-	__u8  pad02[6];
-	__u64 vmm;
-	__u64 pushbuf;
-	__u64 offset;
-};
-#endif
diff --git a/drivers/gpu/drm/nouveau/include/nvif/cl506f.h b/drivers/gpu/drm/nouveau/include/nvif/cl506f.h
deleted file mode 100644
index 327c96a994bb..000000000000
--- a/drivers/gpu/drm/nouveau/include/nvif/cl506f.h
+++ /dev/null
@@ -1,14 +0,0 @@
-/* SPDX-License-Identifier: MIT */
-#ifndef __NVIF_CL506F_H__
-#define __NVIF_CL506F_H__
-
-struct nv50_channel_gpfifo_v0 {
-	__u8  version;
-	__u8  chid;
-	__u8  pad02[2];
-	__u32 ilength;
-	__u64 ioffset;
-	__u64 pushbuf;
-	__u64 vmm;
-};
-#endif
diff --git a/drivers/gpu/drm/nouveau/include/nvif/cl5070.h b/drivers/gpu/drm/nouveau/include/nvif/cl5070.h
deleted file mode 100644
index 56affb606adf..000000000000
--- a/drivers/gpu/drm/nouveau/include/nvif/cl5070.h
+++ /dev/null
@@ -1,92 +0,0 @@
-/* SPDX-License-Identifier: MIT */
-#ifndef __NVIF_CL5070_H__
-#define __NVIF_CL5070_H__
-
-#define NV50_DISP_MTHD                                                     0x00
-
-struct nv50_disp_mthd_v0 {
-	__u8  version;
-#define NV50_DISP_SCANOUTPOS                                               0x00
-	__u8  method;
-	__u8  head;
-	__u8  pad03[5];
-};
-
-struct nv50_disp_scanoutpos_v0 {
-	__u8  version;
-	__u8  pad01[7];
-	__s64 time[2];
-	__u16 vblanks;
-	__u16 vblanke;
-	__u16 vtotal;
-	__u16 vline;
-	__u16 hblanks;
-	__u16 hblanke;
-	__u16 htotal;
-	__u16 hline;
-};
-
-struct nv50_disp_mthd_v1 {
-	__u8  version;
-#define NV50_DISP_MTHD_V1_ACQUIRE                                          0x01
-#define NV50_DISP_MTHD_V1_RELEASE                                          0x02
-#define NV50_DISP_MTHD_V1_SOR_HDA_ELD                                      0x21
-#define NV50_DISP_MTHD_V1_SOR_HDMI_PWR                                     0x22
-#define NV50_DISP_MTHD_V1_SOR_LVDS_SCRIPT                                  0x23
-#define NV50_DISP_MTHD_V1_SOR_DP_MST_LINK                                  0x25
-#define NV50_DISP_MTHD_V1_SOR_DP_MST_VCPI                                  0x26
-	__u8  method;
-	__u16 hasht;
-	__u16 hashm;
-	__u8  pad06[2];
-};
-
-struct nv50_disp_acquire_v0 {
-	__u8  version;
-	__u8  or;
-	__u8  link;
-	__u8  hda;
-	__u8  pad04[4];
-};
-
-struct nv50_disp_sor_hda_eld_v0 {
-	__u8  version;
-	__u8  pad01[7];
-	__u8  data[];
-};
-
-struct nv50_disp_sor_hdmi_pwr_v0 {
-	__u8  version;
-	__u8  state;
-	__u8  max_ac_packet;
-	__u8  rekey;
-	__u8  avi_infoframe_length;
-	__u8  vendor_infoframe_length;
-#define NV50_DISP_SOR_HDMI_PWR_V0_SCDC_SCRAMBLE (1 << 0)
-#define NV50_DISP_SOR_HDMI_PWR_V0_SCDC_DIV_BY_4 (1 << 1)
-	__u8  scdc;
-	__u8  pad07[1];
-};
-
-struct nv50_disp_sor_lvds_script_v0 {
-	__u8  version;
-	__u8  pad01[1];
-	__u16 script;
-	__u8  pad04[4];
-};
-
-struct nv50_disp_sor_dp_mst_link_v0 {
-	__u8  version;
-	__u8  state;
-	__u8  pad02[6];
-};
-
-struct nv50_disp_sor_dp_mst_vcpi_v0 {
-	__u8  version;
-	__u8  pad01[1];
-	__u8  start_slot;
-	__u8  num_slots;
-	__u16 pbn;
-	__u16 aligned_pbn;
-};
-#endif
diff --git a/drivers/gpu/drm/nouveau/include/nvif/cl826e.h b/drivers/gpu/drm/nouveau/include/nvif/cl826e.h
deleted file mode 100644
index 1b6496d31580..000000000000
--- a/drivers/gpu/drm/nouveau/include/nvif/cl826e.h
+++ /dev/null
@@ -1,15 +0,0 @@
-/* SPDX-License-Identifier: MIT */
-#ifndef __NVIF_CL826E_H__
-#define __NVIF_CL826E_H__
-
-struct g82_channel_dma_v0 {
-	__u8  version;
-	__u8  chid;
-	__u8  pad02[6];
-	__u64 vmm;
-	__u64 pushbuf;
-	__u64 offset;
-};
-
-#define NV826E_V0_NTFY_NON_STALL_INTERRUPT                                 0x00
-#endif
diff --git a/drivers/gpu/drm/nouveau/include/nvif/cl826f.h b/drivers/gpu/drm/nouveau/include/nvif/cl826f.h
deleted file mode 100644
index 148602264a76..000000000000
--- a/drivers/gpu/drm/nouveau/include/nvif/cl826f.h
+++ /dev/null
@@ -1,16 +0,0 @@
-/* SPDX-License-Identifier: MIT */
-#ifndef __NVIF_CL826F_H__
-#define __NVIF_CL826F_H__
-
-struct g82_channel_gpfifo_v0 {
-	__u8  version;
-	__u8  chid;
-	__u8  pad02[2];
-	__u32 ilength;
-	__u64 ioffset;
-	__u64 pushbuf;
-	__u64 vmm;
-};
-
-#define NV826F_V0_NTFY_NON_STALL_INTERRUPT                                 0x00
-#endif
diff --git a/drivers/gpu/drm/nouveau/include/nvif/cl906f.h b/drivers/gpu/drm/nouveau/include/nvif/cl906f.h
deleted file mode 100644
index 3823d6891b55..000000000000
--- a/drivers/gpu/drm/nouveau/include/nvif/cl906f.h
+++ /dev/null
@@ -1,16 +0,0 @@
-/* SPDX-License-Identifier: MIT */
-#ifndef __NVIF_CL906F_H__
-#define __NVIF_CL906F_H__
-
-struct fermi_channel_gpfifo_v0 {
-	__u8  version;
-	__u8  chid;
-	__u8  pad02[2];
-	__u32 ilength;
-	__u64 ioffset;
-	__u64 vmm;
-};
-
-#define NV906F_V0_NTFY_NON_STALL_INTERRUPT                                 0x00
-#define NV906F_V0_NTFY_KILLED                                              0x01
-#endif
diff --git a/drivers/gpu/drm/nouveau/include/nvif/cla06f.h b/drivers/gpu/drm/nouveau/include/nvif/cla06f.h
deleted file mode 100644
index cfa18f1fbf83..000000000000
--- a/drivers/gpu/drm/nouveau/include/nvif/cla06f.h
+++ /dev/null
@@ -1,18 +0,0 @@
-/* SPDX-License-Identifier: MIT */
-#ifndef __NVIF_CLA06F_H__
-#define __NVIF_CLA06F_H__
-
-struct kepler_channel_gpfifo_a_v0 {
-	__u8  version;
-	__u8  priv;
-	__u16 chid;
-	__u32 ilength;
-	__u64 ioffset;
-	__u64 runlist;
-	__u64 vmm;
-	__u64 inst;
-};
-
-#define NVA06F_V0_NTFY_NON_STALL_INTERRUPT                                 0x00
-#define NVA06F_V0_NTFY_KILLED                                              0x01
-#endif
diff --git a/drivers/gpu/drm/nouveau/include/nvif/class.h b/drivers/gpu/drm/nouveau/include/nvif/class.h
index 8641db649f48..ad1e5de84e80 100644
--- a/drivers/gpu/drm/nouveau/include/nvif/class.h
+++ b/drivers/gpu/drm/nouveau/include/nvif/class.h
@@ -32,11 +32,17 @@
 #define NVIF_CLASS_VMM_GM200                         /* ifb00d.h */  0x8000b00d
 #define NVIF_CLASS_VMM_GP100                         /* ifc00d.h */  0x8000c00d
 
+#define NVIF_CLASS_EVENT                             /* if000e.h */  0x8000000e
+
 #define NVIF_CLASS_DISP                              /* if0010.h */  0x80000010
 #define NVIF_CLASS_CONN                              /* if0011.h */  0x80000011
 #define NVIF_CLASS_OUTP                              /* if0012.h */  0x80000012
+#define NVIF_CLASS_HEAD                              /* if0013.h */  0x80000013
 #define NVIF_CLASS_DISP_CHAN                         /* if0014.h */  0x80000014
 
+#define NVIF_CLASS_CHAN                              /* if0020.h */  0x80000020
+#define NVIF_CLASS_CGRP                              /* if0021.h */  0x80000021
+
 /* the below match nvidia-assigned (either in hw, or sw) class numbers */
 #define NV_NULL_CLASS                                                0x00000030
 
@@ -58,25 +64,30 @@
 #define NV04_DISP                                     /* cl0046.h */ 0x00000046
 
 #define VOLTA_USERMODE_A                                             0x0000c361
+#define TURING_USERMODE_A                                            0x0000c461
+#define AMPERE_USERMODE_A                                            0x0000c561
 
 #define MAXWELL_FAULT_BUFFER_A                        /* clb069.h */ 0x0000b069
 #define VOLTA_FAULT_BUFFER_A                          /* clb069.h */ 0x0000c369
 
-#define NV03_CHANNEL_DMA                              /* cl506b.h */ 0x0000006b
-#define NV10_CHANNEL_DMA                              /* cl506b.h */ 0x0000006e
-#define NV17_CHANNEL_DMA                              /* cl506b.h */ 0x0000176e
-#define NV40_CHANNEL_DMA                              /* cl506b.h */ 0x0000406e
-
-#define NV50_CHANNEL_GPFIFO                           /* cl506f.h */ 0x0000506f
-#define G82_CHANNEL_GPFIFO                            /* cl826f.h */ 0x0000826f
-#define FERMI_CHANNEL_GPFIFO                          /* cl906f.h */ 0x0000906f
-#define KEPLER_CHANNEL_GPFIFO_A                       /* cla06f.h */ 0x0000a06f
-#define KEPLER_CHANNEL_GPFIFO_B                       /* cla06f.h */ 0x0000a16f
-#define MAXWELL_CHANNEL_GPFIFO_A                      /* cla06f.h */ 0x0000b06f
-#define PASCAL_CHANNEL_GPFIFO_A                       /* cla06f.h */ 0x0000c06f
-#define VOLTA_CHANNEL_GPFIFO_A                        /* clc36f.h */ 0x0000c36f
-#define TURING_CHANNEL_GPFIFO_A                       /* clc36f.h */ 0x0000c46f
-#define AMPERE_CHANNEL_GPFIFO_B                       /* clc36f.h */ 0x0000c76f
+#define NV03_CHANNEL_DMA                              /* if0020.h */ 0x0000006b
+#define NV10_CHANNEL_DMA                              /* if0020.h */ 0x0000006e
+#define NV17_CHANNEL_DMA                              /* if0020.h */ 0x0000176e
+#define NV40_CHANNEL_DMA                              /* if0020.h */ 0x0000406e
+
+#define KEPLER_CHANNEL_GROUP_A                        /* if0021.h */ 0x0000a06c
+
+#define NV50_CHANNEL_GPFIFO                           /* if0020.h */ 0x0000506f
+#define G82_CHANNEL_GPFIFO                            /* if0020.h */ 0x0000826f
+#define FERMI_CHANNEL_GPFIFO                          /* if0020.h */ 0x0000906f
+#define KEPLER_CHANNEL_GPFIFO_A                       /* if0020.h */ 0x0000a06f
+#define KEPLER_CHANNEL_GPFIFO_B                       /* if0020.h */ 0x0000a16f
+#define MAXWELL_CHANNEL_GPFIFO_A                      /* if0020.h */ 0x0000b06f
+#define PASCAL_CHANNEL_GPFIFO_A                       /* if0020.h */ 0x0000c06f
+#define VOLTA_CHANNEL_GPFIFO_A                        /* if0020.h */ 0x0000c36f
+#define TURING_CHANNEL_GPFIFO_A                       /* if0020.h */ 0x0000c46f
+#define AMPERE_CHANNEL_GPFIFO_A                       /* if0020.h */ 0x0000c56f
+#define AMPERE_CHANNEL_GPFIFO_B                       /* if0020.h */ 0x0000c76f
 
 #define NV50_DISP                                     /* if0010.h */ 0x00005070
 #define G82_DISP                                      /* if0010.h */ 0x00008270
@@ -179,6 +190,8 @@
 
 #define TURING_A                                      /* cl9097.h */ 0x0000c597
 
+#define AMPERE_B                                      /* cl9097.h */ 0x0000c797
+
 #define NV74_BSP                                                     0x000074b0
 
 #define GT212_MSVLD                                                  0x000085b1
@@ -206,6 +219,7 @@
 #define PASCAL_DMA_COPY_B                                            0x0000c1b5
 #define VOLTA_DMA_COPY_A                                             0x0000c3b5
 #define TURING_DMA_COPY_A                                            0x0000c5b5
+#define AMPERE_DMA_COPY_A                                            0x0000c6b5
 #define AMPERE_DMA_COPY_B                                            0x0000c7b5
 
 #define FERMI_DECOMPRESS                                             0x000090b8
@@ -222,6 +236,7 @@
 #define PASCAL_COMPUTE_B                                             0x0000c1c0
 #define VOLTA_COMPUTE_A                                              0x0000c3c0
 #define TURING_COMPUTE_A                                             0x0000c5c0
+#define AMPERE_COMPUTE_B                                             0x0000c7c0
 
 #define NV74_CIPHER                                                  0x000074c1
 #endif
diff --git a/drivers/gpu/drm/nouveau/include/nvif/clb069.h b/drivers/gpu/drm/nouveau/include/nvif/clb069.h
index eef5d0227bab..d7689de35ab2 100644
--- a/drivers/gpu/drm/nouveau/include/nvif/clb069.h
+++ b/drivers/gpu/drm/nouveau/include/nvif/clb069.h
@@ -8,5 +8,8 @@ struct nvif_clb069_v0 {
 	__u32 put;
 };
 
-#define NVB069_V0_NTFY_FAULT                                                0x00
+union nvif_clb069_event_args {
+	struct nvif_clb069_event_vn {
+	} vn;
+};
 #endif
diff --git a/drivers/gpu/drm/nouveau/include/nvif/clc36f.h b/drivers/gpu/drm/nouveau/include/nvif/clc36f.h
deleted file mode 100644
index f66885891238..000000000000
--- a/drivers/gpu/drm/nouveau/include/nvif/clc36f.h
+++ /dev/null
@@ -1,19 +0,0 @@
-/* SPDX-License-Identifier: MIT */
-#ifndef __NVIF_CLC36F_H__
-#define __NVIF_CLC36F_H__
-
-struct volta_channel_gpfifo_a_v0 {
-	__u8  version;
-	__u8  priv;
-	__u16 chid;
-	__u32 ilength;
-	__u64 ioffset;
-	__u64 runlist;
-	__u64 vmm;
-	__u64 inst;
-	__u32 token;
-};
-
-#define NVC36F_V0_NTFY_NON_STALL_INTERRUPT                                 0x00
-#define NVC36F_V0_NTFY_KILLED                                              0x01
-#endif
diff --git a/drivers/gpu/drm/nouveau/include/nvif/conn.h b/drivers/gpu/drm/nouveau/include/nvif/conn.h
index f72a8f138f47..dc355e1dfafa 100644
--- a/drivers/gpu/drm/nouveau/include/nvif/conn.h
+++ b/drivers/gpu/drm/nouveau/include/nvif/conn.h
@@ -2,6 +2,7 @@
 #ifndef __NVIF_CONN_H__
 #define __NVIF_CONN_H__
 #include <nvif/object.h>
+#include <nvif/event.h>
 struct nvif_disp;
 
 struct nvif_conn {
@@ -11,8 +12,17 @@ struct nvif_conn {
 int nvif_conn_ctor(struct nvif_disp *, const char *name, int id, struct nvif_conn *);
 void nvif_conn_dtor(struct nvif_conn *);
 
+static inline int
+nvif_conn_id(struct nvif_conn *conn)
+{
+	return conn->object.handle;
+}
+
 #define NVIF_CONN_HPD_STATUS_UNSUPPORTED 0 /* negative if query fails */
 #define NVIF_CONN_HPD_STATUS_NOT_PRESENT 1
 #define NVIF_CONN_HPD_STATUS_PRESENT     2
 int nvif_conn_hpd_status(struct nvif_conn *);
+
+int nvif_conn_event_ctor(struct nvif_conn *, const char *name, nvif_event_func, u8 types,
+			 struct nvif_event *);
 #endif
diff --git a/drivers/gpu/drm/nouveau/include/nvif/disp.h b/drivers/gpu/drm/nouveau/include/nvif/disp.h
index 742632ad3bea..56eb7293e01c 100644
--- a/drivers/gpu/drm/nouveau/include/nvif/disp.h
+++ b/drivers/gpu/drm/nouveau/include/nvif/disp.h
@@ -7,6 +7,7 @@ struct nvif_disp {
 	struct nvif_object object;
 	unsigned long conn_mask;
 	unsigned long outp_mask;
+	unsigned long head_mask;
 };
 
 int nvif_disp_ctor(struct nvif_device *, const char *name, s32 oclass,
diff --git a/drivers/gpu/drm/nouveau/include/nvif/event.h b/drivers/gpu/drm/nouveau/include/nvif/event.h
index a6b1ee4f10ca..68bf6635841f 100644
--- a/drivers/gpu/drm/nouveau/include/nvif/event.h
+++ b/drivers/gpu/drm/nouveau/include/nvif/event.h
@@ -1,63 +1,36 @@
 /* SPDX-License-Identifier: MIT */
 #ifndef __NVIF_EVENT_H__
 #define __NVIF_EVENT_H__
-
-struct nvif_notify_req_v0 {
-	__u8  version;
-	__u8  reply;
-	__u8  pad02[5];
-#define NVIF_NOTIFY_V0_ROUTE_NVIF                                          0x00
-	__u8  route;
-	__u64 token;	/* must be unique */
-	__u8  data[];	/* request data (below) */
-};
-
-struct nvif_notify_rep_v0 {
-	__u8  version;
-	__u8  pad01[6];
-	__u8  route;
-	__u64 token;
-	__u8  data[];	/* reply data (below) */
-};
-
-struct nvif_notify_head_req_v0 {
-	/* nvif_notify_req ... */
-	__u8  version;
-	__u8  head;
-	__u8  pad02[6];
-};
-
-struct nvif_notify_head_rep_v0 {
-	/* nvif_notify_rep ... */
-	__u8  version;
-	__u8  pad01[7];
-};
-
-struct nvif_notify_conn_req_v0 {
-	/* nvif_notify_req ... */
-	__u8  version;
-#define NVIF_NOTIFY_CONN_V0_PLUG                                           0x01
-#define NVIF_NOTIFY_CONN_V0_UNPLUG                                         0x02
-#define NVIF_NOTIFY_CONN_V0_IRQ                                            0x04
-#define NVIF_NOTIFY_CONN_V0_ANY                                            0x07
-	__u8  mask;
-	__u8  conn;
-	__u8  pad03[5];
-};
-
-struct nvif_notify_conn_rep_v0 {
-	/* nvif_notify_rep ... */
-	__u8  version;
-	__u8  mask;
-	__u8  pad02[6];
-};
-
-struct nvif_notify_uevent_req {
-	/* nvif_notify_req ... */
-};
-
-struct nvif_notify_uevent_rep {
-	/* nvif_notify_rep ... */
-};
-
+#include <nvif/object.h>
+#include <nvif/if000e.h>
+struct nvif_event;
+
+#define NVIF_EVENT_KEEP 0
+#define NVIF_EVENT_DROP 1
+typedef int (*nvif_event_func)(struct nvif_event *, void *repv, u32 repc);
+
+struct nvif_event {
+	struct nvif_object object;
+	nvif_event_func func;
+};
+
+static inline bool
+nvif_event_constructed(struct nvif_event *event)
+{
+	return nvif_object_constructed(&event->object);
+}
+
+int nvif_event_ctor_(struct nvif_object *, const char *, u32, nvif_event_func, bool,
+		     struct nvif_event_v0 *, u32, bool, struct nvif_event *);
+
+static inline int
+nvif_event_ctor(struct nvif_object *parent, const char *name, u32 handle, nvif_event_func func,
+		bool wait, struct nvif_event_v0 *args, u32 argc, struct nvif_event *event)
+{
+	return nvif_event_ctor_(parent, name, handle, func, wait, args, argc, true, event);
+}
+
+void nvif_event_dtor(struct nvif_event *);
+int nvif_event_allow(struct nvif_event *);
+int nvif_event_block(struct nvif_event *);
 #endif
diff --git a/drivers/gpu/drm/nouveau/include/nvif/head.h b/drivers/gpu/drm/nouveau/include/nvif/head.h
new file mode 100644
index 000000000000..3ec36999e956
--- /dev/null
+++ b/drivers/gpu/drm/nouveau/include/nvif/head.h
@@ -0,0 +1,23 @@
+/* SPDX-License-Identifier: MIT */
+#ifndef __NVIF_HEAD_H__
+#define __NVIF_HEAD_H__
+#include <nvif/object.h>
+#include <nvif/event.h>
+struct nvif_disp;
+
+struct nvif_head {
+	struct nvif_object object;
+};
+
+int nvif_head_ctor(struct nvif_disp *, const char *name, int id, struct nvif_head *);
+void nvif_head_dtor(struct nvif_head *);
+
+static inline int
+nvif_head_id(struct nvif_head *head)
+{
+	return head->object.handle;
+}
+
+int nvif_head_vblank_event_ctor(struct nvif_head *, const char *name, nvif_event_func, bool wait,
+				struct nvif_event *);
+#endif
diff --git a/drivers/gpu/drm/nouveau/include/nvif/if0004.h b/drivers/gpu/drm/nouveau/include/nvif/if0004.h
index d324c73c27fb..1d916a137941 100644
--- a/drivers/gpu/drm/nouveau/include/nvif/if0004.h
+++ b/drivers/gpu/drm/nouveau/include/nvif/if0004.h
@@ -2,7 +2,10 @@
 #ifndef __NVIF_IF0004_H__
 #define __NVIF_IF0004_H__
 
-#define NV04_NVSW_NTFY_UEVENT                                              0x00
+union nv04_nvsw_event_args {
+	struct nv04_nvsw_event_vn {
+	} vn;
+};
 
 #define NV04_NVSW_GET_REF                                                  0x00
 
diff --git a/drivers/gpu/drm/nouveau/include/nvif/if000e.h b/drivers/gpu/drm/nouveau/include/nvif/if000e.h
new file mode 100644
index 000000000000..90a936cb1766
--- /dev/null
+++ b/drivers/gpu/drm/nouveau/include/nvif/if000e.h
@@ -0,0 +1,26 @@
+/* SPDX-License-Identifier: MIT */
+#ifndef __NVIF_IF000E_H__
+#define __NVIF_IF000E_H__
+
+union nvif_event_args {
+	struct nvif_event_v0 {
+		__u8 version;
+		__u8 wait;
+		__u8 pad02[6];
+		__u8 data[];
+	} v0;
+};
+
+#define NVIF_EVENT_V0_ALLOW 0x00
+#define NVIF_EVENT_V0_BLOCK 0x01
+
+union nvif_event_allow_args {
+	struct nvif_event_allow_vn {
+	} vn;
+};
+
+union nvif_event_block_args {
+	struct nvif_event_block_vn {
+	} vn;
+};
+#endif
diff --git a/drivers/gpu/drm/nouveau/include/nvif/if0010.h b/drivers/gpu/drm/nouveau/include/nvif/if0010.h
index fc236ef28965..4c835bbe6fe3 100644
--- a/drivers/gpu/drm/nouveau/include/nvif/if0010.h
+++ b/drivers/gpu/drm/nouveau/include/nvif/if0010.h
@@ -8,6 +8,7 @@ union nvif_disp_args {
 		__u8 pad01[3];
 		__u32 conn_mask;
 		__u32 outp_mask;
+		__u32 head_mask;
 	} v0;
 };
 #endif
diff --git a/drivers/gpu/drm/nouveau/include/nvif/if0011.h b/drivers/gpu/drm/nouveau/include/nvif/if0011.h
index 04ba6581f840..69b0b779f942 100644
--- a/drivers/gpu/drm/nouveau/include/nvif/if0011.h
+++ b/drivers/gpu/drm/nouveau/include/nvif/if0011.h
@@ -10,6 +10,17 @@ union nvif_conn_args {
 	} v0;
 };
 
+union nvif_conn_event_args {
+	struct nvif_conn_event_v0 {
+		__u8 version;
+#define NVIF_CONN_EVENT_V0_PLUG   0x01
+#define NVIF_CONN_EVENT_V0_UNPLUG 0x02
+#define NVIF_CONN_EVENT_V0_IRQ    0x04
+		__u8 types;
+		__u8 pad02[6];
+	} v0;
+};
+
 #define NVIF_CONN_V0_HPD_STATUS 0x00000000
 
 union nvif_conn_hpd_status_args {
diff --git a/drivers/gpu/drm/nouveau/include/nvif/if0012.h b/drivers/gpu/drm/nouveau/include/nvif/if0012.h
index 243bd35d942f..eb99d84eb844 100644
--- a/drivers/gpu/drm/nouveau/include/nvif/if0012.h
+++ b/drivers/gpu/drm/nouveau/include/nvif/if0012.h
@@ -11,6 +11,13 @@ union nvif_outp_args {
 };
 
 #define NVIF_OUTP_V0_LOAD_DETECT 0x00
+#define NVIF_OUTP_V0_ACQUIRE     0x01
+#define NVIF_OUTP_V0_RELEASE     0x02
+#define NVIF_OUTP_V0_INFOFRAME   0x03
+#define NVIF_OUTP_V0_HDA_ELD     0x04
+#define NVIF_OUTP_V0_DP_AUX_PWR  0x05
+#define NVIF_OUTP_V0_DP_RETRAIN  0x06
+#define NVIF_OUTP_V0_DP_MST_VCPI 0x07
 
 union nvif_outp_load_detect_args {
 	struct nvif_outp_load_detect_v0 {
@@ -20,4 +27,95 @@ union nvif_outp_load_detect_args {
 		__u32 data; /*TODO: move vbios loadval parsing into nvkm */
 	} v0;
 };
+
+union nvif_outp_acquire_args {
+	struct nvif_outp_acquire_v0 {
+		__u8 version;
+#define NVIF_OUTP_ACQUIRE_V0_RGB_CRT 0x00
+#define NVIF_OUTP_ACQUIRE_V0_TV      0x01
+#define NVIF_OUTP_ACQUIRE_V0_TMDS    0x02
+#define NVIF_OUTP_ACQUIRE_V0_LVDS    0x03
+#define NVIF_OUTP_ACQUIRE_V0_DP      0x04
+		__u8 proto;
+		__u8 or;
+		__u8 link;
+		__u8 pad04[4];
+		union {
+			struct {
+				__u8 head;
+				__u8 hdmi;
+				__u8 hdmi_max_ac_packet;
+				__u8 hdmi_rekey;
+#define NVIF_OUTP_ACQUIRE_V0_TMDS_HDMI_SCDC_SCRAMBLE (1 << 0)
+#define NVIF_OUTP_ACQUIRE_V0_TMDS_HDMI_SCDC_DIV_BY_4 (1 << 1)
+				__u8 hdmi_scdc;
+				__u8 hdmi_hda;
+				__u8 pad06[2];
+			} tmds;
+			struct {
+				__u8 dual;
+				__u8 bpc8;
+				__u8 pad02[6];
+			} lvds;
+			struct {
+				__u8 link_nr; /* 0 = highest possible. */
+				__u8 link_bw; /* 0 = highest possible, DP BW code otherwise. */
+				__u8 hda;
+				__u8 mst;
+				__u8 pad04[4];
+				__u8 dpcd[16];
+			} dp;
+		};
+	} v0;
+};
+
+union nvif_outp_release_args {
+	struct nvif_outp_release_vn {
+	} vn;
+};
+
+union nvif_outp_infoframe_args {
+	struct nvif_outp_infoframe_v0 {
+		__u8 version;
+#define NVIF_OUTP_INFOFRAME_V0_AVI 0
+#define NVIF_OUTP_INFOFRAME_V0_VSI 1
+		__u8 type;
+		__u8 head;
+		__u8 pad03[5];
+		__u8 data[];
+	} v0;
+};
+
+union nvif_outp_hda_eld_args {
+	struct nvif_outp_hda_eld_v0 {
+		__u8  version;
+		__u8  head;
+		__u8  pad02[6];
+		__u8  data[];
+	} v0;
+};
+
+union nvif_outp_dp_aux_pwr_args {
+	struct nvif_outp_dp_aux_pwr_v0 {
+		__u8 version;
+		__u8 state;
+		__u8 pad02[6];
+	} v0;
+};
+
+union nvif_outp_dp_retrain_args {
+	struct nvif_outp_dp_retrain_vn {
+	} vn;
+};
+
+union nvif_outp_dp_mst_vcpi_args {
+	struct nvif_outp_dp_mst_vcpi_v0 {
+		__u8  version;
+		__u8  head;
+		__u8  start_slot;
+		__u8  num_slots;
+		__u16 pbn;
+		__u16 aligned_pbn;
+	} v0;
+};
 #endif
diff --git a/drivers/gpu/drm/nouveau/include/nvif/if0013.h b/drivers/gpu/drm/nouveau/include/nvif/if0013.h
new file mode 100644
index 000000000000..6756c7467ae4
--- /dev/null
+++ b/drivers/gpu/drm/nouveau/include/nvif/if0013.h
@@ -0,0 +1,35 @@
+/* SPDX-License-Identifier: MIT */
+#ifndef __NVIF_IF0013_H__
+#define __NVIF_IF0013_H__
+
+union nvif_head_args {
+	struct nvif_head_v0 {
+		__u8 version;
+		__u8 id;
+		__u8 pad02[6];
+	} v0;
+};
+
+union nvif_head_event_args {
+	struct nvif_head_event_vn {
+	} vn;
+};
+
+#define NVIF_HEAD_V0_SCANOUTPOS 0x00
+
+union nvif_head_scanoutpos_args {
+	struct nvif_head_scanoutpos_v0 {
+		__u8  version;
+		__u8  pad01[7];
+		__s64 time[2];
+		__u16 vblanks;
+		__u16 vblanke;
+		__u16 vtotal;
+		__u16 vline;
+		__u16 hblanks;
+		__u16 hblanke;
+		__u16 htotal;
+		__u16 hline;
+	} v0;
+};
+#endif
diff --git a/drivers/gpu/drm/nouveau/include/nvif/if0020.h b/drivers/gpu/drm/nouveau/include/nvif/if0020.h
new file mode 100644
index 000000000000..085e0ae8a450
--- /dev/null
+++ b/drivers/gpu/drm/nouveau/include/nvif/if0020.h
@@ -0,0 +1,45 @@
+/* SPDX-License-Identifier: MIT */
+#ifndef __NVIF_IF0020_H__
+#define __NVIF_IF0020_H__
+
+union nvif_chan_args {
+	struct nvif_chan_v0 {
+		__u8  version;
+		__u8  namelen;
+		__u8  runlist;
+		__u8  runq;
+		__u8  priv;
+		__u8  pad05;
+		__u16 devm;
+		__u64 vmm;
+
+		__u64 ctxdma;
+		__u64 offset;
+		__u64 length;
+
+		__u64 huserd;
+		__u64 ouserd;
+
+		__u32 token;
+		__u16 chid;
+		__u8  pad3e;
+#define NVIF_CHAN_V0_INST_APER_VRAM 0
+#define NVIF_CHAN_V0_INST_APER_HOST 1
+#define NVIF_CHAN_V0_INST_APER_NCOH 2
+#define NVIF_CHAN_V0_INST_APER_INST 0xff
+		__u8  aper;
+		__u64 inst;
+
+		__u8  name[];
+	} v0;
+};
+
+union nvif_chan_event_args {
+	struct nvif_chan_event_v0 {
+		__u8 version;
+#define NVIF_CHAN_EVENT_V0_NON_STALL_INTR 0x00
+#define NVIF_CHAN_EVENT_V0_KILLED         0x01
+		__u8 type;
+	} v0;
+};
+#endif
diff --git a/drivers/gpu/drm/nouveau/include/nvif/if0021.h b/drivers/gpu/drm/nouveau/include/nvif/if0021.h
new file mode 100644
index 000000000000..5013def90455
--- /dev/null
+++ b/drivers/gpu/drm/nouveau/include/nvif/if0021.h
@@ -0,0 +1,16 @@
+/* SPDX-License-Identifier: MIT */
+#ifndef __NVIF_IF0021_H__
+#define __NVIF_IF0021_H__
+
+union nvif_cgrp_args {
+	struct nvif_cgrp_v0 {
+		__u8  version;
+		__u8  namelen;
+		__u8  runlist;
+		__u8  pad03[3];
+		__u16 cgid;
+		__u64 vmm;
+		__u8  name[];
+	} v0;
+};
+#endif
diff --git a/drivers/gpu/drm/nouveau/include/nvif/ioctl.h b/drivers/gpu/drm/nouveau/include/nvif/ioctl.h
index 886c63fe753f..4e047bb1fc07 100644
--- a/drivers/gpu/drm/nouveau/include/nvif/ioctl.h
+++ b/drivers/gpu/drm/nouveau/include/nvif/ioctl.h
@@ -15,10 +15,6 @@ struct nvif_ioctl_v0 {
 #define NVIF_IOCTL_V0_WR                                                   0x06
 #define NVIF_IOCTL_V0_MAP                                                  0x07
 #define NVIF_IOCTL_V0_UNMAP                                                0x08
-#define NVIF_IOCTL_V0_NTFY_NEW                                             0x09
-#define NVIF_IOCTL_V0_NTFY_DEL                                             0x0a
-#define NVIF_IOCTL_V0_NTFY_GET                                             0x0b
-#define NVIF_IOCTL_V0_NTFY_PUT                                             0x0c
 	__u8  type;
 	__u8  pad02[4];
 #define NVIF_IOCTL_V0_OWNER_NVIF                                           0x00
@@ -63,6 +59,14 @@ struct nvif_ioctl_new_v0 {
 struct nvif_ioctl_del {
 };
 
+struct nvif_ioctl_mthd_v0 {
+	/* nvif_ioctl ... */
+	__u8  version;
+	__u8  method;
+	__u8  pad02[6];
+	__u8  data[];		/* method data (class.h) */
+};
+
 struct nvif_ioctl_rd_v0 {
 	/* nvif_ioctl ... */
 	__u8  version;
@@ -95,43 +99,4 @@ struct nvif_ioctl_map_v0 {
 
 struct nvif_ioctl_unmap {
 };
-
-struct nvif_ioctl_ntfy_new_v0 {
-	/* nvif_ioctl ... */
-	__u8  version;
-	__u8  event;
-	__u8  index;
-	__u8  pad03[5];
-	__u8  data[];		/* event request data (event.h) */
-};
-
-struct nvif_ioctl_ntfy_del_v0 {
-	/* nvif_ioctl ... */
-	__u8  version;
-	__u8  index;
-	__u8  pad02[6];
-};
-
-struct nvif_ioctl_ntfy_get_v0 {
-	/* nvif_ioctl ... */
-	__u8  version;
-	__u8  index;
-	__u8  pad02[6];
-};
-
-struct nvif_ioctl_ntfy_put_v0 {
-	/* nvif_ioctl ... */
-	__u8  version;
-	__u8  index;
-	__u8  pad02[6];
-};
-
-struct nvif_ioctl_mthd_v0 {
-	/* nvif_ioctl ... */
-	__u8  version;
-	__u8  method;
-	__u8  pad02[6];
-	__u8  data[];		/* method data (class.h) */
-};
-
 #endif
diff --git a/drivers/gpu/drm/nouveau/include/nvif/notify.h b/drivers/gpu/drm/nouveau/include/nvif/notify.h
deleted file mode 100644
index 39f6b7ee1719..000000000000
--- a/drivers/gpu/drm/nouveau/include/nvif/notify.h
+++ /dev/null
@@ -1,35 +0,0 @@
-/* SPDX-License-Identifier: MIT */
-#ifndef __NVIF_NOTIFY_H__
-#define __NVIF_NOTIFY_H__
-
-struct nvif_notify {
-	struct nvif_object *object;
-	const char *name;
-	int index;
-
-#define NVIF_NOTIFY_USER 0
-#define NVIF_NOTIFY_WORK 1
-	unsigned long flags;
-	atomic_t putcnt;
-	void (*dtor)(struct nvif_notify *);
-#define NVIF_NOTIFY_DROP 0
-#define NVIF_NOTIFY_KEEP 1
-	int  (*func)(struct nvif_notify *);
-
-	/* this is const for a *very* good reason - the data might be on the
-	 * stack from an irq handler.  if you're not nvif/notify.c then you
-	 * should probably think twice before casting it away...
-	 */
-	const void *data;
-	u32 size;
-	struct work_struct work;
-};
-
-int  nvif_notify_ctor(struct nvif_object *, const char *name,
-		      int (*func)(struct nvif_notify *), bool work, u8 type,
-		      void *data, u32 size, u32 reply, struct nvif_notify *);
-int  nvif_notify_dtor(struct nvif_notify *);
-int  nvif_notify_get(struct nvif_notify *);
-int  nvif_notify_put(struct nvif_notify *);
-int  nvif_notify(const void *, u32, const void *, u32);
-#endif
diff --git a/drivers/gpu/drm/nouveau/include/nvif/outp.h b/drivers/gpu/drm/nouveau/include/nvif/outp.h
index 0d6aa07a9184..45daadec3c0c 100644
--- a/drivers/gpu/drm/nouveau/include/nvif/outp.h
+++ b/drivers/gpu/drm/nouveau/include/nvif/outp.h
@@ -2,13 +2,32 @@
 #ifndef __NVIF_OUTP_H__
 #define __NVIF_OUTP_H__
 #include <nvif/object.h>
+#include <nvif/if0012.h>
 struct nvif_disp;
 
 struct nvif_outp {
 	struct nvif_object object;
+
+	struct {
+		int id;
+		int link;
+	} or;
 };
 
 int nvif_outp_ctor(struct nvif_disp *, const char *name, int id, struct nvif_outp *);
 void nvif_outp_dtor(struct nvif_outp *);
 int nvif_outp_load_detect(struct nvif_outp *, u32 loadval);
+int nvif_outp_acquire_rgb_crt(struct nvif_outp *);
+int nvif_outp_acquire_tmds(struct nvif_outp *, int head,
+			   bool hdmi, u8 max_ac_packet, u8 rekey, u8 scdc, bool hda);
+int nvif_outp_acquire_lvds(struct nvif_outp *, bool dual, bool bpc8);
+int nvif_outp_acquire_dp(struct nvif_outp *, u8 dpcd[16],
+			 int link_nr, int link_bw, bool hda, bool mst);
+void nvif_outp_release(struct nvif_outp *);
+int nvif_outp_infoframe(struct nvif_outp *, u8 type, struct nvif_outp_infoframe_v0 *, u32 size);
+int nvif_outp_hda_eld(struct nvif_outp *, int head, void *data, u32 size);
+int nvif_outp_dp_aux_pwr(struct nvif_outp *, bool enable);
+int nvif_outp_dp_retrain(struct nvif_outp *);
+int nvif_outp_dp_mst_vcpi(struct nvif_outp *, int head,
+			  u8 start_slot, u8 num_slots, u16 pbn, u16 aligned_pbn);
 #endif
diff --git a/drivers/gpu/drm/nouveau/include/nvkm/core/client.h b/drivers/gpu/drm/nouveau/include/nvkm/core/client.h
index 2f86606e708c..0d9fc741a719 100644
--- a/drivers/gpu/drm/nouveau/include/nvkm/core/client.h
+++ b/drivers/gpu/drm/nouveau/include/nvkm/core/client.h
@@ -10,28 +10,19 @@ struct nvkm_client {
 	u64 device;
 	u32 debug;
 
-	struct nvkm_client_notify *notify[32];
 	struct rb_root objroot;
 
 	void *data;
-	int (*ntfy)(const void *, u32, const void *, u32);
+	int (*event)(u64 token, void *argv, u32 argc);
 
 	struct list_head umem;
 	spinlock_t lock;
 };
 
-int  nvkm_client_new(const char *name, u64 device, const char *cfg,
-		     const char *dbg,
-		     int (*)(const void *, u32, const void *, u32),
-		     struct nvkm_client **);
+int  nvkm_client_new(const char *name, u64 device, const char *cfg, const char *dbg,
+		     int (*)(u64, void *, u32), struct nvkm_client **);
 struct nvkm_client *nvkm_client_search(struct nvkm_client *, u64 handle);
 
-int nvkm_client_notify_new(struct nvkm_object *, struct nvkm_event *,
-			   void *data, u32 size);
-int nvkm_client_notify_del(struct nvkm_client *, int index);
-int nvkm_client_notify_get(struct nvkm_client *, int index);
-int nvkm_client_notify_put(struct nvkm_client *, int index);
-
 /* logging for client-facing objects */
 #define nvif_printk(o,l,p,f,a...) do {                                         \
 	const struct nvkm_object *_object = (o);                               \
diff --git a/drivers/gpu/drm/nouveau/include/nvkm/core/device.h b/drivers/gpu/drm/nouveau/include/nvkm/core/device.h
index efede1f11e1d..f65b5009acf7 100644
--- a/drivers/gpu/drm/nouveau/include/nvkm/core/device.h
+++ b/drivers/gpu/drm/nouveau/include/nvkm/core/device.h
@@ -2,6 +2,7 @@
 #ifndef __NVKM_DEVICE_H__
 #define __NVKM_DEVICE_H__
 #include <core/oclass.h>
+#include <core/intr.h>
 enum nvkm_subdev_type;
 
 enum nvkm_device_type {
@@ -60,6 +61,16 @@ struct nvkm_device {
 #undef NVKM_LAYOUT_INST
 #undef NVKM_LAYOUT_ONCE
 	struct list_head subdev;
+
+	struct {
+		struct list_head intr;
+		struct list_head prio[NVKM_INTR_PRIO_NR];
+		spinlock_t lock;
+		int irq;
+		bool alloc;
+		bool armed;
+		bool legacy_done;
+	} intr;
 };
 
 struct nvkm_subdev *nvkm_device_subdev(struct nvkm_device *, int type, int inst);
@@ -72,6 +83,7 @@ struct nvkm_device_func {
 	int (*preinit)(struct nvkm_device *);
 	int (*init)(struct nvkm_device *);
 	void (*fini)(struct nvkm_device *, bool suspend);
+	int (*irq)(struct nvkm_device *);
 	resource_size_t (*resource_addr)(struct nvkm_device *, unsigned bar);
 	resource_size_t (*resource_size)(struct nvkm_device *, unsigned bar);
 	bool cpu_coherent;
diff --git a/drivers/gpu/drm/nouveau/include/nvkm/core/engine.h b/drivers/gpu/drm/nouveau/include/nvkm/core/engine.h
index e58923b67d74..b67b9c1a6b4e 100644
--- a/drivers/gpu/drm/nouveau/include/nvkm/core/engine.h
+++ b/drivers/gpu/drm/nouveau/include/nvkm/core/engine.h
@@ -12,12 +12,6 @@ struct nvkm_engine {
 	const struct nvkm_engine_func *func;
 	struct nvkm_subdev subdev;
 	spinlock_t lock;
-
-	struct {
-		refcount_t refcount;
-		struct mutex mutex;
-		bool enabled;
-	} use;
 };
 
 struct nvkm_engine_func {
@@ -27,6 +21,7 @@ struct nvkm_engine_func {
 	int (*info)(struct nvkm_engine *, u64 mthd, u64 *data);
 	int (*init)(struct nvkm_engine *);
 	int (*fini)(struct nvkm_engine *, bool suspend);
+	int (*reset)(struct nvkm_engine *);
 	void (*intr)(struct nvkm_engine *);
 	void (*tile)(struct nvkm_engine *, int region, struct nvkm_fb_tile *);
 	bool (*chsw_load)(struct nvkm_engine *);
@@ -54,6 +49,7 @@ int nvkm_engine_new_(const struct nvkm_engine_func *, struct nvkm_device *,
 
 struct nvkm_engine *nvkm_engine_ref(struct nvkm_engine *);
 void nvkm_engine_unref(struct nvkm_engine **);
+int nvkm_engine_reset(struct nvkm_engine *);
 void nvkm_engine_tile(struct nvkm_engine *, int region);
 bool nvkm_engine_chsw_load(struct nvkm_engine *);
 #endif
diff --git a/drivers/gpu/drm/nouveau/include/nvkm/core/event.h b/drivers/gpu/drm/nouveau/include/nvkm/core/event.h
index a7a413f07a78..82b267c11147 100644
--- a/drivers/gpu/drm/nouveau/include/nvkm/core/event.h
+++ b/drivers/gpu/drm/nouveau/include/nvkm/core/event.h
@@ -2,34 +2,76 @@
 #ifndef __NVKM_EVENT_H__
 #define __NVKM_EVENT_H__
 #include <core/os.h>
-struct nvkm_notify;
 struct nvkm_object;
+struct nvkm_oclass;
+struct nvkm_uevent;
 
 struct nvkm_event {
 	const struct nvkm_event_func *func;
+	struct nvkm_subdev *subdev;
 
 	int types_nr;
 	int index_nr;
 
 	spinlock_t refs_lock;
 	spinlock_t list_lock;
-	struct list_head list;
 	int *refs;
+
+	struct list_head ntfy;
 };
 
 struct nvkm_event_func {
-	int  (*ctor)(struct nvkm_object *, void *data, u32 size,
-		     struct nvkm_notify *);
-	void (*send)(void *data, u32 size, struct nvkm_notify *);
 	void (*init)(struct nvkm_event *, int type, int index);
 	void (*fini)(struct nvkm_event *, int type, int index);
 };
 
-int  nvkm_event_init(const struct nvkm_event_func *func, int types_nr,
-		     int index_nr, struct nvkm_event *);
+int  __nvkm_event_init(const struct nvkm_event_func *func, struct nvkm_subdev *, int types_nr,
+		       int index_nr, struct nvkm_event *);
+
+/* Each nvkm_event needs its own lockdep class due to inter-dependencies, to
+ * prevent lockdep false-positives.
+ *
+ * Inlining the spinlock initialisation ensures each is unique.
+ */
+static __always_inline int
+nvkm_event_init(const struct nvkm_event_func *func, struct nvkm_subdev *subdev,
+		int types_nr, int index_nr, struct nvkm_event *event)
+{
+	spin_lock_init(&event->refs_lock);
+	spin_lock_init(&event->list_lock);
+	return __nvkm_event_init(func, subdev, types_nr, index_nr, event);
+}
+
 void nvkm_event_fini(struct nvkm_event *);
-void nvkm_event_get(struct nvkm_event *, u32 types, int index);
-void nvkm_event_put(struct nvkm_event *, u32 types, int index);
-void nvkm_event_send(struct nvkm_event *, u32 types, int index,
-		     void *data, u32 size);
+
+#define NVKM_EVENT_KEEP 0
+#define NVKM_EVENT_DROP 1
+struct nvkm_event_ntfy;
+typedef int (*nvkm_event_func)(struct nvkm_event_ntfy *, u32 bits);
+
+struct nvkm_event_ntfy {
+	struct nvkm_event *event;
+	int id;
+	u32 bits;
+	bool wait;
+	nvkm_event_func func;
+
+	atomic_t allowed;
+	bool running;
+
+	struct list_head head;
+};
+
+void nvkm_event_ntfy(struct nvkm_event *, int id, u32 bits);
+bool nvkm_event_ntfy_valid(struct nvkm_event *, int id, u32 bits);
+void nvkm_event_ntfy_add(struct nvkm_event *, int id, u32 bits, bool wait, nvkm_event_func,
+			 struct nvkm_event_ntfy *);
+void nvkm_event_ntfy_del(struct nvkm_event_ntfy *);
+void nvkm_event_ntfy_allow(struct nvkm_event_ntfy *);
+void nvkm_event_ntfy_block(struct nvkm_event_ntfy *);
+
+typedef int (*nvkm_uevent_func)(struct nvkm_object *, u64 token, u32 bits);
+
+int nvkm_uevent_new(const struct nvkm_oclass *, void *argv, u32 argc, struct nvkm_object **);
+int nvkm_uevent_add(struct nvkm_uevent *, struct nvkm_event *, int id, u32 bits, nvkm_uevent_func);
 #endif
diff --git a/drivers/gpu/drm/nouveau/include/nvkm/core/falcon.h b/drivers/gpu/drm/nouveau/include/nvkm/core/falcon.h
index fd9a3f9a518e..b857cf142c4a 100644
--- a/drivers/gpu/drm/nouveau/include/nvkm/core/falcon.h
+++ b/drivers/gpu/drm/nouveau/include/nvkm/core/falcon.h
@@ -1,34 +1,166 @@
 #ifndef __NVKM_FALCON_H__
 #define __NVKM_FALCON_H__
+#include <core/firmware.h>
 #include <engine/falcon.h>
 
+enum nvkm_falcon_mem {
+	IMEM,
+	DMEM,
+	EMEM,
+};
+
+static inline const char *
+nvkm_falcon_mem(enum nvkm_falcon_mem mem)
+{
+	switch (mem) {
+	case IMEM: return "imem";
+	case DMEM: return "dmem";
+	case EMEM: return "emem";
+	default:
+		WARN_ON(1);
+		return "?mem";
+	}
+}
+
+struct nvkm_falcon_func_pio {
+	int min;
+	int max;
+	void (*wr_init)(struct nvkm_falcon *, u8 port, bool sec, u32 mem_base);
+	void (*wr)(struct nvkm_falcon *, u8 port, const u8 *img, int len, u16 tag);
+	void (*rd_init)(struct nvkm_falcon *, u8 port, u32 mem_base);
+	void (*rd)(struct nvkm_falcon *, u8 port, const u8 *img, int len);
+};
+
+struct nvkm_falcon_func_dma {
+	int (*init)(struct nvkm_falcon *, u64 dma_addr, int xfer_len,
+		    enum nvkm_falcon_mem, bool sec, u32 *cmd);
+	void (*xfer)(struct nvkm_falcon *, u32 mem_base, u32 dma_base, u32 cmd);
+	bool (*done)(struct nvkm_falcon *);
+};
+
 int nvkm_falcon_ctor(const struct nvkm_falcon_func *, struct nvkm_subdev *owner,
 		     const char *name, u32 addr, struct nvkm_falcon *);
 void nvkm_falcon_dtor(struct nvkm_falcon *);
+int nvkm_falcon_reset(struct nvkm_falcon *);
+int nvkm_falcon_pio_wr(struct nvkm_falcon *, const u8 *img, u32 img_base, u8 port,
+		       enum nvkm_falcon_mem mem_type, u32 mem_base, int len, u16 tag, bool sec);
+int nvkm_falcon_pio_rd(struct nvkm_falcon *, u8 port, enum nvkm_falcon_mem type, u32 mem_base,
+		       const u8 *img, u32 img_base, int len);
+int nvkm_falcon_dma_wr(struct nvkm_falcon *, const u8 *img, u64 dma_addr, u32 dma_base,
+		       enum nvkm_falcon_mem mem_type, u32 mem_base, int len, bool sec);
+
+int gm200_flcn_reset_wait_mem_scrubbing(struct nvkm_falcon *);
+int gm200_flcn_disable(struct nvkm_falcon *);
+int gm200_flcn_enable(struct nvkm_falcon *);
+void gm200_flcn_bind_inst(struct nvkm_falcon *, int, u64);
+int gm200_flcn_bind_stat(struct nvkm_falcon *, bool);
+extern const struct nvkm_falcon_func_pio gm200_flcn_imem_pio;
+extern const struct nvkm_falcon_func_pio gm200_flcn_dmem_pio;
+void gm200_flcn_tracepc(struct nvkm_falcon *);
+
+int gp102_flcn_reset_eng(struct nvkm_falcon *);
+extern const struct nvkm_falcon_func_pio gp102_flcn_emem_pio;
+
+int ga102_flcn_select(struct nvkm_falcon *);
+int ga102_flcn_reset_prep(struct nvkm_falcon *);
+int ga102_flcn_reset_wait_mem_scrubbing(struct nvkm_falcon *);
+extern const struct nvkm_falcon_func_dma ga102_flcn_dma;
 
 void nvkm_falcon_v1_load_imem(struct nvkm_falcon *,
 			      void *, u32, u32, u16, u8, bool);
 void nvkm_falcon_v1_load_dmem(struct nvkm_falcon *, void *, u32, u32, u8);
-void nvkm_falcon_v1_read_dmem(struct nvkm_falcon *, u32, u32, u8, void *);
-void nvkm_falcon_v1_bind_context(struct nvkm_falcon *, struct nvkm_memory *);
-int nvkm_falcon_v1_wait_for_halt(struct nvkm_falcon *, u32);
-int nvkm_falcon_v1_clear_interrupt(struct nvkm_falcon *, u32);
-void nvkm_falcon_v1_set_start_addr(struct nvkm_falcon *, u32 start_addr);
 void nvkm_falcon_v1_start(struct nvkm_falcon *);
-int nvkm_falcon_v1_enable(struct nvkm_falcon *);
-void nvkm_falcon_v1_disable(struct nvkm_falcon *);
 
-void gp102_sec2_flcn_bind_context(struct nvkm_falcon *, struct nvkm_memory *);
-int gp102_sec2_flcn_enable(struct nvkm_falcon *);
+#define FLCN_PRINTK(f,l,p,fmt,a...) ({                                                          \
+	if ((f)->owner->name != (f)->name)                                                      \
+		nvkm_printk___((f)->owner, (f)->user, NV_DBG_##l, p, "%s:"fmt, (f)->name, ##a); \
+	else                                                                                    \
+		nvkm_printk___((f)->owner, (f)->user, NV_DBG_##l, p, fmt, ##a);                 \
+})
+#define FLCN_DBG(f,fmt,a...) FLCN_PRINTK((f), DEBUG, info, " "fmt"\n", ##a)
+#define FLCN_ERR(f,fmt,a...) FLCN_PRINTK((f), ERROR, err, " "fmt"\n", ##a)
+#define FLCN_ERRON(f,c,fmt,a...) \
+	({ bool _cond = (c); _cond ? FLCN_ERR(f, fmt, ##a) : FLCN_DBG(f, fmt, ##a); _cond; })
+
+
+struct nvkm_falcon_fw {
+	const struct nvkm_falcon_fw_func {
+		int (*signature)(struct nvkm_falcon_fw *, u32 *sig_base_src);
+		int (*reset)(struct nvkm_falcon_fw *);
+		int (*setup)(struct nvkm_falcon_fw *);
+		int (*load)(struct nvkm_falcon_fw *);
+		int (*load_bld)(struct nvkm_falcon_fw *);
+		int (*boot)(struct nvkm_falcon_fw *,
+			    u32 *mbox0, u32 *mbox1, u32 mbox0_ok, u32 irqsclr);
+	} *func;
+	struct nvkm_firmware fw;
+
+	u32 sig_base_prd;
+	u32 sig_base_dbg;
+	u32 sig_base_img;
+	u32 sig_size;
+	int sig_nr;
+	u8 *sigs;
+	u32 fuse_ver;
+	u32 engine_id;
+	u32 ucode_id;
+
+	u32 nmem_base_img;
+	u32 nmem_base;
+	u32 nmem_size;
+
+	u32 imem_base_img;
+	u32 imem_base;
+	u32 imem_size;
+
+	u32 dmem_base_img;
+	u32 dmem_base;
+	u32 dmem_size;
+	u32 dmem_sign;
+
+	u8 *boot;
+	u32 boot_size;
+	u32 boot_addr;
+
+	struct nvkm_falcon *falcon;
+	struct nvkm_memory *inst;
+	struct nvkm_vmm *vmm;
+	struct nvkm_vma *vma;
+};
+
+int nvkm_falcon_fw_ctor(const struct nvkm_falcon_fw_func *, const char *name, struct nvkm_device *,
+		        bool bl, const void *src, u32 len, struct nvkm_falcon *,
+			struct nvkm_falcon_fw *);
+int nvkm_falcon_fw_ctor_hs(const struct nvkm_falcon_fw_func *, const char *name,
+			   struct nvkm_subdev *, const char *bl, const char *img, int ver,
+			   struct nvkm_falcon *falcon, struct nvkm_falcon_fw *fw);
+int nvkm_falcon_fw_ctor_hs_v2(const struct nvkm_falcon_fw_func *, const char *name,
+			      struct nvkm_subdev *, const char *img, int ver, struct nvkm_falcon *,
+			      struct nvkm_falcon_fw *);
+int nvkm_falcon_fw_sign(struct nvkm_falcon_fw *, u32 sig_base_img, u32 sig_size, const u8 *sigs,
+			int sig_nr_prd, u32 sig_base_prd, int sig_nr_dbg, u32 sig_base_dbg);
+int nvkm_falcon_fw_patch(struct nvkm_falcon_fw *);
+void nvkm_falcon_fw_dtor(struct nvkm_falcon_fw *);
+int nvkm_falcon_fw_oneinit(struct nvkm_falcon_fw *, struct nvkm_falcon *, struct nvkm_vmm *,
+			   struct nvkm_memory *inst);
+int nvkm_falcon_fw_boot(struct nvkm_falcon_fw *, struct nvkm_subdev *user,
+			bool release, u32 *pmbox0, u32 *pmbox1, u32 mbox0_ok, u32 irqsclr);
+
+extern const struct nvkm_falcon_fw_func gm200_flcn_fw;
+int gm200_flcn_fw_signature(struct nvkm_falcon_fw *, u32 *);
+int gm200_flcn_fw_reset(struct nvkm_falcon_fw *);
+int gm200_flcn_fw_load(struct nvkm_falcon_fw *);
+int gm200_flcn_fw_boot(struct nvkm_falcon_fw *, u32 *, u32 *, u32, u32);
+
+int ga100_flcn_fw_signature(struct nvkm_falcon_fw *, u32 *);
+
+extern const struct nvkm_falcon_fw_func ga102_flcn_fw;
+int ga102_flcn_fw_load(struct nvkm_falcon_fw *);
+int ga102_flcn_fw_boot(struct nvkm_falcon_fw *, u32 *, u32 *, u32, u32);
 
-#define FLCN_PRINTK(t,f,fmt,a...) do {                               \
-	if ((f)->owner->name != (f)->name)                           \
-		nvkm_##t((f)->owner, "%s: "fmt"\n", (f)->name, ##a); \
-	else                                                         \
-		nvkm_##t((f)->owner, fmt"\n", ##a);                  \
-} while(0)
-#define FLCN_DBG(f,fmt,a...) FLCN_PRINTK(debug, (f), fmt, ##a)
-#define FLCN_ERR(f,fmt,a...) FLCN_PRINTK(error, (f), fmt, ##a)
+#define FLCNFW_PRINTK(f,l,p,fmt,a...) FLCN_PRINTK((f)->falcon, l, p, "%s: "fmt, (f)->fw.name, ##a)
+#define FLCNFW_DBG(f,fmt,a...) FLCNFW_PRINTK((f), DEBUG, info, fmt"\n", ##a)
+#define FLCNFW_ERR(f,fmt,a...) FLCNFW_PRINTK((f), ERROR, err, fmt"\n", ##a)
 
 /**
  * struct nvfw_falcon_msg - header for all messages
@@ -72,6 +204,7 @@ int nvkm_falcon_msgq_new(struct nvkm_falcon_qmgr *, const char *name,
 void nvkm_falcon_msgq_del(struct nvkm_falcon_msgq **);
 void nvkm_falcon_msgq_init(struct nvkm_falcon_msgq *,
 			   u32 index, u32 offset, u32 size);
+bool nvkm_falcon_msgq_empty(struct nvkm_falcon_msgq *);
 int nvkm_falcon_msgq_recv_initmsg(struct nvkm_falcon_msgq *, void *, u32 size);
 void nvkm_falcon_msgq_recv(struct nvkm_falcon_msgq *);
 #endif
diff --git a/drivers/gpu/drm/nouveau/include/nvkm/core/firmware.h b/drivers/gpu/drm/nouveau/include/nvkm/core/firmware.h
index 85bcb80f6873..d4e507e252b1 100644
--- a/drivers/gpu/drm/nouveau/include/nvkm/core/firmware.h
+++ b/drivers/gpu/drm/nouveau/include/nvkm/core/firmware.h
@@ -1,9 +1,34 @@
 /* SPDX-License-Identifier: MIT */
 #ifndef __NVKM_FIRMWARE_H__
 #define __NVKM_FIRMWARE_H__
+#include <core/memory.h>
 #include <core/option.h>
 #include <core/subdev.h>
 
+struct nvkm_firmware {
+	const struct nvkm_firmware_func {
+		enum nvkm_firmware_type {
+			NVKM_FIRMWARE_IMG_RAM,
+			NVKM_FIRMWARE_IMG_DMA,
+		} type;
+	} *func;
+	const char *name;
+	struct nvkm_device *device;
+
+	int len;
+	u8 *img;
+	u64 phys;
+
+	struct nvkm_firmware_mem {
+		struct nvkm_memory memory;
+		struct scatterlist sgl;
+	} mem;
+};
+
+int nvkm_firmware_ctor(const struct nvkm_firmware_func *, const char *name, struct nvkm_device *,
+		       const void *ptr, int len, struct nvkm_firmware *);
+void nvkm_firmware_dtor(struct nvkm_firmware *);
+
 int nvkm_firmware_get(const struct nvkm_subdev *, const char *fwname, int ver,
 		      const struct firmware **);
 void nvkm_firmware_put(const struct firmware *);
diff --git a/drivers/gpu/drm/nouveau/include/nvkm/core/intr.h b/drivers/gpu/drm/nouveau/include/nvkm/core/intr.h
new file mode 100644
index 000000000000..a003d6a544b0
--- /dev/null
+++ b/drivers/gpu/drm/nouveau/include/nvkm/core/intr.h
@@ -0,0 +1,73 @@
+/* SPDX-License-Identifier: MIT */
+#ifndef __NVKM_INTR_H__
+#define __NVKM_INTR_H__
+#include <core/os.h>
+struct nvkm_device;
+struct nvkm_subdev;
+
+enum nvkm_intr_prio {
+	NVKM_INTR_PRIO_VBLANK = 0,
+	NVKM_INTR_PRIO_NORMAL,
+	NVKM_INTR_PRIO_NR
+};
+
+enum nvkm_intr_type {
+	NVKM_INTR_SUBDEV   = -1, /* lookup vector by requesting subdev, in mapping table. */
+	NVKM_INTR_VECTOR_0 = 0,
+};
+
+struct nvkm_intr {
+	const struct nvkm_intr_func {
+		bool (*pending)(struct nvkm_intr *);
+		void (*unarm)(struct nvkm_intr *);
+		void (*rearm)(struct nvkm_intr *);
+		void (*block)(struct nvkm_intr *, int leaf, u32 mask);
+		void (*allow)(struct nvkm_intr *, int leaf, u32 mask);
+		void (*reset)(struct nvkm_intr *, int leaf, u32 mask);
+	} *func;
+	const struct nvkm_intr_data {
+		int type; /* enum nvkm_subdev_type (+ve), enum nvkm_intr_type (-ve) */
+		int inst;
+		int leaf;
+		u32 mask; /* 0-terminated. */
+		bool legacy; /* auto-create "legacy" nvkm_subdev_intr() handler */
+	} *data;
+
+	struct nvkm_subdev *subdev;
+	int leaves;
+	u32 *stat;
+	u32 *mask;
+
+	struct list_head head;
+};
+
+void nvkm_intr_ctor(struct nvkm_device *);
+void nvkm_intr_dtor(struct nvkm_device *);
+int nvkm_intr_install(struct nvkm_device *);
+void nvkm_intr_unarm(struct nvkm_device *);
+void nvkm_intr_rearm(struct nvkm_device *);
+
+int nvkm_intr_add(const struct nvkm_intr_func *, const struct nvkm_intr_data *,
+		  struct nvkm_subdev *, int leaves, struct nvkm_intr *);
+void nvkm_intr_block(struct nvkm_subdev *, enum nvkm_intr_type);
+void nvkm_intr_allow(struct nvkm_subdev *, enum nvkm_intr_type);
+
+struct nvkm_inth;
+typedef irqreturn_t (*nvkm_inth_func)(struct nvkm_inth *);
+
+struct nvkm_inth {
+	struct nvkm_intr *intr;
+	int leaf;
+	u32 mask;
+	nvkm_inth_func func;
+
+	atomic_t allowed;
+
+	struct list_head head;
+};
+
+int nvkm_inth_add(struct nvkm_intr *, enum nvkm_intr_type, enum nvkm_intr_prio,
+		  struct nvkm_subdev *, nvkm_inth_func, struct nvkm_inth *);
+void nvkm_inth_allow(struct nvkm_inth *);
+void nvkm_inth_block(struct nvkm_inth *);
+#endif
diff --git a/drivers/gpu/drm/nouveau/include/nvkm/core/layout.h b/drivers/gpu/drm/nouveau/include/nvkm/core/layout.h
index 7afe1579b20f..58108dea5aeb 100644
--- a/drivers/gpu/drm/nouveau/include/nvkm/core/layout.h
+++ b/drivers/gpu/drm/nouveau/include/nvkm/core/layout.h
@@ -1,8 +1,10 @@
 /* SPDX-License-Identifier: MIT */
+NVKM_LAYOUT_ONCE(NVKM_SUBDEV_TOP     , struct nvkm_top     ,      top)
+NVKM_LAYOUT_ONCE(NVKM_SUBDEV_GSP     , struct nvkm_gsp     ,      gsp)
+NVKM_LAYOUT_ONCE(NVKM_SUBDEV_VFN     , struct nvkm_vfn     ,      vfn)
 NVKM_LAYOUT_ONCE(NVKM_SUBDEV_PCI     , struct nvkm_pci     ,      pci)
 NVKM_LAYOUT_ONCE(NVKM_SUBDEV_VBIOS   , struct nvkm_bios    ,     bios)
 NVKM_LAYOUT_ONCE(NVKM_SUBDEV_DEVINIT , struct nvkm_devinit ,  devinit)
-NVKM_LAYOUT_ONCE(NVKM_SUBDEV_TOP     , struct nvkm_top     ,      top)
 NVKM_LAYOUT_ONCE(NVKM_SUBDEV_PRIVRING, struct nvkm_subdev  , privring)
 NVKM_LAYOUT_ONCE(NVKM_SUBDEV_GPIO    , struct nvkm_gpio    ,     gpio)
 NVKM_LAYOUT_ONCE(NVKM_SUBDEV_I2C     , struct nvkm_i2c     ,      i2c)
@@ -23,7 +25,6 @@ NVKM_LAYOUT_ONCE(NVKM_SUBDEV_VOLT    , struct nvkm_volt    ,     volt)
 NVKM_LAYOUT_ONCE(NVKM_SUBDEV_ICCSENSE, struct nvkm_iccsense, iccsense)
 NVKM_LAYOUT_ONCE(NVKM_SUBDEV_THERM   , struct nvkm_therm   ,    therm)
 NVKM_LAYOUT_ONCE(NVKM_SUBDEV_CLK     , struct nvkm_clk     ,      clk)
-NVKM_LAYOUT_ONCE(NVKM_SUBDEV_GSP     , struct nvkm_gsp     ,      gsp)
 NVKM_LAYOUT_INST(NVKM_SUBDEV_IOCTRL  , struct nvkm_subdev  ,   ioctrl, 3)
 NVKM_LAYOUT_ONCE(NVKM_SUBDEV_FLA     , struct nvkm_subdev  ,      fla)
 
diff --git a/drivers/gpu/drm/nouveau/include/nvkm/core/memory.h b/drivers/gpu/drm/nouveau/include/nvkm/core/memory.h
index 74d3f1a809d7..d3b6a68ddda3 100644
--- a/drivers/gpu/drm/nouveau/include/nvkm/core/memory.h
+++ b/drivers/gpu/drm/nouveau/include/nvkm/core/memory.h
@@ -37,6 +37,7 @@ struct nvkm_memory_func {
 	void (*release)(struct nvkm_memory *);
 	int (*map)(struct nvkm_memory *, u64 offset, struct nvkm_vmm *,
 		   struct nvkm_vma *, void *argv, u32 argc);
+	int (*kmap)(struct nvkm_memory *, struct nvkm_memory **);
 };
 
 struct nvkm_memory_ptrs {
@@ -63,6 +64,7 @@ void nvkm_memory_tags_put(struct nvkm_memory *, struct nvkm_device *,
 #define nvkm_memory_boot(p,v) (p)->func->boot((p),(v))
 #define nvkm_memory_map(p,o,vm,va,av,ac)                                       \
 	(p)->func->map((p),(o),(vm),(va),(av),(ac))
+#define nvkm_memory_kmap(p,i) ((p)->func->kmap ? (p)->func->kmap((p), (i)) : -ENOSYS)
 
 /* accessor macros - kmap()/done() must bracket use of the other accessor
  * macros to guarantee correct behaviour across all chipsets
diff --git a/drivers/gpu/drm/nouveau/include/nvkm/core/notify.h b/drivers/gpu/drm/nouveau/include/nvkm/core/notify.h
deleted file mode 100644
index 3d358a66db3a..000000000000
--- a/drivers/gpu/drm/nouveau/include/nvkm/core/notify.h
+++ /dev/null
@@ -1,39 +0,0 @@
-/* SPDX-License-Identifier: MIT */
-#ifndef __NVKM_NOTIFY_H__
-#define __NVKM_NOTIFY_H__
-#include <core/os.h>
-struct nvkm_object;
-
-struct nvkm_notify {
-	struct nvkm_event *event;
-	struct list_head head;
-#define NVKM_NOTIFY_USER 0
-#define NVKM_NOTIFY_WORK 1
-	unsigned long flags;
-	int block;
-#define NVKM_NOTIFY_DROP 0
-#define NVKM_NOTIFY_KEEP 1
-	int (*func)(struct nvkm_notify *);
-
-	/* set by nvkm_event ctor */
-	u32 types;
-	int index;
-	u32 size;
-
-	struct work_struct work;
-	/* this is const for a *very* good reason - the data might be on the
-	 * stack from an irq handler.  if you're not core/notify.c then you
-	 * should probably think twice before casting it away...
-	 */
-	const void *data;
-};
-
-int  nvkm_notify_init(struct nvkm_object *, struct nvkm_event *,
-		      int (*func)(struct nvkm_notify *), bool work,
-		      void *data, u32 size, u32 reply,
-		      struct nvkm_notify *);
-void nvkm_notify_fini(struct nvkm_notify *);
-void nvkm_notify_get(struct nvkm_notify *);
-void nvkm_notify_put(struct nvkm_notify *);
-void nvkm_notify_send(struct nvkm_notify *, void *data, u32 size);
-#endif
diff --git a/drivers/gpu/drm/nouveau/include/nvkm/core/object.h b/drivers/gpu/drm/nouveau/include/nvkm/core/object.h
index 7efcd5d2f2ff..ed1f66360782 100644
--- a/drivers/gpu/drm/nouveau/include/nvkm/core/object.h
+++ b/drivers/gpu/drm/nouveau/include/nvkm/core/object.h
@@ -4,6 +4,7 @@
 #include <core/oclass.h>
 struct nvkm_event;
 struct nvkm_gpuobj;
+struct nvkm_uevent;
 
 struct nvkm_object {
 	const struct nvkm_object_func *func;
@@ -43,6 +44,7 @@ struct nvkm_object_func {
 	int (*bind)(struct nvkm_object *, struct nvkm_gpuobj *, int align,
 		    struct nvkm_gpuobj **);
 	int (*sclass)(struct nvkm_object *, int index, struct nvkm_oclass *);
+	int (*uevent)(struct nvkm_object *, void *argv, u32 argc, struct nvkm_uevent *);
 };
 
 void nvkm_object_ctor(const struct nvkm_object_func *,
diff --git a/drivers/gpu/drm/nouveau/include/nvkm/core/os.h b/drivers/gpu/drm/nouveau/include/nvkm/core/os.h
index d7ba3205207f..4486d9862849 100644
--- a/drivers/gpu/drm/nouveau/include/nvkm/core/os.h
+++ b/drivers/gpu/drm/nouveau/include/nvkm/core/os.h
@@ -34,4 +34,24 @@ nvkm_blob_dtor(struct nvkm_blob *blob)
 	blob->data = NULL;
 	blob->size = 0;
 }
+
+#define nvkm_list_find_next(p,h,m,c) ({                                                      \
+	typeof(p) _p = NULL;                                                                 \
+	list_for_each_entry_continue(p, (h), m) {                                            \
+		if (c) {                                                                     \
+			_p = p;                                                              \
+			break;                                                               \
+		}                                                                            \
+	}                                                                                    \
+	_p;                                                                                  \
+})
+#define nvkm_list_find(p,h,m,c)                                                              \
+	(p = container_of((h), typeof(*p), m), nvkm_list_find_next(p, (h), m, (c)))
+#define nvkm_list_foreach(p,h,m,c)                                                           \
+	for (p = nvkm_list_find(p, (h), m, (c)); p; p = nvkm_list_find_next(p, (h), m, (c)))
+
+/*FIXME: remove after */
+#define nvkm_fifo_chan nvkm_chan
+#define nvkm_fifo_chan_func nvkm_chan_func
+#define nvkm_fifo_cgrp nvkm_cgrp
 #endif
diff --git a/drivers/gpu/drm/nouveau/include/nvkm/core/subdev.h b/drivers/gpu/drm/nouveau/include/nvkm/core/subdev.h
index 96113c8bee8c..bce6e1ba09ea 100644
--- a/drivers/gpu/drm/nouveau/include/nvkm/core/subdev.h
+++ b/drivers/gpu/drm/nouveau/include/nvkm/core/subdev.h
@@ -17,10 +17,19 @@ struct nvkm_subdev {
 	struct nvkm_device *device;
 	enum nvkm_subdev_type type;
 	int inst;
+
 	char name[16];
 	u32 debug;
-	struct list_head head;
 
+	struct {
+		refcount_t refcount;
+		struct mutex mutex;
+		bool enabled;
+	} use;
+
+	struct nvkm_inth inth;
+
+	struct list_head head;
 	void **pself;
 	bool oneinit;
 };
@@ -38,22 +47,41 @@ struct nvkm_subdev_func {
 extern const char *nvkm_subdev_type[NVKM_SUBDEV_NR];
 int nvkm_subdev_new_(const struct nvkm_subdev_func *, struct nvkm_device *, enum nvkm_subdev_type,
 		     int inst, struct nvkm_subdev **);
-void nvkm_subdev_ctor(const struct nvkm_subdev_func *, struct nvkm_device *,
-		      enum nvkm_subdev_type, int inst, struct nvkm_subdev *);
+void __nvkm_subdev_ctor(const struct nvkm_subdev_func *, struct nvkm_device *,
+			enum nvkm_subdev_type, int inst, struct nvkm_subdev *);
+
+static inline void
+nvkm_subdev_ctor(const struct nvkm_subdev_func *func, struct nvkm_device *device,
+		 enum nvkm_subdev_type type, int inst, struct nvkm_subdev *subdev)
+{
+	__nvkm_subdev_ctor(func, device, type, inst, subdev);
+	mutex_init(&subdev->use.mutex);
+}
+
 void nvkm_subdev_disable(struct nvkm_device *, enum nvkm_subdev_type, int inst);
 void nvkm_subdev_del(struct nvkm_subdev **);
+int  nvkm_subdev_ref(struct nvkm_subdev *);
+void nvkm_subdev_unref(struct nvkm_subdev *);
 int  nvkm_subdev_preinit(struct nvkm_subdev *);
+int  nvkm_subdev_oneinit(struct nvkm_subdev *);
 int  nvkm_subdev_init(struct nvkm_subdev *);
 int  nvkm_subdev_fini(struct nvkm_subdev *, bool suspend);
 int  nvkm_subdev_info(struct nvkm_subdev *, u64, u64 *);
 void nvkm_subdev_intr(struct nvkm_subdev *);
 
 /* subdev logging */
-#define nvkm_printk_(s,l,p,f,a...) do {                                        \
-	const struct nvkm_subdev *_subdev = (s);                               \
-	if (CONFIG_NOUVEAU_DEBUG >= (l) && _subdev->debug >= (l))              \
-		dev_##p(_subdev->device->dev, "%s: "f, _subdev->name, ##a);    \
+#define nvkm_printk_ok(s,u,l)                                                                \
+	((CONFIG_NOUVEAU_DEBUG >= (l)) && ((s)->debug >= (l) || ((u) && (u)->debug >= (l))))
+#define nvkm_printk___(s,u,l,p,f,a...) do {                                                  \
+	if (nvkm_printk_ok((s), (u), (l))) {                                                 \
+		if ((u) && (u) != (s))                                                       \
+			dev_##p((s)->device->dev, "%s(%s):"f, (s)->name, (u)->name, ##a);    \
+		else                                                                         \
+			dev_##p((s)->device->dev, "%s:"f, (s)->name, ##a);                   \
+	}                                                                                    \
 } while(0)
+#define nvkm_printk__(s,l,p,f,a...) nvkm_printk___((s), (s), (l), p, f, ##a)
+#define nvkm_printk_(s,l,p,f,a...) nvkm_printk__((s), (l), p, " "f, ##a)
 #define nvkm_printk(s,l,p,f,a...) nvkm_printk_((s), NV_DBG_##l, p, f, ##a)
 #define nvkm_fatal(s,f,a...) nvkm_printk((s), FATAL,   crit, f, ##a)
 #define nvkm_error(s,f,a...) nvkm_printk((s), ERROR,    err, f, ##a)
diff --git a/drivers/gpu/drm/nouveau/include/nvkm/core/tegra.h b/drivers/gpu/drm/nouveau/include/nvkm/core/tegra.h
index 924009dd2bb0..ccee53d4e4ec 100644
--- a/drivers/gpu/drm/nouveau/include/nvkm/core/tegra.h
+++ b/drivers/gpu/drm/nouveau/include/nvkm/core/tegra.h
@@ -8,7 +8,6 @@ struct nvkm_device_tegra {
 	const struct nvkm_device_tegra_func *func;
 	struct nvkm_device device;
 	struct platform_device *pdev;
-	int irq;
 
 	struct reset_control *rst;
 	struct clk *clk;
diff --git a/drivers/gpu/drm/nouveau/include/nvkm/engine/ce.h b/drivers/gpu/drm/nouveau/include/nvkm/engine/ce.h
index cfd2da8e66fe..b616a1e8ca02 100644
--- a/drivers/gpu/drm/nouveau/include/nvkm/engine/ce.h
+++ b/drivers/gpu/drm/nouveau/include/nvkm/engine/ce.h
@@ -12,4 +12,6 @@ int gp100_ce_new(struct nvkm_device *, enum nvkm_subdev_type, int inst, struct n
 int gp102_ce_new(struct nvkm_device *, enum nvkm_subdev_type, int inst, struct nvkm_engine **);
 int gv100_ce_new(struct nvkm_device *, enum nvkm_subdev_type, int inst, struct nvkm_engine **);
 int tu102_ce_new(struct nvkm_device *, enum nvkm_subdev_type, int inst, struct nvkm_engine **);
+int ga100_ce_new(struct nvkm_device *, enum nvkm_subdev_type, int inst, struct nvkm_engine **);
+int ga102_ce_new(struct nvkm_device *, enum nvkm_subdev_type, int inst, struct nvkm_engine **);
 #endif
diff --git a/drivers/gpu/drm/nouveau/include/nvkm/engine/disp.h b/drivers/gpu/drm/nouveau/include/nvkm/engine/disp.h
index 8b5d8a434be8..ad9aef2df48f 100644
--- a/drivers/gpu/drm/nouveau/include/nvkm/engine/disp.h
+++ b/drivers/gpu/drm/nouveau/include/nvkm/engine/disp.h
@@ -16,6 +16,7 @@ struct nvkm_disp {
 	struct list_head conns;
 
 	struct nvkm_event hpd;
+#define NVKM_DISP_HEAD_EVENT_VBLANK BIT(0)
 	struct nvkm_event vblank;
 
 	struct {
@@ -31,13 +32,7 @@ struct nvkm_disp {
 	struct {
 		unsigned long mask;
 		int nr;
-	} wndw, head, dac;
-
-	struct {
-		unsigned long mask;
-		int nr;
-		u32 lvdsconf;
-	} sor;
+	} wndw, head, dac, sor;
 
 	struct {
 		unsigned long mask;
diff --git a/drivers/gpu/drm/nouveau/include/nvkm/engine/falcon.h b/drivers/gpu/drm/nouveau/include/nvkm/engine/falcon.h
index b593407b9e36..cd86d9198e4a 100644
--- a/drivers/gpu/drm/nouveau/include/nvkm/engine/falcon.h
+++ b/drivers/gpu/drm/nouveau/include/nvkm/engine/falcon.h
@@ -16,15 +16,16 @@ enum nvkm_falcon_dmaidx {
 
 struct nvkm_falcon {
 	const struct nvkm_falcon_func *func;
-	const struct nvkm_subdev *owner;
+	struct nvkm_subdev *owner;
 	const char *name;
 	u32 addr;
+	u32 addr2;
 
 	struct mutex mutex;
 	struct mutex dmem_mutex;
 	bool oneinit;
 
-	const struct nvkm_subdev *user;
+	struct nvkm_subdev *user;
 
 	u8 version;
 	u8 secret;
@@ -50,13 +51,42 @@ struct nvkm_falcon {
 	struct nvkm_engine engine;
 };
 
-int nvkm_falcon_get(struct nvkm_falcon *, const struct nvkm_subdev *);
-void nvkm_falcon_put(struct nvkm_falcon *, const struct nvkm_subdev *);
+int nvkm_falcon_get(struct nvkm_falcon *, struct nvkm_subdev *);
+void nvkm_falcon_put(struct nvkm_falcon *, struct nvkm_subdev *);
 
 int nvkm_falcon_new_(const struct nvkm_falcon_func *, struct nvkm_device *,
 		     enum nvkm_subdev_type, int inst, bool enable, u32 addr, struct nvkm_engine **);
 
 struct nvkm_falcon_func {
+	int (*disable)(struct nvkm_falcon *);
+	int (*enable)(struct nvkm_falcon *);
+	int (*select)(struct nvkm_falcon *);
+	u32 addr2;
+	bool reset_pmc;
+	int (*reset_eng)(struct nvkm_falcon *);
+	int (*reset_prep)(struct nvkm_falcon *);
+	int (*reset_wait_mem_scrubbing)(struct nvkm_falcon *);
+
+	u32 debug;
+	void (*bind_inst)(struct nvkm_falcon *, int target, u64 addr);
+	int (*bind_stat)(struct nvkm_falcon *, bool intr);
+	bool bind_intr;
+
+	const struct nvkm_falcon_func_pio *imem_pio;
+	const struct nvkm_falcon_func_dma *imem_dma;
+
+	const struct nvkm_falcon_func_pio *dmem_pio;
+	const struct nvkm_falcon_func_dma *dmem_dma;
+
+	u32 emem_addr;
+	const struct nvkm_falcon_func_pio *emem_pio;
+
+	struct {
+		u32 head;
+		u32 tail;
+		u32 stride;
+	} cmdq, msgq;
+
 	struct {
 		u32 *data;
 		u32  size;
@@ -66,29 +96,11 @@ struct nvkm_falcon_func {
 		u32  size;
 	} data;
 	void (*init)(struct nvkm_falcon *);
-	void (*intr)(struct nvkm_falcon *, struct nvkm_fifo_chan *);
-
-	u32 debug;
-	u32 fbif;
+	void (*intr)(struct nvkm_falcon *, struct nvkm_chan *);
 
 	void (*load_imem)(struct nvkm_falcon *, void *, u32, u32, u16, u8, bool);
 	void (*load_dmem)(struct nvkm_falcon *, void *, u32, u32, u8);
-	void (*read_dmem)(struct nvkm_falcon *, u32, u32, u8, void *);
-	u32 emem_addr;
-	void (*bind_context)(struct nvkm_falcon *, struct nvkm_memory *);
-	int (*wait_for_halt)(struct nvkm_falcon *, u32);
-	int (*clear_interrupt)(struct nvkm_falcon *, u32);
-	void (*set_start_addr)(struct nvkm_falcon *, u32 start_addr);
 	void (*start)(struct nvkm_falcon *);
-	int (*enable)(struct nvkm_falcon *falcon);
-	void (*disable)(struct nvkm_falcon *falcon);
-	int (*reset)(struct nvkm_falcon *);
-
-	struct {
-		u32 head;
-		u32 tail;
-		u32 stride;
-	} cmdq, msgq;
 
 	struct nvkm_sclass sclass[];
 };
@@ -116,13 +128,5 @@ nvkm_falcon_mask(struct nvkm_falcon *falcon, u32 addr, u32 mask, u32 val)
 void nvkm_falcon_load_imem(struct nvkm_falcon *, void *, u32, u32, u16, u8,
 			   bool);
 void nvkm_falcon_load_dmem(struct nvkm_falcon *, void *, u32, u32, u8);
-void nvkm_falcon_read_dmem(struct nvkm_falcon *, u32, u32, u8, void *);
-void nvkm_falcon_bind_context(struct nvkm_falcon *, struct nvkm_memory *);
-void nvkm_falcon_set_start_addr(struct nvkm_falcon *, u32);
 void nvkm_falcon_start(struct nvkm_falcon *);
-int nvkm_falcon_wait_for_halt(struct nvkm_falcon *, u32);
-int nvkm_falcon_clear_interrupt(struct nvkm_falcon *, u32);
-int nvkm_falcon_enable(struct nvkm_falcon *);
-void nvkm_falcon_disable(struct nvkm_falcon *);
-int nvkm_falcon_reset(struct nvkm_falcon *);
 #endif
diff --git a/drivers/gpu/drm/nouveau/include/nvkm/engine/fifo.h b/drivers/gpu/drm/nouveau/include/nvkm/engine/fifo.h
index 15099913504d..221abd6c4310 100644
--- a/drivers/gpu/drm/nouveau/include/nvkm/engine/fifo.h
+++ b/drivers/gpu/drm/nouveau/include/nvkm/engine/fifo.h
@@ -6,56 +6,76 @@
 #include <core/event.h>
 struct nvkm_fault_data;
 
-#define NVKM_FIFO_CHID_NR 4096
 #define NVKM_FIFO_ENGN_NR 16
 
-struct nvkm_fifo_engn {
-	struct nvkm_object *object;
-	int refcount;
-	int usecount;
-};
-
-struct nvkm_fifo_chan {
-	const struct nvkm_fifo_chan_func *func;
-	struct nvkm_fifo *fifo;
-	u32 engm;
-	struct nvkm_object object;
+struct nvkm_chan {
+	const struct nvkm_chan_func *func;
+	char name[64];
+	struct nvkm_cgrp *cgrp;
+	int runq;
 
-	struct list_head head;
-	u16 chid;
 	struct nvkm_gpuobj *inst;
-	struct nvkm_gpuobj *push;
 	struct nvkm_vmm *vmm;
-	u64 addr;
-	u32 size;
+	struct nvkm_gpuobj *push;
+	int id;
+
+	struct {
+		struct nvkm_memory *mem;
+		u32 base;
+	} userd;
+
+	u32 ramfc_offset;
+	struct nvkm_gpuobj *ramfc;
+	struct nvkm_gpuobj *cache;
+	struct nvkm_gpuobj *eng;
+	struct nvkm_gpuobj *pgd;
+	struct nvkm_ramht *ramht;
+
+	spinlock_t lock;
+	atomic_t blocked;
+	atomic_t errored;
 
-	struct nvkm_fifo_engn engn[NVKM_FIFO_ENGN_NR];
+	struct list_head cctxs;
+	struct list_head head;
 };
 
+struct nvkm_chan *nvkm_chan_get_chid(struct nvkm_engine *, int id, unsigned long *irqflags);
+struct nvkm_chan *nvkm_chan_get_inst(struct nvkm_engine *, u64 inst, unsigned long *irqflags);
+void nvkm_chan_put(struct nvkm_chan **, unsigned long irqflags);
+
 struct nvkm_fifo {
 	const struct nvkm_fifo_func *func;
 	struct nvkm_engine engine;
 
-	DECLARE_BITMAP(mask, NVKM_FIFO_CHID_NR);
-	int nr;
-	struct list_head chan;
+	struct nvkm_chid *chid;
+	struct nvkm_chid *cgid;
+
+	struct list_head runqs;
+	struct list_head runls;
+
+	struct {
+#define NVKM_FIFO_NONSTALL_EVENT BIT(0)
+		struct nvkm_event event;
+		struct nvkm_inth intr;
+	} nonstall;
+
+	struct {
+		u32 chan_msec;
+	} timeout;
+
+	struct {
+		struct nvkm_memory *mem;
+		struct nvkm_vma *bar1;
+	} userd;
+
 	spinlock_t lock;
 	struct mutex mutex;
-
-	struct nvkm_event uevent; /* async user trigger */
-	struct nvkm_event kevent; /* channel killed */
 };
 
 void nvkm_fifo_fault(struct nvkm_fifo *, struct nvkm_fault_data *);
 void nvkm_fifo_pause(struct nvkm_fifo *, unsigned long *);
 void nvkm_fifo_start(struct nvkm_fifo *, unsigned long *);
-
-void nvkm_fifo_chan_put(struct nvkm_fifo *, unsigned long flags,
-			struct nvkm_fifo_chan **);
-struct nvkm_fifo_chan *
-nvkm_fifo_chan_inst(struct nvkm_fifo *, u64 inst, unsigned long *flags);
-struct nvkm_fifo_chan *
-nvkm_fifo_chan_chid(struct nvkm_fifo *, int chid, unsigned long *flags);
+bool nvkm_fifo_ctxsw_in_progress(struct nvkm_engine *);
 
 int nv04_fifo_new(struct nvkm_device *, enum nvkm_subdev_type, int inst, struct nvkm_fifo **);
 int nv10_fifo_new(struct nvkm_device *, enum nvkm_subdev_type, int inst, struct nvkm_fifo **);
@@ -63,6 +83,7 @@ int nv17_fifo_new(struct nvkm_device *, enum nvkm_subdev_type, int inst, struct
 int nv40_fifo_new(struct nvkm_device *, enum nvkm_subdev_type, int inst, struct nvkm_fifo **);
 int nv50_fifo_new(struct nvkm_device *, enum nvkm_subdev_type, int inst, struct nvkm_fifo **);
 int g84_fifo_new(struct nvkm_device *, enum nvkm_subdev_type, int inst, struct nvkm_fifo **);
+int g98_fifo_new(struct nvkm_device *, enum nvkm_subdev_type, int inst, struct nvkm_fifo **);
 int gf100_fifo_new(struct nvkm_device *, enum nvkm_subdev_type, int inst, struct nvkm_fifo **);
 int gk104_fifo_new(struct nvkm_device *, enum nvkm_subdev_type, int inst, struct nvkm_fifo **);
 int gk110_fifo_new(struct nvkm_device *, enum nvkm_subdev_type, int inst, struct nvkm_fifo **);
@@ -70,10 +91,9 @@ int gk208_fifo_new(struct nvkm_device *, enum nvkm_subdev_type, int inst, struct
 int gk20a_fifo_new(struct nvkm_device *, enum nvkm_subdev_type, int inst, struct nvkm_fifo **);
 int gm107_fifo_new(struct nvkm_device *, enum nvkm_subdev_type, int inst, struct nvkm_fifo **);
 int gm200_fifo_new(struct nvkm_device *, enum nvkm_subdev_type, int inst, struct nvkm_fifo **);
-int gm20b_fifo_new(struct nvkm_device *, enum nvkm_subdev_type, int inst, struct nvkm_fifo **);
 int gp100_fifo_new(struct nvkm_device *, enum nvkm_subdev_type, int inst, struct nvkm_fifo **);
-int gp10b_fifo_new(struct nvkm_device *, enum nvkm_subdev_type, int inst, struct nvkm_fifo **);
 int gv100_fifo_new(struct nvkm_device *, enum nvkm_subdev_type, int inst, struct nvkm_fifo **);
 int tu102_fifo_new(struct nvkm_device *, enum nvkm_subdev_type, int inst, struct nvkm_fifo **);
+int ga100_fifo_new(struct nvkm_device *, enum nvkm_subdev_type, int inst, struct nvkm_fifo **);
 int ga102_fifo_new(struct nvkm_device *, enum nvkm_subdev_type, int inst, struct nvkm_fifo **);
 #endif
diff --git a/drivers/gpu/drm/nouveau/include/nvkm/engine/gr.h b/drivers/gpu/drm/nouveau/include/nvkm/engine/gr.h
index b28b752ffaa2..a2333cfe6955 100644
--- a/drivers/gpu/drm/nouveau/include/nvkm/engine/gr.h
+++ b/drivers/gpu/drm/nouveau/include/nvkm/engine/gr.h
@@ -54,4 +54,5 @@ int gp108_gr_new(struct nvkm_device *, enum nvkm_subdev_type, int inst, struct n
 int gp10b_gr_new(struct nvkm_device *, enum nvkm_subdev_type, int inst, struct nvkm_gr **);
 int gv100_gr_new(struct nvkm_device *, enum nvkm_subdev_type, int inst, struct nvkm_gr **);
 int tu102_gr_new(struct nvkm_device *, enum nvkm_subdev_type, int inst, struct nvkm_gr **);
+int ga102_gr_new(struct nvkm_device *, enum nvkm_subdev_type, int inst, struct nvkm_gr **);
 #endif
diff --git a/drivers/gpu/drm/nouveau/include/nvkm/engine/nvdec.h b/drivers/gpu/drm/nouveau/include/nvkm/engine/nvdec.h
index 97bd3092f68a..9baf197ac833 100644
--- a/drivers/gpu/drm/nouveau/include/nvkm/engine/nvdec.h
+++ b/drivers/gpu/drm/nouveau/include/nvkm/engine/nvdec.h
@@ -12,4 +12,5 @@ struct nvkm_nvdec {
 };
 
 int gm107_nvdec_new(struct nvkm_device *, enum nvkm_subdev_type, int inst, struct nvkm_nvdec **);
+int ga102_nvdec_new(struct nvkm_device *, enum nvkm_subdev_type, int inst, struct nvkm_nvdec **);
 #endif
diff --git a/drivers/gpu/drm/nouveau/include/nvkm/engine/sec2.h b/drivers/gpu/drm/nouveau/include/nvkm/engine/sec2.h
index 06264c840eae..8d48fb20fa54 100644
--- a/drivers/gpu/drm/nouveau/include/nvkm/engine/sec2.h
+++ b/drivers/gpu/drm/nouveau/include/nvkm/engine/sec2.h
@@ -10,15 +10,18 @@ struct nvkm_sec2 {
 	struct nvkm_engine engine;
 	struct nvkm_falcon falcon;
 
+	atomic_t running;
+	atomic_t initmsg;
+
 	struct nvkm_falcon_qmgr *qmgr;
 	struct nvkm_falcon_cmdq *cmdq;
 	struct nvkm_falcon_msgq *msgq;
 
 	struct work_struct work;
-	bool initmsg_received;
 };
 
 int gp102_sec2_new(struct nvkm_device *, enum nvkm_subdev_type, int inst, struct nvkm_sec2 **);
 int gp108_sec2_new(struct nvkm_device *, enum nvkm_subdev_type, int inst, struct nvkm_sec2 **);
 int tu102_sec2_new(struct nvkm_device *, enum nvkm_subdev_type, int inst, struct nvkm_sec2 **);
+int ga102_sec2_new(struct nvkm_device *, enum nvkm_subdev_type, int inst, struct nvkm_sec2 **);
 #endif
diff --git a/drivers/gpu/drm/nouveau/include/nvkm/subdev/acr.h b/drivers/gpu/drm/nouveau/include/nvkm/subdev/acr.h
index c0b254f7f0b5..73d2a6ae9ab2 100644
--- a/drivers/gpu/drm/nouveau/include/nvkm/subdev/acr.h
+++ b/drivers/gpu/drm/nouveau/include/nvkm/subdev/acr.h
@@ -36,7 +36,7 @@ struct nvkm_acr {
 	const struct nvkm_acr_func *func;
 	struct nvkm_subdev subdev;
 
-	struct list_head hsfw, hsf;
+	struct list_head hsfw;
 	struct list_head lsfw, lsf;
 
 	u64 managed_falcons;
@@ -50,6 +50,7 @@ struct nvkm_acr {
 	struct nvkm_vmm *vmm;
 
 	bool done;
+	struct nvkm_acr_lsf *rtos;
 
 	const struct firmware *wpr_fw;
 	bool wpr_comp;
@@ -64,7 +65,9 @@ int gm20b_acr_new(struct nvkm_device *, enum nvkm_subdev_type, int inst, struct
 int gp102_acr_new(struct nvkm_device *, enum nvkm_subdev_type, int inst, struct nvkm_acr **);
 int gp108_acr_new(struct nvkm_device *, enum nvkm_subdev_type, int inst, struct nvkm_acr **);
 int gp10b_acr_new(struct nvkm_device *, enum nvkm_subdev_type, int inst, struct nvkm_acr **);
+int gv100_acr_new(struct nvkm_device *, enum nvkm_subdev_type, int inst, struct nvkm_acr **);
 int tu102_acr_new(struct nvkm_device *, enum nvkm_subdev_type, int inst, struct nvkm_acr **);
+int ga102_acr_new(struct nvkm_device *, enum nvkm_subdev_type, int inst, struct nvkm_acr **);
 
 struct nvkm_acr_lsfw {
 	const struct nvkm_acr_lsf_func *func;
@@ -77,6 +80,7 @@ struct nvkm_acr_lsfw {
 
 	const struct firmware *sig;
 
+	bool secure_bootloader;
 	u32 bootloader_size;
 	u32 bootloader_imem_offset;
 
@@ -87,10 +91,19 @@ struct nvkm_acr_lsfw {
 	u32 app_resident_code_size;
 	u32 app_resident_data_offset;
 	u32 app_resident_data_size;
+	u32 app_imem_offset;
+	u32 app_dmem_offset;
 
 	u32 ucode_size;
 	u32 data_size;
 
+	u32 fuse_ver;
+	u32 engine_id;
+	u32 ucode_id;
+	u32 sig_size;
+	u32 sig_nr;
+	u8 *sigs;
+
 	struct {
 		u32 lsb;
 		u32 img;
@@ -105,10 +118,10 @@ struct nvkm_acr_lsf_func {
 #define NVKM_ACR_LSF_DMACTL_REQ_CTX                                  0x00000004
 #define NVKM_ACR_LSF_FORCE_PRIV_LOAD                                 0x00000008
 	u32 flags;
+	u32 bl_entry;
 	u32 bld_size;
 	void (*bld_write)(struct nvkm_acr *, u32 bld, struct nvkm_acr_lsfw *);
 	void (*bld_patch)(struct nvkm_acr *, u32 bld, s64 adjust);
-	int (*boot)(struct nvkm_falcon *);
 	u64 bootstrap_falcons;
 	int (*bootstrap_falcon)(struct nvkm_falcon *, enum nvkm_acr_lsf_id);
 	int (*bootstrap_multiple_falcons)(struct nvkm_falcon *, u32 mask);
@@ -122,8 +135,20 @@ int
 nvkm_acr_lsfw_load_sig_image_desc_v1(struct nvkm_subdev *, struct nvkm_falcon *,
 				     enum nvkm_acr_lsf_id, const char *path,
 				     int ver, const struct nvkm_acr_lsf_func *);
+
+int
+nvkm_acr_lsfw_load_sig_image_desc_v2(struct nvkm_subdev *, struct nvkm_falcon *,
+				     enum nvkm_acr_lsf_id, const char *path,
+				     int ver, const struct nvkm_acr_lsf_func *);
+
 int
 nvkm_acr_lsfw_load_bl_inst_data_sig(struct nvkm_subdev *, struct nvkm_falcon *,
 				    enum nvkm_acr_lsf_id, const char *path,
 				    int ver, const struct nvkm_acr_lsf_func *);
+
+int
+nvkm_acr_lsfw_load_bl_sig_net(struct nvkm_subdev *, struct nvkm_falcon *,
+				    enum nvkm_acr_lsf_id, const char *path,
+				    int ver, const struct nvkm_acr_lsf_func *,
+				    const void *, u32, const void *, u32);
 #endif
diff --git a/drivers/gpu/drm/nouveau/include/nvkm/subdev/fault.h b/drivers/gpu/drm/nouveau/include/nvkm/subdev/fault.h
index 9c78f072d62b..e40bbf378a8d 100644
--- a/drivers/gpu/drm/nouveau/include/nvkm/subdev/fault.h
+++ b/drivers/gpu/drm/nouveau/include/nvkm/subdev/fault.h
@@ -2,18 +2,21 @@
 #define __NVKM_FAULT_H__
 #include <core/subdev.h>
 #include <core/event.h>
-#include <core/notify.h>
 
 struct nvkm_fault {
 	const struct nvkm_fault_func *func;
 	struct nvkm_subdev subdev;
 
+	struct nvkm_inth info_fault;
+
 	struct nvkm_fault_buffer *buffer[2];
 	int buffer_nr;
 
+#define NVKM_FAULT_BUFFER_EVENT_PENDING BIT(0)
 	struct nvkm_event event;
 
-	struct nvkm_notify nrpfb;
+	struct nvkm_event_ntfy nrpfb;
+	struct work_struct nrpfb_work;
 
 	struct nvkm_device_oclass user;
 };
diff --git a/drivers/gpu/drm/nouveau/include/nvkm/subdev/fb.h b/drivers/gpu/drm/nouveau/include/nvkm/subdev/fb.h
index ef6a6297148c..40768373cdd9 100644
--- a/drivers/gpu/drm/nouveau/include/nvkm/subdev/fb.h
+++ b/drivers/gpu/drm/nouveau/include/nvkm/subdev/fb.h
@@ -35,6 +35,11 @@ struct nvkm_fb {
 
 	struct nvkm_blob vpr_scrubber;
 
+	struct {
+		struct page *flush_page;
+		dma_addr_t flush_page_addr;
+	} sysmem;
+
 	struct nvkm_ram *ram;
 
 	struct {
@@ -53,6 +58,8 @@ struct nvkm_fb {
 	struct nvkm_memory *mmu_wr;
 };
 
+int nvkm_fb_mem_unlock(struct nvkm_fb *);
+
 void nvkm_fb_tile_init(struct nvkm_fb *, int region, u32 addr, u32 size,
 		       u32 pitch, u32 flags, struct nvkm_fb_tile *);
 void nvkm_fb_tile_fini(struct nvkm_fb *, int region, struct nvkm_fb_tile *);
diff --git a/drivers/gpu/drm/nouveau/include/nvkm/subdev/gpio.h b/drivers/gpu/drm/nouveau/include/nvkm/subdev/gpio.h
index 0e46ea1fe972..537c4fc58b4f 100644
--- a/drivers/gpu/drm/nouveau/include/nvkm/subdev/gpio.h
+++ b/drivers/gpu/drm/nouveau/include/nvkm/subdev/gpio.h
@@ -8,9 +8,6 @@
 #include <subdev/bios/gpio.h>
 
 struct nvkm_gpio_ntfy_req {
-#define NVKM_GPIO_HI                                                       0x01
-#define NVKM_GPIO_LO                                                       0x02
-#define NVKM_GPIO_TOGGLED                                                  0x03
 	u8 mask;
 	u8 line;
 };
@@ -23,6 +20,9 @@ struct nvkm_gpio {
 	const struct nvkm_gpio_func *func;
 	struct nvkm_subdev subdev;
 
+#define NVKM_GPIO_HI       BIT(0)
+#define NVKM_GPIO_LO       BIT(1)
+#define NVKM_GPIO_TOGGLED (NVKM_GPIO_HI | NVKM_GPIO_LO)
 	struct nvkm_event event;
 };
 
diff --git a/drivers/gpu/drm/nouveau/include/nvkm/subdev/gsp.h b/drivers/gpu/drm/nouveau/include/nvkm/subdev/gsp.h
index cf42a59d4e58..72619d7df73e 100644
--- a/drivers/gpu/drm/nouveau/include/nvkm/subdev/gsp.h
+++ b/drivers/gpu/drm/nouveau/include/nvkm/subdev/gsp.h
@@ -5,9 +5,12 @@
 #include <core/falcon.h>
 
 struct nvkm_gsp {
+	const struct nvkm_gsp_func *func;
 	struct nvkm_subdev subdev;
+
 	struct nvkm_falcon falcon;
 };
 
 int gv100_gsp_new(struct nvkm_device *, enum nvkm_subdev_type, int, struct nvkm_gsp **);
+int ga102_gsp_new(struct nvkm_device *, enum nvkm_subdev_type, int, struct nvkm_gsp **);
 #endif
diff --git a/drivers/gpu/drm/nouveau/include/nvkm/subdev/i2c.h b/drivers/gpu/drm/nouveau/include/nvkm/subdev/i2c.h
index 146e13292203..40a1065ae626 100644
--- a/drivers/gpu/drm/nouveau/include/nvkm/subdev/i2c.h
+++ b/drivers/gpu/drm/nouveau/include/nvkm/subdev/i2c.h
@@ -7,20 +7,6 @@
 #include <subdev/bios.h>
 #include <subdev/bios/i2c.h>
 
-struct nvkm_i2c_ntfy_req {
-#define NVKM_I2C_PLUG                                                      0x01
-#define NVKM_I2C_UNPLUG                                                    0x02
-#define NVKM_I2C_IRQ                                                       0x04
-#define NVKM_I2C_DONE                                                      0x08
-#define NVKM_I2C_ANY                                                       0x0f
-	u8 mask;
-	u8 port;
-};
-
-struct nvkm_i2c_ntfy_rep {
-	u8 mask;
-};
-
 struct nvkm_i2c_bus_probe {
 	struct i2c_board_info dev;
 	u8 udelay; /* set to 0 to use the standard delay */
@@ -79,6 +65,11 @@ struct nvkm_i2c {
 	struct list_head bus;
 	struct list_head aux;
 
+#define NVKM_I2C_PLUG   BIT(0)
+#define NVKM_I2C_UNPLUG BIT(1)
+#define NVKM_I2C_IRQ    BIT(2)
+#define NVKM_I2C_DONE   BIT(3)
+#define NVKM_I2C_ANY   (NVKM_I2C_PLUG | NVKM_I2C_UNPLUG | NVKM_I2C_IRQ | NVKM_I2C_DONE)
 	struct nvkm_event event;
 };
 
diff --git a/drivers/gpu/drm/nouveau/include/nvkm/subdev/instmem.h b/drivers/gpu/drm/nouveau/include/nvkm/subdev/instmem.h
index f967b97d163c..fcdaefc99fe8 100644
--- a/drivers/gpu/drm/nouveau/include/nvkm/subdev/instmem.h
+++ b/drivers/gpu/drm/nouveau/include/nvkm/subdev/instmem.h
@@ -28,7 +28,7 @@ u32 nvkm_instmem_rd32(struct nvkm_instmem *, u32 addr);
 void nvkm_instmem_wr32(struct nvkm_instmem *, u32 addr, u32 data);
 int nvkm_instobj_new(struct nvkm_instmem *, u32 size, u32 align, bool zero,
 		     struct nvkm_memory **);
-
+int nvkm_instobj_wrap(struct nvkm_device *, struct nvkm_memory *, struct nvkm_memory **);
 
 int nv04_instmem_new(struct nvkm_device *, enum nvkm_subdev_type, int, struct nvkm_instmem **);
 int nv40_instmem_new(struct nvkm_device *, enum nvkm_subdev_type, int, struct nvkm_instmem **);
diff --git a/drivers/gpu/drm/nouveau/include/nvkm/subdev/ltc.h b/drivers/gpu/drm/nouveau/include/nvkm/subdev/ltc.h
index d32a326a9290..64294042ec07 100644
--- a/drivers/gpu/drm/nouveau/include/nvkm/subdev/ltc.h
+++ b/drivers/gpu/drm/nouveau/include/nvkm/subdev/ltc.h
@@ -4,7 +4,8 @@
 #include <core/subdev.h>
 #include <core/mm.h>
 
-#define NVKM_LTC_MAX_ZBC_CNT 16
+#define NVKM_LTC_MAX_ZBC_COLOR_CNT 32
+#define NVKM_LTC_MAX_ZBC_DEPTH_CNT 16
 
 struct nvkm_ltc {
 	const struct nvkm_ltc_func *func;
@@ -18,11 +19,13 @@ struct nvkm_ltc {
 	u32 tag_base;
 	struct nvkm_memory *tag_ram;
 
-	int zbc_min;
-	int zbc_max;
-	u32 zbc_color[NVKM_LTC_MAX_ZBC_CNT][4];
-	u32 zbc_depth[NVKM_LTC_MAX_ZBC_CNT];
-	u32 zbc_stencil[NVKM_LTC_MAX_ZBC_CNT];
+	int zbc_color_min;
+	int zbc_color_max;
+	u32 zbc_color[NVKM_LTC_MAX_ZBC_COLOR_CNT][4];
+	int zbc_depth_min;
+	int zbc_depth_max;
+	u32 zbc_depth[NVKM_LTC_MAX_ZBC_DEPTH_CNT];
+	u32 zbc_stencil[NVKM_LTC_MAX_ZBC_DEPTH_CNT];
 };
 
 void nvkm_ltc_tags_clear(struct nvkm_device *, u32 first, u32 count);
@@ -41,4 +44,5 @@ int gm200_ltc_new(struct nvkm_device *, enum nvkm_subdev_type, int inst, struct
 int gp100_ltc_new(struct nvkm_device *, enum nvkm_subdev_type, int inst, struct nvkm_ltc **);
 int gp102_ltc_new(struct nvkm_device *, enum nvkm_subdev_type, int inst, struct nvkm_ltc **);
 int gp10b_ltc_new(struct nvkm_device *, enum nvkm_subdev_type, int inst, struct nvkm_ltc **);
+int ga102_ltc_new(struct nvkm_device *, enum nvkm_subdev_type, int inst, struct nvkm_ltc **);
 #endif
diff --git a/drivers/gpu/drm/nouveau/include/nvkm/subdev/mc.h b/drivers/gpu/drm/nouveau/include/nvkm/subdev/mc.h
index cb86a56e68d4..127ac545e4b2 100644
--- a/drivers/gpu/drm/nouveau/include/nvkm/subdev/mc.h
+++ b/drivers/gpu/drm/nouveau/include/nvkm/subdev/mc.h
@@ -6,15 +6,14 @@
 struct nvkm_mc {
 	const struct nvkm_mc_func *func;
 	struct nvkm_subdev subdev;
+
+	struct nvkm_intr intr;
 };
 
 void nvkm_mc_enable(struct nvkm_device *, enum nvkm_subdev_type, int);
 void nvkm_mc_disable(struct nvkm_device *, enum nvkm_subdev_type, int);
 bool nvkm_mc_enabled(struct nvkm_device *, enum nvkm_subdev_type, int);
 void nvkm_mc_reset(struct nvkm_device *, enum nvkm_subdev_type, int);
-void nvkm_mc_intr(struct nvkm_device *, bool *handled);
-void nvkm_mc_intr_unarm(struct nvkm_device *);
-void nvkm_mc_intr_rearm(struct nvkm_device *);
 void nvkm_mc_intr_mask(struct nvkm_device *, enum nvkm_subdev_type, int, bool enable);
 void nvkm_mc_unk260(struct nvkm_device *, u32 data);
 
@@ -31,6 +30,5 @@ int gk104_mc_new(struct nvkm_device *, enum nvkm_subdev_type, int inst, struct n
 int gk20a_mc_new(struct nvkm_device *, enum nvkm_subdev_type, int inst, struct nvkm_mc **);
 int gp100_mc_new(struct nvkm_device *, enum nvkm_subdev_type, int inst, struct nvkm_mc **);
 int gp10b_mc_new(struct nvkm_device *, enum nvkm_subdev_type, int inst, struct nvkm_mc **);
-int tu102_mc_new(struct nvkm_device *, enum nvkm_subdev_type, int inst, struct nvkm_mc **);
 int ga100_mc_new(struct nvkm_device *, enum nvkm_subdev_type, int inst, struct nvkm_mc **);
 #endif
diff --git a/drivers/gpu/drm/nouveau/include/nvkm/subdev/pci.h b/drivers/gpu/drm/nouveau/include/nvkm/subdev/pci.h
index 74c19bdfb757..3c103101d5fc 100644
--- a/drivers/gpu/drm/nouveau/include/nvkm/subdev/pci.h
+++ b/drivers/gpu/drm/nouveau/include/nvkm/subdev/pci.h
@@ -13,7 +13,6 @@ struct nvkm_pci {
 	const struct nvkm_pci_func *func;
 	struct nvkm_subdev subdev;
 	struct pci_dev *pdev;
-	int irq;
 
 	struct {
 		struct agp_bridge_data *bridge;
@@ -38,6 +37,7 @@ void nvkm_pci_wr08(struct nvkm_pci *, u16 addr, u8 data);
 void nvkm_pci_wr32(struct nvkm_pci *, u16 addr, u32 data);
 u32 nvkm_pci_mask(struct nvkm_pci *, u16 addr, u32 mask, u32 value);
 void nvkm_pci_rom_shadow(struct nvkm_pci *, bool shadow);
+void nvkm_pci_msi_rearm(struct nvkm_device *);
 
 int nv04_pci_new(struct nvkm_device *, enum nvkm_subdev_type, int inst, struct nvkm_pci **);
 int nv40_pci_new(struct nvkm_device *, enum nvkm_subdev_type, int inst, struct nvkm_pci **);
diff --git a/drivers/gpu/drm/nouveau/include/nvkm/subdev/top.h b/drivers/gpu/drm/nouveau/include/nvkm/subdev/top.h
index ee75c5524c43..73e717b980b8 100644
--- a/drivers/gpu/drm/nouveau/include/nvkm/subdev/top.h
+++ b/drivers/gpu/drm/nouveau/include/nvkm/subdev/top.h
@@ -21,6 +21,7 @@ struct nvkm_top_device {
 	struct list_head head;
 };
 
+int nvkm_top_parse(struct nvkm_device *);
 u32 nvkm_top_addr(struct nvkm_device *, enum nvkm_subdev_type, int);
 u32 nvkm_top_reset(struct nvkm_device *, enum nvkm_subdev_type, int);
 u32 nvkm_top_intr_mask(struct nvkm_device *, enum nvkm_subdev_type, int);
diff --git a/drivers/gpu/drm/nouveau/include/nvkm/subdev/vfn.h b/drivers/gpu/drm/nouveau/include/nvkm/subdev/vfn.h
new file mode 100644
index 000000000000..cc6d0796c265
--- /dev/null
+++ b/drivers/gpu/drm/nouveau/include/nvkm/subdev/vfn.h
@@ -0,0 +1,23 @@
+/* SPDX-License-Identifier: MIT */
+#ifndef __NVKM_VFN_H__
+#define __NVKM_VFN_H__
+#include <core/subdev.h>
+
+struct nvkm_vfn {
+	const struct nvkm_vfn_func *func;
+	struct nvkm_subdev subdev;
+
+	struct {
+		u32 priv;
+		u32 user;
+	} addr;
+
+	struct nvkm_intr intr;
+
+	struct nvkm_device_oclass user;
+};
+
+int gv100_vfn_new(struct nvkm_device *, enum nvkm_subdev_type, int, struct nvkm_vfn **);
+int tu102_vfn_new(struct nvkm_device *, enum nvkm_subdev_type, int, struct nvkm_vfn **);
+int ga100_vfn_new(struct nvkm_device *, enum nvkm_subdev_type, int, struct nvkm_vfn **);
+#endif
diff --git a/drivers/gpu/drm/nouveau/nouveau_abi16.c b/drivers/gpu/drm/nouveau/nouveau_abi16.c
index 5bee655e7e63..82dab51d8aeb 100644
--- a/drivers/gpu/drm/nouveau/nouveau_abi16.c
+++ b/drivers/gpu/drm/nouveau/nouveau_abi16.c
@@ -27,7 +27,6 @@
 #include <nvif/ioctl.h>
 #include <nvif/class.h>
 #include <nvif/cl0002.h>
-#include <nvif/cla06f.h>
 #include <nvif/unpack.h>
 
 #include "nouveau_drv.h"
@@ -253,7 +252,7 @@ nouveau_abi16_ioctl_channel_alloc(ABI16_IOCTL_ARGS)
 	struct nouveau_abi16 *abi16 = nouveau_abi16_get(file_priv);
 	struct nouveau_abi16_chan *chan;
 	struct nvif_device *device;
-	u64 engine;
+	u64 engine, runm;
 	int ret;
 
 	if (unlikely(!abi16))
@@ -263,6 +262,7 @@ nouveau_abi16_ioctl_channel_alloc(ABI16_IOCTL_ARGS)
 		return nouveau_abi16_put(abi16, -ENODEV);
 
 	device = &abi16->device;
+	engine = NV_DEVICE_HOST_RUNLIST_ENGINES_GR;
 
 	/* hack to allow channel engine type specification on kepler */
 	if (device->info.family >= NV_DEVICE_INFO_V0_KEPLER) {
@@ -276,19 +276,18 @@ nouveau_abi16_ioctl_channel_alloc(ABI16_IOCTL_ARGS)
 			default:
 				return nouveau_abi16_put(abi16, -ENOSYS);
 			}
-		} else {
-			engine = NV_DEVICE_HOST_RUNLIST_ENGINES_GR;
-		}
 
-		if (engine != NV_DEVICE_HOST_RUNLIST_ENGINES_CE)
-			engine = nvif_fifo_runlist(device, engine);
-		else
-			engine = nvif_fifo_runlist_ce(device);
-		init->fb_ctxdma_handle = engine;
-		init->tt_ctxdma_handle = 0;
+			init->fb_ctxdma_handle = 0;
+			init->tt_ctxdma_handle = 0;
+		}
 	}
 
-	if (init->fb_ctxdma_handle == ~0 || init->tt_ctxdma_handle == ~0)
+	if (engine != NV_DEVICE_HOST_RUNLIST_ENGINES_CE)
+		runm = nvif_fifo_runlist(device, engine);
+	else
+		runm = nvif_fifo_runlist_ce(device);
+
+	if (!runm || init->fb_ctxdma_handle == ~0 || init->tt_ctxdma_handle == ~0)
 		return nouveau_abi16_put(abi16, -EINVAL);
 
 	/* allocate "abi16 channel" data and make up a handle for it */
@@ -300,8 +299,8 @@ nouveau_abi16_ioctl_channel_alloc(ABI16_IOCTL_ARGS)
 	list_add(&chan->head, &abi16->channels);
 
 	/* create channel object and initialise dma and fence management */
-	ret = nouveau_channel_new(drm, device, init->fb_ctxdma_handle,
-				  init->tt_ctxdma_handle, false, &chan->chan);
+	ret = nouveau_channel_new(drm, device, false, runm, init->fb_ctxdma_handle,
+				  init->tt_ctxdma_handle, &chan->chan);
 	if (ret)
 		goto done;
 
diff --git a/drivers/gpu/drm/nouveau/nouveau_backlight.c b/drivers/gpu/drm/nouveau/nouveau_backlight.c
index a614582779ca..40409a29f5b6 100644
--- a/drivers/gpu/drm/nouveau/nouveau_backlight.c
+++ b/drivers/gpu/drm/nouveau/nouveau_backlight.c
@@ -264,7 +264,11 @@ nva3_set_intensity(struct backlight_device *bd)
 	u32 div, val;
 
 	div = nvif_rd32(device, NV50_PDISP_SOR_PWM_DIV(or));
-	val = (bd->props.brightness * div) / 100;
+
+	val = backlight_get_brightness(bd);
+	if (val)
+		val = (val * div) / 100;
+
 	if (div) {
 		nvif_wr32(device, NV50_PDISP_SOR_PWM_CTL(or),
 			  val |
diff --git a/drivers/gpu/drm/nouveau/nouveau_bo.c b/drivers/gpu/drm/nouveau/nouveau_bo.c
index 813937ad1dc2..a11871e3119c 100644
--- a/drivers/gpu/drm/nouveau/nouveau_bo.c
+++ b/drivers/gpu/drm/nouveau/nouveau_bo.c
@@ -856,6 +856,9 @@ nouveau_bo_move_init(struct nouveau_drm *drm)
 		int (*init)(struct nouveau_channel *, u32 handle);
 	} _methods[] = {
 		{  "COPY", 4, 0xc7b5, nve0_bo_move_copy, nve0_bo_move_init },
+		{  "GRCE", 0, 0xc7b5, nve0_bo_move_copy, nvc0_bo_move_init },
+		{  "COPY", 4, 0xc6b5, nve0_bo_move_copy, nve0_bo_move_init },
+		{  "GRCE", 0, 0xc6b5, nve0_bo_move_copy, nvc0_bo_move_init },
 		{  "COPY", 4, 0xc5b5, nve0_bo_move_copy, nve0_bo_move_init },
 		{  "GRCE", 0, 0xc5b5, nve0_bo_move_copy, nvc0_bo_move_init },
 		{  "COPY", 4, 0xc3b5, nve0_bo_move_copy, nve0_bo_move_init },
diff --git a/drivers/gpu/drm/nouveau/nouveau_chan.c b/drivers/gpu/drm/nouveau/nouveau_chan.c
index 48dea5d0c580..e648ecd0c1a0 100644
--- a/drivers/gpu/drm/nouveau/nouveau_chan.c
+++ b/drivers/gpu/drm/nouveau/nouveau_chan.c
@@ -25,12 +25,7 @@
 
 #include <nvif/class.h>
 #include <nvif/cl0002.h>
-#include <nvif/cl006b.h>
-#include <nvif/cl506f.h>
-#include <nvif/cl906f.h>
-#include <nvif/cla06f.h>
-#include <nvif/clc36f.h>
-#include <nvif/ioctl.h>
+#include <nvif/if0020.h>
 
 #include "nouveau_drv.h"
 #include "nouveau_dma.h"
@@ -46,15 +41,17 @@ int nouveau_vram_pushbuf;
 module_param_named(vram_pushbuf, nouveau_vram_pushbuf, int, 0400);
 
 static int
-nouveau_channel_killed(struct nvif_notify *ntfy)
+nouveau_channel_killed(struct nvif_event *event, void *repv, u32 repc)
 {
-	struct nouveau_channel *chan = container_of(ntfy, typeof(*chan), kill);
+	struct nouveau_channel *chan = container_of(event, typeof(*chan), kill);
 	struct nouveau_cli *cli = (void *)chan->user.client;
+
 	NV_PRINTK(warn, cli, "channel %d killed!\n", chan->chid);
 	atomic_set(&chan->killed, 1);
 	if (chan->fence)
 		nouveau_fence_context_kill(chan->fence, -ENODEV);
-	return NVIF_NOTIFY_DROP;
+
+	return NVIF_EVENT_DROP;
 }
 
 int
@@ -96,8 +93,9 @@ nouveau_channel_del(struct nouveau_channel **pchan)
 		nvif_object_dtor(&chan->nvsw);
 		nvif_object_dtor(&chan->gart);
 		nvif_object_dtor(&chan->vram);
-		nvif_notify_dtor(&chan->kill);
+		nvif_event_dtor(&chan->kill);
 		nvif_object_dtor(&chan->user);
+		nvif_mem_dtor(&chan->mem_userd);
 		nvif_object_dtor(&chan->push.ctxdma);
 		nouveau_vma_del(&chan->push.vma);
 		nouveau_bo_unmap(chan->push.buffer);
@@ -247,134 +245,113 @@ nouveau_channel_prep(struct nouveau_drm *drm, struct nvif_device *device,
 }
 
 static int
-nouveau_channel_ind(struct nouveau_drm *drm, struct nvif_device *device,
-		    u64 runlist, bool priv, struct nouveau_channel **pchan)
+nouveau_channel_ctor(struct nouveau_drm *drm, struct nvif_device *device, bool priv, u64 runm,
+		     struct nouveau_channel **pchan)
 {
-	static const u16 oclasses[] = { AMPERE_CHANNEL_GPFIFO_B,
-					TURING_CHANNEL_GPFIFO_A,
-					VOLTA_CHANNEL_GPFIFO_A,
-					PASCAL_CHANNEL_GPFIFO_A,
-					MAXWELL_CHANNEL_GPFIFO_A,
-					KEPLER_CHANNEL_GPFIFO_B,
-					KEPLER_CHANNEL_GPFIFO_A,
-					FERMI_CHANNEL_GPFIFO,
-					G82_CHANNEL_GPFIFO,
-					NV50_CHANNEL_GPFIFO,
-					0 };
-	const u16 *oclass = oclasses;
-	union {
-		struct nv50_channel_gpfifo_v0 nv50;
-		struct fermi_channel_gpfifo_v0 fermi;
-		struct kepler_channel_gpfifo_a_v0 kepler;
-		struct volta_channel_gpfifo_a_v0 volta;
+	static const struct {
+		s32 oclass;
+		int version;
+	} hosts[] = {
+		{  AMPERE_CHANNEL_GPFIFO_B, 0 },
+		{  AMPERE_CHANNEL_GPFIFO_A, 0 },
+		{  TURING_CHANNEL_GPFIFO_A, 0 },
+		{   VOLTA_CHANNEL_GPFIFO_A, 0 },
+		{  PASCAL_CHANNEL_GPFIFO_A, 0 },
+		{ MAXWELL_CHANNEL_GPFIFO_A, 0 },
+		{  KEPLER_CHANNEL_GPFIFO_B, 0 },
+		{  KEPLER_CHANNEL_GPFIFO_A, 0 },
+		{   FERMI_CHANNEL_GPFIFO  , 0 },
+		{     G82_CHANNEL_GPFIFO  , 0 },
+		{    NV50_CHANNEL_GPFIFO  , 0 },
+		{    NV40_CHANNEL_DMA     , 0 },
+		{    NV17_CHANNEL_DMA     , 0 },
+		{    NV10_CHANNEL_DMA     , 0 },
+		{    NV03_CHANNEL_DMA     , 0 },
+		{}
+	};
+	struct {
+		struct nvif_chan_v0 chan;
+		char name[TASK_COMM_LEN+16];
 	} args;
+	struct nouveau_cli *cli = (void *)device->object.client;
 	struct nouveau_channel *chan;
-	u32 size;
-	int ret;
+	const u64 plength = 0x10000;
+	const u64 ioffset = plength;
+	const u64 ilength = 0x02000;
+	char name[TASK_COMM_LEN];
+	int cid, ret;
+	u64 size;
+
+	cid = nvif_mclass(&device->object, hosts);
+	if (cid < 0)
+		return cid;
+
+	if (hosts[cid].oclass < NV50_CHANNEL_GPFIFO)
+		size = plength;
+	else
+		size = ioffset + ilength;
 
 	/* allocate dma push buffer */
-	ret = nouveau_channel_prep(drm, device, 0x12000, &chan);
+	ret = nouveau_channel_prep(drm, device, size, &chan);
 	*pchan = chan;
 	if (ret)
 		return ret;
 
 	/* create channel object */
-	do {
-		if (oclass[0] >= VOLTA_CHANNEL_GPFIFO_A) {
-			args.volta.version = 0;
-			args.volta.ilength = 0x02000;
-			args.volta.ioffset = 0x10000 + chan->push.addr;
-			args.volta.runlist = runlist;
-			args.volta.vmm = nvif_handle(&chan->vmm->vmm.object);
-			args.volta.priv = priv;
-			size = sizeof(args.volta);
-		} else
-		if (oclass[0] >= KEPLER_CHANNEL_GPFIFO_A) {
-			args.kepler.version = 0;
-			args.kepler.ilength = 0x02000;
-			args.kepler.ioffset = 0x10000 + chan->push.addr;
-			args.kepler.runlist = runlist;
-			args.kepler.vmm = nvif_handle(&chan->vmm->vmm.object);
-			args.kepler.priv = priv;
-			size = sizeof(args.kepler);
-		} else
-		if (oclass[0] >= FERMI_CHANNEL_GPFIFO) {
-			args.fermi.version = 0;
-			args.fermi.ilength = 0x02000;
-			args.fermi.ioffset = 0x10000 + chan->push.addr;
-			args.fermi.vmm = nvif_handle(&chan->vmm->vmm.object);
-			size = sizeof(args.fermi);
-		} else {
-			args.nv50.version = 0;
-			args.nv50.ilength = 0x02000;
-			args.nv50.ioffset = 0x10000 + chan->push.addr;
-			args.nv50.pushbuf = nvif_handle(&chan->push.ctxdma);
-			args.nv50.vmm = nvif_handle(&chan->vmm->vmm.object);
-			size = sizeof(args.nv50);
-		}
-
-		ret = nvif_object_ctor(&device->object, "abi16ChanUser", 0,
-				       *oclass++, &args, size, &chan->user);
-		if (ret == 0) {
-			if (chan->user.oclass >= VOLTA_CHANNEL_GPFIFO_A) {
-				chan->chid = args.volta.chid;
-				chan->inst = args.volta.inst;
-				chan->token = args.volta.token;
-			} else
-			if (chan->user.oclass >= KEPLER_CHANNEL_GPFIFO_A) {
-				chan->chid = args.kepler.chid;
-				chan->inst = args.kepler.inst;
-			} else
-			if (chan->user.oclass >= FERMI_CHANNEL_GPFIFO) {
-				chan->chid = args.fermi.chid;
-			} else {
-				chan->chid = args.nv50.chid;
-			}
+	args.chan.version = 0;
+	args.chan.namelen = sizeof(args.name);
+	args.chan.runlist = __ffs64(runm);
+	args.chan.runq = 0;
+	args.chan.priv = priv;
+	args.chan.devm = BIT(0);
+	if (hosts[cid].oclass < NV50_CHANNEL_GPFIFO) {
+		args.chan.vmm = 0;
+		args.chan.ctxdma = nvif_handle(&chan->push.ctxdma);
+		args.chan.offset = chan->push.addr;
+		args.chan.length = 0;
+	} else {
+		args.chan.vmm = nvif_handle(&chan->vmm->vmm.object);
+		if (hosts[cid].oclass < FERMI_CHANNEL_GPFIFO)
+			args.chan.ctxdma = nvif_handle(&chan->push.ctxdma);
+		else
+			args.chan.ctxdma = 0;
+		args.chan.offset = ioffset + chan->push.addr;
+		args.chan.length = ilength;
+	}
+	args.chan.huserd = 0;
+	args.chan.ouserd = 0;
+
+	/* allocate userd */
+	if (hosts[cid].oclass >= VOLTA_CHANNEL_GPFIFO_A) {
+		ret = nvif_mem_ctor(&cli->mmu, "abi16ChanUSERD", NVIF_CLASS_MEM_GF100,
+				    NVIF_MEM_VRAM | NVIF_MEM_COHERENT | NVIF_MEM_MAPPABLE,
+				    0, PAGE_SIZE, NULL, 0, &chan->mem_userd);
+		if (ret)
 			return ret;
-		}
-	} while (*oclass);
 
-	nouveau_channel_del(pchan);
-	return ret;
-}
+		args.chan.huserd = nvif_handle(&chan->mem_userd.object);
+		args.chan.ouserd = 0;
 
-static int
-nouveau_channel_dma(struct nouveau_drm *drm, struct nvif_device *device,
-		    struct nouveau_channel **pchan)
-{
-	static const u16 oclasses[] = { NV40_CHANNEL_DMA,
-					NV17_CHANNEL_DMA,
-					NV10_CHANNEL_DMA,
-					NV03_CHANNEL_DMA,
-					0 };
-	const u16 *oclass = oclasses;
-	struct nv03_channel_dma_v0 args;
-	struct nouveau_channel *chan;
-	int ret;
+		chan->userd = &chan->mem_userd.object;
+	} else {
+		chan->userd = &chan->user;
+	}
 
-	/* allocate dma push buffer */
-	ret = nouveau_channel_prep(drm, device, 0x10000, &chan);
-	*pchan = chan;
-	if (ret)
-		return ret;
+	get_task_comm(name, current);
+	snprintf(args.name, sizeof(args.name), "%s[%d]", name, task_pid_nr(current));
 
-	/* create channel object */
-	args.version = 0;
-	args.pushbuf = nvif_handle(&chan->push.ctxdma);
-	args.offset = chan->push.addr;
-
-	do {
-		ret = nvif_object_ctor(&device->object, "abi16ChanUser", 0,
-				       *oclass++, &args, sizeof(args),
-				       &chan->user);
-		if (ret == 0) {
-			chan->chid = args.chid;
-			return ret;
-		}
-	} while (ret && *oclass);
+	ret = nvif_object_ctor(&device->object, "abi16ChanUser", 0, hosts[cid].oclass,
+			       &args, sizeof(args), &chan->user);
+	if (ret) {
+		nouveau_channel_del(pchan);
+		return ret;
+	}
 
-	nouveau_channel_del(pchan);
-	return ret;
+	chan->runlist = args.chan.runlist;
+	chan->chid = args.chan.chid;
+	chan->inst = args.chan.inst;
+	chan->token = args.chan.token;
+	return 0;
 }
 
 static int
@@ -385,18 +362,24 @@ nouveau_channel_init(struct nouveau_channel *chan, u32 vram, u32 gart)
 	struct nv_dma_v0 args = {};
 	int ret, i;
 
-	ret = nvif_object_map(&chan->user, NULL, 0);
+	ret = nvif_object_map(chan->userd, NULL, 0);
 	if (ret)
 		return ret;
 
-	if (chan->user.oclass >= FERMI_CHANNEL_GPFIFO &&
-	    chan->user.oclass < AMPERE_CHANNEL_GPFIFO_B) {
-		ret = nvif_notify_ctor(&chan->user, "abi16ChanKilled",
-				       nouveau_channel_killed,
-				       true, NV906F_V0_NTFY_KILLED,
-				       NULL, 0, 0, &chan->kill);
+	if (chan->user.oclass >= FERMI_CHANNEL_GPFIFO) {
+		struct {
+			struct nvif_event_v0 base;
+			struct nvif_chan_event_v0 host;
+		} args;
+
+		args.host.version = 0;
+		args.host.type = NVIF_CHAN_EVENT_V0_KILLED;
+
+		ret = nvif_event_ctor(&chan->user, "abi16ChanKilled", chan->chid,
+				      nouveau_channel_killed, false,
+				      &args.base, sizeof(args), &chan->kill);
 		if (ret == 0)
-			ret = nvif_notify_get(&chan->kill);
+			ret = nvif_event_allow(&chan->kill);
 		if (ret) {
 			NV_ERROR(drm, "Failed to request channel kill "
 				      "notification: %d\n", ret);
@@ -503,24 +486,18 @@ nouveau_channel_init(struct nouveau_channel *chan, u32 vram, u32 gart)
 
 int
 nouveau_channel_new(struct nouveau_drm *drm, struct nvif_device *device,
-		    u32 arg0, u32 arg1, bool priv,
-		    struct nouveau_channel **pchan)
+		    bool priv, u64 runm, u32 vram, u32 gart, struct nouveau_channel **pchan)
 {
 	struct nouveau_cli *cli = (void *)device->object.client;
 	int ret;
 
-	/* hack until fencenv50 is fixed, and agp access relaxed */
-	ret = nouveau_channel_ind(drm, device, arg0, priv, pchan);
+	ret = nouveau_channel_ctor(drm, device, priv, runm, pchan);
 	if (ret) {
-		NV_PRINTK(dbg, cli, "ib channel create, %d\n", ret);
-		ret = nouveau_channel_dma(drm, device, pchan);
-		if (ret) {
-			NV_PRINTK(dbg, cli, "dma channel create, %d\n", ret);
-			return ret;
-		}
+		NV_PRINTK(dbg, cli, "channel create, %d\n", ret);
+		return ret;
 	}
 
-	ret = nouveau_channel_init(*pchan, arg0, arg1);
+	ret = nouveau_channel_init(*pchan, vram, gart);
 	if (ret) {
 		NV_PRINTK(err, cli, "channel failed to initialise, %d\n", ret);
 		nouveau_channel_del(pchan);
@@ -534,6 +511,12 @@ nouveau_channel_new(struct nouveau_drm *drm, struct nvif_device *device,
 	return ret;
 }
 
+void
+nouveau_channels_fini(struct nouveau_drm *drm)
+{
+	kfree(drm->runl);
+}
+
 int
 nouveau_channels_init(struct nouveau_drm *drm)
 {
@@ -541,20 +524,53 @@ nouveau_channels_init(struct nouveau_drm *drm)
 		struct nv_device_info_v1 m;
 		struct {
 			struct nv_device_info_v1_data channels;
+			struct nv_device_info_v1_data runlists;
 		} v;
 	} args = {
 		.m.version = 1,
 		.m.count = sizeof(args.v) / sizeof(args.v.channels),
 		.v.channels.mthd = NV_DEVICE_HOST_CHANNELS,
+		.v.runlists.mthd = NV_DEVICE_HOST_RUNLISTS,
 	};
 	struct nvif_object *device = &drm->client.device.object;
-	int ret;
+	int ret, i;
 
 	ret = nvif_object_mthd(device, NV_DEVICE_V0_INFO, &args, sizeof(args));
-	if (ret || args.v.channels.mthd == NV_DEVICE_INFO_INVALID)
+	if (ret ||
+	    args.v.runlists.mthd == NV_DEVICE_INFO_INVALID || !args.v.runlists.data ||
+	    args.v.channels.mthd == NV_DEVICE_INFO_INVALID)
 		return -ENODEV;
 
-	drm->chan.nr = args.v.channels.data;
-	drm->chan.context_base = dma_fence_context_alloc(drm->chan.nr);
+	drm->chan_nr = drm->chan_total = args.v.channels.data;
+	drm->runl_nr = fls64(args.v.runlists.data);
+	drm->runl = kcalloc(drm->runl_nr, sizeof(*drm->runl), GFP_KERNEL);
+	if (!drm->runl)
+		return -ENOMEM;
+
+	if (drm->chan_nr == 0) {
+		for (i = 0; i < drm->runl_nr; i++) {
+			if (!(args.v.runlists.data & BIT(i)))
+				continue;
+
+			args.v.channels.mthd = NV_DEVICE_HOST_RUNLIST_CHANNELS;
+			args.v.channels.data = i;
+
+			ret = nvif_object_mthd(device, NV_DEVICE_V0_INFO, &args, sizeof(args));
+			if (ret || args.v.channels.mthd == NV_DEVICE_INFO_INVALID)
+				return -ENODEV;
+
+			drm->runl[i].chan_nr = args.v.channels.data;
+			drm->runl[i].chan_id_base = drm->chan_total;
+			drm->runl[i].context_base = dma_fence_context_alloc(drm->runl[i].chan_nr);
+
+			drm->chan_total += drm->runl[i].chan_nr;
+		}
+	} else {
+		drm->runl[0].context_base = dma_fence_context_alloc(drm->chan_nr);
+		for (i = 1; i < drm->runl_nr; i++)
+			drm->runl[i].context_base = drm->runl[0].context_base;
+
+	}
+
 	return 0;
 }
diff --git a/drivers/gpu/drm/nouveau/nouveau_chan.h b/drivers/gpu/drm/nouveau/nouveau_chan.h
index 98ba9d27e6b4..e06a8ffed31a 100644
--- a/drivers/gpu/drm/nouveau/nouveau_chan.h
+++ b/drivers/gpu/drm/nouveau/nouveau_chan.h
@@ -2,7 +2,7 @@
 #ifndef __NOUVEAU_CHAN_H__
 #define __NOUVEAU_CHAN_H__
 #include <nvif/object.h>
-#include <nvif/notify.h>
+#include <nvif/event.h>
 #include <nvif/push.h>
 struct nvif_device;
 
@@ -16,6 +16,10 @@ struct nouveau_channel {
 	struct nouveau_drm *drm;
 	struct nouveau_vmm *vmm;
 
+	struct nvif_mem mem_userd;
+	struct nvif_object *userd;
+
+	int runlist;
 	int chid;
 	u64 inst;
 	u32 token;
@@ -50,15 +54,15 @@ struct nouveau_channel {
 
 	struct nvif_object user;
 
-	struct nvif_notify kill;
+	struct nvif_event kill;
 	atomic_t killed;
 };
 
 int nouveau_channels_init(struct nouveau_drm *);
+void nouveau_channels_fini(struct nouveau_drm *);
 
-int  nouveau_channel_new(struct nouveau_drm *, struct nvif_device *,
-			 u32 arg0, u32 arg1, bool priv,
-			 struct nouveau_channel **);
+int  nouveau_channel_new(struct nouveau_drm *, struct nvif_device *, bool priv, u64 runm,
+			 u32 vram, u32 gart, struct nouveau_channel **);
 void nouveau_channel_del(struct nouveau_channel **);
 int  nouveau_channel_idle(struct nouveau_channel *);
 
diff --git a/drivers/gpu/drm/nouveau/nouveau_connector.c b/drivers/gpu/drm/nouveau/nouveau_connector.c
index 1991bbb1d05c..086b66b60d91 100644
--- a/drivers/gpu/drm/nouveau/nouveau_connector.c
+++ b/drivers/gpu/drm/nouveau/nouveau_connector.c
@@ -47,8 +47,7 @@
 #include "nouveau_crtc.h"
 
 #include <nvif/class.h>
-#include <nvif/cl0046.h>
-#include <nvif/event.h>
+#include <nvif/if0011.h>
 
 struct drm_display_mode *
 nouveau_conn_native_mode(struct drm_connector *connector)
@@ -396,7 +395,8 @@ static void
 nouveau_connector_destroy(struct drm_connector *connector)
 {
 	struct nouveau_connector *nv_connector = nouveau_connector(connector);
-	nvif_notify_dtor(&nv_connector->hpd);
+	nvif_event_dtor(&nv_connector->irq);
+	nvif_event_dtor(&nv_connector->hpd);
 	kfree(nv_connector->edid);
 	drm_connector_unregister(connector);
 	drm_connector_cleanup(connector);
@@ -1162,39 +1162,38 @@ nouveau_connector_funcs_lvds = {
 };
 
 void
-nouveau_connector_hpd(struct drm_connector *connector)
+nouveau_connector_hpd(struct nouveau_connector *nv_connector, u64 bits)
 {
-	struct nouveau_drm *drm = nouveau_drm(connector->dev);
-	u32 mask = drm_connector_mask(connector);
+	struct nouveau_drm *drm = nouveau_drm(nv_connector->base.dev);
+	u32 mask = drm_connector_mask(&nv_connector->base);
+	unsigned long flags;
 
-	mutex_lock(&drm->hpd_lock);
+	spin_lock_irqsave(&drm->hpd_lock, flags);
 	if (!(drm->hpd_pending & mask)) {
+		nv_connector->hpd_pending |= bits;
 		drm->hpd_pending |= mask;
 		schedule_work(&drm->hpd_work);
 	}
-	mutex_unlock(&drm->hpd_lock);
+	spin_unlock_irqrestore(&drm->hpd_lock, flags);
 }
 
 static int
-nouveau_connector_hotplug(struct nvif_notify *notify)
+nouveau_connector_irq(struct nvif_event *event, void *repv, u32 repc)
 {
-	struct nouveau_connector *nv_connector =
-		container_of(notify, typeof(*nv_connector), hpd);
-	struct drm_connector *connector = &nv_connector->base;
-	struct drm_device *dev = connector->dev;
-	struct nouveau_drm *drm = nouveau_drm(dev);
-	const struct nvif_notify_conn_rep_v0 *rep = notify->data;
-	bool plugged = (rep->mask != NVIF_NOTIFY_CONN_V0_UNPLUG);
+	struct nouveau_connector *nv_connector = container_of(event, typeof(*nv_connector), irq);
 
-	if (rep->mask & NVIF_NOTIFY_CONN_V0_IRQ) {
-		nouveau_dp_irq(drm, nv_connector);
-		return NVIF_NOTIFY_KEEP;
-	}
+	schedule_work(&nv_connector->irq_work);
+	return NVIF_EVENT_KEEP;
+}
 
-	NV_DEBUG(drm, "%splugged %s\n", plugged ? "" : "un", connector->name);
-	nouveau_connector_hpd(connector);
+static int
+nouveau_connector_hotplug(struct nvif_event *event, void *repv, u32 repc)
+{
+	struct nouveau_connector *nv_connector = container_of(event, typeof(*nv_connector), hpd);
+	struct nvif_conn_event_v0 *rep = repv;
 
-	return NVIF_NOTIFY_KEEP;
+	nouveau_connector_hpd(nv_connector, rep->types);
+	return NVIF_EVENT_KEEP;
 }
 
 static ssize_t
@@ -1290,6 +1289,7 @@ nouveau_connector_create(struct drm_device *dev,
 
 	connector = &nv_connector->base;
 	nv_connector->index = index;
+	INIT_WORK(&nv_connector->irq_work, nouveau_dp_irq);
 
 	/* attempt to parse vbios connector type and hotplug gpio */
 	nv_connector->dcb = olddcb_conn(dev, index);
@@ -1401,6 +1401,7 @@ nouveau_connector_create(struct drm_device *dev,
 
 	drm_connector_init(dev, connector, funcs, type);
 	drm_connector_helper_add(connector, &nouveau_connector_helper_funcs);
+	connector->polled = DRM_CONNECTOR_POLL_CONNECT;
 
 	if (nv_connector->dcb && (disp->disp.conn_mask & BIT(nv_connector->index))) {
 		ret = nvif_conn_ctor(&disp->disp, nv_connector->base.name, nv_connector->index,
@@ -1409,6 +1410,25 @@ nouveau_connector_create(struct drm_device *dev,
 			kfree(nv_connector);
 			return ERR_PTR(ret);
 		}
+
+		ret = nvif_conn_event_ctor(&nv_connector->conn, "kmsHotplug",
+					   nouveau_connector_hotplug,
+					   NVIF_CONN_EVENT_V0_PLUG | NVIF_CONN_EVENT_V0_UNPLUG,
+					   &nv_connector->hpd);
+		if (ret == 0)
+			connector->polled = DRM_CONNECTOR_POLL_HPD;
+
+		if (nv_connector->aux.transfer) {
+			ret = nvif_conn_event_ctor(&nv_connector->conn, "kmsDpIrq",
+						   nouveau_connector_irq, NVIF_CONN_EVENT_V0_IRQ,
+						   &nv_connector->irq);
+			if (ret) {
+				nvif_event_dtor(&nv_connector->hpd);
+				nvif_conn_dtor(&nv_connector->conn);
+				kfree(nv_connector);
+				return ERR_PTR(ret);
+			}
+		}
 	}
 
 	connector->funcs->reset(connector);
@@ -1452,21 +1472,6 @@ nouveau_connector_create(struct drm_device *dev,
 		break;
 	}
 
-	ret = nvif_notify_ctor(&disp->disp.object, "kmsHotplug",
-			       nouveau_connector_hotplug,
-			       true, NV04_DISP_NTFY_CONN,
-			       &(struct nvif_notify_conn_req_v0) {
-				.mask = NVIF_NOTIFY_CONN_V0_ANY,
-				.conn = index,
-			       },
-			       sizeof(struct nvif_notify_conn_req_v0),
-			       sizeof(struct nvif_notify_conn_rep_v0),
-			       &nv_connector->hpd);
-	if (ret)
-		connector->polled = DRM_CONNECTOR_POLL_CONNECT;
-	else
-		connector->polled = DRM_CONNECTOR_POLL_HPD;
-
 	drm_connector_register(connector);
 	return connector;
 }
diff --git a/drivers/gpu/drm/nouveau/nouveau_connector.h b/drivers/gpu/drm/nouveau/nouveau_connector.h
index f4e17ff68bf9..35bcb541722b 100644
--- a/drivers/gpu/drm/nouveau/nouveau_connector.h
+++ b/drivers/gpu/drm/nouveau/nouveau_connector.h
@@ -27,7 +27,7 @@
 #ifndef __NOUVEAU_CONNECTOR_H__
 #define __NOUVEAU_CONNECTOR_H__
 #include <nvif/conn.h>
-#include <nvif/notify.h>
+#include <nvif/event.h>
 
 #include <nvhw/class/cl507d.h>
 #include <nvhw/class/cl907d.h>
@@ -124,7 +124,10 @@ struct nouveau_connector {
 	u8 *dcb;
 
 	struct nvif_conn conn;
-	struct nvif_notify hpd;
+	u64 hpd_pending;
+	struct nvif_event hpd;
+	struct nvif_event irq;
+	struct work_struct irq_work;
 
 	struct drm_dp_aux aux;
 
@@ -198,7 +201,7 @@ nouveau_crtc_connector_get(struct nouveau_crtc *nv_crtc)
 
 struct drm_connector *
 nouveau_connector_create(struct drm_device *, const struct dcb_output *);
-void nouveau_connector_hpd(struct drm_connector *connector);
+void nouveau_connector_hpd(struct nouveau_connector *, u64 bits);
 
 extern int nouveau_tv_disable;
 extern int nouveau_ignorelid;
diff --git a/drivers/gpu/drm/nouveau/nouveau_crtc.h b/drivers/gpu/drm/nouveau/nouveau_crtc.h
index 7f63be2ec35d..c717f664a7b8 100644
--- a/drivers/gpu/drm/nouveau/nouveau_crtc.h
+++ b/drivers/gpu/drm/nouveau/nouveau_crtc.h
@@ -26,16 +26,17 @@
 
 #ifndef __NOUVEAU_CRTC_H__
 #define __NOUVEAU_CRTC_H__
-
 #include <drm/drm_crtc.h>
 
-#include <nvif/notify.h>
+#include <nvif/head.h>
+#include <nvif/event.h>
 
 struct nouveau_crtc {
 	struct drm_crtc base;
 
+	struct nvif_head head;
 	int index;
-	struct nvif_notify vblank;
+	struct nvif_event vblank;
 
 	uint32_t dpms_saved_fp_control;
 	uint32_t fp_users;
diff --git a/drivers/gpu/drm/nouveau/nouveau_display.c b/drivers/gpu/drm/nouveau/nouveau_display.c
index 2e97186090c8..ec3ffff487fc 100644
--- a/drivers/gpu/drm/nouveau/nouveau_display.c
+++ b/drivers/gpu/drm/nouveau/nouveau_display.c
@@ -35,15 +35,14 @@
 #include <drm/drm_probe_helper.h>
 #include <drm/drm_vblank.h>
 
-#include "nouveau_fbcon.h"
 #include "nouveau_crtc.h"
 #include "nouveau_gem.h"
 #include "nouveau_connector.h"
 #include "nv50_display.h"
 
 #include <nvif/class.h>
-#include <nvif/cl0046.h>
-#include <nvif/event.h>
+#include <nvif/if0011.h>
+#include <nvif/if0013.h>
 #include <dispnv50/crc.h>
 
 int
@@ -52,7 +51,7 @@ nouveau_display_vblank_enable(struct drm_crtc *crtc)
 	struct nouveau_crtc *nv_crtc;
 
 	nv_crtc = nouveau_crtc(crtc);
-	nvif_notify_get(&nv_crtc->vblank);
+	nvif_event_allow(&nv_crtc->vblank);
 
 	return 0;
 }
@@ -63,7 +62,7 @@ nouveau_display_vblank_disable(struct drm_crtc *crtc)
 	struct nouveau_crtc *nv_crtc;
 
 	nv_crtc = nouveau_crtc(crtc);
-	nvif_notify_put(&nv_crtc->vblank);
+	nvif_event_block(&nv_crtc->vblank);
 }
 
 static inline int
@@ -84,24 +83,20 @@ static bool
 nouveau_display_scanoutpos_head(struct drm_crtc *crtc, int *vpos, int *hpos,
 				ktime_t *stime, ktime_t *etime)
 {
-	struct {
-		struct nv04_disp_mthd_v0 base;
-		struct nv04_disp_scanoutpos_v0 scan;
-	} args = {
-		.base.method = NV04_DISP_SCANOUTPOS,
-		.base.head = nouveau_crtc(crtc)->index,
-	};
-	struct nouveau_display *disp = nouveau_display(crtc->dev);
 	struct drm_vblank_crtc *vblank = &crtc->dev->vblank[drm_crtc_index(crtc)];
+	struct nvif_head *head = &nouveau_crtc(crtc)->head;
+	struct nvif_head_scanoutpos_v0 args;
 	int retry = 20;
 	bool ret = false;
 
+	args.version = 0;
+
 	do {
-		ret = nvif_mthd(&disp->disp.object, 0, &args, sizeof(args));
+		ret = nvif_mthd(&head->object, NVIF_HEAD_V0_SCANOUTPOS, &args, sizeof(args));
 		if (ret != 0)
 			return false;
 
-		if (args.scan.vline) {
+		if (args.vline) {
 			ret = true;
 			break;
 		}
@@ -109,11 +104,10 @@ nouveau_display_scanoutpos_head(struct drm_crtc *crtc, int *vpos, int *hpos,
 		if (retry) ndelay(vblank->linedur_ns);
 	} while (retry--);
 
-	*hpos = args.scan.hline;
-	*vpos = calc(args.scan.vblanks, args.scan.vblanke,
-		     args.scan.vtotal, args.scan.vline);
-	if (stime) *stime = ns_to_ktime(args.scan.time[0]);
-	if (etime) *etime = ns_to_ktime(args.scan.time[1]);
+	*hpos = args.hline;
+	*vpos = calc(args.vblanks, args.vblanke, args.vtotal, args.vline);
+	if (stime) *stime = ns_to_ktime(args.time[0]);
+	if (etime) *etime = ns_to_ktime(args.time[1]);
 
 	return ret;
 }
@@ -397,7 +391,7 @@ nouveau_user_framebuffer_create(struct drm_device *dev,
 
 static const struct drm_mode_config_funcs nouveau_mode_config_funcs = {
 	.fb_create = nouveau_user_framebuffer_create,
-	.output_poll_changed = nouveau_fbcon_output_poll_changed,
+	.output_poll_changed = drm_fb_helper_output_poll_changed,
 };
 
 
@@ -456,9 +450,9 @@ nouveau_display_hpd_resume(struct drm_device *dev)
 {
 	struct nouveau_drm *drm = nouveau_drm(dev);
 
-	mutex_lock(&drm->hpd_lock);
+	spin_lock_irq(&drm->hpd_lock);
 	drm->hpd_pending = ~0;
-	mutex_unlock(&drm->hpd_lock);
+	spin_unlock_irq(&drm->hpd_lock);
 
 	schedule_work(&drm->hpd_work);
 }
@@ -475,10 +469,10 @@ nouveau_display_hpd_work(struct work_struct *work)
 
 	pm_runtime_get_sync(dev->dev);
 
-	mutex_lock(&drm->hpd_lock);
+	spin_lock_irq(&drm->hpd_lock);
 	pending = drm->hpd_pending;
 	drm->hpd_pending = 0;
-	mutex_unlock(&drm->hpd_lock);
+	spin_unlock_irq(&drm->hpd_lock);
 
 	/* Nothing to do, exit early without updating the last busy counter */
 	if (!pending)
@@ -488,14 +482,30 @@ nouveau_display_hpd_work(struct work_struct *work)
 	drm_connector_list_iter_begin(dev, &conn_iter);
 
 	nouveau_for_each_non_mst_connector_iter(connector, &conn_iter) {
+		struct nouveau_connector *nv_connector = nouveau_connector(connector);
 		enum drm_connector_status old_status = connector->status;
-		u64 old_epoch_counter = connector->epoch_counter;
+		u64 bits, old_epoch_counter = connector->epoch_counter;
 
 		if (!(pending & drm_connector_mask(connector)))
 			continue;
 
-		connector->status = drm_helper_probe_detect(connector, NULL,
-							    false);
+		spin_lock_irq(&drm->hpd_lock);
+		bits = nv_connector->hpd_pending;
+		nv_connector->hpd_pending = 0;
+		spin_unlock_irq(&drm->hpd_lock);
+
+		drm_dbg_kms(dev, "[CONNECTOR:%d:%s] plug:%d unplug:%d irq:%d\n",
+			    connector->base.id, connector->name,
+			    !!(bits & NVIF_CONN_EVENT_V0_PLUG),
+			    !!(bits & NVIF_CONN_EVENT_V0_UNPLUG),
+			    !!(bits & NVIF_CONN_EVENT_V0_IRQ));
+
+		if (bits & NVIF_CONN_EVENT_V0_IRQ) {
+			if (nouveau_dp_link_check(nv_connector))
+				continue;
+		}
+
+		connector->status = drm_helper_probe_detect(connector, NULL, false);
 		if (old_epoch_counter == connector->epoch_counter)
 			continue;
 
@@ -573,7 +583,8 @@ nouveau_display_init(struct drm_device *dev, bool resume, bool runtime)
 	drm_connector_list_iter_begin(dev, &conn_iter);
 	nouveau_for_each_non_mst_connector_iter(connector, &conn_iter) {
 		struct nouveau_connector *conn = nouveau_connector(connector);
-		nvif_notify_get(&conn->hpd);
+		nvif_event_allow(&conn->hpd);
+		nvif_event_allow(&conn->irq);
 	}
 	drm_connector_list_iter_end(&conn_iter);
 
@@ -608,7 +619,8 @@ nouveau_display_fini(struct drm_device *dev, bool suspend, bool runtime)
 	drm_connector_list_iter_begin(dev, &conn_iter);
 	nouveau_for_each_non_mst_connector_iter(connector, &conn_iter) {
 		struct nouveau_connector *conn = nouveau_connector(connector);
-		nvif_notify_put(&conn->hpd);
+		nvif_event_block(&conn->irq);
+		nvif_event_block(&conn->hpd);
 	}
 	drm_connector_list_iter_end(&conn_iter);
 
@@ -732,7 +744,7 @@ nouveau_display_create(struct drm_device *dev)
 	}
 
 	INIT_WORK(&drm->hpd_work, nouveau_display_hpd_work);
-	mutex_init(&drm->hpd_lock);
+	spin_lock_init(&drm->hpd_lock);
 #ifdef CONFIG_ACPI
 	drm->acpi_nb.notifier_call = nouveau_display_acpi_ntfy;
 	register_acpi_notifier(&drm->acpi_nb);
@@ -766,8 +778,7 @@ nouveau_display_destroy(struct drm_device *dev)
 
 	nvif_disp_dtor(&disp->disp);
 
-	nouveau_drm(dev)->display = NULL;
-	mutex_destroy(&drm->hpd_lock);
+	drm->display = NULL;
 	kfree(disp);
 }
 
@@ -776,6 +787,9 @@ nouveau_display_suspend(struct drm_device *dev, bool runtime)
 {
 	struct nouveau_display *disp = nouveau_display(dev);
 
+	/* Disable console. */
+	drm_fb_helper_set_suspend_unlocked(dev->fb_helper, true);
+
 	if (drm_drv_uses_atomic_modeset(dev)) {
 		if (!runtime) {
 			disp->suspend = drm_atomic_helper_suspend(dev);
@@ -803,8 +817,10 @@ nouveau_display_resume(struct drm_device *dev, bool runtime)
 			drm_atomic_helper_resume(dev, disp->suspend);
 			disp->suspend = NULL;
 		}
-		return;
 	}
+
+	/* Enable console. */
+	drm_fb_helper_set_suspend_unlocked(dev->fb_helper, false);
 }
 
 int
diff --git a/drivers/gpu/drm/nouveau/nouveau_dma.c b/drivers/gpu/drm/nouveau/nouveau_dma.c
index ddb75d80bc53..b90cac6d5772 100644
--- a/drivers/gpu/drm/nouveau/nouveau_dma.c
+++ b/drivers/gpu/drm/nouveau/nouveau_dma.c
@@ -42,9 +42,9 @@ READ_GET(struct nouveau_channel *chan, uint64_t *prev_get, int *timeout)
 {
 	uint64_t val;
 
-	val = nvif_rd32(&chan->user, chan->user_get);
+	val = nvif_rd32(chan->userd, chan->user_get);
         if (chan->user_get_hi)
-                val |= (uint64_t)nvif_rd32(&chan->user, chan->user_get_hi) << 32;
+		val |= (uint64_t)nvif_rd32(chan->userd, chan->user_get_hi) << 32;
 
 	/* reset counter as long as GET is still advancing, this is
 	 * to avoid misdetecting a GPU lockup if the GPU happens to
@@ -86,7 +86,7 @@ nv50_dma_push(struct nouveau_channel *chan, u64 offset, int length)
 	/* Flush writes. */
 	nouveau_bo_rd32(pb, 0);
 
-	nvif_wr32(&chan->user, 0x8c, chan->dma.ib_put);
+	nvif_wr32(chan->userd, 0x8c, chan->dma.ib_put);
 	if (user->func && user->func->doorbell)
 		user->func->doorbell(user, chan->token);
 	chan->dma.ib_free--;
@@ -98,7 +98,7 @@ nv50_dma_push_wait(struct nouveau_channel *chan, int count)
 	uint32_t cnt = 0, prev_get = 0;
 
 	while (chan->dma.ib_free < count) {
-		uint32_t get = nvif_rd32(&chan->user, 0x88);
+		uint32_t get = nvif_rd32(chan->userd, 0x88);
 		if (get != prev_get) {
 			prev_get = get;
 			cnt = 0;
diff --git a/drivers/gpu/drm/nouveau/nouveau_dp.c b/drivers/gpu/drm/nouveau/nouveau_dp.c
index 20db8ea1a0ba..e00876f92aee 100644
--- a/drivers/gpu/drm/nouveau/nouveau_dp.c
+++ b/drivers/gpu/drm/nouveau/nouveau_dp.c
@@ -29,8 +29,7 @@
 #include "nouveau_encoder.h"
 #include "nouveau_crtc.h"
 
-#include <nvif/class.h>
-#include <nvif/cl5070.h>
+#include <nvif/if0011.h>
 
 MODULE_PARM_DESC(mst, "Enable DisplayPort multi-stream (default: enabled)");
 static int nouveau_mst = 1;
@@ -140,12 +139,17 @@ nouveau_dp_detect(struct nouveau_connector *nv_connector,
 	 * TODO: look into checking this before probing I2C to detect DVI/HDMI
 	 */
 	hpd = nvif_conn_hpd_status(&nv_connector->conn);
-	if (hpd == NVIF_CONN_HPD_STATUS_NOT_PRESENT)
+	if (hpd == NVIF_CONN_HPD_STATUS_NOT_PRESENT) {
+		nvif_outp_dp_aux_pwr(&nv_encoder->outp, false);
 		goto out;
+	}
+	nvif_outp_dp_aux_pwr(&nv_encoder->outp, true);
 
 	status = nouveau_dp_probe_dpcd(nv_connector, nv_encoder);
-	if (status == connector_status_disconnected)
+	if (status == connector_status_disconnected) {
+		nvif_outp_dp_aux_pwr(&nv_encoder->outp, false);
 		goto out;
+	}
 
 	/* If we're in MST mode, we're done here */
 	if (mstm && mstm->can_mst && mstm->is_mst) {
@@ -193,6 +197,7 @@ nouveau_dp_detect(struct nouveau_connector *nv_connector,
 			ret = NOUVEAU_DP_MST;
 			goto out;
 		} else if (ret != 0) {
+			nvif_outp_dp_aux_pwr(&nv_encoder->outp, false);
 			goto out;
 		}
 	}
@@ -206,14 +211,28 @@ out:
 	return ret;
 }
 
-void nouveau_dp_irq(struct nouveau_drm *drm,
-		    struct nouveau_connector *nv_connector)
+bool
+nouveau_dp_link_check(struct nouveau_connector *nv_connector)
+{
+	struct nouveau_encoder *nv_encoder = find_encoder(&nv_connector->base, DCB_OUTPUT_DP);
+
+	if (!nv_encoder || nv_encoder->outp.or.id < 0)
+		return true;
+
+	return nvif_outp_dp_retrain(&nv_encoder->outp) == 0;
+}
+
+void
+nouveau_dp_irq(struct work_struct *work)
 {
+	struct nouveau_connector *nv_connector =
+		container_of(work, typeof(*nv_connector), irq_work);
 	struct drm_connector *connector = &nv_connector->base;
 	struct nouveau_encoder *outp = find_encoder(connector, DCB_OUTPUT_DP);
+	struct nouveau_drm *drm = nouveau_drm(outp->base.base.dev);
 	struct nv50_mstm *mstm;
+	u64 hpd = 0;
 	int ret;
-	bool send_hpd = false;
 
 	if (!outp)
 		return;
@@ -225,14 +244,14 @@ void nouveau_dp_irq(struct nouveau_drm *drm,
 
 	if (mstm && mstm->is_mst) {
 		if (!nv50_mstm_service(drm, nv_connector, mstm))
-			send_hpd = true;
+			hpd |= NVIF_CONN_EVENT_V0_UNPLUG;
 	} else {
 		drm_dp_cec_irq(&nv_connector->aux);
 
 		if (nouveau_dp_has_sink_count(connector, outp)) {
 			ret = drm_dp_read_sink_count(&nv_connector->aux);
 			if (ret != outp->dp.sink_count)
-				send_hpd = true;
+				hpd |= NVIF_CONN_EVENT_V0_PLUG;
 			if (ret >= 0)
 				outp->dp.sink_count = ret;
 		}
@@ -240,8 +259,7 @@ void nouveau_dp_irq(struct nouveau_drm *drm,
 
 	mutex_unlock(&outp->dp.hpd_irq_lock);
 
-	if (send_hpd)
-		nouveau_connector_hpd(connector);
+	nouveau_connector_hpd(nv_connector, NVIF_CONN_EVENT_V0_IRQ | hpd);
 }
 
 /* TODO:
diff --git a/drivers/gpu/drm/nouveau/nouveau_drm.c b/drivers/gpu/drm/nouveau/nouveau_drm.c
index fd99ec0f4257..80f154b6adab 100644
--- a/drivers/gpu/drm/nouveau/nouveau_drm.c
+++ b/drivers/gpu/drm/nouveau/nouveau_drm.c
@@ -33,6 +33,8 @@
 #include <drm/drm_aperture.h>
 #include <drm/drm_crtc_helper.h>
 #include <drm/drm_drv.h>
+#include <drm/drm_fb_helper.h>
+#include <drm/drm_fbdev_generic.h>
 #include <drm/drm_gem_ttm_helper.h>
 #include <drm/drm_ioctl.h>
 #include <drm/drm_vblank.h>
@@ -49,7 +51,6 @@
 
 #include <nvif/class.h>
 #include <nvif/cl0002.h>
-#include <nvif/cla06f.h>
 
 #include "nouveau_drv.h"
 #include "nouveau_dma.h"
@@ -62,7 +63,6 @@
 #include "nouveau_bios.h"
 #include "nouveau_ioctl.h"
 #include "nouveau_abi16.h"
-#include "nouveau_fbcon.h"
 #include "nouveau_fence.h"
 #include "nouveau_debugfs.h"
 #include "nouveau_usif.h"
@@ -316,28 +316,19 @@ static void
 nouveau_accel_ce_init(struct nouveau_drm *drm)
 {
 	struct nvif_device *device = &drm->client.device;
+	u64 runm;
 	int ret = 0;
 
 	/* Allocate channel that has access to a (preferably async) copy
 	 * engine, to use for TTM buffer moves.
 	 */
-	if (device->info.family >= NV_DEVICE_INFO_V0_KEPLER) {
-		ret = nouveau_channel_new(drm, device,
-					  nvif_fifo_runlist_ce(device), 0,
-					  true, &drm->cechan);
-	} else
-	if (device->info.chipset >= 0xa3 &&
-	    device->info.chipset != 0xaa &&
-	    device->info.chipset != 0xac) {
-		/* Prior to Kepler, there's only a single runlist, so all
-		 * engines can be accessed from any channel.
-		 *
-		 * We still want to use a separate channel though.
-		 */
-		ret = nouveau_channel_new(drm, device, NvDmaFB, NvDmaTT, false,
-					  &drm->cechan);
+	runm = nvif_fifo_runlist_ce(device);
+	if (!runm) {
+		NV_DEBUG(drm, "no ce runlist\n");
+		return;
 	}
 
+	ret = nouveau_channel_new(drm, device, false, runm, NvDmaFB, NvDmaTT, &drm->cechan);
 	if (ret)
 		NV_ERROR(drm, "failed to create ce channel, %d\n", ret);
 }
@@ -355,23 +346,17 @@ static void
 nouveau_accel_gr_init(struct nouveau_drm *drm)
 {
 	struct nvif_device *device = &drm->client.device;
-	u32 arg0, arg1;
+	u64 runm;
 	int ret;
 
-	if (device->info.family >= NV_DEVICE_INFO_V0_AMPERE)
-		return;
-
 	/* Allocate channel that has access to the graphics engine. */
-	if (device->info.family >= NV_DEVICE_INFO_V0_KEPLER) {
-		arg0 = nvif_fifo_runlist(device, NV_DEVICE_HOST_RUNLIST_ENGINES_GR);
-		arg1 = 1;
-	} else {
-		arg0 = NvDmaFB;
-		arg1 = NvDmaTT;
+	runm = nvif_fifo_runlist(device, NV_DEVICE_HOST_RUNLIST_ENGINES_GR);
+	if (!runm) {
+		NV_DEBUG(drm, "no gr runlist\n");
+		return;
 	}
 
-	ret = nouveau_channel_new(drm, device, arg0, arg1, false,
-				  &drm->channel);
+	ret = nouveau_channel_new(drm, device, false, runm, NvDmaFB, NvDmaTT, &drm->channel);
 	if (ret) {
 		NV_ERROR(drm, "failed to create kernel channel, %d\n", ret);
 		nouveau_accel_gr_fini(drm);
@@ -436,6 +421,7 @@ nouveau_accel_fini(struct nouveau_drm *drm)
 	nouveau_accel_gr_fini(drm);
 	if (drm->fence)
 		nouveau_fence(drm)->dtor(drm);
+	nouveau_channels_fini(drm);
 }
 
 static void
@@ -485,6 +471,7 @@ nouveau_accel_init(struct nouveau_drm *drm)
 		case PASCAL_CHANNEL_GPFIFO_A:
 		case VOLTA_CHANNEL_GPFIFO_A:
 		case TURING_CHANNEL_GPFIFO_A:
+		case AMPERE_CHANNEL_GPFIFO_A:
 		case AMPERE_CHANNEL_GPFIFO_B:
 			ret = nvc0_fence_create(drm);
 			break;
@@ -611,7 +598,6 @@ nouveau_drm_device_init(struct drm_device *dev)
 	nouveau_hwmon_init(dev);
 	nouveau_svm_init(drm);
 	nouveau_dmem_init(drm);
-	nouveau_fbcon_init(dev);
 	nouveau_led_init(dev);
 
 	if (nouveau_pmops_runtime()) {
@@ -655,7 +641,6 @@ nouveau_drm_device_fini(struct drm_device *dev)
 	}
 
 	nouveau_led_fini(dev);
-	nouveau_fbcon_fini(dev);
 	nouveau_dmem_fini(drm);
 	nouveau_svm_fini(drm);
 	nouveau_hwmon_fini(dev);
@@ -809,6 +794,11 @@ static int nouveau_drm_probe(struct pci_dev *pdev,
 	if (ret)
 		goto fail_drm_dev_init;
 
+	if (nouveau_drm(drm_dev)->client.device.info.ram_size <= 32 * 1024 * 1024)
+		drm_fbdev_generic_setup(drm_dev, 8);
+	else
+		drm_fbdev_generic_setup(drm_dev, 32);
+
 	quirk_broken_nv_runpm(pdev);
 	return 0;
 
@@ -865,8 +855,6 @@ nouveau_do_suspend(struct drm_device *dev, bool runtime)
 	nouveau_led_suspend(dev);
 
 	if (dev->mode_config.num_crtc) {
-		NV_DEBUG(drm, "suspending console...\n");
-		nouveau_fbcon_set_suspend(dev, 1);
 		NV_DEBUG(drm, "suspending display...\n");
 		ret = nouveau_display_suspend(dev, runtime);
 		if (ret)
@@ -940,8 +928,6 @@ nouveau_do_resume(struct drm_device *dev, bool runtime)
 	if (dev->mode_config.num_crtc) {
 		NV_DEBUG(drm, "resuming display...\n");
 		nouveau_display_resume(dev, runtime);
-		NV_DEBUG(drm, "resuming console...\n");
-		nouveau_fbcon_set_suspend(dev, 0);
 	}
 
 	nouveau_led_resume(dev);
@@ -1296,7 +1282,6 @@ static void nouveau_display_options(void)
 	DRM_DEBUG_DRIVER("... tv_disable   : %d\n", nouveau_tv_disable);
 	DRM_DEBUG_DRIVER("... ignorelid    : %d\n", nouveau_ignorelid);
 	DRM_DEBUG_DRIVER("... duallink     : %d\n", nouveau_duallink);
-	DRM_DEBUG_DRIVER("... nofbaccel    : %d\n", nouveau_nofbaccel);
 	DRM_DEBUG_DRIVER("... config       : %s\n", nouveau_config);
 	DRM_DEBUG_DRIVER("... debug        : %s\n", nouveau_debug);
 	DRM_DEBUG_DRIVER("... noaccel      : %d\n", nouveau_noaccel);
diff --git a/drivers/gpu/drm/nouveau/nouveau_drv.h b/drivers/gpu/drm/nouveau/nouveau_drv.h
index 84df5ddae4d0..d6dd07bfa64a 100644
--- a/drivers/gpu/drm/nouveau/nouveau_drv.h
+++ b/drivers/gpu/drm/nouveau/nouveau_drv.h
@@ -78,11 +78,6 @@ enum nouveau_drm_object_route {
 	NVDRM_OBJECT_ANY = NVIF_IOCTL_V0_OWNER_ANY,
 };
 
-enum nouveau_drm_notify_route {
-	NVDRM_NOTIFY_NVIF = 0,
-	NVDRM_NOTIFY_USIF
-};
-
 enum nouveau_drm_handle {
 	NVDRM_CHAN    = 0xcccc0000, /* |= client chid */
 	NVDRM_NVSW    = 0x55550000,
@@ -179,16 +174,19 @@ struct nouveau_drm {
 	void *fence;
 
 	/* Global channel management. */
+	int chan_total; /* Number of channels across all runlists. */
+	int chan_nr;	/* 0 if per-runlist CHIDs. */
+	int runl_nr;
 	struct {
-		int nr;
+		int chan_nr;
+		int chan_id_base;
 		u64 context_base;
-	} chan;
+	} *runl;
 
 	/* context for accelerated drm-internal operations */
 	struct nouveau_channel *cechan;
 	struct nouveau_channel *channel;
 	struct nvkm_gpuobj *notify;
-	struct nouveau_fbdev *fbcon;
 	struct nvif_object ntfy;
 
 	/* nv10-nv40 tiling regions */
@@ -201,10 +199,8 @@ struct nouveau_drm {
 	struct nvbios vbios;
 	struct nouveau_display *display;
 	struct work_struct hpd_work;
-	struct mutex hpd_lock;
+	spinlock_t hpd_lock;
 	u32 hpd_pending;
-	struct work_struct fbcon_work;
-	int fbcon_new_state;
 #ifdef CONFIG_ACPI
 	struct notifier_block acpi_nb;
 #endif
diff --git a/drivers/gpu/drm/nouveau/nouveau_encoder.h b/drivers/gpu/drm/nouveau/nouveau_encoder.h
index b72e5783a00f..70c1ad6c4d9d 100644
--- a/drivers/gpu/drm/nouveau/nouveau_encoder.h
+++ b/drivers/gpu/drm/nouveau/nouveau_encoder.h
@@ -48,7 +48,6 @@ struct nouveau_encoder {
 	struct dcb_output *dcb;
 	struct nvif_outp outp;
 	int or;
-	int link;
 
 	struct i2c_adapter *i2c;
 	struct nvkm_i2c_aux *aux;
@@ -142,8 +141,8 @@ enum nouveau_dp_status {
 };
 
 int nouveau_dp_detect(struct nouveau_connector *, struct nouveau_encoder *);
-void nouveau_dp_irq(struct nouveau_drm *drm,
-		    struct nouveau_connector *nv_connector);
+bool nouveau_dp_link_check(struct nouveau_connector *);
+void nouveau_dp_irq(struct work_struct *);
 enum drm_mode_status nv50_dp_mode_valid(struct drm_connector *,
 					struct nouveau_encoder *,
 					const struct drm_display_mode *,
diff --git a/drivers/gpu/drm/nouveau/nouveau_fbcon.c b/drivers/gpu/drm/nouveau/nouveau_fbcon.c
index 3c7e0c9d6baf..e87de7906f78 100644
--- a/drivers/gpu/drm/nouveau/nouveau_fbcon.c
+++ b/drivers/gpu/drm/nouveau/nouveau_fbcon.c
@@ -231,9 +231,9 @@ void
 nouveau_fbcon_accel_save_disable(struct drm_device *dev)
 {
 	struct nouveau_drm *drm = nouveau_drm(dev);
-	if (drm->fbcon && drm->fbcon->helper.fbdev) {
-		drm->fbcon->saved_flags = drm->fbcon->helper.fbdev->flags;
-		drm->fbcon->helper.fbdev->flags |= FBINFO_HWACCEL_DISABLED;
+	if (drm->fbcon && drm->fbcon->helper.info) {
+		drm->fbcon->saved_flags = drm->fbcon->helper.info->flags;
+		drm->fbcon->helper.info->flags |= FBINFO_HWACCEL_DISABLED;
 	}
 }
 
@@ -241,9 +241,8 @@ void
 nouveau_fbcon_accel_restore(struct drm_device *dev)
 {
 	struct nouveau_drm *drm = nouveau_drm(dev);
-	if (drm->fbcon && drm->fbcon->helper.fbdev) {
-		drm->fbcon->helper.fbdev->flags = drm->fbcon->saved_flags;
-	}
+	if (drm->fbcon && drm->fbcon->helper.info)
+		drm->fbcon->helper.info->flags = drm->fbcon->saved_flags;
 }
 
 static void
@@ -253,8 +252,8 @@ nouveau_fbcon_accel_fini(struct drm_device *dev)
 	struct nouveau_fbdev *fbcon = drm->fbcon;
 	if (fbcon && drm->channel) {
 		console_lock();
-		if (fbcon->helper.fbdev)
-			fbcon->helper.fbdev->flags |= FBINFO_HWACCEL_DISABLED;
+		if (fbcon->helper.info)
+			fbcon->helper.info->flags |= FBINFO_HWACCEL_DISABLED;
 		console_unlock();
 		nouveau_channel_idle(drm->channel);
 		nvif_object_dtor(&fbcon->twod);
@@ -272,7 +271,7 @@ nouveau_fbcon_accel_init(struct drm_device *dev)
 {
 	struct nouveau_drm *drm = nouveau_drm(dev);
 	struct nouveau_fbdev *fbcon = drm->fbcon;
-	struct fb_info *info = fbcon->helper.fbdev;
+	struct fb_info *info = fbcon->helper.info;
 	int ret;
 
 	if (drm->client.device.info.family < NV_DEVICE_INFO_V0_TESLA)
@@ -290,7 +289,7 @@ nouveau_fbcon_accel_init(struct drm_device *dev)
 static void
 nouveau_fbcon_zfill(struct drm_device *dev, struct nouveau_fbdev *fbcon)
 {
-	struct fb_info *info = fbcon->helper.fbdev;
+	struct fb_info *info = fbcon->helper.info;
 	struct fb_fillrect rect;
 
 	/* Clear the entire fbcon.  The drm will program every connector
@@ -363,7 +362,7 @@ nouveau_fbcon_create(struct drm_fb_helper *helper,
 		}
 	}
 
-	info = drm_fb_helper_alloc_fbi(helper);
+	info = drm_fb_helper_alloc_info(helper);
 	if (IS_ERR(info)) {
 		ret = PTR_ERR(info);
 		goto out_unlock;
@@ -420,7 +419,7 @@ nouveau_fbcon_destroy(struct drm_device *dev, struct nouveau_fbdev *fbcon)
 	struct drm_framebuffer *fb = fbcon->helper.fb;
 	struct nouveau_bo *nvbo;
 
-	drm_fb_helper_unregister_fbi(&fbcon->helper);
+	drm_fb_helper_unregister_info(&fbcon->helper);
 	drm_fb_helper_fini(&fbcon->helper);
 
 	if (fb && fb->obj[0]) {
@@ -586,8 +585,8 @@ nouveau_fbcon_init(struct drm_device *dev)
 	if (ret)
 		goto fini;
 
-	if (fbcon->helper.fbdev)
-		fbcon->helper.fbdev->pixmap.buf_align = 4;
+	if (fbcon->helper.info)
+		fbcon->helper.info->pixmap.buf_align = 4;
 	return 0;
 
 fini:
diff --git a/drivers/gpu/drm/nouveau/nouveau_fbcon.h b/drivers/gpu/drm/nouveau/nouveau_fbcon.h
deleted file mode 100644
index 1796d8824580..000000000000
--- a/drivers/gpu/drm/nouveau/nouveau_fbcon.h
+++ /dev/null
@@ -1,82 +0,0 @@
-/*
- * Copyright (C) 2008 Maarten Maathuis.
- * All Rights Reserved.
- *
- * Permission is hereby granted, free of charge, to any person obtaining
- * a copy of this software and associated documentation files (the
- * "Software"), to deal in the Software without restriction, including
- * without limitation the rights to use, copy, modify, merge, publish,
- * distribute, sublicense, and/or sell copies of the Software, and to
- * permit persons to whom the Software is furnished to do so, subject to
- * the following conditions:
- *
- * The above copyright notice and this permission notice (including the
- * next paragraph) shall be included in all copies or substantial
- * portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
- * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
- * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
- * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
- * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
- *
- */
-
-#ifndef __NOUVEAU_FBCON_H__
-#define __NOUVEAU_FBCON_H__
-
-#include <drm/drm_fb_helper.h>
-
-#include "nouveau_display.h"
-
-struct nouveau_vma;
-
-struct nouveau_fbdev {
-	struct drm_fb_helper helper; /* must be first */
-	unsigned int saved_flags;
-	struct nvif_object surf2d;
-	struct nvif_object clip;
-	struct nvif_object rop;
-	struct nvif_object patt;
-	struct nvif_object gdi;
-	struct nvif_object blit;
-	struct nvif_object twod;
-	struct nouveau_vma *vma;
-
-	struct mutex hotplug_lock;
-	bool hotplug_waiting;
-};
-
-void nouveau_fbcon_restore(void);
-
-int nv04_fbcon_copyarea(struct fb_info *info, const struct fb_copyarea *region);
-int nv04_fbcon_fillrect(struct fb_info *info, const struct fb_fillrect *rect);
-int nv04_fbcon_imageblit(struct fb_info *info, const struct fb_image *image);
-int nv04_fbcon_accel_init(struct fb_info *info);
-
-int nv50_fbcon_fillrect(struct fb_info *info, const struct fb_fillrect *rect);
-int nv50_fbcon_copyarea(struct fb_info *info, const struct fb_copyarea *region);
-int nv50_fbcon_imageblit(struct fb_info *info, const struct fb_image *image);
-int nv50_fbcon_accel_init(struct fb_info *info);
-
-int nvc0_fbcon_fillrect(struct fb_info *info, const struct fb_fillrect *rect);
-int nvc0_fbcon_copyarea(struct fb_info *info, const struct fb_copyarea *region);
-int nvc0_fbcon_imageblit(struct fb_info *info, const struct fb_image *image);
-int nvc0_fbcon_accel_init(struct fb_info *info);
-
-void nouveau_fbcon_gpu_lockup(struct fb_info *info);
-
-int nouveau_fbcon_init(struct drm_device *dev);
-void nouveau_fbcon_fini(struct drm_device *dev);
-void nouveau_fbcon_set_suspend(struct drm_device *dev, int state);
-void nouveau_fbcon_accel_save_disable(struct drm_device *dev);
-void nouveau_fbcon_accel_restore(struct drm_device *dev);
-
-void nouveau_fbcon_output_poll_changed(struct drm_device *dev);
-void nouveau_fbcon_hotplug_resume(struct nouveau_fbdev *fbcon);
-extern int nouveau_nofbaccel;
-
-#endif /* __NV50_FBCON_H__ */
-
diff --git a/drivers/gpu/drm/nouveau/nouveau_fence.c b/drivers/gpu/drm/nouveau/nouveau_fence.c
index abcac7db4347..ee5e9d40c166 100644
--- a/drivers/gpu/drm/nouveau/nouveau_fence.c
+++ b/drivers/gpu/drm/nouveau/nouveau_fence.c
@@ -29,9 +29,7 @@
 #include <linux/sched/signal.h>
 #include <trace/events/dma_fence.h>
 
-#include <nvif/cl826e.h>
-#include <nvif/notify.h>
-#include <nvif/event.h>
+#include <nvif/if0020.h>
 
 #include "nouveau_drv.h"
 #include "nouveau_dma.h"
@@ -79,10 +77,6 @@ nouveau_local_fence(struct dma_fence *fence, struct nouveau_drm *drm)
 	    fence->ops != &nouveau_fence_ops_uevent)
 		return NULL;
 
-	if (fence->context < drm->chan.context_base ||
-	    fence->context >= drm->chan.context_base + drm->chan.nr)
-		return NULL;
-
 	return from_fence(fence);
 }
 
@@ -90,8 +84,9 @@ void
 nouveau_fence_context_kill(struct nouveau_fence_chan *fctx, int error)
 {
 	struct nouveau_fence *fence;
+	unsigned long flags;
 
-	spin_lock_irq(&fctx->lock);
+	spin_lock_irqsave(&fctx->lock, flags);
 	while (!list_empty(&fctx->pending)) {
 		fence = list_entry(fctx->pending.next, typeof(*fence), head);
 
@@ -99,16 +94,16 @@ nouveau_fence_context_kill(struct nouveau_fence_chan *fctx, int error)
 			dma_fence_set_error(&fence->base, error);
 
 		if (nouveau_fence_signal(fence))
-			nvif_notify_put(&fctx->notify);
+			nvif_event_block(&fctx->event);
 	}
-	spin_unlock_irq(&fctx->lock);
+	spin_unlock_irqrestore(&fctx->lock, flags);
 }
 
 void
 nouveau_fence_context_del(struct nouveau_fence_chan *fctx)
 {
 	nouveau_fence_context_kill(fctx, 0);
-	nvif_notify_dtor(&fctx->notify);
+	nvif_event_dtor(&fctx->event);
 	fctx->dead = 1;
 
 	/*
@@ -150,12 +145,11 @@ nouveau_fence_update(struct nouveau_channel *chan, struct nouveau_fence_chan *fc
 }
 
 static int
-nouveau_fence_wait_uevent_handler(struct nvif_notify *notify)
+nouveau_fence_wait_uevent_handler(struct nvif_event *event, void *repv, u32 repc)
 {
-	struct nouveau_fence_chan *fctx =
-		container_of(notify, typeof(*fctx), notify);
+	struct nouveau_fence_chan *fctx = container_of(event, typeof(*fctx), event);
 	unsigned long flags;
-	int ret = NVIF_NOTIFY_KEEP;
+	int ret = NVIF_EVENT_KEEP;
 
 	spin_lock_irqsave(&fctx->lock, flags);
 	if (!list_empty(&fctx->pending)) {
@@ -165,7 +159,7 @@ nouveau_fence_wait_uevent_handler(struct nvif_notify *notify)
 		fence = list_entry(fctx->pending.next, typeof(*fence), head);
 		chan = rcu_dereference_protected(fence->channel, lockdep_is_held(&fctx->lock));
 		if (nouveau_fence_update(chan, fctx))
-			ret = NVIF_NOTIFY_DROP;
+			ret = NVIF_EVENT_DROP;
 	}
 	spin_unlock_irqrestore(&fctx->lock, flags);
 
@@ -177,12 +171,16 @@ nouveau_fence_context_new(struct nouveau_channel *chan, struct nouveau_fence_cha
 {
 	struct nouveau_fence_priv *priv = (void*)chan->drm->fence;
 	struct nouveau_cli *cli = (void *)chan->user.client;
+	struct {
+		struct nvif_event_v0 base;
+		struct nvif_chan_event_v0 host;
+	} args;
 	int ret;
 
 	INIT_LIST_HEAD(&fctx->flip);
 	INIT_LIST_HEAD(&fctx->pending);
 	spin_lock_init(&fctx->lock);
-	fctx->context = chan->drm->chan.context_base + chan->chid;
+	fctx->context = chan->drm->runl[chan->runlist].context_base + chan->chid;
 
 	if (chan == chan->drm->cechan)
 		strcpy(fctx->name, "copy engine channel");
@@ -195,13 +193,12 @@ nouveau_fence_context_new(struct nouveau_channel *chan, struct nouveau_fence_cha
 	if (!priv->uevent)
 		return;
 
-	ret = nvif_notify_ctor(&chan->user, "fenceNonStallIntr",
-			       nouveau_fence_wait_uevent_handler,
-			       false, NV826E_V0_NTFY_NON_STALL_INTERRUPT,
-			       &(struct nvif_notify_uevent_req) { },
-			       sizeof(struct nvif_notify_uevent_req),
-			       sizeof(struct nvif_notify_uevent_rep),
-			       &fctx->notify);
+	args.host.version = 0;
+	args.host.type = NVIF_CHAN_EVENT_V0_NON_STALL_INTR;
+
+	ret = nvif_event_ctor(&chan->user, "fenceNonStallIntr", (chan->runlist << 16) | chan->chid,
+			      nouveau_fence_wait_uevent_handler, false,
+			      &args.base, sizeof(args), &fctx->event);
 
 	WARN_ON(ret);
 }
@@ -230,7 +227,7 @@ nouveau_fence_emit(struct nouveau_fence *fence, struct nouveau_channel *chan)
 		spin_lock_irq(&fctx->lock);
 
 		if (nouveau_fence_update(chan, fctx))
-			nvif_notify_put(&fctx->notify);
+			nvif_event_block(&fctx->event);
 
 		list_add_tail(&fence->head, &fctx->pending);
 		spin_unlock_irq(&fctx->lock);
@@ -254,7 +251,7 @@ nouveau_fence_done(struct nouveau_fence *fence)
 		spin_lock_irqsave(&fctx->lock, flags);
 		chan = rcu_dereference_protected(fence->channel, lockdep_is_held(&fctx->lock));
 		if (chan && nouveau_fence_update(chan, fctx))
-			nvif_notify_put(&fctx->notify);
+			nvif_event_block(&fctx->event);
 		spin_unlock_irqrestore(&fctx->lock, flags);
 	}
 	return dma_fence_is_signaled(&fence->base);
@@ -505,13 +502,13 @@ static bool nouveau_fence_enable_signaling(struct dma_fence *f)
 	bool ret;
 
 	if (!fctx->notify_ref++)
-		nvif_notify_get(&fctx->notify);
+		nvif_event_allow(&fctx->event);
 
 	ret = nouveau_fence_no_signaling(f);
 	if (ret)
 		set_bit(DMA_FENCE_FLAG_USER_BITS, &fence->base.flags);
 	else if (!--fctx->notify_ref)
-		nvif_notify_put(&fctx->notify);
+		nvif_event_block(&fctx->event);
 
 	return ret;
 }
diff --git a/drivers/gpu/drm/nouveau/nouveau_fence.h b/drivers/gpu/drm/nouveau/nouveau_fence.h
index 4887caa69c65..0ca2bc85adf6 100644
--- a/drivers/gpu/drm/nouveau/nouveau_fence.h
+++ b/drivers/gpu/drm/nouveau/nouveau_fence.h
@@ -3,7 +3,7 @@
 #define __NOUVEAU_FENCE_H__
 
 #include <linux/dma-fence.h>
-#include <nvif/notify.h>
+#include <nvif/event.h>
 
 struct nouveau_drm;
 struct nouveau_bo;
@@ -44,7 +44,7 @@ struct nouveau_fence_chan {
 	u32 context;
 	char name[32];
 
-	struct nvif_notify notify;
+	struct nvif_event event;
 	int notify_ref, dead;
 };
 
diff --git a/drivers/gpu/drm/nouveau/nouveau_nvif.c b/drivers/gpu/drm/nouveau/nouveau_nvif.c
index df0fe58ca3ab..1d49ebdfd5dc 100644
--- a/drivers/gpu/drm/nouveau/nouveau_nvif.c
+++ b/drivers/gpu/drm/nouveau/nouveau_nvif.c
@@ -27,12 +27,10 @@
  ******************************************************************************/
 
 #include <core/client.h>
-#include <core/notify.h>
 #include <core/ioctl.h>
 
 #include <nvif/client.h>
 #include <nvif/driver.h>
-#include <nvif/notify.h>
 #include <nvif/event.h>
 #include <nvif/ioctl.h>
 
@@ -72,10 +70,23 @@ nvkm_client_suspend(void *priv)
 }
 
 static int
+nvkm_client_event(u64 token, void *repv, u32 repc)
+{
+	struct nvif_object *object = (void *)(unsigned long)token;
+	struct nvif_event *event = container_of(object, typeof(*event), object);
+
+	if (event->func(event, repv, repc) == NVIF_EVENT_KEEP)
+		return NVKM_EVENT_KEEP;
+
+	return NVKM_EVENT_DROP;
+}
+
+static int
 nvkm_client_driver_init(const char *name, u64 device, const char *cfg,
 			const char *dbg, void **ppriv)
 {
-	return nvkm_client_new(name, device, cfg, dbg, nvif_notify, (struct nvkm_client **)ppriv);
+	return nvkm_client_new(name, device, cfg, dbg, nvkm_client_event,
+			       (struct nvkm_client **)ppriv);
 }
 
 const struct nvif_driver
diff --git a/drivers/gpu/drm/nouveau/nouveau_svm.c b/drivers/gpu/drm/nouveau/nouveau_svm.c
index 31a5b81ee9fc..a74ba8d84ba7 100644
--- a/drivers/gpu/drm/nouveau/nouveau_svm.c
+++ b/drivers/gpu/drm/nouveau/nouveau_svm.c
@@ -24,7 +24,7 @@
 #include "nouveau_chan.h"
 #include "nouveau_dmem.h"
 
-#include <nvif/notify.h>
+#include <nvif/event.h>
 #include <nvif/object.h>
 #include <nvif/vmm.h>
 
@@ -51,7 +51,8 @@ struct nouveau_svm {
 		u32 putaddr;
 		u32 get;
 		u32 put;
-		struct nvif_notify notify;
+		struct nvif_event notify;
+		struct work_struct work;
 
 		struct nouveau_svm_fault {
 			u64 inst;
@@ -711,13 +712,11 @@ out:
 	return ret;
 }
 
-static int
-nouveau_svm_fault(struct nvif_notify *notify)
+static void
+nouveau_svm_fault(struct work_struct *work)
 {
-	struct nouveau_svm_fault_buffer *buffer =
-		container_of(notify, typeof(*buffer), notify);
-	struct nouveau_svm *svm =
-		container_of(buffer, typeof(*svm), buffer[buffer->id]);
+	struct nouveau_svm_fault_buffer *buffer = container_of(work, typeof(*buffer), work);
+	struct nouveau_svm *svm = container_of(buffer, typeof(*svm), buffer[buffer->id]);
 	struct nvif_object *device = &svm->drm->client.device.object;
 	struct nouveau_svmm *svmm;
 	struct {
@@ -737,7 +736,7 @@ nouveau_svm_fault(struct nvif_notify *notify)
 		buffer->put = nvif_rd32(device, buffer->putaddr);
 		buffer->get = nvif_rd32(device, buffer->getaddr);
 		if (buffer->get == buffer->put)
-			return NVIF_NOTIFY_KEEP;
+			return;
 	}
 	buffer->fault_nr = 0;
 
@@ -881,7 +880,15 @@ nouveau_svm_fault(struct nvif_notify *notify)
 	/* Issue fault replay to the GPU. */
 	if (replay)
 		nouveau_svm_fault_replay(svm);
-	return NVIF_NOTIFY_KEEP;
+}
+
+static int
+nouveau_svm_event(struct nvif_event *event, void *argv, u32 argc)
+{
+	struct nouveau_svm_fault_buffer *buffer = container_of(event, typeof(*buffer), notify);
+
+	schedule_work(&buffer->work);
+	return NVIF_EVENT_KEEP;
 }
 
 static struct nouveau_pfnmap_args *
@@ -936,7 +943,9 @@ static void
 nouveau_svm_fault_buffer_fini(struct nouveau_svm *svm, int id)
 {
 	struct nouveau_svm_fault_buffer *buffer = &svm->buffer[id];
-	nvif_notify_put(&buffer->notify);
+
+	nvif_event_block(&buffer->notify);
+	flush_work(&buffer->work);
 }
 
 static int
@@ -944,10 +953,12 @@ nouveau_svm_fault_buffer_init(struct nouveau_svm *svm, int id)
 {
 	struct nouveau_svm_fault_buffer *buffer = &svm->buffer[id];
 	struct nvif_object *device = &svm->drm->client.device.object;
+
 	buffer->get = nvif_rd32(device, buffer->getaddr);
 	buffer->put = nvif_rd32(device, buffer->putaddr);
 	SVM_DBG(svm, "get %08x put %08x (init)", buffer->get, buffer->put);
-	return nvif_notify_get(&buffer->notify);
+
+	return nvif_event_allow(&buffer->notify);
 }
 
 static void
@@ -956,15 +967,18 @@ nouveau_svm_fault_buffer_dtor(struct nouveau_svm *svm, int id)
 	struct nouveau_svm_fault_buffer *buffer = &svm->buffer[id];
 	int i;
 
+	if (!nvif_object_constructed(&buffer->object))
+		return;
+
+	nouveau_svm_fault_buffer_fini(svm, id);
+
 	if (buffer->fault) {
 		for (i = 0; buffer->fault[i] && i < buffer->entries; i++)
 			kfree(buffer->fault[i]);
 		kvfree(buffer->fault);
 	}
 
-	nouveau_svm_fault_buffer_fini(svm, id);
-
-	nvif_notify_dtor(&buffer->notify);
+	nvif_event_dtor(&buffer->notify);
 	nvif_object_dtor(&buffer->object);
 }
 
@@ -990,10 +1004,10 @@ nouveau_svm_fault_buffer_ctor(struct nouveau_svm *svm, s32 oclass, int id)
 	buffer->entries = args.entries;
 	buffer->getaddr = args.get;
 	buffer->putaddr = args.put;
+	INIT_WORK(&buffer->work, nouveau_svm_fault);
 
-	ret = nvif_notify_ctor(&buffer->object, "svmFault", nouveau_svm_fault,
-			       true, NVB069_V0_NTFY_FAULT, NULL, 0, 0,
-			       &buffer->notify);
+	ret = nvif_event_ctor(&buffer->object, "svmFault", id, nouveau_svm_event, true, NULL, 0,
+			      &buffer->notify);
 	if (ret)
 		return ret;
 
diff --git a/drivers/gpu/drm/nouveau/nouveau_usif.c b/drivers/gpu/drm/nouveau/nouveau_usif.c
index 36df6840c099..002d1479ba89 100644
--- a/drivers/gpu/drm/nouveau/nouveau_usif.c
+++ b/drivers/gpu/drm/nouveau/nouveau_usif.c
@@ -151,12 +151,6 @@ usif_ioctl(struct drm_file *filp, void __user *user, u32 argc)
 	case NVIF_IOCTL_V0_NEW:
 		ret = usif_object_new(filp, data, size, argv, argc, abi16);
 		break;
-	case NVIF_IOCTL_V0_NTFY_NEW:
-	case NVIF_IOCTL_V0_NTFY_DEL:
-	case NVIF_IOCTL_V0_NTFY_GET:
-	case NVIF_IOCTL_V0_NTFY_PUT:
-		ret = -ENOSYS;
-		break;
 	default:
 		ret = nvif_client_ioctl(client, argv, argc);
 		break;
diff --git a/drivers/gpu/drm/nouveau/nouveau_vga.c b/drivers/gpu/drm/nouveau/nouveau_vga.c
index 60cd8c0463df..789393b94291 100644
--- a/drivers/gpu/drm/nouveau/nouveau_vga.c
+++ b/drivers/gpu/drm/nouveau/nouveau_vga.c
@@ -7,7 +7,6 @@
 
 #include "nouveau_drv.h"
 #include "nouveau_acpi.h"
-#include "nouveau_fbcon.h"
 #include "nouveau_vga.h"
 
 static unsigned int
diff --git a/drivers/gpu/drm/nouveau/nv04_fbcon.c b/drivers/gpu/drm/nouveau/nv04_fbcon.c
deleted file mode 100644
index c30b8dacd86b..000000000000
--- a/drivers/gpu/drm/nouveau/nv04_fbcon.c
+++ /dev/null
@@ -1,259 +0,0 @@
-/*
- * Copyright 2009 Ben Skeggs
- * Copyright 2008 Stuart Bennett
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * the rights to use, copy, modify, merge, publish, distribute, sublicense,
- * and/or sell copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice (including the next
- * paragraph) shall be included in all copies or substantial portions of the
- * Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
- * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
- * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
- * DEALINGS IN THE SOFTWARE.
- */
-#define NVIF_DEBUG_PRINT_DISABLE
-#include "nouveau_drv.h"
-#include "nouveau_dma.h"
-#include "nouveau_fbcon.h"
-
-#include <nvif/push006c.h>
-
-int
-nv04_fbcon_copyarea(struct fb_info *info, const struct fb_copyarea *region)
-{
-	struct nouveau_fbdev *nfbdev = info->par;
-	struct nouveau_drm *drm = nouveau_drm(nfbdev->helper.dev);
-	struct nouveau_channel *chan = drm->channel;
-	struct nvif_push *push = chan->chan.push;
-	int ret;
-
-	ret = PUSH_WAIT(push, 4);
-	if (ret)
-		return ret;
-
-	PUSH_NVSQ(push, NV05F, 0x0300, (region->sy << 16) | region->sx,
-			       0x0304, (region->dy << 16) | region->dx,
-			       0x0308, (region->height << 16) | region->width);
-	PUSH_KICK(push);
-	return 0;
-}
-
-int
-nv04_fbcon_fillrect(struct fb_info *info, const struct fb_fillrect *rect)
-{
-	struct nouveau_fbdev *nfbdev = info->par;
-	struct nouveau_drm *drm = nouveau_drm(nfbdev->helper.dev);
-	struct nouveau_channel *chan = drm->channel;
-	struct nvif_push *push = chan->chan.push;
-	int ret;
-
-	ret = PUSH_WAIT(push, 7);
-	if (ret)
-		return ret;
-
-	PUSH_NVSQ(push, NV04A, 0x02fc, (rect->rop != ROP_COPY) ? 1 : 3);
-	if (info->fix.visual == FB_VISUAL_TRUECOLOR ||
-	    info->fix.visual == FB_VISUAL_DIRECTCOLOR)
-		PUSH_NVSQ(push, NV04A, 0x03fc, ((uint32_t *)info->pseudo_palette)[rect->color]);
-	else
-		PUSH_NVSQ(push, NV04A, 0x03fc, rect->color);
-	PUSH_NVSQ(push, NV04A, 0x0400, (rect->dx << 16) | rect->dy,
-			       0x0404, (rect->width << 16) | rect->height);
-	PUSH_KICK(push);
-	return 0;
-}
-
-int
-nv04_fbcon_imageblit(struct fb_info *info, const struct fb_image *image)
-{
-	struct nouveau_fbdev *nfbdev = info->par;
-	struct nouveau_drm *drm = nouveau_drm(nfbdev->helper.dev);
-	struct nouveau_channel *chan = drm->channel;
-	struct nvif_push *push = chan->chan.push;
-	uint32_t fg;
-	uint32_t bg;
-	uint32_t dsize;
-	uint32_t *data = (uint32_t *)image->data;
-	int ret;
-
-	if (image->depth != 1)
-		return -ENODEV;
-
-	ret = PUSH_WAIT(push, 8);
-	if (ret)
-		return ret;
-
-	if (info->fix.visual == FB_VISUAL_TRUECOLOR ||
-	    info->fix.visual == FB_VISUAL_DIRECTCOLOR) {
-		fg = ((uint32_t *) info->pseudo_palette)[image->fg_color];
-		bg = ((uint32_t *) info->pseudo_palette)[image->bg_color];
-	} else {
-		fg = image->fg_color;
-		bg = image->bg_color;
-	}
-
-	PUSH_NVSQ(push, NV04A, 0x0be4, (image->dy << 16) | (image->dx & 0xffff),
-			       0x0be8, ((image->dy + image->height) << 16) |
-				       ((image->dx + image->width) & 0xffff),
-			       0x0bec, bg,
-			       0x0bf0, fg,
-			       0x0bf4, (image->height << 16) | ALIGN(image->width, 8),
-			       0x0bf8, (image->height << 16) | image->width,
-			       0x0bfc, (image->dy << 16) | (image->dx & 0xffff));
-
-	dsize = ALIGN(ALIGN(image->width, 8) * image->height, 32) >> 5;
-	while (dsize) {
-		int iter_len = dsize > 128 ? 128 : dsize;
-
-		ret = PUSH_WAIT(push, iter_len + 1);
-		if (ret)
-			return ret;
-
-		PUSH_NVSQ(push, NV04A, 0x0c00, data, iter_len);
-		data += iter_len;
-		dsize -= iter_len;
-	}
-
-	PUSH_KICK(push);
-	return 0;
-}
-
-int
-nv04_fbcon_accel_init(struct fb_info *info)
-{
-	struct nouveau_fbdev *nfbdev = info->par;
-	struct drm_device *dev = nfbdev->helper.dev;
-	struct nouveau_drm *drm = nouveau_drm(dev);
-	struct nouveau_channel *chan = drm->channel;
-	struct nvif_device *device = &drm->client.device;
-	struct nvif_push *push = chan->chan.push;
-	struct nvkm_device *nvkm_device = nvxx_device(&drm->client.device);
-	resource_size_t fb_base = nvkm_device->func->resource_addr(nvkm_device, 1);
-	int surface_fmt, pattern_fmt, rect_fmt;
-	int ret;
-
-	switch (info->var.bits_per_pixel) {
-	case 8:
-		surface_fmt = 1;
-		pattern_fmt = 3;
-		rect_fmt = 3;
-		break;
-	case 16:
-		surface_fmt = 4;
-		pattern_fmt = 1;
-		rect_fmt = 1;
-		break;
-	case 32:
-		switch (info->var.transp.length) {
-		case 0: /* depth 24 */
-		case 8: /* depth 32 */
-			break;
-		default:
-			return -EINVAL;
-		}
-
-		surface_fmt = 6;
-		pattern_fmt = 3;
-		rect_fmt = 3;
-		break;
-	default:
-		return -EINVAL;
-	}
-
-	ret = nvif_object_ctor(&chan->user, "fbconCtxSurf2d", 0x0062,
-			       device->info.family >= NV_DEVICE_INFO_V0_CELSIUS ?
-			       0x0062 : 0x0042, NULL, 0, &nfbdev->surf2d);
-	if (ret)
-		return ret;
-
-	ret = nvif_object_ctor(&chan->user, "fbconCtxClip", 0x0019, 0x0019,
-			       NULL, 0, &nfbdev->clip);
-	if (ret)
-		return ret;
-
-	ret = nvif_object_ctor(&chan->user, "fbconCtxRop", 0x0043, 0x0043,
-			       NULL, 0, &nfbdev->rop);
-	if (ret)
-		return ret;
-
-	ret = nvif_object_ctor(&chan->user, "fbconCtxPatt", 0x0044, 0x0044,
-			       NULL, 0, &nfbdev->patt);
-	if (ret)
-		return ret;
-
-	ret = nvif_object_ctor(&chan->user, "fbconGdiRectText", 0x004a, 0x004a,
-			       NULL, 0, &nfbdev->gdi);
-	if (ret)
-		return ret;
-
-	ret = nvif_object_ctor(&chan->user, "fbconImageBlit", 0x005f,
-			       device->info.chipset >= 0x11 ? 0x009f : 0x005f,
-			       NULL, 0, &nfbdev->blit);
-	if (ret)
-		return ret;
-
-	if (PUSH_WAIT(push, 49 + (device->info.chipset >= 0x11 ? 4 : 0))) {
-		nouveau_fbcon_gpu_lockup(info);
-		return 0;
-	}
-
-	PUSH_NVSQ(push, NV042, 0x0000, nfbdev->surf2d.handle);
-	PUSH_NVSQ(push, NV042, 0x0184, chan->vram.handle,
-			       0x0188, chan->vram.handle);
-	PUSH_NVSQ(push, NV042, 0x0300, surface_fmt,
-			       0x0304, info->fix.line_length | (info->fix.line_length << 16),
-			       0x0308, info->fix.smem_start - fb_base,
-			       0x030c, info->fix.smem_start - fb_base);
-
-	PUSH_NVSQ(push, NV043, 0x0000, nfbdev->rop.handle);
-	PUSH_NVSQ(push, NV043, 0x0300, 0x55);
-
-	PUSH_NVSQ(push, NV044, 0x0000, nfbdev->patt.handle);
-	PUSH_NVSQ(push, NV044, 0x0300, pattern_fmt,
-#ifdef __BIG_ENDIAN
-			       0x0304, 2,
-#else
-			       0x0304, 1,
-#endif
-			       0x0308, 0,
-			       0x030c, 1,
-			       0x0310, ~0,
-			       0x0314, ~0,
-			       0x0318, ~0,
-			       0x031c, ~0);
-
-	PUSH_NVSQ(push, NV019, 0x0000, nfbdev->clip.handle);
-	PUSH_NVSQ(push, NV019, 0x0300, 0,
-			       0x0304, (info->var.yres_virtual << 16) | info->var.xres_virtual);
-
-	PUSH_NVSQ(push, NV05F, 0x0000, nfbdev->blit.handle);
-	PUSH_NVSQ(push, NV05F, 0x019c, nfbdev->surf2d.handle);
-	PUSH_NVSQ(push, NV05F, 0x02fc, 3);
-	if (nfbdev->blit.oclass == 0x009f) {
-		PUSH_NVSQ(push, NV09F, 0x0120, 0,
-				       0x0124, 1,
-				       0x0128, 2);
-	}
-
-	PUSH_NVSQ(push, NV04A, 0x0000, nfbdev->gdi.handle);
-	PUSH_NVSQ(push, NV04A, 0x0198, nfbdev->surf2d.handle);
-	PUSH_NVSQ(push, NV04A, 0x0188, nfbdev->patt.handle,
-			       0x018c, nfbdev->rop.handle);
-	PUSH_NVSQ(push, NV04A, 0x0304, 1);
-	PUSH_NVSQ(push, NV04A, 0x0300, rect_fmt);
-	PUSH_NVSQ(push, NV04A, 0x02fc, 3);
-
-	PUSH_KICK(push);
-	return 0;
-}
-
diff --git a/drivers/gpu/drm/nouveau/nv50_fbcon.c b/drivers/gpu/drm/nouveau/nv50_fbcon.c
deleted file mode 100644
index 71f92e4750f9..000000000000
--- a/drivers/gpu/drm/nouveau/nv50_fbcon.c
+++ /dev/null
@@ -1,299 +0,0 @@
-/*
- * Copyright 2010 Red Hat Inc.
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * the rights to use, copy, modify, merge, publish, distribute, sublicense,
- * and/or sell copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in
- * all copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
- * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
- * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
- * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
- * OTHER DEALINGS IN THE SOFTWARE.
- *
- * Authors: Ben Skeggs
- */
-#define NVIF_DEBUG_PRINT_DISABLE
-#include "nouveau_drv.h"
-#include "nouveau_dma.h"
-#include "nouveau_fbcon.h"
-#include "nouveau_vmm.h"
-
-#include <nvif/push206e.h>
-
-#include <nvhw/class/cl502d.h>
-
-int
-nv50_fbcon_fillrect(struct fb_info *info, const struct fb_fillrect *rect)
-{
-	struct nouveau_fbdev *nfbdev = info->par;
-	struct nouveau_drm *drm = nouveau_drm(nfbdev->helper.dev);
-	struct nouveau_channel *chan = drm->channel;
-	struct nvif_push *push = chan->chan.push;
-	u32 colour;
-	int ret;
-
-	if (info->fix.visual == FB_VISUAL_TRUECOLOR ||
-	    info->fix.visual == FB_VISUAL_DIRECTCOLOR)
-		colour = ((uint32_t *)info->pseudo_palette)[rect->color];
-	else
-		colour = rect->color;
-
-	ret = PUSH_WAIT(push, rect->rop == ROP_COPY ? 7 : 11);
-	if (ret)
-		return ret;
-
-	if (rect->rop != ROP_COPY) {
-		PUSH_MTHD(push, NV502D, SET_OPERATION,
-			  NVDEF(NV502D, SET_OPERATION, V, ROP_AND));
-	}
-
-	PUSH_MTHD(push, NV502D, SET_RENDER_SOLID_PRIM_COLOR, colour);
-
-	PUSH_MTHD(push, NV502D, RENDER_SOLID_PRIM_POINT_SET_X(0), rect->dx,
-				RENDER_SOLID_PRIM_POINT_Y(0), rect->dy,
-				RENDER_SOLID_PRIM_POINT_SET_X(1), rect->dx + rect->width,
-				RENDER_SOLID_PRIM_POINT_Y(1), rect->dy + rect->height);
-
-	if (rect->rop != ROP_COPY) {
-		PUSH_MTHD(push, NV502D, SET_OPERATION,
-			  NVDEF(NV502D, SET_OPERATION, V, SRCCOPY));
-	}
-
-	PUSH_KICK(push);
-	return 0;
-}
-
-int
-nv50_fbcon_copyarea(struct fb_info *info, const struct fb_copyarea *region)
-{
-	struct nouveau_fbdev *nfbdev = info->par;
-	struct nouveau_drm *drm = nouveau_drm(nfbdev->helper.dev);
-	struct nouveau_channel *chan = drm->channel;
-	struct nvif_push *push = chan->chan.push;
-	int ret;
-
-	ret = PUSH_WAIT(push, 12);
-	if (ret)
-		return ret;
-
-	PUSH_MTHD(push, NV502D, WAIT_FOR_IDLE, 0);
-
-	PUSH_MTHD(push, NV502D, SET_PIXELS_FROM_MEMORY_DST_X0, region->dx,
-				SET_PIXELS_FROM_MEMORY_DST_Y0, region->dy,
-				SET_PIXELS_FROM_MEMORY_DST_WIDTH, region->width,
-				SET_PIXELS_FROM_MEMORY_DST_HEIGHT, region->height);
-
-	PUSH_MTHD(push, NV502D, SET_PIXELS_FROM_MEMORY_SRC_X0_FRAC, 0,
-				SET_PIXELS_FROM_MEMORY_SRC_X0_INT, region->sx,
-				SET_PIXELS_FROM_MEMORY_SRC_Y0_FRAC, 0,
-				PIXELS_FROM_MEMORY_SRC_Y0_INT, region->sy);
-	PUSH_KICK(push);
-	return 0;
-}
-
-int
-nv50_fbcon_imageblit(struct fb_info *info, const struct fb_image *image)
-{
-	struct nouveau_fbdev *nfbdev = info->par;
-	struct nouveau_drm *drm = nouveau_drm(nfbdev->helper.dev);
-	struct nouveau_channel *chan = drm->channel;
-	struct nvif_push *push = chan->chan.push;
-	uint32_t dwords, *data = (uint32_t *)image->data;
-	uint32_t mask = ~(~0 >> (32 - info->var.bits_per_pixel));
-	uint32_t *palette = info->pseudo_palette, bg, fg;
-	int ret;
-
-	if (image->depth != 1)
-		return -ENODEV;
-
-	if (info->fix.visual == FB_VISUAL_TRUECOLOR ||
-	    info->fix.visual == FB_VISUAL_DIRECTCOLOR) {
-		bg = palette[image->bg_color] | mask;
-		fg = palette[image->fg_color] | mask;
-	} else {
-		bg = image->bg_color;
-		fg = image->fg_color;
-	}
-
-	ret = PUSH_WAIT(push, 11);
-	if (ret)
-		return ret;
-
-	PUSH_MTHD(push, NV502D, SET_PIXELS_FROM_CPU_COLOR0, bg,
-				SET_PIXELS_FROM_CPU_COLOR1, fg);
-
-	PUSH_MTHD(push, NV502D, SET_PIXELS_FROM_CPU_SRC_WIDTH, image->width,
-				SET_PIXELS_FROM_CPU_SRC_HEIGHT, image->height);
-
-	PUSH_MTHD(push, NV502D, SET_PIXELS_FROM_CPU_DST_X0_FRAC, 0,
-				SET_PIXELS_FROM_CPU_DST_X0_INT, image->dx,
-				SET_PIXELS_FROM_CPU_DST_Y0_FRAC, 0,
-				SET_PIXELS_FROM_CPU_DST_Y0_INT, image->dy);
-
-	dwords = ALIGN(ALIGN(image->width, 8) * image->height, 32) >> 5;
-	while (dwords) {
-		int count = dwords > 2047 ? 2047 : dwords;
-
-		ret = PUSH_WAIT(push, count + 1);
-		if (ret)
-			return ret;
-
-		dwords -= count;
-
-		PUSH_NINC(push, NV502D, PIXELS_FROM_CPU_DATA, data, count);
-		data += count;
-	}
-
-	PUSH_KICK(push);
-	return 0;
-}
-
-int
-nv50_fbcon_accel_init(struct fb_info *info)
-{
-	struct nouveau_fbdev *nfbdev = info->par;
-	struct drm_device *dev = nfbdev->helper.dev;
-	struct nouveau_drm *drm = nouveau_drm(dev);
-	struct nouveau_channel *chan = drm->channel;
-	struct nvif_push *push = chan->chan.push;
-	int ret, format;
-
-	switch (info->var.bits_per_pixel) {
-	case 8:
-		format = NV502D_SET_DST_FORMAT_V_Y8;
-		break;
-	case 15:
-		format = NV502D_SET_DST_FORMAT_V_X1R5G5B5;
-		break;
-	case 16:
-		format = NV502D_SET_DST_FORMAT_V_R5G6B5;
-		break;
-	case 32:
-		switch (info->var.transp.length) {
-		case 0: /* depth 24 */
-		case 8: /* depth 32, just use 24.. */
-			format = NV502D_SET_DST_FORMAT_V_X8R8G8B8;
-			break;
-		case 2: /* depth 30 */
-			format = NV502D_SET_DST_FORMAT_V_A2B10G10R10;
-			break;
-		default:
-			return -EINVAL;
-		}
-		break;
-	default:
-		return -EINVAL;
-	}
-
-	ret = nvif_object_ctor(&chan->user, "fbconTwoD", 0x502d, 0x502d,
-			       NULL, 0, &nfbdev->twod);
-	if (ret)
-		return ret;
-
-	ret = PUSH_WAIT(push, 56);
-	if (ret) {
-		nouveau_fbcon_gpu_lockup(info);
-		return ret;
-	}
-
-	PUSH_MTHD(push, NV502D, SET_OBJECT, nfbdev->twod.handle);
-	PUSH_MTHD(push, NV502D, SET_DST_CONTEXT_DMA, chan->vram.handle,
-				SET_SRC_CONTEXT_DMA, chan->vram.handle,
-				SET_SEMAPHORE_CONTEXT_DMA, chan->vram.handle);
-
-	PUSH_MTHD(push, NV502D, SET_DST_FORMAT,
-		  NVVAL(NV502D, SET_DST_FORMAT, V, format),
-
-				SET_DST_MEMORY_LAYOUT,
-		  NVDEF(NV502D, SET_DST_MEMORY_LAYOUT, V, PITCH));
-
-	PUSH_MTHD(push, NV502D, SET_DST_PITCH, info->fix.line_length,
-				SET_DST_WIDTH, info->var.xres_virtual,
-				SET_DST_HEIGHT, info->var.yres_virtual,
-
-				SET_DST_OFFSET_UPPER,
-		  NVVAL(NV502D, SET_DST_OFFSET_UPPER, V, upper_32_bits(nfbdev->vma->addr)),
-
-				SET_DST_OFFSET_LOWER,
-		  NVVAL(NV502D, SET_DST_OFFSET_LOWER, V, lower_32_bits(nfbdev->vma->addr)));
-
-	PUSH_MTHD(push, NV502D, SET_SRC_FORMAT,
-		  NVVAL(NV502D, SET_SRC_FORMAT, V, format),
-
-				SET_SRC_MEMORY_LAYOUT,
-		  NVDEF(NV502D, SET_SRC_MEMORY_LAYOUT, V, PITCH));
-
-	PUSH_MTHD(push, NV502D, SET_SRC_PITCH, info->fix.line_length,
-				SET_SRC_WIDTH, info->var.xres_virtual,
-				SET_SRC_HEIGHT, info->var.yres_virtual,
-
-				SET_SRC_OFFSET_UPPER,
-		  NVVAL(NV502D, SET_SRC_OFFSET_UPPER, V, upper_32_bits(nfbdev->vma->addr)),
-
-				SET_SRC_OFFSET_LOWER,
-		  NVVAL(NV502D, SET_SRC_OFFSET_LOWER, V, lower_32_bits(nfbdev->vma->addr)));
-
-	PUSH_MTHD(push, NV502D, SET_CLIP_ENABLE,
-		  NVDEF(NV502D, SET_CLIP_ENABLE, V, FALSE));
-
-	PUSH_MTHD(push, NV502D, SET_ROP,
-		  NVVAL(NV502D, SET_ROP, V, 0x55));
-
-	PUSH_MTHD(push, NV502D, SET_OPERATION,
-		  NVDEF(NV502D, SET_OPERATION, V, SRCCOPY));
-
-	PUSH_MTHD(push, NV502D, SET_MONOCHROME_PATTERN_COLOR_FORMAT,
-		  NVDEF(NV502D, SET_MONOCHROME_PATTERN_COLOR_FORMAT, V, A8R8G8B8),
-
-				SET_MONOCHROME_PATTERN_FORMAT,
-		  NVDEF(NV502D, SET_MONOCHROME_PATTERN_FORMAT, V, LE_M1));
-
-	PUSH_MTHD(push, NV502D, RENDER_SOLID_PRIM_MODE,
-		  NVDEF(NV502D, RENDER_SOLID_PRIM_MODE, V, RECTS),
-
-				SET_RENDER_SOLID_PRIM_COLOR_FORMAT,
-		  NVVAL(NV502D, SET_RENDER_SOLID_PRIM_COLOR_FORMAT, V, format));
-
-	PUSH_MTHD(push, NV502D, SET_PIXELS_FROM_CPU_DATA_TYPE,
-		  NVDEF(NV502D, SET_PIXELS_FROM_CPU_DATA_TYPE, V, INDEX),
-
-				SET_PIXELS_FROM_CPU_COLOR_FORMAT,
-		  NVVAL(NV502D, SET_PIXELS_FROM_CPU_COLOR_FORMAT, V, format),
-
-				SET_PIXELS_FROM_CPU_INDEX_FORMAT,
-		  NVDEF(NV502D, SET_PIXELS_FROM_CPU_INDEX_FORMAT, V, I1),
-
-				SET_PIXELS_FROM_CPU_MONO_FORMAT,
-		  NVDEF(NV502D, SET_PIXELS_FROM_CPU_MONO_FORMAT, V, CGA6_M1),
-
-				SET_PIXELS_FROM_CPU_WRAP,
-		  NVDEF(NV502D, SET_PIXELS_FROM_CPU_WRAP, V, WRAP_BYTE));
-
-	PUSH_MTHD(push, NV502D, SET_PIXELS_FROM_CPU_MONO_OPACITY,
-		  NVDEF(NV502D, SET_PIXELS_FROM_CPU_MONO_OPACITY, V, OPAQUE));
-
-	PUSH_MTHD(push, NV502D, SET_PIXELS_FROM_CPU_DX_DU_FRAC, 0,
-				SET_PIXELS_FROM_CPU_DX_DU_INT, 1,
-				SET_PIXELS_FROM_CPU_DY_DV_FRAC, 0,
-				SET_PIXELS_FROM_CPU_DY_DV_INT, 1);
-
-	PUSH_MTHD(push, NV502D, SET_PIXELS_FROM_MEMORY_SAFE_OVERLAP,
-		  NVDEF(NV502D, SET_PIXELS_FROM_MEMORY_SAFE_OVERLAP, V, TRUE));
-
-	PUSH_MTHD(push, NV502D, SET_PIXELS_FROM_MEMORY_DU_DX_FRAC, 0,
-				SET_PIXELS_FROM_MEMORY_DU_DX_INT, 1,
-				SET_PIXELS_FROM_MEMORY_DV_DY_FRAC, 0,
-				SET_PIXELS_FROM_MEMORY_DV_DY_INT, 1);
-	PUSH_KICK(push);
-	return 0;
-}
-
diff --git a/drivers/gpu/drm/nouveau/nv84_fence.c b/drivers/gpu/drm/nouveau/nv84_fence.c
index c3526a8622e3..812b8c62eeba 100644
--- a/drivers/gpu/drm/nouveau/nv84_fence.c
+++ b/drivers/gpu/drm/nouveau/nv84_fence.c
@@ -76,12 +76,18 @@ nv84_fence_sync32(struct nouveau_channel *chan, u64 virtual, u32 sequence)
 	return ret;
 }
 
+static inline u32
+nv84_fence_chid(struct nouveau_channel *chan)
+{
+	return chan->drm->runl[chan->runlist].chan_id_base + chan->chid;
+}
+
 static int
 nv84_fence_emit(struct nouveau_fence *fence)
 {
 	struct nouveau_channel *chan = fence->channel;
 	struct nv84_fence_chan *fctx = chan->fence;
-	u64 addr = fctx->vma->addr + chan->chid * 16;
+	u64 addr = fctx->vma->addr + nv84_fence_chid(chan) * 16;
 
 	return fctx->base.emit32(chan, addr, fence->base.seqno);
 }
@@ -91,7 +97,7 @@ nv84_fence_sync(struct nouveau_fence *fence,
 		struct nouveau_channel *prev, struct nouveau_channel *chan)
 {
 	struct nv84_fence_chan *fctx = chan->fence;
-	u64 addr = fctx->vma->addr + prev->chid * 16;
+	u64 addr = fctx->vma->addr + nv84_fence_chid(prev) * 16;
 
 	return fctx->base.sync32(chan, addr, fence->base.seqno);
 }
@@ -100,7 +106,7 @@ static u32
 nv84_fence_read(struct nouveau_channel *chan)
 {
 	struct nv84_fence_priv *priv = chan->drm->fence;
-	return nouveau_bo_rd32(priv->bo, chan->chid * 16/4);
+	return nouveau_bo_rd32(priv->bo, nv84_fence_chid(chan) * 16/4);
 }
 
 static void
@@ -109,7 +115,7 @@ nv84_fence_context_del(struct nouveau_channel *chan)
 	struct nv84_fence_priv *priv = chan->drm->fence;
 	struct nv84_fence_chan *fctx = chan->fence;
 
-	nouveau_bo_wr32(priv->bo, chan->chid * 16 / 4, fctx->base.sequence);
+	nouveau_bo_wr32(priv->bo, nv84_fence_chid(chan) * 16 / 4, fctx->base.sequence);
 	mutex_lock(&priv->mutex);
 	nouveau_vma_del(&fctx->vma);
 	mutex_unlock(&priv->mutex);
@@ -152,9 +158,9 @@ nv84_fence_suspend(struct nouveau_drm *drm)
 	struct nv84_fence_priv *priv = drm->fence;
 	int i;
 
-	priv->suspend = vmalloc(array_size(sizeof(u32), drm->chan.nr));
+	priv->suspend = vmalloc(array_size(sizeof(u32), drm->chan_total));
 	if (priv->suspend) {
-		for (i = 0; i < drm->chan.nr; i++)
+		for (i = 0; i < drm->chan_total; i++)
 			priv->suspend[i] = nouveau_bo_rd32(priv->bo, i*4);
 	}
 
@@ -168,7 +174,7 @@ nv84_fence_resume(struct nouveau_drm *drm)
 	int i;
 
 	if (priv->suspend) {
-		for (i = 0; i < drm->chan.nr; i++)
+		for (i = 0; i < drm->chan_total; i++)
 			nouveau_bo_wr32(priv->bo, i*4, priv->suspend[i]);
 		vfree(priv->suspend);
 		priv->suspend = NULL;
@@ -204,7 +210,7 @@ nv84_fence_create(struct nouveau_drm *drm)
 	priv->base.context_new = nv84_fence_context_new;
 	priv->base.context_del = nv84_fence_context_del;
 
-	priv->base.uevent = drm->client.device.info.family < NV_DEVICE_INFO_V0_AMPERE;
+	priv->base.uevent = true;
 
 	mutex_init(&priv->mutex);
 
@@ -216,7 +222,7 @@ nv84_fence_create(struct nouveau_drm *drm)
 		  * will lose CPU/GPU coherency!
 		  */
 		NOUVEAU_GEM_DOMAIN_GART | NOUVEAU_GEM_DOMAIN_COHERENT;
-	ret = nouveau_bo_new(&drm->client, 16 * drm->chan.nr, 0,
+	ret = nouveau_bo_new(&drm->client, 16 * drm->chan_total, 0,
 			     domain, 0, 0, NULL, NULL, &priv->bo);
 	if (ret == 0) {
 		ret = nouveau_bo_pin(priv->bo, domain, false);
diff --git a/drivers/gpu/drm/nouveau/nvc0_fbcon.c b/drivers/gpu/drm/nouveau/nvc0_fbcon.c
deleted file mode 100644
index 7908a1a3e00f..000000000000
--- a/drivers/gpu/drm/nouveau/nvc0_fbcon.c
+++ /dev/null
@@ -1,297 +0,0 @@
-/*
- * Copyright 2010 Red Hat Inc.
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * the rights to use, copy, modify, merge, publish, distribute, sublicense,
- * and/or sell copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in
- * all copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
- * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
- * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
- * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
- * OTHER DEALINGS IN THE SOFTWARE.
- *
- * Authors: Ben Skeggs
- */
-#define NVIF_DEBUG_PRINT_DISABLE
-#include "nouveau_drv.h"
-#include "nouveau_dma.h"
-#include "nouveau_fbcon.h"
-#include "nouveau_vmm.h"
-
-#include <nvif/push906f.h>
-
-#include <nvhw/class/cl902d.h>
-
-int
-nvc0_fbcon_fillrect(struct fb_info *info, const struct fb_fillrect *rect)
-{
-	struct nouveau_fbdev *nfbdev = info->par;
-	struct nouveau_drm *drm = nouveau_drm(nfbdev->helper.dev);
-	struct nouveau_channel *chan = drm->channel;
-	struct nvif_push *push = chan->chan.push;
-	u32 colour;
-	int ret;
-
-	if (info->fix.visual == FB_VISUAL_TRUECOLOR ||
-	    info->fix.visual == FB_VISUAL_DIRECTCOLOR)
-		colour = ((uint32_t *)info->pseudo_palette)[rect->color];
-	else
-		colour = rect->color;
-
-	ret = PUSH_WAIT(push, rect->rop == ROP_COPY ? 7 : 9);
-	if (ret)
-		return ret;
-
-	if (rect->rop != ROP_COPY) {
-		PUSH_IMMD(push, NV902D, SET_OPERATION,
-			  NVDEF(NV902D, SET_OPERATION, V, ROP_AND));
-	}
-
-	PUSH_MTHD(push, NV902D, SET_RENDER_SOLID_PRIM_COLOR, colour);
-
-	PUSH_MTHD(push, NV902D, RENDER_SOLID_PRIM_POINT_SET_X(0), rect->dx,
-				RENDER_SOLID_PRIM_POINT_Y(0), rect->dy,
-				RENDER_SOLID_PRIM_POINT_SET_X(1), rect->dx + rect->width,
-				RENDER_SOLID_PRIM_POINT_Y(1), rect->dy + rect->height);
-
-	if (rect->rop != ROP_COPY) {
-		PUSH_IMMD(push, NV902D, SET_OPERATION,
-			  NVDEF(NV902D, SET_OPERATION, V, SRCCOPY));
-	}
-
-	PUSH_KICK(push);
-	return 0;
-}
-
-int
-nvc0_fbcon_copyarea(struct fb_info *info, const struct fb_copyarea *region)
-{
-	struct nouveau_fbdev *nfbdev = info->par;
-	struct nouveau_drm *drm = nouveau_drm(nfbdev->helper.dev);
-	struct nouveau_channel *chan = drm->channel;
-	struct nvif_push *push = chan->chan.push;
-	int ret;
-
-	ret = PUSH_WAIT(push, 11);
-	if (ret)
-		return ret;
-
-	PUSH_IMMD(push, NV902D, WAIT_FOR_IDLE, 0);
-
-	PUSH_MTHD(push, NV902D, SET_PIXELS_FROM_MEMORY_DST_X0, region->dx,
-				SET_PIXELS_FROM_MEMORY_DST_Y0, region->dy,
-				SET_PIXELS_FROM_MEMORY_DST_WIDTH, region->width,
-				SET_PIXELS_FROM_MEMORY_DST_HEIGHT, region->height);
-
-	PUSH_MTHD(push, NV902D, SET_PIXELS_FROM_MEMORY_SRC_X0_FRAC, 0,
-				SET_PIXELS_FROM_MEMORY_SRC_X0_INT, region->sx,
-				SET_PIXELS_FROM_MEMORY_SRC_Y0_FRAC, 0,
-				PIXELS_FROM_MEMORY_SRC_Y0_INT, region->sy);
-	PUSH_KICK(push);
-	return 0;
-}
-
-int
-nvc0_fbcon_imageblit(struct fb_info *info, const struct fb_image *image)
-{
-	struct nouveau_fbdev *nfbdev = info->par;
-	struct nouveau_drm *drm = nouveau_drm(nfbdev->helper.dev);
-	struct nouveau_channel *chan = drm->channel;
-	struct nvif_push *push = chan->chan.push;
-	uint32_t dwords, *data = (uint32_t *)image->data;
-	uint32_t mask = ~(~0 >> (32 - info->var.bits_per_pixel));
-	uint32_t *palette = info->pseudo_palette, bg, fg;
-	int ret;
-
-	if (image->depth != 1)
-		return -ENODEV;
-
-	if (info->fix.visual == FB_VISUAL_TRUECOLOR ||
-	    info->fix.visual == FB_VISUAL_DIRECTCOLOR) {
-		bg = palette[image->bg_color] | mask;
-		fg = palette[image->fg_color] | mask;
-	} else {
-		bg = image->bg_color;
-		fg = image->fg_color;
-	}
-
-	ret = PUSH_WAIT(push, 11);
-	if (ret)
-		return ret;
-
-	PUSH_MTHD(push, NV902D, SET_PIXELS_FROM_CPU_COLOR0, bg,
-				SET_PIXELS_FROM_CPU_COLOR1, fg);
-
-	PUSH_MTHD(push, NV902D, SET_PIXELS_FROM_CPU_SRC_WIDTH, image->width,
-				SET_PIXELS_FROM_CPU_SRC_HEIGHT, image->height);
-
-	PUSH_MTHD(push, NV902D, SET_PIXELS_FROM_CPU_DST_X0_FRAC, 0,
-				SET_PIXELS_FROM_CPU_DST_X0_INT, image->dx,
-				SET_PIXELS_FROM_CPU_DST_Y0_FRAC, 0,
-				SET_PIXELS_FROM_CPU_DST_Y0_INT, image->dy);
-
-	dwords = ALIGN(ALIGN(image->width, 8) * image->height, 32) >> 5;
-	while (dwords) {
-		int count = dwords > 2047 ? 2047 : dwords;
-
-		ret = PUSH_WAIT(push, count + 1);
-		if (ret)
-			return ret;
-
-		dwords -= count;
-
-		PUSH_NINC(push, NV902D, PIXELS_FROM_CPU_DATA, data, count);
-		data += count;
-	}
-
-	PUSH_KICK(push);
-	return 0;
-}
-
-int
-nvc0_fbcon_accel_init(struct fb_info *info)
-{
-	struct nouveau_fbdev *nfbdev = info->par;
-	struct drm_device *dev = nfbdev->helper.dev;
-	struct nouveau_drm *drm = nouveau_drm(dev);
-	struct nouveau_channel *chan = drm->channel;
-	struct nvif_push *push = chan->chan.push;
-	int ret, format;
-
-	ret = nvif_object_ctor(&chan->user, "fbconTwoD", 0x902d, 0x902d,
-			       NULL, 0, &nfbdev->twod);
-	if (ret)
-		return ret;
-
-	switch (info->var.bits_per_pixel) {
-	case 8:
-		format = NV902D_SET_DST_FORMAT_V_Y8;
-		break;
-	case 15:
-		format = NV902D_SET_DST_FORMAT_V_X1R5G5B5;
-		break;
-	case 16:
-		format = NV902D_SET_DST_FORMAT_V_R5G6B5;
-		break;
-	case 32:
-		switch (info->var.transp.length) {
-		case 0: /* depth 24 */
-		case 8: /* depth 32, just use 24.. */
-			format = NV902D_SET_DST_FORMAT_V_X8R8G8B8;
-			break;
-		case 2: /* depth 30 */
-			format = NV902D_SET_DST_FORMAT_V_A2B10G10R10;
-			break;
-		default:
-			return -EINVAL;
-		}
-		break;
-	default:
-		return -EINVAL;
-	}
-
-	ret = PUSH_WAIT(push, 52);
-	if (ret) {
-		WARN_ON(1);
-		nouveau_fbcon_gpu_lockup(info);
-		return ret;
-	}
-
-	PUSH_MTHD(push, NV902D, SET_OBJECT, nfbdev->twod.handle);
-
-	PUSH_MTHD(push, NV902D, SET_DST_FORMAT,
-		  NVVAL(NV902D, SET_DST_FORMAT, V, format),
-
-				SET_DST_MEMORY_LAYOUT,
-		  NVDEF(NV902D, SET_DST_MEMORY_LAYOUT, V, PITCH));
-
-	PUSH_MTHD(push, NV902D, SET_DST_PITCH, info->fix.line_length,
-				SET_DST_WIDTH, info->var.xres_virtual,
-				SET_DST_HEIGHT, info->var.yres_virtual,
-
-				SET_DST_OFFSET_UPPER,
-		  NVVAL(NV902D, SET_DST_OFFSET_UPPER, V, upper_32_bits(nfbdev->vma->addr)),
-
-				SET_DST_OFFSET_LOWER,
-		  NVVAL(NV902D, SET_DST_OFFSET_LOWER, V, lower_32_bits(nfbdev->vma->addr)));
-
-	PUSH_MTHD(push, NV902D, SET_SRC_FORMAT,
-		  NVVAL(NV902D, SET_SRC_FORMAT, V, format),
-
-				SET_SRC_MEMORY_LAYOUT,
-		  NVDEF(NV902D, SET_SRC_MEMORY_LAYOUT, V, PITCH));
-
-	PUSH_MTHD(push, NV902D, SET_SRC_PITCH, info->fix.line_length,
-				SET_SRC_WIDTH, info->var.xres_virtual,
-				SET_SRC_HEIGHT, info->var.yres_virtual,
-
-				SET_SRC_OFFSET_UPPER,
-		  NVVAL(NV902D, SET_SRC_OFFSET_UPPER, V, upper_32_bits(nfbdev->vma->addr)),
-
-				SET_SRC_OFFSET_LOWER,
-		  NVVAL(NV902D, SET_SRC_OFFSET_LOWER, V, lower_32_bits(nfbdev->vma->addr)));
-
-	PUSH_IMMD(push, NV902D, SET_CLIP_ENABLE,
-		  NVDEF(NV902D, SET_CLIP_ENABLE, V, FALSE));
-
-	PUSH_IMMD(push, NV902D, SET_ROP,
-		  NVVAL(NV902D, SET_ROP, V, 0x55));
-
-	PUSH_IMMD(push, NV902D, SET_OPERATION,
-		  NVDEF(NV902D, SET_OPERATION, V, SRCCOPY));
-
-	PUSH_MTHD(push, NV902D, SET_MONOCHROME_PATTERN_COLOR_FORMAT,
-		  NVDEF(NV902D, SET_MONOCHROME_PATTERN_COLOR_FORMAT, V, A8R8G8B8),
-
-				SET_MONOCHROME_PATTERN_FORMAT,
-		  NVDEF(NV902D, SET_MONOCHROME_PATTERN_FORMAT, V, LE_M1));
-
-	PUSH_MTHD(push, NV902D, RENDER_SOLID_PRIM_MODE,
-		  NVDEF(NV902D, RENDER_SOLID_PRIM_MODE, V, RECTS),
-
-				SET_RENDER_SOLID_PRIM_COLOR_FORMAT,
-		  NVVAL(NV902D, SET_RENDER_SOLID_PRIM_COLOR_FORMAT, V, format));
-
-	PUSH_MTHD(push, NV902D, SET_PIXELS_FROM_CPU_DATA_TYPE,
-		  NVDEF(NV902D, SET_PIXELS_FROM_CPU_DATA_TYPE, V, INDEX),
-
-				SET_PIXELS_FROM_CPU_COLOR_FORMAT,
-		  NVVAL(NV902D, SET_PIXELS_FROM_CPU_COLOR_FORMAT, V, format),
-
-				SET_PIXELS_FROM_CPU_INDEX_FORMAT,
-		  NVDEF(NV902D, SET_PIXELS_FROM_CPU_INDEX_FORMAT, V, I1),
-
-				SET_PIXELS_FROM_CPU_MONO_FORMAT,
-		  NVDEF(NV902D, SET_PIXELS_FROM_CPU_MONO_FORMAT, V, CGA6_M1),
-
-				SET_PIXELS_FROM_CPU_WRAP,
-		  NVDEF(NV902D, SET_PIXELS_FROM_CPU_WRAP, V, WRAP_BYTE));
-
-	PUSH_IMMD(push, NV902D, SET_PIXELS_FROM_CPU_MONO_OPACITY,
-		  NVDEF(NV902D, SET_PIXELS_FROM_CPU_MONO_OPACITY, V, OPAQUE));
-
-	PUSH_MTHD(push, NV902D, SET_PIXELS_FROM_CPU_DX_DU_FRAC, 0,
-				SET_PIXELS_FROM_CPU_DX_DU_INT, 1,
-				SET_PIXELS_FROM_CPU_DY_DV_FRAC, 0,
-				SET_PIXELS_FROM_CPU_DY_DV_INT, 1);
-
-	PUSH_IMMD(push, NV902D, SET_PIXELS_FROM_MEMORY_SAFE_OVERLAP,
-		  NVDEF(NV902D, SET_PIXELS_FROM_MEMORY_SAFE_OVERLAP, V, TRUE));
-
-	PUSH_MTHD(push, NV902D, SET_PIXELS_FROM_MEMORY_DU_DX_FRAC, 0,
-				SET_PIXELS_FROM_MEMORY_DU_DX_INT, 1,
-				SET_PIXELS_FROM_MEMORY_DV_DY_FRAC, 0,
-				SET_PIXELS_FROM_MEMORY_DV_DY_INT, 1);
-	PUSH_KICK(push);
-	return 0;
-}
-
diff --git a/drivers/gpu/drm/nouveau/nvif/Kbuild b/drivers/gpu/drm/nouveau/nvif/Kbuild
index 6abc4bc42e35..b7963a39dd91 100644
--- a/drivers/gpu/drm/nouveau/nvif/Kbuild
+++ b/drivers/gpu/drm/nouveau/nvif/Kbuild
@@ -5,10 +5,11 @@ nvif-y += nvif/conn.o
 nvif-y += nvif/device.o
 nvif-y += nvif/disp.o
 nvif-y += nvif/driver.o
+nvif-y += nvif/event.o
 nvif-y += nvif/fifo.o
+nvif-y += nvif/head.o
 nvif-y += nvif/mem.o
 nvif-y += nvif/mmu.o
-nvif-y += nvif/notify.o
 nvif-y += nvif/outp.o
 nvif-y += nvif/timer.o
 nvif-y += nvif/vmm.o
diff --git a/drivers/gpu/drm/nouveau/nvif/conn.c b/drivers/gpu/drm/nouveau/nvif/conn.c
index 4ce935d58c90..a3cf91aeae2d 100644
--- a/drivers/gpu/drm/nouveau/nvif/conn.c
+++ b/drivers/gpu/drm/nouveau/nvif/conn.c
@@ -27,6 +27,25 @@
 #include <nvif/if0011.h>
 
 int
+nvif_conn_event_ctor(struct nvif_conn *conn, const char *name, nvif_event_func func, u8 types,
+		     struct nvif_event *event)
+{
+	struct {
+		struct nvif_event_v0 base;
+		struct nvif_conn_event_v0 conn;
+	} args;
+	int ret;
+
+	args.conn.version = 0;
+	args.conn.types = types;
+
+	ret = nvif_event_ctor_(&conn->object, name ?: "nvifConnHpd", nvif_conn_id(conn),
+			       func, true, &args.base, sizeof(args), false, event);
+	NVIF_DEBUG(&conn->object, "[NEW EVENT:HPD types:%02x]", types);
+	return ret;
+}
+
+int
 nvif_conn_hpd_status(struct nvif_conn *conn)
 {
 	struct nvif_conn_hpd_status_v0 args;
diff --git a/drivers/gpu/drm/nouveau/nvif/disp.c b/drivers/gpu/drm/nouveau/nvif/disp.c
index 926b0c04b1e8..09915f2715af 100644
--- a/drivers/gpu/drm/nouveau/nvif/disp.c
+++ b/drivers/gpu/drm/nouveau/nvif/disp.c
@@ -72,9 +72,10 @@ nvif_disp_ctor(struct nvif_device *device, const char *name, s32 oclass, struct
 	if (ret)
 		return ret;
 
-	NVIF_DEBUG(&disp->object, "[NEW] conn_mask:%08x outp_mask:%08x",
-		   args.conn_mask, args.outp_mask);
+	NVIF_DEBUG(&disp->object, "[NEW] conn_mask:%08x outp_mask:%08x head_mask:%08x",
+		   args.conn_mask, args.outp_mask, args.head_mask);
 	disp->conn_mask = args.conn_mask;
 	disp->outp_mask = args.outp_mask;
+	disp->head_mask = args.head_mask;
 	return 0;
 }
diff --git a/drivers/gpu/drm/nouveau/nvif/event.c b/drivers/gpu/drm/nouveau/nvif/event.c
new file mode 100644
index 000000000000..61ff4d6eba9f
--- /dev/null
+++ b/drivers/gpu/drm/nouveau/nvif/event.c
@@ -0,0 +1,81 @@
+/*
+ * Copyright 2021 Red Hat Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+#include <nvif/event.h>
+#include <nvif/printf.h>
+
+#include <nvif/class.h>
+#include <nvif/if000e.h>
+
+int
+nvif_event_block(struct nvif_event *event)
+{
+	if (nvif_event_constructed(event)) {
+		int ret = nvif_mthd(&event->object, NVIF_EVENT_V0_BLOCK, NULL, 0);
+		NVIF_ERRON(ret, &event->object, "[BLOCK]");
+		return ret;
+	}
+	return 0;
+}
+
+int
+nvif_event_allow(struct nvif_event *event)
+{
+	if (nvif_event_constructed(event)) {
+		int ret = nvif_mthd(&event->object, NVIF_EVENT_V0_ALLOW, NULL, 0);
+		NVIF_ERRON(ret, &event->object, "[ALLOW]");
+		return ret;
+	}
+	return 0;
+}
+
+void
+nvif_event_dtor(struct nvif_event *event)
+{
+	nvif_object_dtor(&event->object);
+}
+
+int
+nvif_event_ctor_(struct nvif_object *parent, const char *name, u32 handle, nvif_event_func func,
+		 bool wait, struct nvif_event_v0 *args, u32 argc, bool warn,
+		 struct nvif_event *event)
+{
+	struct nvif_event_v0 _args;
+	int ret;
+
+	if (!args) {
+		args = &_args;
+		argc = sizeof(_args);
+	}
+
+	args->version = 0;
+	args->wait = wait;
+
+	ret = nvif_object_ctor(parent, name ?: "nvifEvent", handle,
+			       NVIF_CLASS_EVENT, args, argc, &event->object);
+	NVIF_ERRON(ret && warn, parent, "[NEW EVENT wait:%d size:%zd]",
+		   args->wait, argc - sizeof(*args));
+	if (ret)
+		return ret;
+
+	event->func = func;
+	return 0;
+}
diff --git a/drivers/gpu/drm/nouveau/nvif/head.c b/drivers/gpu/drm/nouveau/nvif/head.c
new file mode 100644
index 000000000000..f00e01d232db
--- /dev/null
+++ b/drivers/gpu/drm/nouveau/nvif/head.c
@@ -0,0 +1,58 @@
+/*
+ * Copyright 2021 Red Hat Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+#include <nvif/head.h>
+#include <nvif/disp.h>
+#include <nvif/printf.h>
+
+#include <nvif/class.h>
+#include <nvif/if0013.h>
+
+int
+nvif_head_vblank_event_ctor(struct nvif_head *head, const char *name, nvif_event_func func,
+			    bool wait, struct nvif_event *event)
+{
+	int ret = nvif_event_ctor(&head->object, name ?: "nvifHeadVBlank", nvif_head_id(head),
+				  func, wait, NULL, 0, event);
+	NVIF_ERRON(ret, &head->object, "[NEW EVENT:VBLANK]");
+	return ret;
+}
+
+void
+nvif_head_dtor(struct nvif_head *head)
+{
+	nvif_object_dtor(&head->object);
+}
+
+int
+nvif_head_ctor(struct nvif_disp *disp, const char *name, int id, struct nvif_head *head)
+{
+	struct nvif_head_v0 args;
+	int ret;
+
+	args.version = 0;
+	args.id = id;
+
+	ret = nvif_object_ctor(&disp->object, name ? name : "nvifHead", id, NVIF_CLASS_HEAD,
+			       &args, sizeof(args), &head->object);
+	NVIF_ERRON(ret, &disp->object, "[NEW head id:%d]", args.id);
+	return ret;
+}
diff --git a/drivers/gpu/drm/nouveau/nvif/notify.c b/drivers/gpu/drm/nouveau/nvif/notify.c
deleted file mode 100644
index 143c8dc6889e..000000000000
--- a/drivers/gpu/drm/nouveau/nvif/notify.c
+++ /dev/null
@@ -1,210 +0,0 @@
-/*
- * Copyright 2014 Red Hat Inc.
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * the rights to use, copy, modify, merge, publish, distribute, sublicense,
- * and/or sell copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in
- * all copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
- * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
- * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
- * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
- * OTHER DEALINGS IN THE SOFTWARE.
- *
- * Authors: Ben Skeggs <bskeggs@redhat.com>
- */
-
-#include <nvif/client.h>
-#include <nvif/driver.h>
-#include <nvif/notify.h>
-#include <nvif/object.h>
-#include <nvif/ioctl.h>
-#include <nvif/event.h>
-
-static inline int
-nvif_notify_put_(struct nvif_notify *notify)
-{
-	struct nvif_object *object = notify->object;
-	struct {
-		struct nvif_ioctl_v0 ioctl;
-		struct nvif_ioctl_ntfy_put_v0 ntfy;
-	} args = {
-		.ioctl.type = NVIF_IOCTL_V0_NTFY_PUT,
-		.ntfy.index = notify->index,
-	};
-
-	if (atomic_inc_return(&notify->putcnt) != 1)
-		return 0;
-
-	return nvif_object_ioctl(object, &args, sizeof(args), NULL);
-}
-
-int
-nvif_notify_put(struct nvif_notify *notify)
-{
-	if (likely(notify->object) &&
-	    test_and_clear_bit(NVIF_NOTIFY_USER, &notify->flags)) {
-		int ret = nvif_notify_put_(notify);
-		if (test_bit(NVIF_NOTIFY_WORK, &notify->flags))
-			flush_work(&notify->work);
-		return ret;
-	}
-	return 0;
-}
-
-static inline int
-nvif_notify_get_(struct nvif_notify *notify)
-{
-	struct nvif_object *object = notify->object;
-	struct {
-		struct nvif_ioctl_v0 ioctl;
-		struct nvif_ioctl_ntfy_get_v0 ntfy;
-	} args = {
-		.ioctl.type = NVIF_IOCTL_V0_NTFY_GET,
-		.ntfy.index = notify->index,
-	};
-
-	if (atomic_dec_return(&notify->putcnt) != 0)
-		return 0;
-
-	return nvif_object_ioctl(object, &args, sizeof(args), NULL);
-}
-
-int
-nvif_notify_get(struct nvif_notify *notify)
-{
-	if (likely(notify->object) &&
-	    !test_and_set_bit(NVIF_NOTIFY_USER, &notify->flags))
-		return nvif_notify_get_(notify);
-	return 0;
-}
-
-static inline int
-nvif_notify_func(struct nvif_notify *notify, bool keep)
-{
-	int ret = notify->func(notify);
-	if (ret == NVIF_NOTIFY_KEEP ||
-	    !test_and_clear_bit(NVIF_NOTIFY_USER, &notify->flags)) {
-		if (!keep)
-			atomic_dec(&notify->putcnt);
-		else
-			nvif_notify_get_(notify);
-	}
-	return ret;
-}
-
-static void
-nvif_notify_work(struct work_struct *work)
-{
-	struct nvif_notify *notify = container_of(work, typeof(*notify), work);
-	nvif_notify_func(notify, true);
-}
-
-int
-nvif_notify(const void *header, u32 length, const void *data, u32 size)
-{
-	struct nvif_notify *notify = NULL;
-	const union {
-		struct nvif_notify_rep_v0 v0;
-	} *args = header;
-	int ret = NVIF_NOTIFY_DROP;
-
-	if (length == sizeof(args->v0) && args->v0.version == 0) {
-		if (WARN_ON(args->v0.route))
-			return NVIF_NOTIFY_DROP;
-		notify = (void *)(unsigned long)args->v0.token;
-	}
-
-	if (!WARN_ON(notify == NULL)) {
-		struct nvif_client *client = notify->object->client;
-		if (!WARN_ON(notify->size != size)) {
-			atomic_inc(&notify->putcnt);
-			if (test_bit(NVIF_NOTIFY_WORK, &notify->flags)) {
-				memcpy((void *)notify->data, data, size);
-				schedule_work(&notify->work);
-				return NVIF_NOTIFY_DROP;
-			}
-			notify->data = data;
-			ret = nvif_notify_func(notify, client->driver->keep);
-			notify->data = NULL;
-		}
-	}
-
-	return ret;
-}
-
-int
-nvif_notify_dtor(struct nvif_notify *notify)
-{
-	struct nvif_object *object = notify->object;
-	struct {
-		struct nvif_ioctl_v0 ioctl;
-		struct nvif_ioctl_ntfy_del_v0 ntfy;
-	} args = {
-		.ioctl.type = NVIF_IOCTL_V0_NTFY_DEL,
-		.ntfy.index = notify->index,
-	};
-	int ret = nvif_notify_put(notify);
-	if (ret >= 0 && object) {
-		ret = nvif_object_ioctl(object, &args, sizeof(args), NULL);
-		notify->object = NULL;
-		kfree((void *)notify->data);
-	}
-	return ret;
-}
-
-int
-nvif_notify_ctor(struct nvif_object *object, const char *name,
-		 int (*func)(struct nvif_notify *), bool work, u8 event,
-		 void *data, u32 size, u32 reply, struct nvif_notify *notify)
-{
-	struct {
-		struct nvif_ioctl_v0 ioctl;
-		struct nvif_ioctl_ntfy_new_v0 ntfy;
-		struct nvif_notify_req_v0 req;
-	} *args;
-	int ret = -ENOMEM;
-
-	notify->object = object;
-	notify->name = name ? name : "nvifNotify";
-	notify->flags = 0;
-	atomic_set(&notify->putcnt, 1);
-	notify->func = func;
-	notify->data = NULL;
-	notify->size = reply;
-	if (work) {
-		INIT_WORK(&notify->work, nvif_notify_work);
-		set_bit(NVIF_NOTIFY_WORK, &notify->flags);
-		notify->data = kmalloc(notify->size, GFP_KERNEL);
-		if (!notify->data)
-			goto done;
-	}
-
-	if (!(args = kmalloc(sizeof(*args) + size, GFP_KERNEL)))
-		goto done;
-	args->ioctl.version = 0;
-	args->ioctl.type = NVIF_IOCTL_V0_NTFY_NEW;
-	args->ntfy.version = 0;
-	args->ntfy.event = event;
-	args->req.version = 0;
-	args->req.reply = notify->size;
-	args->req.route = 0;
-	args->req.token = (unsigned long)(void *)notify;
-
-	memcpy(args->req.data, data, size);
-	ret = nvif_object_ioctl(object, args, sizeof(*args) + size, NULL);
-	notify->index = args->ntfy.index;
-	kfree(args);
-done:
-	if (ret)
-		nvif_notify_dtor(notify);
-	return ret;
-}
diff --git a/drivers/gpu/drm/nouveau/nvif/outp.c b/drivers/gpu/drm/nouveau/nvif/outp.c
index 7bfe91a8d6f9..7da39f1eae9f 100644
--- a/drivers/gpu/drm/nouveau/nvif/outp.c
+++ b/drivers/gpu/drm/nouveau/nvif/outp.c
@@ -24,7 +24,177 @@
 #include <nvif/printf.h>
 
 #include <nvif/class.h>
-#include <nvif/if0012.h>
+
+int
+nvif_outp_dp_mst_vcpi(struct nvif_outp *outp, int head,
+		      u8 start_slot, u8 num_slots, u16 pbn, u16 aligned_pbn)
+{
+	struct nvif_outp_dp_mst_vcpi_v0 args;
+	int ret;
+
+	args.version = 0;
+	args.head = head;
+	args.start_slot = start_slot;
+	args.num_slots = num_slots;
+	args.pbn = pbn;
+	args.aligned_pbn = aligned_pbn;
+
+	ret = nvif_object_mthd(&outp->object, NVIF_OUTP_V0_DP_MST_VCPI, &args, sizeof(args));
+	NVIF_ERRON(ret, &outp->object,
+		   "[DP_MST_VCPI head:%d start_slot:%02x num_slots:%02x pbn:%04x aligned_pbn:%04x]",
+		   args.head, args.start_slot, args.num_slots, args.pbn, args.aligned_pbn);
+	return ret;
+}
+
+int
+nvif_outp_dp_retrain(struct nvif_outp *outp)
+{
+	int ret = nvif_object_mthd(&outp->object, NVIF_OUTP_V0_DP_RETRAIN, NULL, 0);
+	NVIF_ERRON(ret, &outp->object, "[DP_RETRAIN]");
+	return ret;
+}
+
+int
+nvif_outp_dp_aux_pwr(struct nvif_outp *outp, bool enable)
+{
+	struct nvif_outp_dp_aux_pwr_v0 args;
+	int ret;
+
+	args.version = 0;
+	args.state = enable;
+
+	ret = nvif_object_mthd(&outp->object, NVIF_OUTP_V0_DP_AUX_PWR, &args, sizeof(args));
+	NVIF_ERRON(ret, &outp->object, "[DP_AUX_PWR state:%d]", args.state);
+	return ret;
+}
+
+int
+nvif_outp_hda_eld(struct nvif_outp *outp, int head, void *data, u32 size)
+{
+	struct {
+		struct nvif_outp_hda_eld_v0 mthd;
+		u8 data[128];
+	} args;
+	int ret;
+
+	if (WARN_ON(size > ARRAY_SIZE(args.data)))
+		return -EINVAL;
+
+	args.mthd.version = 0;
+	args.mthd.head = head;
+
+	memcpy(args.data, data, size);
+	ret = nvif_mthd(&outp->object, NVIF_OUTP_V0_HDA_ELD, &args, sizeof(args.mthd) + size);
+	NVIF_ERRON(ret, &outp->object, "[HDA_ELD head:%d size:%d]", head, size);
+	return ret;
+}
+
+int
+nvif_outp_infoframe(struct nvif_outp *outp, u8 type, struct nvif_outp_infoframe_v0 *args, u32 size)
+{
+	int ret;
+
+	args->type = type;
+
+	ret = nvif_mthd(&outp->object, NVIF_OUTP_V0_INFOFRAME, args, sizeof(*args) + size);
+	NVIF_ERRON(ret, &outp->object, "[INFOFRAME type:%d size:%d]", type, size);
+	return ret;
+}
+
+void
+nvif_outp_release(struct nvif_outp *outp)
+{
+	int ret = nvif_mthd(&outp->object, NVIF_OUTP_V0_RELEASE, NULL, 0);
+	NVIF_ERRON(ret, &outp->object, "[RELEASE]");
+	outp->or.id = -1;
+}
+
+static inline int
+nvif_outp_acquire(struct nvif_outp *outp, u8 proto, struct nvif_outp_acquire_v0 *args)
+{
+	int ret;
+
+	args->version = 0;
+	args->proto = proto;
+
+	ret = nvif_mthd(&outp->object, NVIF_OUTP_V0_ACQUIRE, args, sizeof(*args));
+	if (ret)
+		return ret;
+
+	outp->or.id = args->or;
+	outp->or.link = args->link;
+	return 0;
+}
+
+int
+nvif_outp_acquire_dp(struct nvif_outp *outp, u8 dpcd[16],
+		     int link_nr, int link_bw, bool hda, bool mst)
+{
+	struct nvif_outp_acquire_v0 args;
+	int ret;
+
+	args.dp.link_nr = link_nr;
+	args.dp.link_bw = link_bw;
+	args.dp.hda = hda;
+	args.dp.mst = mst;
+	memcpy(args.dp.dpcd, dpcd, sizeof(args.dp.dpcd));
+
+	ret = nvif_outp_acquire(outp, NVIF_OUTP_ACQUIRE_V0_DP, &args);
+	NVIF_ERRON(ret, &outp->object,
+		   "[ACQUIRE proto:DP link_nr:%d link_bw:%02x hda:%d mst:%d] or:%d link:%d",
+		   args.dp.link_nr, args.dp.link_bw, args.dp.hda, args.dp.mst, args.or, args.link);
+	return ret;
+}
+
+int
+nvif_outp_acquire_lvds(struct nvif_outp *outp, bool dual, bool bpc8)
+{
+	struct nvif_outp_acquire_v0 args;
+	int ret;
+
+	args.lvds.dual = dual;
+	args.lvds.bpc8 = bpc8;
+
+	ret = nvif_outp_acquire(outp, NVIF_OUTP_ACQUIRE_V0_LVDS, &args);
+	NVIF_ERRON(ret, &outp->object,
+		   "[ACQUIRE proto:LVDS dual:%d 8bpc:%d] or:%d link:%d",
+		   args.lvds.dual, args.lvds.bpc8, args.or, args.link);
+	return ret;
+}
+
+int
+nvif_outp_acquire_tmds(struct nvif_outp *outp, int head,
+		       bool hdmi, u8 max_ac_packet, u8 rekey, u8 scdc, bool hda)
+{
+	struct nvif_outp_acquire_v0 args;
+	int ret;
+
+	args.tmds.head = head;
+	args.tmds.hdmi = hdmi;
+	args.tmds.hdmi_max_ac_packet = max_ac_packet;
+	args.tmds.hdmi_rekey = rekey;
+	args.tmds.hdmi_scdc = scdc;
+	args.tmds.hdmi_hda = hda;
+
+	ret = nvif_outp_acquire(outp, NVIF_OUTP_ACQUIRE_V0_TMDS, &args);
+	NVIF_ERRON(ret, &outp->object,
+		   "[ACQUIRE proto:TMDS head:%d hdmi:%d max_ac_packet:%d rekey:%d scdc:%d hda:%d]"
+		   " or:%d link:%d", args.tmds.head, args.tmds.hdmi, args.tmds.hdmi_max_ac_packet,
+		   args.tmds.hdmi_rekey, args.tmds.hdmi_scdc, args.tmds.hdmi_hda,
+		   args.or, args.link);
+	return ret;
+}
+
+int
+nvif_outp_acquire_rgb_crt(struct nvif_outp *outp)
+{
+	struct nvif_outp_acquire_v0 args;
+	int ret;
+
+	ret = nvif_outp_acquire(outp, NVIF_OUTP_ACQUIRE_V0_RGB_CRT, &args);
+	NVIF_ERRON(ret, &outp->object, "[ACQUIRE proto:RGB_CRT] or:%d", args.or);
+	return ret;
+}
 
 int
 nvif_outp_load_detect(struct nvif_outp *outp, u32 loadval)
@@ -58,5 +228,9 @@ nvif_outp_ctor(struct nvif_disp *disp, const char *name, int id, struct nvif_out
 	ret = nvif_object_ctor(&disp->object, name ?: "nvifOutp", id, NVIF_CLASS_OUTP,
 			       &args, sizeof(args), &outp->object);
 	NVIF_ERRON(ret, &disp->object, "[NEW outp id:%d]", id);
-	return ret;
+	if (ret)
+		return ret;
+
+	outp->or.id = -1;
+	return 0;
 }
diff --git a/drivers/gpu/drm/nouveau/nvif/user.c b/drivers/gpu/drm/nouveau/nvif/user.c
index d89f5b67b304..b648a5e036af 100644
--- a/drivers/gpu/drm/nouveau/nvif/user.c
+++ b/drivers/gpu/drm/nouveau/nvif/user.c
@@ -41,7 +41,9 @@ nvif_user_ctor(struct nvif_device *device, const char *name)
 		int version;
 		const struct nvif_user_func *func;
 	} users[] = {
-		{ VOLTA_USERMODE_A, -1, &nvif_userc361 },
+		{ AMPERE_USERMODE_A, -1, &nvif_userc361 },
+		{ TURING_USERMODE_A, -1, &nvif_userc361 },
+		{  VOLTA_USERMODE_A, -1, &nvif_userc361 },
 		{}
 	};
 	int cid, ret;
diff --git a/drivers/gpu/drm/nouveau/nvkm/core/Kbuild b/drivers/gpu/drm/nouveau/nvkm/core/Kbuild
index 2b471ab585b4..e40712023c73 100644
--- a/drivers/gpu/drm/nouveau/nvkm/core/Kbuild
+++ b/drivers/gpu/drm/nouveau/nvkm/core/Kbuild
@@ -5,12 +5,13 @@ nvkm-y += nvkm/core/enum.o
 nvkm-y += nvkm/core/event.o
 nvkm-y += nvkm/core/firmware.o
 nvkm-y += nvkm/core/gpuobj.o
+nvkm-y += nvkm/core/intr.o
 nvkm-y += nvkm/core/ioctl.o
 nvkm-y += nvkm/core/memory.o
 nvkm-y += nvkm/core/mm.o
-nvkm-y += nvkm/core/notify.o
 nvkm-y += nvkm/core/object.o
 nvkm-y += nvkm/core/oproxy.o
 nvkm-y += nvkm/core/option.o
 nvkm-y += nvkm/core/ramht.o
 nvkm-y += nvkm/core/subdev.o
+nvkm-y += nvkm/core/uevent.o
diff --git a/drivers/gpu/drm/nouveau/nvkm/core/client.c b/drivers/gpu/drm/nouveau/nvkm/core/client.c
index 0c8c55c73b12..ebdeb8eb9e77 100644
--- a/drivers/gpu/drm/nouveau/nvkm/core/client.c
+++ b/drivers/gpu/drm/nouveau/nvkm/core/client.c
@@ -23,7 +23,6 @@
  */
 #include <core/client.h>
 #include <core/device.h>
-#include <core/notify.h>
 #include <core/option.h>
 
 #include <nvif/class.h>
@@ -44,7 +43,7 @@ nvkm_uclient_new(const struct nvkm_oclass *oclass, void *argv, u32 argc,
 	if (!(ret = nvif_unpack(ret, &argv, &argc, args->v0, 0, 0, false))){
 		args->v0.name[sizeof(args->v0.name) - 1] = 0;
 		ret = nvkm_client_new(args->v0.name, args->v0.device, NULL,
-				      NULL, oclass->client->ntfy, &client);
+				      NULL, oclass->client->event, &client);
 		if (ret)
 			return ret;
 	} else
@@ -68,113 +67,6 @@ nvkm_uclient_sclass = {
 	.ctor = nvkm_uclient_new,
 };
 
-struct nvkm_client_notify {
-	struct nvkm_client *client;
-	struct nvkm_notify n;
-	u8 version;
-	u8 size;
-	union {
-		struct nvif_notify_rep_v0 v0;
-	} rep;
-};
-
-static int
-nvkm_client_notify(struct nvkm_notify *n)
-{
-	struct nvkm_client_notify *notify = container_of(n, typeof(*notify), n);
-	struct nvkm_client *client = notify->client;
-	return client->ntfy(&notify->rep, notify->size, n->data, n->size);
-}
-
-int
-nvkm_client_notify_put(struct nvkm_client *client, int index)
-{
-	if (index < ARRAY_SIZE(client->notify)) {
-		if (client->notify[index]) {
-			nvkm_notify_put(&client->notify[index]->n);
-			return 0;
-		}
-	}
-	return -ENOENT;
-}
-
-int
-nvkm_client_notify_get(struct nvkm_client *client, int index)
-{
-	if (index < ARRAY_SIZE(client->notify)) {
-		if (client->notify[index]) {
-			nvkm_notify_get(&client->notify[index]->n);
-			return 0;
-		}
-	}
-	return -ENOENT;
-}
-
-int
-nvkm_client_notify_del(struct nvkm_client *client, int index)
-{
-	if (index < ARRAY_SIZE(client->notify)) {
-		if (client->notify[index]) {
-			nvkm_notify_fini(&client->notify[index]->n);
-			kfree(client->notify[index]);
-			client->notify[index] = NULL;
-			return 0;
-		}
-	}
-	return -ENOENT;
-}
-
-int
-nvkm_client_notify_new(struct nvkm_object *object,
-		       struct nvkm_event *event, void *data, u32 size)
-{
-	struct nvkm_client *client = object->client;
-	struct nvkm_client_notify *notify;
-	union {
-		struct nvif_notify_req_v0 v0;
-	} *req = data;
-	u8  index, reply;
-	int ret = -ENOSYS;
-
-	for (index = 0; index < ARRAY_SIZE(client->notify); index++) {
-		if (!client->notify[index])
-			break;
-	}
-
-	if (index == ARRAY_SIZE(client->notify))
-		return -ENOSPC;
-
-	notify = kzalloc(sizeof(*notify), GFP_KERNEL);
-	if (!notify)
-		return -ENOMEM;
-
-	nvif_ioctl(object, "notify new size %d\n", size);
-	if (!(ret = nvif_unpack(ret, &data, &size, req->v0, 0, 0, true))) {
-		nvif_ioctl(object, "notify new vers %d reply %d route %02x "
-				   "token %llx\n", req->v0.version,
-			   req->v0.reply, req->v0.route, req->v0.token);
-		notify->version = req->v0.version;
-		notify->size = sizeof(notify->rep.v0);
-		notify->rep.v0.version = req->v0.version;
-		notify->rep.v0.route = req->v0.route;
-		notify->rep.v0.token = req->v0.token;
-		reply = req->v0.reply;
-	}
-
-	if (ret == 0) {
-		ret = nvkm_notify_init(object, event, nvkm_client_notify,
-				       false, data, size, reply, &notify->n);
-		if (ret == 0) {
-			client->notify[index] = notify;
-			notify->client = client;
-			return index;
-		}
-	}
-
-	kfree(notify);
-	return ret;
-}
-
 static const struct nvkm_object_func nvkm_client;
 struct nvkm_client *
 nvkm_client_search(struct nvkm_client *client, u64 handle)
@@ -255,23 +147,13 @@ nvkm_client_child_get(struct nvkm_object *object, int index,
 static int
 nvkm_client_fini(struct nvkm_object *object, bool suspend)
 {
-	struct nvkm_client *client = nvkm_client(object);
-	const char *name[2] = { "fini", "suspend" };
-	int i;
-	nvif_debug(object, "%s notify\n", name[suspend]);
-	for (i = 0; i < ARRAY_SIZE(client->notify); i++)
-		nvkm_client_notify_put(client, i);
 	return 0;
 }
 
 static void *
 nvkm_client_dtor(struct nvkm_object *object)
 {
-	struct nvkm_client *client = nvkm_client(object);
-	int i;
-	for (i = 0; i < ARRAY_SIZE(client->notify); i++)
-		nvkm_client_notify_del(client, i);
-	return client;
+	return nvkm_client(object);
 }
 
 static const struct nvkm_object_func
@@ -283,10 +165,8 @@ nvkm_client = {
 };
 
 int
-nvkm_client_new(const char *name, u64 device, const char *cfg,
-		const char *dbg,
-		int (*ntfy)(const void *, u32, const void *, u32),
-		struct nvkm_client **pclient)
+nvkm_client_new(const char *name, u64 device, const char *cfg, const char *dbg,
+		int (*event)(u64, void *, u32), struct nvkm_client **pclient)
 {
 	struct nvkm_oclass oclass = { .base = nvkm_uclient_sclass };
 	struct nvkm_client *client;
@@ -300,7 +180,7 @@ nvkm_client_new(const char *name, u64 device, const char *cfg,
 	client->device = device;
 	client->debug = nvkm_dbgopt(dbg, "CLIENT");
 	client->objroot = RB_ROOT;
-	client->ntfy = ntfy;
+	client->event = event;
 	INIT_LIST_HEAD(&client->umem);
 	spin_lock_init(&client->lock);
 	return 0;
diff --git a/drivers/gpu/drm/nouveau/nvkm/core/engine.c b/drivers/gpu/drm/nouveau/nvkm/core/engine.c
index e41a39ae1597..36a31e9eea22 100644
--- a/drivers/gpu/drm/nouveau/nvkm/core/engine.c
+++ b/drivers/gpu/drm/nouveau/nvkm/core/engine.c
@@ -35,16 +35,23 @@ nvkm_engine_chsw_load(struct nvkm_engine *engine)
 	return false;
 }
 
+int
+nvkm_engine_reset(struct nvkm_engine *engine)
+{
+	if (engine->func->reset)
+		return engine->func->reset(engine);
+
+	nvkm_subdev_fini(&engine->subdev, false);
+	return nvkm_subdev_init(&engine->subdev);
+}
+
 void
 nvkm_engine_unref(struct nvkm_engine **pengine)
 {
 	struct nvkm_engine *engine = *pengine;
+
 	if (engine) {
-		if (refcount_dec_and_mutex_lock(&engine->use.refcount, &engine->use.mutex)) {
-			nvkm_subdev_fini(&engine->subdev, false);
-			engine->use.enabled = false;
-			mutex_unlock(&engine->use.mutex);
-		}
+		nvkm_subdev_unref(&engine->subdev);
 		*pengine = NULL;
 	}
 }
@@ -53,21 +60,13 @@ struct nvkm_engine *
 nvkm_engine_ref(struct nvkm_engine *engine)
 {
 	int ret;
+
 	if (engine) {
-		if (!refcount_inc_not_zero(&engine->use.refcount)) {
-			mutex_lock(&engine->use.mutex);
-			if (!refcount_inc_not_zero(&engine->use.refcount)) {
-				engine->use.enabled = true;
-				if ((ret = nvkm_subdev_init(&engine->subdev))) {
-					engine->use.enabled = false;
-					mutex_unlock(&engine->use.mutex);
-					return ERR_PTR(ret);
-				}
-				refcount_set(&engine->use.refcount, 1);
-			}
-			mutex_unlock(&engine->use.mutex);
-		}
+		ret = nvkm_subdev_ref(&engine->subdev);
+		if (ret)
+			return ERR_PTR(ret);
 	}
+
 	return engine;
 }
 
@@ -91,14 +90,10 @@ static int
 nvkm_engine_info(struct nvkm_subdev *subdev, u64 mthd, u64 *data)
 {
 	struct nvkm_engine *engine = nvkm_engine(subdev);
-	if (engine->func->info) {
-		if (!IS_ERR((engine = nvkm_engine_ref(engine)))) {
-			int ret = engine->func->info(engine, mthd, data);
-			nvkm_engine_unref(&engine);
-			return ret;
-		}
-		return PTR_ERR(engine);
-	}
+
+	if (engine->func->info)
+		return engine->func->info(engine, mthd, data);
+
 	return -ENOSYS;
 }
 
@@ -117,26 +112,6 @@ nvkm_engine_init(struct nvkm_subdev *subdev)
 	struct nvkm_engine *engine = nvkm_engine(subdev);
 	struct nvkm_fb *fb = subdev->device->fb;
 	int ret = 0, i;
-	s64 time;
-
-	if (!engine->use.enabled) {
-		nvkm_trace(subdev, "init skipped, engine has no users\n");
-		return ret;
-	}
-
-	if (engine->func->oneinit && !engine->subdev.oneinit) {
-		nvkm_trace(subdev, "one-time init running...\n");
-		time = ktime_to_us(ktime_get());
-		ret = engine->func->oneinit(engine);
-		if (ret) {
-			nvkm_trace(subdev, "one-time init failed, %d\n", ret);
-			return ret;
-		}
-
-		engine->subdev.oneinit = true;
-		time = ktime_to_us(ktime_get()) - time;
-		nvkm_trace(subdev, "one-time init completed in %lldus\n", time);
-	}
 
 	if (engine->func->init)
 		ret = engine->func->init(engine);
@@ -147,6 +122,17 @@ nvkm_engine_init(struct nvkm_subdev *subdev)
 }
 
 static int
+nvkm_engine_oneinit(struct nvkm_subdev *subdev)
+{
+	struct nvkm_engine *engine = nvkm_engine(subdev);
+
+	if (engine->func->oneinit)
+		return engine->func->oneinit(engine);
+
+	return 0;
+}
+
+static int
 nvkm_engine_preinit(struct nvkm_subdev *subdev)
 {
 	struct nvkm_engine *engine = nvkm_engine(subdev);
@@ -161,7 +147,6 @@ nvkm_engine_dtor(struct nvkm_subdev *subdev)
 	struct nvkm_engine *engine = nvkm_engine(subdev);
 	if (engine->func->dtor)
 		return engine->func->dtor(engine);
-	mutex_destroy(&engine->use.mutex);
 	return engine;
 }
 
@@ -169,6 +154,7 @@ const struct nvkm_subdev_func
 nvkm_engine = {
 	.dtor = nvkm_engine_dtor,
 	.preinit = nvkm_engine_preinit,
+	.oneinit = nvkm_engine_oneinit,
 	.init = nvkm_engine_init,
 	.fini = nvkm_engine_fini,
 	.info = nvkm_engine_info,
@@ -179,10 +165,9 @@ int
 nvkm_engine_ctor(const struct nvkm_engine_func *func, struct nvkm_device *device,
 		 enum nvkm_subdev_type type, int inst, bool enable, struct nvkm_engine *engine)
 {
-	nvkm_subdev_ctor(&nvkm_engine, device, type, inst, &engine->subdev);
 	engine->func = func;
-	refcount_set(&engine->use.refcount, 0);
-	mutex_init(&engine->use.mutex);
+	nvkm_subdev_ctor(&nvkm_engine, device, type, inst, &engine->subdev);
+	refcount_set(&engine->subdev.use.refcount, 0);
 
 	if (!nvkm_boolopt(device->cfgopt, engine->subdev.name, enable)) {
 		nvkm_debug(&engine->subdev, "disabled\n");
diff --git a/drivers/gpu/drm/nouveau/nvkm/core/event.c b/drivers/gpu/drm/nouveau/nvkm/core/event.c
index 006618d77aa4..a6c877135598 100644
--- a/drivers/gpu/drm/nouveau/nvkm/core/event.c
+++ b/drivers/gpu/drm/nouveau/nvkm/core/event.c
@@ -20,54 +20,171 @@
  * OTHER DEALINGS IN THE SOFTWARE.
  */
 #include <core/event.h>
-#include <core/notify.h>
+#include <core/subdev.h>
 
-void
+static void
 nvkm_event_put(struct nvkm_event *event, u32 types, int index)
 {
 	assert_spin_locked(&event->refs_lock);
+
+	nvkm_trace(event->subdev, "event: decr %08x on %d\n", types, index);
+
 	while (types) {
 		int type = __ffs(types); types &= ~(1 << type);
 		if (--event->refs[index * event->types_nr + type] == 0) {
+			nvkm_trace(event->subdev, "event: blocking %d on %d\n", type, index);
 			if (event->func->fini)
 				event->func->fini(event, 1 << type, index);
 		}
 	}
 }
 
-void
+static void
 nvkm_event_get(struct nvkm_event *event, u32 types, int index)
 {
 	assert_spin_locked(&event->refs_lock);
+
+	nvkm_trace(event->subdev, "event: incr %08x on %d\n", types, index);
+
 	while (types) {
 		int type = __ffs(types); types &= ~(1 << type);
 		if (++event->refs[index * event->types_nr + type] == 1) {
+			nvkm_trace(event->subdev, "event: allowing %d on %d\n", type, index);
 			if (event->func->init)
 				event->func->init(event, 1 << type, index);
 		}
 	}
 }
 
+static void
+nvkm_event_ntfy_state(struct nvkm_event_ntfy *ntfy)
+{
+	struct nvkm_event *event = ntfy->event;
+	unsigned long flags;
+
+	nvkm_trace(event->subdev, "event: ntfy state changed\n");
+	spin_lock_irqsave(&event->refs_lock, flags);
+
+	if (atomic_read(&ntfy->allowed) != ntfy->running) {
+		if (ntfy->running) {
+			nvkm_event_put(ntfy->event, ntfy->bits, ntfy->id);
+			ntfy->running = false;
+		} else {
+			nvkm_event_get(ntfy->event, ntfy->bits, ntfy->id);
+			ntfy->running = true;
+		}
+	}
+
+	spin_unlock_irqrestore(&event->refs_lock, flags);
+}
+
+static void
+nvkm_event_ntfy_remove(struct nvkm_event_ntfy *ntfy)
+{
+	spin_lock_irq(&ntfy->event->list_lock);
+	list_del_init(&ntfy->head);
+	spin_unlock_irq(&ntfy->event->list_lock);
+}
+
+static void
+nvkm_event_ntfy_insert(struct nvkm_event_ntfy *ntfy)
+{
+	spin_lock_irq(&ntfy->event->list_lock);
+	list_add_tail(&ntfy->head, &ntfy->event->ntfy);
+	spin_unlock_irq(&ntfy->event->list_lock);
+}
+
+static void
+nvkm_event_ntfy_block_(struct nvkm_event_ntfy *ntfy, bool wait)
+{
+	struct nvkm_subdev *subdev = ntfy->event->subdev;
+
+	nvkm_trace(subdev, "event: ntfy block %08x on %d wait:%d\n", ntfy->bits, ntfy->id, wait);
+
+	if (atomic_xchg(&ntfy->allowed, 0) == 1) {
+		nvkm_event_ntfy_state(ntfy);
+		if (wait)
+			nvkm_event_ntfy_remove(ntfy);
+	}
+}
+
 void
-nvkm_event_send(struct nvkm_event *event, u32 types, int index,
-		void *data, u32 size)
+nvkm_event_ntfy_block(struct nvkm_event_ntfy *ntfy)
 {
-	struct nvkm_notify *notify;
+	if (ntfy->event)
+		nvkm_event_ntfy_block_(ntfy, ntfy->wait);
+}
+
+void
+nvkm_event_ntfy_allow(struct nvkm_event_ntfy *ntfy)
+{
+	nvkm_trace(ntfy->event->subdev, "event: ntfy allow %08x on %d\n", ntfy->bits, ntfy->id);
+
+	if (atomic_xchg(&ntfy->allowed, 1) == 0) {
+		nvkm_event_ntfy_state(ntfy);
+		if (ntfy->wait)
+			nvkm_event_ntfy_insert(ntfy);
+	}
+}
+
+void
+nvkm_event_ntfy_del(struct nvkm_event_ntfy *ntfy)
+{
+	struct nvkm_event *event = ntfy->event;
+
+	if (!event)
+		return;
+
+	nvkm_trace(event->subdev, "event: ntfy del %08x on %d\n", ntfy->bits, ntfy->id);
+
+	nvkm_event_ntfy_block_(ntfy, false);
+	nvkm_event_ntfy_remove(ntfy);
+	ntfy->event = NULL;
+}
+
+void
+nvkm_event_ntfy_add(struct nvkm_event *event, int id, u32 bits, bool wait, nvkm_event_func func,
+		    struct nvkm_event_ntfy *ntfy)
+{
+	nvkm_trace(event->subdev, "event: ntfy add %08x on %d wait:%d\n", id, bits, wait);
+
+	ntfy->event = event;
+	ntfy->id = id;
+	ntfy->bits = bits;
+	ntfy->wait = wait;
+	ntfy->func = func;
+	atomic_set(&ntfy->allowed, 0);
+	ntfy->running = false;
+	INIT_LIST_HEAD(&ntfy->head);
+	if (!ntfy->wait)
+		nvkm_event_ntfy_insert(ntfy);
+}
+
+bool
+nvkm_event_ntfy_valid(struct nvkm_event *event, int id, u32 bits)
+{
+	return true;
+}
+
+void
+nvkm_event_ntfy(struct nvkm_event *event, int id, u32 bits)
+{
+	struct nvkm_event_ntfy *ntfy, *ntmp;
 	unsigned long flags;
 
-	if (!event->refs || WARN_ON(index >= event->index_nr))
+	if (!event->refs || WARN_ON(id >= event->index_nr))
 		return;
 
+	nvkm_trace(event->subdev, "event: ntfy %08x on %d\n", bits, id);
 	spin_lock_irqsave(&event->list_lock, flags);
-	list_for_each_entry(notify, &event->list, head) {
-		if (notify->index == index && (notify->types & types)) {
-			if (event->func->send) {
-				event->func->send(data, size, notify);
-				continue;
-			}
-			nvkm_notify_send(notify, data, size);
+
+	list_for_each_entry_safe(ntfy, ntmp, &event->ntfy, head) {
+		if (ntfy->id == id && ntfy->bits & bits) {
+			if (atomic_read(&ntfy->allowed))
+				ntfy->func(ntfy, ntfy->bits & bits);
 		}
 	}
+
 	spin_unlock_irqrestore(&event->list_lock, flags);
 }
 
@@ -81,20 +198,17 @@ nvkm_event_fini(struct nvkm_event *event)
 }
 
 int
-nvkm_event_init(const struct nvkm_event_func *func, int types_nr, int index_nr,
-		struct nvkm_event *event)
+__nvkm_event_init(const struct nvkm_event_func *func, struct nvkm_subdev *subdev,
+		  int types_nr, int index_nr, struct nvkm_event *event)
 {
-	event->refs = kzalloc(array3_size(index_nr, types_nr,
-					  sizeof(*event->refs)),
-			      GFP_KERNEL);
+	event->refs = kzalloc(array3_size(index_nr, types_nr, sizeof(*event->refs)), GFP_KERNEL);
 	if (!event->refs)
 		return -ENOMEM;
 
 	event->func = func;
+	event->subdev = subdev;
 	event->types_nr = types_nr;
 	event->index_nr = index_nr;
-	spin_lock_init(&event->refs_lock);
-	spin_lock_init(&event->list_lock);
-	INIT_LIST_HEAD(&event->list);
+	INIT_LIST_HEAD(&event->ntfy);
 	return 0;
 }
diff --git a/drivers/gpu/drm/nouveau/nvkm/core/firmware.c b/drivers/gpu/drm/nouveau/nvkm/core/firmware.c
index ca1f8463cff5..fcf2a002f6cb 100644
--- a/drivers/gpu/drm/nouveau/nvkm/core/firmware.c
+++ b/drivers/gpu/drm/nouveau/nvkm/core/firmware.c
@@ -22,6 +22,9 @@
 #include <core/device.h>
 #include <core/firmware.h>
 
+#include <subdev/fb.h>
+#include <subdev/mmu.h>
+
 int
 nvkm_firmware_load_name(const struct nvkm_subdev *subdev, const char *base,
 			const char *name, int ver, const struct firmware **pfw)
@@ -107,3 +110,127 @@ nvkm_firmware_put(const struct firmware *fw)
 {
 	release_firmware(fw);
 }
+
+#define nvkm_firmware_mem(p) container_of((p), struct nvkm_firmware, mem.memory)
+
+static int
+nvkm_firmware_mem_map(struct nvkm_memory *memory, u64 offset, struct nvkm_vmm *vmm,
+		      struct nvkm_vma *vma, void *argv, u32 argc)
+{
+	struct nvkm_firmware *fw = nvkm_firmware_mem(memory);
+	struct nvkm_vmm_map map = {
+		.memory = &fw->mem.memory,
+		.offset = offset,
+		.sgl = &fw->mem.sgl,
+	};
+
+	if (WARN_ON(fw->func->type != NVKM_FIRMWARE_IMG_DMA))
+		return -ENOSYS;
+
+	return nvkm_vmm_map(vmm, vma, argv, argc, &map);
+}
+
+static u64
+nvkm_firmware_mem_size(struct nvkm_memory *memory)
+{
+	return sg_dma_len(&nvkm_firmware_mem(memory)->mem.sgl);
+}
+
+static u64
+nvkm_firmware_mem_addr(struct nvkm_memory *memory)
+{
+	return nvkm_firmware_mem(memory)->phys;
+}
+
+static u8
+nvkm_firmware_mem_page(struct nvkm_memory *memory)
+{
+	return PAGE_SHIFT;
+}
+
+static enum nvkm_memory_target
+nvkm_firmware_mem_target(struct nvkm_memory *memory)
+{
+	return NVKM_MEM_TARGET_HOST;
+}
+
+static void *
+nvkm_firmware_mem_dtor(struct nvkm_memory *memory)
+{
+	return NULL;
+}
+
+static const struct nvkm_memory_func
+nvkm_firmware_mem = {
+	.dtor = nvkm_firmware_mem_dtor,
+	.target = nvkm_firmware_mem_target,
+	.page = nvkm_firmware_mem_page,
+	.addr = nvkm_firmware_mem_addr,
+	.size = nvkm_firmware_mem_size,
+	.map = nvkm_firmware_mem_map,
+};
+
+void
+nvkm_firmware_dtor(struct nvkm_firmware *fw)
+{
+	struct nvkm_memory *memory = &fw->mem.memory;
+
+	if (!fw->img)
+		return;
+
+	switch (fw->func->type) {
+	case NVKM_FIRMWARE_IMG_RAM:
+		kfree(fw->img);
+		break;
+	case NVKM_FIRMWARE_IMG_DMA:
+		nvkm_memory_unref(&memory);
+		dma_free_coherent(fw->device->dev, sg_dma_len(&fw->mem.sgl), fw->img, fw->phys);
+		break;
+	default:
+		WARN_ON(1);
+		break;
+	}
+
+	fw->img = NULL;
+}
+
+int
+nvkm_firmware_ctor(const struct nvkm_firmware_func *func, const char *name,
+		   struct nvkm_device *device, const void *src, int len, struct nvkm_firmware *fw)
+{
+	fw->func = func;
+	fw->name = name;
+	fw->device = device;
+	fw->len = len;
+
+	switch (fw->func->type) {
+	case NVKM_FIRMWARE_IMG_RAM:
+		fw->img = kmemdup(src, fw->len, GFP_KERNEL);
+		break;
+	case NVKM_FIRMWARE_IMG_DMA: {
+		dma_addr_t addr;
+
+		len = ALIGN(fw->len, PAGE_SIZE);
+
+		fw->img = dma_alloc_coherent(fw->device->dev, len, &addr, GFP_KERNEL);
+		if (fw->img) {
+			memcpy(fw->img, src, fw->len);
+			fw->phys = addr;
+		}
+
+		sg_init_one(&fw->mem.sgl, fw->img, len);
+		sg_dma_address(&fw->mem.sgl) = fw->phys;
+		sg_dma_len(&fw->mem.sgl) = len;
+	}
+		break;
+	default:
+		WARN_ON(1);
+		return -EINVAL;
+	}
+
+	if (!fw->img)
+		return -ENOMEM;
+
+	nvkm_memory_ctor(&nvkm_firmware_mem, &fw->mem.memory);
+	return 0;
+}
diff --git a/drivers/gpu/drm/nouveau/nvkm/core/intr.c b/drivers/gpu/drm/nouveau/nvkm/core/intr.c
new file mode 100644
index 000000000000..e20b7ca218c3
--- /dev/null
+++ b/drivers/gpu/drm/nouveau/nvkm/core/intr.c
@@ -0,0 +1,442 @@
+/*
+ * Copyright 2021 Red Hat Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+#include <core/intr.h>
+#include <core/device.h>
+#include <core/subdev.h>
+#include <subdev/pci.h>
+#include <subdev/top.h>
+
+static int
+nvkm_intr_xlat(struct nvkm_subdev *subdev, struct nvkm_intr *intr,
+	       enum nvkm_intr_type type, int *leaf, u32 *mask)
+{
+	struct nvkm_device *device = subdev->device;
+
+	if (type < NVKM_INTR_VECTOR_0) {
+		if (type == NVKM_INTR_SUBDEV) {
+			const struct nvkm_intr_data *data = intr->data;
+			struct nvkm_top_device *tdev;
+
+			while (data && data->mask) {
+				if (data->type == NVKM_SUBDEV_TOP) {
+					list_for_each_entry(tdev, &device->top->device, head) {
+						if (tdev->intr >= 0 &&
+						    tdev->type == subdev->type &&
+						    tdev->inst == subdev->inst) {
+							if (data->mask & BIT(tdev->intr)) {
+								*leaf = data->leaf;
+								*mask = BIT(tdev->intr);
+								return 0;
+							}
+						}
+					}
+				} else
+				if (data->type == subdev->type && data->inst == subdev->inst) {
+					*leaf = data->leaf;
+					*mask = data->mask;
+					return 0;
+				}
+
+				data++;
+			}
+		} else {
+			return -ENOSYS;
+		}
+	} else {
+		if (type < intr->leaves * sizeof(*intr->stat) * 8) {
+			*leaf = type / 32;
+			*mask = BIT(type % 32);
+			return 0;
+		}
+	}
+
+	return -EINVAL;
+}
+
+static struct nvkm_intr *
+nvkm_intr_find(struct nvkm_subdev *subdev, enum nvkm_intr_type type, int *leaf, u32 *mask)
+{
+	struct nvkm_intr *intr;
+	int ret;
+
+	list_for_each_entry(intr, &subdev->device->intr.intr, head) {
+		ret = nvkm_intr_xlat(subdev, intr, type, leaf, mask);
+		if (ret == 0)
+			return intr;
+	}
+
+	return NULL;
+}
+
+static void
+nvkm_intr_allow_locked(struct nvkm_intr *intr, int leaf, u32 mask)
+{
+	intr->mask[leaf] |= mask;
+	if (intr->func->allow) {
+		if (intr->func->reset)
+			intr->func->reset(intr, leaf, mask);
+		intr->func->allow(intr, leaf, mask);
+	}
+}
+
+void
+nvkm_intr_allow(struct nvkm_subdev *subdev, enum nvkm_intr_type type)
+{
+	struct nvkm_device *device = subdev->device;
+	struct nvkm_intr *intr;
+	unsigned long flags;
+	int leaf;
+	u32 mask;
+
+	intr = nvkm_intr_find(subdev, type, &leaf, &mask);
+	if (intr) {
+		nvkm_debug(intr->subdev, "intr %d/%08x allowed by %s\n", leaf, mask, subdev->name);
+		spin_lock_irqsave(&device->intr.lock, flags);
+		nvkm_intr_allow_locked(intr, leaf, mask);
+		spin_unlock_irqrestore(&device->intr.lock, flags);
+	}
+}
+
+static void
+nvkm_intr_block_locked(struct nvkm_intr *intr, int leaf, u32 mask)
+{
+	intr->mask[leaf] &= ~mask;
+	if (intr->func->block)
+		intr->func->block(intr, leaf, mask);
+}
+
+void
+nvkm_intr_block(struct nvkm_subdev *subdev, enum nvkm_intr_type type)
+{
+	struct nvkm_device *device = subdev->device;
+	struct nvkm_intr *intr;
+	unsigned long flags;
+	int leaf;
+	u32 mask;
+
+	intr = nvkm_intr_find(subdev, type, &leaf, &mask);
+	if (intr) {
+		nvkm_debug(intr->subdev, "intr %d/%08x blocked by %s\n", leaf, mask, subdev->name);
+		spin_lock_irqsave(&device->intr.lock, flags);
+		nvkm_intr_block_locked(intr, leaf, mask);
+		spin_unlock_irqrestore(&device->intr.lock, flags);
+	}
+}
+
+static void
+nvkm_intr_rearm_locked(struct nvkm_device *device)
+{
+	struct nvkm_intr *intr;
+
+	list_for_each_entry(intr, &device->intr.intr, head)
+		intr->func->rearm(intr);
+}
+
+static void
+nvkm_intr_unarm_locked(struct nvkm_device *device)
+{
+	struct nvkm_intr *intr;
+
+	list_for_each_entry(intr, &device->intr.intr, head)
+		intr->func->unarm(intr);
+}
+
+static irqreturn_t
+nvkm_intr(int irq, void *arg)
+{
+	struct nvkm_device *device = arg;
+	struct nvkm_intr *intr;
+	struct nvkm_inth *inth;
+	irqreturn_t ret = IRQ_NONE;
+	bool pending = false;
+	int prio, leaf;
+
+	/* Disable all top-level interrupt sources, and re-arm MSI interrupts. */
+	spin_lock(&device->intr.lock);
+	if (!device->intr.armed)
+		goto done_unlock;
+
+	nvkm_intr_unarm_locked(device);
+	nvkm_pci_msi_rearm(device);
+
+	/* Fetch pending interrupt masks. */
+	list_for_each_entry(intr, &device->intr.intr, head) {
+		if (intr->func->pending(intr))
+			pending = true;
+	}
+
+	if (!pending)
+		goto done;
+
+	/* Check that GPU is still on the bus by reading NV_PMC_BOOT_0. */
+	if (WARN_ON(nvkm_rd32(device, 0x000000) == 0xffffffff))
+		goto done;
+
+	/* Execute handlers. */
+	for (prio = 0; prio < ARRAY_SIZE(device->intr.prio); prio++) {
+		list_for_each_entry(inth, &device->intr.prio[prio], head) {
+			struct nvkm_intr *intr = inth->intr;
+
+			if (intr->stat[inth->leaf] & inth->mask) {
+				if (atomic_read(&inth->allowed)) {
+					if (intr->func->reset)
+						intr->func->reset(intr, inth->leaf, inth->mask);
+					if (inth->func(inth) == IRQ_HANDLED)
+						ret = IRQ_HANDLED;
+				}
+			}
+		}
+	}
+
+	/* Nothing handled?  Some debugging/protection from IRQ storms is in order... */
+	if (ret == IRQ_NONE) {
+		list_for_each_entry(intr, &device->intr.intr, head) {
+			for (leaf = 0; leaf < intr->leaves; leaf++) {
+				if (intr->stat[leaf]) {
+					nvkm_warn(intr->subdev, "intr%d: %08x\n",
+						  leaf, intr->stat[leaf]);
+					nvkm_intr_block_locked(intr, leaf, intr->stat[leaf]);
+				}
+			}
+		}
+	}
+
+done:
+	/* Re-enable all top-level interrupt sources. */
+	nvkm_intr_rearm_locked(device);
+done_unlock:
+	spin_unlock(&device->intr.lock);
+	return ret;
+}
+
+int
+nvkm_intr_add(const struct nvkm_intr_func *func, const struct nvkm_intr_data *data,
+	      struct nvkm_subdev *subdev, int leaves, struct nvkm_intr *intr)
+{
+	struct nvkm_device *device = subdev->device;
+	int i;
+
+	intr->func = func;
+	intr->data = data;
+	intr->subdev = subdev;
+	intr->leaves = leaves;
+	intr->stat = kcalloc(leaves, sizeof(*intr->stat), GFP_KERNEL);
+	intr->mask = kcalloc(leaves, sizeof(*intr->mask), GFP_KERNEL);
+	if (!intr->stat || !intr->mask) {
+		kfree(intr->stat);
+		return -ENOMEM;
+	}
+
+	if (intr->subdev->debug >= NV_DBG_DEBUG) {
+		for (i = 0; i < intr->leaves; i++)
+			intr->mask[i] = ~0;
+	}
+
+	spin_lock_irq(&device->intr.lock);
+	list_add_tail(&intr->head, &device->intr.intr);
+	spin_unlock_irq(&device->intr.lock);
+	return 0;
+}
+
+static irqreturn_t
+nvkm_intr_subdev(struct nvkm_inth *inth)
+{
+	struct nvkm_subdev *subdev = container_of(inth, typeof(*subdev), inth);
+
+	nvkm_subdev_intr(subdev);
+	return IRQ_HANDLED;
+}
+
+static void
+nvkm_intr_subdev_add_dev(struct nvkm_intr *intr, enum nvkm_subdev_type type, int inst)
+{
+	struct nvkm_subdev *subdev;
+	enum nvkm_intr_prio prio;
+	int ret;
+
+	subdev = nvkm_device_subdev(intr->subdev->device, type, inst);
+	if (!subdev || !subdev->func->intr)
+		return;
+
+	if (type == NVKM_ENGINE_DISP)
+		prio = NVKM_INTR_PRIO_VBLANK;
+	else
+		prio = NVKM_INTR_PRIO_NORMAL;
+
+	ret = nvkm_inth_add(intr, NVKM_INTR_SUBDEV, prio, subdev, nvkm_intr_subdev, &subdev->inth);
+	if (WARN_ON(ret))
+		return;
+
+	nvkm_inth_allow(&subdev->inth);
+}
+
+static void
+nvkm_intr_subdev_add(struct nvkm_intr *intr)
+{
+	const struct nvkm_intr_data *data;
+	struct nvkm_device *device = intr->subdev->device;
+	struct nvkm_top_device *tdev;
+
+	for (data = intr->data; data && data->mask; data++) {
+		if (data->legacy) {
+			if (data->type == NVKM_SUBDEV_TOP) {
+				list_for_each_entry(tdev, &device->top->device, head) {
+					if (tdev->intr < 0 || !(data->mask & BIT(tdev->intr)))
+						continue;
+
+					nvkm_intr_subdev_add_dev(intr, tdev->type, tdev->inst);
+				}
+			} else {
+				nvkm_intr_subdev_add_dev(intr, data->type, data->inst);
+			}
+		}
+	}
+}
+
+void
+nvkm_intr_rearm(struct nvkm_device *device)
+{
+	struct nvkm_intr *intr;
+	int i;
+
+	if (unlikely(!device->intr.legacy_done)) {
+		list_for_each_entry(intr, &device->intr.intr, head)
+			nvkm_intr_subdev_add(intr);
+		device->intr.legacy_done = true;
+	}
+
+	spin_lock_irq(&device->intr.lock);
+	list_for_each_entry(intr, &device->intr.intr, head) {
+		for (i = 0; intr->func->block && i < intr->leaves; i++) {
+			intr->func->block(intr, i, ~0);
+			intr->func->allow(intr, i, intr->mask[i]);
+		}
+	}
+
+	nvkm_intr_rearm_locked(device);
+	device->intr.armed = true;
+	spin_unlock_irq(&device->intr.lock);
+}
+
+void
+nvkm_intr_unarm(struct nvkm_device *device)
+{
+	spin_lock_irq(&device->intr.lock);
+	nvkm_intr_unarm_locked(device);
+	device->intr.armed = false;
+	spin_unlock_irq(&device->intr.lock);
+}
+
+int
+nvkm_intr_install(struct nvkm_device *device)
+{
+	int ret;
+
+	device->intr.irq = device->func->irq(device);
+	if (device->intr.irq < 0)
+		return device->intr.irq;
+
+	ret = request_irq(device->intr.irq, nvkm_intr, IRQF_SHARED, "nvkm", device);
+	if (ret)
+		return ret;
+
+	device->intr.alloc = true;
+	return 0;
+}
+
+void
+nvkm_intr_dtor(struct nvkm_device *device)
+{
+	struct nvkm_intr *intr, *intt;
+
+	list_for_each_entry_safe(intr, intt, &device->intr.intr, head) {
+		list_del(&intr->head);
+		kfree(intr->mask);
+		kfree(intr->stat);
+	}
+
+	if (device->intr.alloc)
+		free_irq(device->intr.irq, device);
+}
+
+void
+nvkm_intr_ctor(struct nvkm_device *device)
+{
+	int i;
+
+	INIT_LIST_HEAD(&device->intr.intr);
+	for (i = 0; i < ARRAY_SIZE(device->intr.prio); i++)
+		INIT_LIST_HEAD(&device->intr.prio[i]);
+
+	spin_lock_init(&device->intr.lock);
+	device->intr.armed = false;
+}
+
+void
+nvkm_inth_block(struct nvkm_inth *inth)
+{
+	if (unlikely(!inth->intr))
+		return;
+
+	atomic_set(&inth->allowed, 0);
+}
+
+void
+nvkm_inth_allow(struct nvkm_inth *inth)
+{
+	struct nvkm_intr *intr = inth->intr;
+	unsigned long flags;
+
+	if (unlikely(!inth->intr))
+		return;
+
+	spin_lock_irqsave(&intr->subdev->device->intr.lock, flags);
+	if (!atomic_xchg(&inth->allowed, 1)) {
+		if ((intr->mask[inth->leaf] & inth->mask) != inth->mask)
+			nvkm_intr_allow_locked(intr, inth->leaf, inth->mask);
+	}
+	spin_unlock_irqrestore(&intr->subdev->device->intr.lock, flags);
+}
+
+int
+nvkm_inth_add(struct nvkm_intr *intr, enum nvkm_intr_type type, enum nvkm_intr_prio prio,
+	      struct nvkm_subdev *subdev, nvkm_inth_func func, struct nvkm_inth *inth)
+{
+	struct nvkm_device *device = subdev->device;
+	int ret;
+
+	if (WARN_ON(inth->mask))
+		return -EBUSY;
+
+	ret = nvkm_intr_xlat(subdev, intr, type, &inth->leaf, &inth->mask);
+	if (ret)
+		return ret;
+
+	nvkm_debug(intr->subdev, "intr %d/%08x requested by %s\n",
+		   inth->leaf, inth->mask, subdev->name);
+
+	inth->intr = intr;
+	inth->func = func;
+	atomic_set(&inth->allowed, 0);
+	list_add_tail(&inth->head, &device->intr.prio[prio]);
+	return 0;
+}
diff --git a/drivers/gpu/drm/nouveau/nvkm/core/ioctl.c b/drivers/gpu/drm/nouveau/nvkm/core/ioctl.c
index 45f920da89af..0b33287e43a7 100644
--- a/drivers/gpu/drm/nouveau/nvkm/core/ioctl.c
+++ b/drivers/gpu/drm/nouveau/nvkm/core/ioctl.c
@@ -47,6 +47,26 @@ nvkm_ioctl_nop(struct nvkm_client *client,
 	return ret;
 }
 
+#include <nvif/class.h>
+
+static int
+nvkm_ioctl_sclass_(struct nvkm_object *object, int index, struct nvkm_oclass *oclass)
+{
+	if ( object->func->uevent &&
+	    !object->func->uevent(object, NULL, 0, NULL) && index-- == 0) {
+		oclass->ctor = nvkm_uevent_new;
+		oclass->base.minver = 0;
+		oclass->base.maxver = 0;
+		oclass->base.oclass = NVIF_CLASS_EVENT;
+		return 0;
+	}
+
+	if (object->func->sclass)
+		return object->func->sclass(object, index, oclass);
+
+	return -ENOSYS;
+}
+
 static int
 nvkm_ioctl_sclass(struct nvkm_client *client,
 		  struct nvkm_object *object, void *data, u32 size)
@@ -64,8 +84,7 @@ nvkm_ioctl_sclass(struct nvkm_client *client,
 		if (size != args->v0.count * sizeof(args->v0.oclass[0]))
 			return -EINVAL;
 
-		while (object->func->sclass &&
-		       object->func->sclass(object, i, &oclass) >= 0) {
+		while (nvkm_ioctl_sclass_(object, i, &oclass) >= 0) {
 			if (i < args->v0.count) {
 				args->v0.oclass[i].oclass = oclass.base.oclass;
 				args->v0.oclass[i].minver = oclass.base.minver;
@@ -100,7 +119,7 @@ nvkm_ioctl_new(struct nvkm_client *client,
 	} else
 		return ret;
 
-	if (!parent->func->sclass) {
+	if (!parent->func->sclass && !parent->func->uevent) {
 		nvif_ioctl(parent, "cannot have children\n");
 		return -EINVAL;
 	}
@@ -113,7 +132,7 @@ nvkm_ioctl_new(struct nvkm_client *client,
 		oclass.object = args->v0.object;
 		oclass.client = client;
 		oclass.parent = parent;
-		ret = parent->func->sclass(parent, i++, &oclass);
+		ret = nvkm_ioctl_sclass_(parent, i++, &oclass);
 		if (ret)
 			return ret;
 	} while (oclass.base.oclass != args->v0.oclass);
@@ -294,90 +313,6 @@ nvkm_ioctl_unmap(struct nvkm_client *client,
 	return ret;
 }
 
-static int
-nvkm_ioctl_ntfy_new(struct nvkm_client *client,
-		    struct nvkm_object *object, void *data, u32 size)
-{
-	union {
-		struct nvif_ioctl_ntfy_new_v0 v0;
-	} *args = data;
-	struct nvkm_event *event;
-	int ret = -ENOSYS;
-
-	nvif_ioctl(object, "ntfy new size %d\n", size);
-	if (!(ret = nvif_unpack(ret, &data, &size, args->v0, 0, 0, true))) {
-		nvif_ioctl(object, "ntfy new vers %d event %02x\n",
-			   args->v0.version, args->v0.event);
-		ret = nvkm_object_ntfy(object, args->v0.event, &event);
-		if (ret == 0) {
-			ret = nvkm_client_notify_new(object, event, data, size);
-			if (ret >= 0) {
-				args->v0.index = ret;
-				ret = 0;
-			}
-		}
-	}
-
-	return ret;
-}
-
-static int
-nvkm_ioctl_ntfy_del(struct nvkm_client *client,
-		    struct nvkm_object *object, void *data, u32 size)
-{
-	union {
-		struct nvif_ioctl_ntfy_del_v0 v0;
-	} *args = data;
-	int ret = -ENOSYS;
-
-	nvif_ioctl(object, "ntfy del size %d\n", size);
-	if (!(ret = nvif_unpack(ret, &data, &size, args->v0, 0, 0, false))) {
-		nvif_ioctl(object, "ntfy del vers %d index %d\n",
-			   args->v0.version, args->v0.index);
-		ret = nvkm_client_notify_del(client, args->v0.index);
-	}
-
-	return ret;
-}
-
-static int
-nvkm_ioctl_ntfy_get(struct nvkm_client *client,
-		    struct nvkm_object *object, void *data, u32 size)
-{
-	union {
-		struct nvif_ioctl_ntfy_get_v0 v0;
-	} *args = data;
-	int ret = -ENOSYS;
-
-	nvif_ioctl(object, "ntfy get size %d\n", size);
-	if (!(ret = nvif_unpack(ret, &data, &size, args->v0, 0, 0, false))) {
-		nvif_ioctl(object, "ntfy get vers %d index %d\n",
-			   args->v0.version, args->v0.index);
-		ret = nvkm_client_notify_get(client, args->v0.index);
-	}
-
-	return ret;
-}
-
-static int
-nvkm_ioctl_ntfy_put(struct nvkm_client *client,
-		    struct nvkm_object *object, void *data, u32 size)
-{
-	union {
-		struct nvif_ioctl_ntfy_put_v0 v0;
-	} *args = data;
-	int ret = -ENOSYS;
-
-	nvif_ioctl(object, "ntfy put size %d\n", size);
-	if (!(ret = nvif_unpack(ret, &data, &size, args->v0, 0, 0, false))) {
-		nvif_ioctl(object, "ntfy put vers %d index %d\n",
-			   args->v0.version, args->v0.index);
-		ret = nvkm_client_notify_put(client, args->v0.index);
-	}
-
-	return ret;
-}
-
 static struct {
 	int version;
 	int (*func)(struct nvkm_client *, struct nvkm_object *, void *, u32);
@@ -392,10 +327,6 @@ nvkm_ioctl_v0[] = {
 	{ 0x00, nvkm_ioctl_wr },
 	{ 0x00, nvkm_ioctl_map },
 	{ 0x00, nvkm_ioctl_unmap },
-	{ 0x00, nvkm_ioctl_ntfy_new },
-	{ 0x00, nvkm_ioctl_ntfy_del },
-	{ 0x00, nvkm_ioctl_ntfy_get },
-	{ 0x00, nvkm_ioctl_ntfy_put },
 };
 
 static int
diff --git a/drivers/gpu/drm/nouveau/nvkm/core/notify.c b/drivers/gpu/drm/nouveau/nvkm/core/notify.c
deleted file mode 100644
index 023610d01458..000000000000
--- a/drivers/gpu/drm/nouveau/nvkm/core/notify.c
+++ /dev/null
@@ -1,163 +0,0 @@
-/*
- * Copyright 2014 Red Hat Inc.
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * the rights to use, copy, modify, merge, publish, distribute, sublicense,
- * and/or sell copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in
- * all copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
- * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
- * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
- * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
- * OTHER DEALINGS IN THE SOFTWARE.
- *
- * Authors: Ben Skeggs <bskeggs@redhat.com>
- */
-#include <core/notify.h>
-#include <core/event.h>
-
-static inline void
-nvkm_notify_put_locked(struct nvkm_notify *notify)
-{
-	if (notify->block++ == 0)
-		nvkm_event_put(notify->event, notify->types, notify->index);
-}
-
-void
-nvkm_notify_put(struct nvkm_notify *notify)
-{
-	struct nvkm_event *event = notify->event;
-	unsigned long flags;
-	if (likely(event) &&
-	    test_and_clear_bit(NVKM_NOTIFY_USER, &notify->flags)) {
-		spin_lock_irqsave(&event->refs_lock, flags);
-		nvkm_notify_put_locked(notify);
-		spin_unlock_irqrestore(&event->refs_lock, flags);
-		if (test_bit(NVKM_NOTIFY_WORK, &notify->flags))
-			flush_work(&notify->work);
-	}
-}
-
-static inline void
-nvkm_notify_get_locked(struct nvkm_notify *notify)
-{
-	if (--notify->block == 0)
-		nvkm_event_get(notify->event, notify->types, notify->index);
-}
-
-void
-nvkm_notify_get(struct nvkm_notify *notify)
-{
-	struct nvkm_event *event = notify->event;
-	unsigned long flags;
-	if (likely(event) &&
-	    !test_and_set_bit(NVKM_NOTIFY_USER, &notify->flags)) {
-		spin_lock_irqsave(&event->refs_lock, flags);
-		nvkm_notify_get_locked(notify);
-		spin_unlock_irqrestore(&event->refs_lock, flags);
-	}
-}
-
-static inline void
-nvkm_notify_func(struct nvkm_notify *notify)
-{
-	struct nvkm_event *event = notify->event;
-	int ret = notify->func(notify);
-	unsigned long flags;
-	if ((ret == NVKM_NOTIFY_KEEP) ||
-	    !test_and_clear_bit(NVKM_NOTIFY_USER, &notify->flags)) {
-		spin_lock_irqsave(&event->refs_lock, flags);
-		nvkm_notify_get_locked(notify);
-		spin_unlock_irqrestore(&event->refs_lock, flags);
-	}
-}
-
-static void
-nvkm_notify_work(struct work_struct *work)
-{
-	struct nvkm_notify *notify = container_of(work, typeof(*notify), work);
-	nvkm_notify_func(notify);
-}
-
-void
-nvkm_notify_send(struct nvkm_notify *notify, void *data, u32 size)
-{
-	struct nvkm_event *event = notify->event;
-	unsigned long flags;
-
-	assert_spin_locked(&event->list_lock);
-	BUG_ON(size != notify->size);
-
-	spin_lock_irqsave(&event->refs_lock, flags);
-	if (notify->block) {
-		spin_unlock_irqrestore(&event->refs_lock, flags);
-		return;
-	}
-	nvkm_notify_put_locked(notify);
-	spin_unlock_irqrestore(&event->refs_lock, flags);
-
-	if (test_bit(NVKM_NOTIFY_WORK, &notify->flags)) {
-		memcpy((void *)notify->data, data, size);
-		schedule_work(&notify->work);
-	} else {
-		notify->data = data;
-		nvkm_notify_func(notify);
-		notify->data = NULL;
-	}
-}
-
-void
-nvkm_notify_fini(struct nvkm_notify *notify)
-{
-	unsigned long flags;
-	if (notify->event) {
-		nvkm_notify_put(notify);
-		spin_lock_irqsave(&notify->event->list_lock, flags);
-		list_del(&notify->head);
-		spin_unlock_irqrestore(&notify->event->list_lock, flags);
-		kfree((void *)notify->data);
-		notify->event = NULL;
-	}
-}
-
-int
-nvkm_notify_init(struct nvkm_object *object, struct nvkm_event *event,
-		 int (*func)(struct nvkm_notify *), bool work,
-		 void *data, u32 size, u32 reply,
-		 struct nvkm_notify *notify)
-{
-	unsigned long flags;
-	int ret = -ENODEV;
-	if ((notify->event = event), event->refs) {
-		ret = event->func->ctor(object, data, size, notify);
-		if (ret == 0 && (ret = -EINVAL, notify->size == reply)) {
-			notify->flags = 0;
-			notify->block = 1;
-			notify->func = func;
-			notify->data = NULL;
-			if (ret = 0, work) {
-				INIT_WORK(&notify->work, nvkm_notify_work);
-				set_bit(NVKM_NOTIFY_WORK, &notify->flags);
-				notify->data = kmalloc(reply, GFP_KERNEL);
-				if (!notify->data)
-					ret = -ENOMEM;
-			}
-		}
-		if (ret == 0) {
-			spin_lock_irqsave(&event->list_lock, flags);
-			list_add_tail(&notify->head, &event->list);
-			spin_unlock_irqrestore(&event->list_lock, flags);
-		}
-	}
-	if (ret)
-		notify->event = NULL;
-	return ret;
-}
diff --git a/drivers/gpu/drm/nouveau/nvkm/core/oproxy.c b/drivers/gpu/drm/nouveau/nvkm/core/oproxy.c
index 16299837a296..3385528da650 100644
--- a/drivers/gpu/drm/nouveau/nvkm/core/oproxy.c
+++ b/drivers/gpu/drm/nouveau/nvkm/core/oproxy.c
@@ -47,7 +47,12 @@ nvkm_oproxy_map(struct nvkm_object *object, void *argv, u32 argc,
 static int
 nvkm_oproxy_unmap(struct nvkm_object *object)
 {
-	return nvkm_object_unmap(nvkm_oproxy(object)->object);
+	struct nvkm_oproxy *oproxy = nvkm_oproxy(object);
+
+	if (unlikely(!oproxy->object))
+		return 0;
+
+	return nvkm_object_unmap(oproxy->object);
 }
 
 static int
@@ -106,6 +111,18 @@ nvkm_oproxy_sclass(struct nvkm_object *object, int index,
 }
 
 static int
+nvkm_oproxy_uevent(struct nvkm_object *object, void *argv, u32 argc,
+		   struct nvkm_uevent *uevent)
+{
+	struct nvkm_oproxy *oproxy = nvkm_oproxy(object);
+
+	if (!oproxy->object->func->uevent)
+		return -ENOSYS;
+
+	return oproxy->object->func->uevent(oproxy->object, argv, argc, uevent);
+}
+
+static int
 nvkm_oproxy_fini(struct nvkm_object *object, bool suspend)
 {
 	struct nvkm_oproxy *oproxy = nvkm_oproxy(object);
@@ -188,6 +205,7 @@ nvkm_oproxy_func = {
 	.wr32 = nvkm_oproxy_wr32,
 	.bind = nvkm_oproxy_bind,
 	.sclass = nvkm_oproxy_sclass,
+	.uevent = nvkm_oproxy_uevent,
 };
 
 void
diff --git a/drivers/gpu/drm/nouveau/nvkm/core/subdev.c b/drivers/gpu/drm/nouveau/nvkm/core/subdev.c
index a74b7acb6832..6c20e827a069 100644
--- a/drivers/gpu/drm/nouveau/nvkm/core/subdev.c
+++ b/drivers/gpu/drm/nouveau/nvkm/core/subdev.c
@@ -54,7 +54,7 @@ int
 nvkm_subdev_fini(struct nvkm_subdev *subdev, bool suspend)
 {
 	struct nvkm_device *device = subdev->device;
-	const char *action = suspend ? "suspend" : "fini";
+	const char *action = suspend ? "suspend" : subdev->use.enabled ? "fini" : "reset";
 	s64 time;
 
 	nvkm_trace(subdev, "%s running...\n", action);
@@ -68,6 +68,7 @@ nvkm_subdev_fini(struct nvkm_subdev *subdev, bool suspend)
 				return ret;
 		}
 	}
+	subdev->use.enabled = false;
 
 	nvkm_mc_reset(device, subdev->type, subdev->inst);
 
@@ -97,30 +98,49 @@ nvkm_subdev_preinit(struct nvkm_subdev *subdev)
 	return 0;
 }
 
-int
-nvkm_subdev_init(struct nvkm_subdev *subdev)
+static int
+nvkm_subdev_oneinit_(struct nvkm_subdev *subdev)
 {
 	s64 time;
 	int ret;
 
-	nvkm_trace(subdev, "init running...\n");
+	if (!subdev->func->oneinit || subdev->oneinit)
+		return 0;
+
+	nvkm_trace(subdev, "one-time init running...\n");
 	time = ktime_to_us(ktime_get());
+	ret = subdev->func->oneinit(subdev);
+	if (ret) {
+		nvkm_error(subdev, "one-time init failed, %d\n", ret);
+		return ret;
+	}
 
-	if (subdev->func->oneinit && !subdev->oneinit) {
-		s64 time;
-		nvkm_trace(subdev, "one-time init running...\n");
-		time = ktime_to_us(ktime_get());
-		ret = subdev->func->oneinit(subdev);
-		if (ret) {
-			nvkm_error(subdev, "one-time init failed, %d\n", ret);
-			return ret;
-		}
+	subdev->oneinit = true;
+	time = ktime_to_us(ktime_get()) - time;
+	nvkm_trace(subdev, "one-time init completed in %lldus\n", time);
+	return 0;
+}
 
-		subdev->oneinit = true;
-		time = ktime_to_us(ktime_get()) - time;
-		nvkm_trace(subdev, "one-time init completed in %lldus\n", time);
+static int
+nvkm_subdev_init_(struct nvkm_subdev *subdev)
+{
+	s64 time;
+	int ret;
+
+	if (subdev->use.enabled) {
+		nvkm_trace(subdev, "init skipped, already running\n");
+		return 0;
 	}
 
+	nvkm_trace(subdev, "init running...\n");
+	time = ktime_to_us(ktime_get());
+
+	ret = nvkm_subdev_oneinit_(subdev);
+	if (ret)
+		return ret;
+
+	subdev->use.enabled = true;
+
 	if (subdev->func->init) {
 		ret = subdev->func->init(subdev);
 		if (ret) {
@@ -134,6 +154,64 @@ nvkm_subdev_init(struct nvkm_subdev *subdev)
 	return 0;
 }
 
+int
+nvkm_subdev_init(struct nvkm_subdev *subdev)
+{
+	int ret;
+
+	mutex_lock(&subdev->use.mutex);
+	if (refcount_read(&subdev->use.refcount) == 0) {
+		nvkm_trace(subdev, "init skipped, no users\n");
+		mutex_unlock(&subdev->use.mutex);
+		return 0;
+	}
+
+	ret = nvkm_subdev_init_(subdev);
+	mutex_unlock(&subdev->use.mutex);
+	return ret;
+}
+
+int
+nvkm_subdev_oneinit(struct nvkm_subdev *subdev)
+{
+	int ret;
+
+	mutex_lock(&subdev->use.mutex);
+	ret = nvkm_subdev_oneinit_(subdev);
+	mutex_unlock(&subdev->use.mutex);
+	return ret;
+}
+
+void
+nvkm_subdev_unref(struct nvkm_subdev *subdev)
+{
+	if (refcount_dec_and_mutex_lock(&subdev->use.refcount, &subdev->use.mutex)) {
+		nvkm_subdev_fini(subdev, false);
+		mutex_unlock(&subdev->use.mutex);
+	}
+}
+
+int
+nvkm_subdev_ref(struct nvkm_subdev *subdev)
+{
+	int ret;
+
+	if (subdev && !refcount_inc_not_zero(&subdev->use.refcount)) {
+		mutex_lock(&subdev->use.mutex);
+		if (!refcount_inc_not_zero(&subdev->use.refcount)) {
+			if ((ret = nvkm_subdev_init_(subdev))) {
+				mutex_unlock(&subdev->use.mutex);
+				return ret;
+			}
+
+			refcount_set(&subdev->use.refcount, 1);
+		}
+		mutex_unlock(&subdev->use.mutex);
+	}
+
+	return 0;
+}
+
 void
 nvkm_subdev_del(struct nvkm_subdev **psubdev)
 {
@@ -146,6 +224,7 @@ nvkm_subdev_del(struct nvkm_subdev **psubdev)
 		list_del(&subdev->head);
 		if (subdev->func->dtor)
 			*psubdev = subdev->func->dtor(subdev);
+		mutex_destroy(&subdev->use.mutex);
 		time = ktime_to_us(ktime_get()) - time;
 		nvkm_trace(subdev, "destroy completed in %lldus\n", time);
 		kfree(*psubdev);
@@ -167,8 +246,8 @@ nvkm_subdev_disable(struct nvkm_device *device, enum nvkm_subdev_type type, int
 }
 
 void
-nvkm_subdev_ctor(const struct nvkm_subdev_func *func, struct nvkm_device *device,
-		 enum nvkm_subdev_type type, int inst, struct nvkm_subdev *subdev)
+__nvkm_subdev_ctor(const struct nvkm_subdev_func *func, struct nvkm_device *device,
+		   enum nvkm_subdev_type type, int inst, struct nvkm_subdev *subdev)
 {
 	subdev->func = func;
 	subdev->device = device;
@@ -180,6 +259,8 @@ nvkm_subdev_ctor(const struct nvkm_subdev_func *func, struct nvkm_device *device
 	else
 		strscpy(subdev->name, nvkm_subdev_type[type], sizeof(subdev->name));
 	subdev->debug = nvkm_dbgopt(device->dbgopt, subdev->name);
+
+	refcount_set(&subdev->use.refcount, 1);
 	list_add_tail(&subdev->head, &device->subdev);
 }
 
diff --git a/drivers/gpu/drm/nouveau/nvkm/core/uevent.c b/drivers/gpu/drm/nouveau/nvkm/core/uevent.c
new file mode 100644
index 000000000000..ba9d9edaec75
--- /dev/null
+++ b/drivers/gpu/drm/nouveau/nvkm/core/uevent.c
@@ -0,0 +1,157 @@
+/*
+ * Copyright 2021 Red Hat Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+#define nvkm_uevent(p) container_of((p), struct nvkm_uevent, object)
+#include <core/event.h>
+#include <core/client.h>
+
+#include <nvif/if000e.h>
+
+struct nvkm_uevent {
+	struct nvkm_object object;
+	struct nvkm_object *parent;
+	nvkm_uevent_func func;
+	bool wait;
+
+	struct nvkm_event_ntfy ntfy;
+	atomic_t allowed;
+};
+
+static int
+nvkm_uevent_mthd_block(struct nvkm_uevent *uevent, union nvif_event_block_args *args, u32 argc)
+{
+	if (argc != sizeof(args->vn))
+		return -ENOSYS;
+
+	nvkm_event_ntfy_block(&uevent->ntfy);
+	atomic_set(&uevent->allowed, 0);
+	return 0;
+}
+
+static int
+nvkm_uevent_mthd_allow(struct nvkm_uevent *uevent, union nvif_event_allow_args *args, u32 argc)
+{
+	if (argc != sizeof(args->vn))
+		return -ENOSYS;
+
+	nvkm_event_ntfy_allow(&uevent->ntfy);
+	atomic_set(&uevent->allowed, 1);
+	return 0;
+}
+
+static int
+nvkm_uevent_mthd(struct nvkm_object *object, u32 mthd, void *argv, u32 argc)
+{
+	struct nvkm_uevent *uevent = nvkm_uevent(object);
+
+	switch (mthd) {
+	case NVIF_EVENT_V0_ALLOW: return nvkm_uevent_mthd_allow(uevent, argv, argc);
+	case NVIF_EVENT_V0_BLOCK: return nvkm_uevent_mthd_block(uevent, argv, argc);
+	default:
+		break;
+	}
+
+	return -EINVAL;
+}
+
+static int
+nvkm_uevent_fini(struct nvkm_object *object, bool suspend)
+{
+	struct nvkm_uevent *uevent = nvkm_uevent(object);
+
+	nvkm_event_ntfy_block(&uevent->ntfy);
+	return 0;
+}
+
+static int
+nvkm_uevent_init(struct nvkm_object *object)
+{
+	struct nvkm_uevent *uevent = nvkm_uevent(object);
+
+	if (atomic_read(&uevent->allowed))
+		nvkm_event_ntfy_allow(&uevent->ntfy);
+
+	return 0;
+}
+
+static void *
+nvkm_uevent_dtor(struct nvkm_object *object)
+{
+	struct nvkm_uevent *uevent = nvkm_uevent(object);
+
+	nvkm_event_ntfy_del(&uevent->ntfy);
+	return uevent;
+}
+
+static const struct nvkm_object_func
+nvkm_uevent = {
+	.dtor = nvkm_uevent_dtor,
+	.init = nvkm_uevent_init,
+	.fini = nvkm_uevent_fini,
+	.mthd = nvkm_uevent_mthd,
+};
+
+static int
+nvkm_uevent_ntfy(struct nvkm_event_ntfy *ntfy, u32 bits)
+{
+	struct nvkm_uevent *uevent = container_of(ntfy, typeof(*uevent), ntfy);
+	struct nvkm_client *client = uevent->object.client;
+
+	if (uevent->func)
+		return uevent->func(uevent->parent, uevent->object.token, bits);
+
+	return client->event(uevent->object.token, NULL, 0);
+}
+
+int
+nvkm_uevent_add(struct nvkm_uevent *uevent, struct nvkm_event *event, int id, u32 bits,
+		nvkm_uevent_func func)
+{
+	if (WARN_ON(uevent->func))
+		return -EBUSY;
+
+	nvkm_event_ntfy_add(event, id, bits, uevent->wait, nvkm_uevent_ntfy, &uevent->ntfy);
+	uevent->func = func;
+	return 0;
+}
+
+int
+nvkm_uevent_new(const struct nvkm_oclass *oclass, void *argv, u32 argc,
+		struct nvkm_object **pobject)
+{
+	struct nvkm_object *parent = oclass->parent;
+	struct nvkm_uevent *uevent;
+	union nvif_event_args *args = argv;
+
+	if (argc < sizeof(args->v0) || args->v0.version != 0)
+		return -ENOSYS;
+
+	if (!(uevent = kzalloc(sizeof(*uevent), GFP_KERNEL)))
+		return -ENOMEM;
+	*pobject = &uevent->object;
+
+	nvkm_object_ctor(&nvkm_uevent, oclass, &uevent->object);
+	uevent->parent = parent;
+	uevent->func = NULL;
+	uevent->wait = args->v0.wait;
+	uevent->ntfy.event = NULL;
+	return parent->func->uevent(parent, &args->v0.data, argc - sizeof(args->v0), uevent);
+}
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/ce/Kbuild b/drivers/gpu/drm/nouveau/nvkm/engine/ce/Kbuild
index ba88613e1e46..8bf1635ffabc 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/ce/Kbuild
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/ce/Kbuild
@@ -8,3 +8,5 @@ nvkm-y += nvkm/engine/ce/gp100.o
 nvkm-y += nvkm/engine/ce/gp102.o
 nvkm-y += nvkm/engine/ce/gv100.o
 nvkm-y += nvkm/engine/ce/tu102.o
+nvkm-y += nvkm/engine/ce/ga100.o
+nvkm-y += nvkm/engine/ce/ga102.o
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/ce/ga100.c b/drivers/gpu/drm/nouveau/nvkm/engine/ce/ga100.c
new file mode 100644
index 000000000000..6648ed62daa6
--- /dev/null
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/ce/ga100.c
@@ -0,0 +1,82 @@
+/*
+ * Copyright 2021 Red Hat Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+#include "priv.h"
+
+#include <subdev/vfn.h>
+
+#include <nvif/class.h>
+
+static irqreturn_t
+ga100_ce_intr(struct nvkm_inth *inth)
+{
+	struct nvkm_subdev *subdev = container_of(inth, typeof(*subdev), inth);
+
+	/*TODO*/
+	nvkm_error(subdev, "intr\n");
+	return IRQ_NONE;
+}
+
+int
+ga100_ce_fini(struct nvkm_engine *engine, bool suspend)
+{
+	nvkm_inth_block(&engine->subdev.inth);
+	return 0;
+}
+
+int
+ga100_ce_init(struct nvkm_engine *engine)
+{
+	nvkm_inth_allow(&engine->subdev.inth);
+	return 0;
+}
+
+int
+ga100_ce_oneinit(struct nvkm_engine *engine)
+{
+	struct nvkm_subdev *subdev = &engine->subdev;
+	struct nvkm_device *device = subdev->device;
+	u32 vector;
+
+	vector = nvkm_rd32(device, 0x10442c + (subdev->inst * 0x80)) & 0x00000fff;
+
+	return nvkm_inth_add(&device->vfn->intr, vector, NVKM_INTR_PRIO_NORMAL,
+			     subdev, ga100_ce_intr, &subdev->inth);
+}
+
+static const struct nvkm_engine_func
+ga100_ce = {
+	.oneinit = ga100_ce_oneinit,
+	.init = ga100_ce_init,
+	.fini = ga100_ce_fini,
+	.cclass = &gv100_ce_cclass,
+	.sclass = {
+		{ -1, -1, AMPERE_DMA_COPY_A },
+		{}
+	}
+};
+
+int
+ga100_ce_new(struct nvkm_device *device, enum nvkm_subdev_type type, int inst,
+	     struct nvkm_engine **pengine)
+{
+	return nvkm_engine_new_(&ga100_ce, device, type, inst, true, pengine);
+}
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/fifo/usergv100.c b/drivers/gpu/drm/nouveau/nvkm/engine/ce/ga102.c
index 3dc3b8b312de..9f3448ad625f 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/fifo/usergv100.c
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/ce/ga102.c
@@ -1,5 +1,5 @@
 /*
- * Copyright 2018 Red Hat Inc.
+ * Copyright 2021 Red Hat Inc.
  *
  * Permission is hereby granted, free of charge, to any person obtaining a
  * copy of this software and associated documentation files (the "Software"),
@@ -19,27 +19,26 @@
  * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
  * OTHER DEALINGS IN THE SOFTWARE.
  */
-#include "user.h"
+#include "priv.h"
 
-static int
-gv100_fifo_user_map(struct nvkm_object *object, void *argv, u32 argc,
-		    enum nvkm_object_map *type, u64 *addr, u64 *size)
-{
-	struct nvkm_device *device = object->engine->subdev.device;
-	*addr = 0x810000 + device->func->resource_addr(device, 0);
-	*size = 0x010000;
-	*type = NVKM_OBJECT_MAP_IO;
-	return 0;
-}
+#include <nvif/class.h>
 
-static const struct nvkm_object_func
-gv100_fifo_user = {
-	.map = gv100_fifo_user_map,
+static const struct nvkm_engine_func
+ga102_ce = {
+	.oneinit = ga100_ce_oneinit,
+	.init = ga100_ce_init,
+	.fini = ga100_ce_fini,
+	.cclass = &gv100_ce_cclass,
+	.sclass = {
+		{ -1, -1, AMPERE_DMA_COPY_A },
+		{ -1, -1, AMPERE_DMA_COPY_B },
+		{}
+	}
 };
 
 int
-gv100_fifo_user_new(const struct nvkm_oclass *oclass, void *argv, u32 argc,
-		    struct nvkm_object **pobject)
+ga102_ce_new(struct nvkm_device *device, enum nvkm_subdev_type type, int inst,
+	     struct nvkm_engine **pengine)
 {
-	return nvkm_object_new_(&gv100_fifo_user, oclass, argv, argc, pobject);
+	return nvkm_engine_new_(&ga102_ce, device, type, inst, true, pengine);
 }
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/ce/gt215.c b/drivers/gpu/drm/nouveau/nvkm/engine/ce/gt215.c
index 09a112af2f89..c9bf6305c3ec 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/ce/gt215.c
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/ce/gt215.c
@@ -40,7 +40,7 @@ gt215_ce_isr_error_name[] = {
 };
 
 void
-gt215_ce_intr(struct nvkm_falcon *ce, struct nvkm_fifo_chan *chan)
+gt215_ce_intr(struct nvkm_falcon *ce, struct nvkm_chan *chan)
 {
 	struct nvkm_subdev *subdev = &ce->engine.subdev;
 	struct nvkm_device *device = subdev->device;
@@ -55,9 +55,9 @@ gt215_ce_intr(struct nvkm_falcon *ce, struct nvkm_fifo_chan *chan)
 
 	nvkm_error(subdev, "DISPATCH_ERROR %04x [%s] ch %d [%010llx %s] "
 			   "subc %d mthd %04x data %08x\n", ssta,
-		   en ? en->name : "", chan ? chan->chid : -1,
+		   en ? en->name : "", chan ? chan->id : -1,
 		   chan ? chan->inst->addr : 0,
-		   chan ? chan->object.client->name : "unknown",
+		   chan ? chan->name : "unknown",
 		   subc, mthd, data);
 }
 
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/ce/priv.h b/drivers/gpu/drm/nouveau/nvkm/engine/ce/priv.h
index cd53b93664d6..c4c046916fa6 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/ce/priv.h
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/ce/priv.h
@@ -8,4 +8,8 @@ void gk104_ce_intr(struct nvkm_engine *);
 void gp100_ce_intr(struct nvkm_engine *);
 
 extern const struct nvkm_object_func gv100_ce_cclass;
+
+int ga100_ce_oneinit(struct nvkm_engine *);
+int ga100_ce_init(struct nvkm_engine *);
+int ga100_ce_fini(struct nvkm_engine *, bool);
 #endif
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/cipher/g84.c b/drivers/gpu/drm/nouveau/nvkm/engine/cipher/g84.c
index be2a7181dc15..caca4f639895 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/cipher/g84.c
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/cipher/g84.c
@@ -81,8 +81,7 @@ g84_cipher_intr(struct nvkm_engine *cipher)
 {
 	struct nvkm_subdev *subdev = &cipher->subdev;
 	struct nvkm_device *device = subdev->device;
-	struct nvkm_fifo *fifo = device->fifo;
-	struct nvkm_fifo_chan *chan;
+	struct nvkm_chan *chan;
 	u32 stat = nvkm_rd32(device, 0x102130);
 	u32 mthd = nvkm_rd32(device, 0x102190);
 	u32 data = nvkm_rd32(device, 0x102194);
@@ -90,16 +89,16 @@ g84_cipher_intr(struct nvkm_engine *cipher)
 	unsigned long flags;
 	char msg[128];
 
-	chan = nvkm_fifo_chan_inst(fifo, (u64)inst << 12, &flags);
+	chan = nvkm_chan_get_inst(cipher, (u64)inst << 12, &flags);
 	if (stat) {
 		nvkm_snprintbf(msg, sizeof(msg), g84_cipher_intr_mask, stat);
 		nvkm_error(subdev,  "%08x [%s] ch %d [%010llx %s] "
 				    "mthd %04x data %08x\n", stat, msg,
-			   chan ? chan->chid : -1, (u64)inst << 12,
-			   chan ? chan->object.client->name : "unknown",
+			   chan ? chan->id : -1, (u64)inst << 12,
+			   chan ? chan->name : "unknown",
 			   mthd, data);
 	}
-	nvkm_fifo_chan_put(fifo, flags, &chan);
+	nvkm_chan_put(&chan, flags);
 
 	nvkm_wr32(device, 0x102130, stat);
 	nvkm_wr32(device, 0x10200c, 0x10);
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/device/base.c b/drivers/gpu/drm/nouveau/nvkm/engine/device/base.c
index d8cf71fb0512..364fea320cb3 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/device/base.c
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/device/base.c
@@ -1095,7 +1095,7 @@ nv98_chipset = {
 	.volt     = { 0x00000001, nv40_volt_new },
 	.disp     = { 0x00000001, g94_disp_new },
 	.dma      = { 0x00000001, nv50_dma_new },
-	.fifo     = { 0x00000001, g84_fifo_new },
+	.fifo     = { 0x00000001, g98_fifo_new },
 	.gr       = { 0x00000001, g84_gr_new },
 	.mspdec   = { 0x00000001, g98_mspdec_new },
 	.msppp    = { 0x00000001, g98_msppp_new },
@@ -1161,7 +1161,7 @@ nva3_chipset = {
 	.ce       = { 0x00000001, gt215_ce_new },
 	.disp     = { 0x00000001, gt215_disp_new },
 	.dma      = { 0x00000001, nv50_dma_new },
-	.fifo     = { 0x00000001, g84_fifo_new },
+	.fifo     = { 0x00000001, g98_fifo_new },
 	.gr       = { 0x00000001, gt215_gr_new },
 	.mpeg     = { 0x00000001, g84_mpeg_new },
 	.mspdec   = { 0x00000001, gt215_mspdec_new },
@@ -1195,7 +1195,7 @@ nva5_chipset = {
 	.ce       = { 0x00000001, gt215_ce_new },
 	.disp     = { 0x00000001, gt215_disp_new },
 	.dma      = { 0x00000001, nv50_dma_new },
-	.fifo     = { 0x00000001, g84_fifo_new },
+	.fifo     = { 0x00000001, g98_fifo_new },
 	.gr       = { 0x00000001, gt215_gr_new },
 	.mspdec   = { 0x00000001, gt215_mspdec_new },
 	.msppp    = { 0x00000001, gt215_msppp_new },
@@ -1228,7 +1228,7 @@ nva8_chipset = {
 	.ce       = { 0x00000001, gt215_ce_new },
 	.disp     = { 0x00000001, gt215_disp_new },
 	.dma      = { 0x00000001, nv50_dma_new },
-	.fifo     = { 0x00000001, g84_fifo_new },
+	.fifo     = { 0x00000001, g98_fifo_new },
 	.gr       = { 0x00000001, gt215_gr_new },
 	.mspdec   = { 0x00000001, gt215_mspdec_new },
 	.msppp    = { 0x00000001, gt215_msppp_new },
@@ -1259,7 +1259,7 @@ nvaa_chipset = {
 	.volt     = { 0x00000001, nv40_volt_new },
 	.disp     = { 0x00000001, mcp77_disp_new },
 	.dma      = { 0x00000001, nv50_dma_new },
-	.fifo     = { 0x00000001, g84_fifo_new },
+	.fifo     = { 0x00000001, g98_fifo_new },
 	.gr       = { 0x00000001, gt200_gr_new },
 	.mspdec   = { 0x00000001, g98_mspdec_new },
 	.msppp    = { 0x00000001, g98_msppp_new },
@@ -1291,7 +1291,7 @@ nvac_chipset = {
 	.volt     = { 0x00000001, nv40_volt_new },
 	.disp     = { 0x00000001, mcp77_disp_new },
 	.dma      = { 0x00000001, nv50_dma_new },
-	.fifo     = { 0x00000001, g84_fifo_new },
+	.fifo     = { 0x00000001, g98_fifo_new },
 	.gr       = { 0x00000001, mcp79_gr_new },
 	.mspdec   = { 0x00000001, g98_mspdec_new },
 	.msppp    = { 0x00000001, g98_msppp_new },
@@ -1325,7 +1325,7 @@ nvaf_chipset = {
 	.ce       = { 0x00000001, gt215_ce_new },
 	.disp     = { 0x00000001, mcp89_disp_new },
 	.dma      = { 0x00000001, nv50_dma_new },
-	.fifo     = { 0x00000001, g84_fifo_new },
+	.fifo     = { 0x00000001, g98_fifo_new },
 	.gr       = { 0x00000001, mcp89_gr_new },
 	.mspdec   = { 0x00000001, gt215_mspdec_new },
 	.msppp    = { 0x00000001, gt215_msppp_new },
@@ -2130,7 +2130,7 @@ nv12b_chipset = {
 	.volt     = { 0x00000001, gm20b_volt_new },
 	.ce       = { 0x00000004, gm200_ce_new },
 	.dma      = { 0x00000001, gf119_dma_new },
-	.fifo     = { 0x00000001, gm20b_fifo_new },
+	.fifo     = { 0x00000001, gm200_fifo_new },
 	.gr       = { 0x00000001, gm20b_gr_new },
 	.sw       = { 0x00000001, gf100_sw_new },
 };
@@ -2356,7 +2356,7 @@ nv13b_chipset = {
 	.top      = { 0x00000001, gk104_top_new },
 	.ce       = { 0x00000001, gp100_ce_new },
 	.dma      = { 0x00000001, gf119_dma_new },
-	.fifo     = { 0x00000001, gp10b_fifo_new },
+	.fifo     = { 0x00000001, gp100_fifo_new },
 	.gr       = { 0x00000001, gp10b_gr_new },
 	.sw       = { 0x00000001, gf100_sw_new },
 };
@@ -2364,7 +2364,7 @@ nv13b_chipset = {
 static const struct nvkm_device_chip
 nv140_chipset = {
 	.name = "GV100",
-	.acr      = { 0x00000001, gp108_acr_new },
+	.acr      = { 0x00000001, gv100_acr_new },
 	.bar      = { 0x00000001, gm107_bar_new },
 	.bios     = { 0x00000001, nvkm_bios_new },
 	.bus      = { 0x00000001, gf100_bus_new },
@@ -2385,6 +2385,7 @@ nv140_chipset = {
 	.therm    = { 0x00000001, gp100_therm_new },
 	.timer    = { 0x00000001, gk20a_timer_new },
 	.top      = { 0x00000001, gk104_top_new },
+	.vfn      = { 0x00000001, gv100_vfn_new },
 	.ce       = { 0x000001ff, gv100_ce_new },
 	.disp     = { 0x00000001, gv100_disp_new },
 	.dma      = { 0x00000001, gv100_dma_new },
@@ -2411,7 +2412,7 @@ nv162_chipset = {
 	.i2c      = { 0x00000001, gm200_i2c_new },
 	.imem     = { 0x00000001, nv50_instmem_new },
 	.ltc      = { 0x00000001, gp102_ltc_new },
-	.mc       = { 0x00000001, tu102_mc_new },
+	.mc       = { 0x00000001, gp100_mc_new },
 	.mmu      = { 0x00000001, tu102_mmu_new },
 	.pci      = { 0x00000001, gp100_pci_new },
 	.pmu      = { 0x00000001, gp102_pmu_new },
@@ -2419,6 +2420,7 @@ nv162_chipset = {
 	.therm    = { 0x00000001, gp100_therm_new },
 	.timer    = { 0x00000001, gk20a_timer_new },
 	.top      = { 0x00000001, gk104_top_new },
+	.vfn      = { 0x00000001, tu102_vfn_new },
 	.ce       = { 0x0000001f, tu102_ce_new },
 	.disp     = { 0x00000001, tu102_disp_new },
 	.dma      = { 0x00000001, gv100_dma_new },
@@ -2445,7 +2447,7 @@ nv164_chipset = {
 	.i2c      = { 0x00000001, gm200_i2c_new },
 	.imem     = { 0x00000001, nv50_instmem_new },
 	.ltc      = { 0x00000001, gp102_ltc_new },
-	.mc       = { 0x00000001, tu102_mc_new },
+	.mc       = { 0x00000001, gp100_mc_new },
 	.mmu      = { 0x00000001, tu102_mmu_new },
 	.pci      = { 0x00000001, gp100_pci_new },
 	.pmu      = { 0x00000001, gp102_pmu_new },
@@ -2453,6 +2455,7 @@ nv164_chipset = {
 	.therm    = { 0x00000001, gp100_therm_new },
 	.timer    = { 0x00000001, gk20a_timer_new },
 	.top      = { 0x00000001, gk104_top_new },
+	.vfn      = { 0x00000001, tu102_vfn_new },
 	.ce       = { 0x0000001f, tu102_ce_new },
 	.disp     = { 0x00000001, tu102_disp_new },
 	.dma      = { 0x00000001, gv100_dma_new },
@@ -2479,7 +2482,7 @@ nv166_chipset = {
 	.i2c      = { 0x00000001, gm200_i2c_new },
 	.imem     = { 0x00000001, nv50_instmem_new },
 	.ltc      = { 0x00000001, gp102_ltc_new },
-	.mc       = { 0x00000001, tu102_mc_new },
+	.mc       = { 0x00000001, gp100_mc_new },
 	.mmu      = { 0x00000001, tu102_mmu_new },
 	.pci      = { 0x00000001, gp100_pci_new },
 	.pmu      = { 0x00000001, gp102_pmu_new },
@@ -2487,6 +2490,7 @@ nv166_chipset = {
 	.therm    = { 0x00000001, gp100_therm_new },
 	.timer    = { 0x00000001, gk20a_timer_new },
 	.top      = { 0x00000001, gk104_top_new },
+	.vfn      = { 0x00000001, tu102_vfn_new },
 	.ce       = { 0x0000001f, tu102_ce_new },
 	.disp     = { 0x00000001, tu102_disp_new },
 	.dma      = { 0x00000001, gv100_dma_new },
@@ -2513,7 +2517,7 @@ nv167_chipset = {
 	.i2c      = { 0x00000001, gm200_i2c_new },
 	.imem     = { 0x00000001, nv50_instmem_new },
 	.ltc      = { 0x00000001, gp102_ltc_new },
-	.mc       = { 0x00000001, tu102_mc_new },
+	.mc       = { 0x00000001, gp100_mc_new },
 	.mmu      = { 0x00000001, tu102_mmu_new },
 	.pci      = { 0x00000001, gp100_pci_new },
 	.pmu      = { 0x00000001, gp102_pmu_new },
@@ -2521,6 +2525,7 @@ nv167_chipset = {
 	.therm    = { 0x00000001, gp100_therm_new },
 	.timer    = { 0x00000001, gk20a_timer_new },
 	.top      = { 0x00000001, gk104_top_new },
+	.vfn      = { 0x00000001, tu102_vfn_new },
 	.ce       = { 0x0000001f, tu102_ce_new },
 	.disp     = { 0x00000001, tu102_disp_new },
 	.dma      = { 0x00000001, gv100_dma_new },
@@ -2547,7 +2552,7 @@ nv168_chipset = {
 	.i2c      = { 0x00000001, gm200_i2c_new },
 	.imem     = { 0x00000001, nv50_instmem_new },
 	.ltc      = { 0x00000001, gp102_ltc_new },
-	.mc       = { 0x00000001, tu102_mc_new },
+	.mc       = { 0x00000001, gp100_mc_new },
 	.mmu      = { 0x00000001, tu102_mmu_new },
 	.pci      = { 0x00000001, gp100_pci_new },
 	.pmu      = { 0x00000001, gp102_pmu_new },
@@ -2555,6 +2560,7 @@ nv168_chipset = {
 	.therm    = { 0x00000001, gp100_therm_new },
 	.timer    = { 0x00000001, gk20a_timer_new },
 	.top      = { 0x00000001, gk104_top_new },
+	.vfn      = { 0x00000001, tu102_vfn_new },
 	.ce       = { 0x0000001f, tu102_ce_new },
 	.disp     = { 0x00000001, tu102_disp_new },
 	.dma      = { 0x00000001, gv100_dma_new },
@@ -2571,6 +2577,7 @@ nv170_chipset = {
 	.bar      = { 0x00000001, tu102_bar_new },
 	.bios     = { 0x00000001, nvkm_bios_new },
 	.devinit  = { 0x00000001, ga100_devinit_new },
+	.fault    = { 0x00000001, tu102_fault_new },
 	.fb       = { 0x00000001, ga100_fb_new },
 	.gpio     = { 0x00000001, gk104_gpio_new },
 	.i2c      = { 0x00000001, gm200_i2c_new },
@@ -2581,111 +2588,159 @@ nv170_chipset = {
 	.privring = { 0x00000001, gm200_privring_new },
 	.timer    = { 0x00000001, gk20a_timer_new },
 	.top      = { 0x00000001, ga100_top_new },
+	.vfn      = { 0x00000001, ga100_vfn_new },
+	.ce       = { 0x000003ff, ga100_ce_new },
+	.fifo     = { 0x00000001, ga100_fifo_new },
 };
 
 static const struct nvkm_device_chip
 nv172_chipset = {
 	.name = "GA102",
+	.acr      = { 0x00000001, ga102_acr_new },
 	.bar      = { 0x00000001, tu102_bar_new },
 	.bios     = { 0x00000001, nvkm_bios_new },
 	.devinit  = { 0x00000001, ga100_devinit_new },
+	.fault    = { 0x00000001, tu102_fault_new },
 	.fb       = { 0x00000001, ga102_fb_new },
 	.gpio     = { 0x00000001, ga102_gpio_new },
+	.gsp      = { 0x00000001, ga102_gsp_new },
 	.i2c      = { 0x00000001, gm200_i2c_new },
 	.imem     = { 0x00000001, nv50_instmem_new },
+	.ltc      = { 0x00000001, ga102_ltc_new },
 	.mc       = { 0x00000001, ga100_mc_new },
 	.mmu      = { 0x00000001, tu102_mmu_new },
 	.pci      = { 0x00000001, gp100_pci_new },
 	.privring = { 0x00000001, gm200_privring_new },
 	.timer    = { 0x00000001, gk20a_timer_new },
 	.top      = { 0x00000001, ga100_top_new },
+	.vfn      = { 0x00000001, ga100_vfn_new },
+	.ce       = { 0x0000001f, ga102_ce_new },
 	.disp     = { 0x00000001, ga102_disp_new },
 	.dma      = { 0x00000001, gv100_dma_new },
 	.fifo     = { 0x00000001, ga102_fifo_new },
+	.gr       = { 0x00000001, ga102_gr_new },
+	.nvdec    = { 0x00000001, ga102_nvdec_new },
+	.sec2     = { 0x00000001, ga102_sec2_new },
 };
 
 static const struct nvkm_device_chip
 nv173_chipset = {
 	.name = "GA103",
+	.acr      = { 0x00000001, ga102_acr_new },
 	.bar      = { 0x00000001, tu102_bar_new },
 	.bios     = { 0x00000001, nvkm_bios_new },
 	.devinit  = { 0x00000001, ga100_devinit_new },
+	.fault    = { 0x00000001, tu102_fault_new },
 	.fb       = { 0x00000001, ga102_fb_new },
 	.gpio     = { 0x00000001, ga102_gpio_new },
+	.gsp      = { 0x00000001, ga102_gsp_new },
 	.i2c      = { 0x00000001, gm200_i2c_new },
 	.imem     = { 0x00000001, nv50_instmem_new },
+	.ltc      = { 0x00000001, ga102_ltc_new },
 	.mc       = { 0x00000001, ga100_mc_new },
 	.mmu      = { 0x00000001, tu102_mmu_new },
 	.pci      = { 0x00000001, gp100_pci_new },
 	.privring = { 0x00000001, gm200_privring_new },
 	.timer    = { 0x00000001, gk20a_timer_new },
 	.top      = { 0x00000001, ga100_top_new },
+	.vfn      = { 0x00000001, ga100_vfn_new },
+	.ce       = { 0x0000001f, ga102_ce_new },
 	.disp     = { 0x00000001, ga102_disp_new },
 	.dma      = { 0x00000001, gv100_dma_new },
 	.fifo     = { 0x00000001, ga102_fifo_new },
+	.gr       = { 0x00000001, ga102_gr_new },
+	.nvdec    = { 0x00000001, ga102_nvdec_new },
+	.sec2     = { 0x00000001, ga102_sec2_new },
 };
 
 static const struct nvkm_device_chip
 nv174_chipset = {
 	.name = "GA104",
+	.acr      = { 0x00000001, ga102_acr_new },
 	.bar      = { 0x00000001, tu102_bar_new },
 	.bios     = { 0x00000001, nvkm_bios_new },
 	.devinit  = { 0x00000001, ga100_devinit_new },
+	.fault    = { 0x00000001, tu102_fault_new },
 	.fb       = { 0x00000001, ga102_fb_new },
 	.gpio     = { 0x00000001, ga102_gpio_new },
+	.gsp      = { 0x00000001, ga102_gsp_new },
 	.i2c      = { 0x00000001, gm200_i2c_new },
 	.imem     = { 0x00000001, nv50_instmem_new },
+	.ltc      = { 0x00000001, ga102_ltc_new },
 	.mc       = { 0x00000001, ga100_mc_new },
 	.mmu      = { 0x00000001, tu102_mmu_new },
 	.pci      = { 0x00000001, gp100_pci_new },
 	.privring = { 0x00000001, gm200_privring_new },
 	.timer    = { 0x00000001, gk20a_timer_new },
 	.top      = { 0x00000001, ga100_top_new },
+	.vfn      = { 0x00000001, ga100_vfn_new },
+	.ce       = { 0x0000001f, ga102_ce_new },
 	.disp     = { 0x00000001, ga102_disp_new },
 	.dma      = { 0x00000001, gv100_dma_new },
 	.fifo     = { 0x00000001, ga102_fifo_new },
+	.gr       = { 0x00000001, ga102_gr_new },
+	.nvdec    = { 0x00000001, ga102_nvdec_new },
+	.sec2     = { 0x00000001, ga102_sec2_new },
 };
 
 static const struct nvkm_device_chip
 nv176_chipset = {
 	.name = "GA106",
+	.acr      = { 0x00000001, ga102_acr_new },
 	.bar      = { 0x00000001, tu102_bar_new },
 	.bios     = { 0x00000001, nvkm_bios_new },
 	.devinit  = { 0x00000001, ga100_devinit_new },
+	.fault    = { 0x00000001, tu102_fault_new },
 	.fb       = { 0x00000001, ga102_fb_new },
 	.gpio     = { 0x00000001, ga102_gpio_new },
+	.gsp      = { 0x00000001, ga102_gsp_new },
 	.i2c      = { 0x00000001, gm200_i2c_new },
 	.imem     = { 0x00000001, nv50_instmem_new },
+	.ltc      = { 0x00000001, ga102_ltc_new },
 	.mc       = { 0x00000001, ga100_mc_new },
 	.mmu      = { 0x00000001, tu102_mmu_new },
 	.pci      = { 0x00000001, gp100_pci_new },
 	.privring = { 0x00000001, gm200_privring_new },
 	.timer    = { 0x00000001, gk20a_timer_new },
 	.top      = { 0x00000001, ga100_top_new },
+	.vfn      = { 0x00000001, ga100_vfn_new },
+	.ce       = { 0x0000001f, ga102_ce_new },
 	.disp     = { 0x00000001, ga102_disp_new },
 	.dma      = { 0x00000001, gv100_dma_new },
 	.fifo     = { 0x00000001, ga102_fifo_new },
+	.gr       = { 0x00000001, ga102_gr_new },
+	.nvdec    = { 0x00000001, ga102_nvdec_new },
+	.sec2     = { 0x00000001, ga102_sec2_new },
 };
 
 static const struct nvkm_device_chip
 nv177_chipset = {
 	.name = "GA107",
+	.acr      = { 0x00000001, ga102_acr_new },
 	.bar      = { 0x00000001, tu102_bar_new },
 	.bios     = { 0x00000001, nvkm_bios_new },
 	.devinit  = { 0x00000001, ga100_devinit_new },
+	.fault    = { 0x00000001, tu102_fault_new },
 	.fb       = { 0x00000001, ga102_fb_new },
 	.gpio     = { 0x00000001, ga102_gpio_new },
+	.gsp      = { 0x00000001, ga102_gsp_new },
 	.i2c      = { 0x00000001, gm200_i2c_new },
 	.imem     = { 0x00000001, nv50_instmem_new },
+	.ltc      = { 0x00000001, ga102_ltc_new },
 	.mc       = { 0x00000001, ga100_mc_new },
 	.mmu      = { 0x00000001, tu102_mmu_new },
 	.pci      = { 0x00000001, gp100_pci_new },
 	.privring = { 0x00000001, gm200_privring_new },
 	.timer    = { 0x00000001, gk20a_timer_new },
 	.top      = { 0x00000001, ga100_top_new },
+	.vfn      = { 0x00000001, ga100_vfn_new },
+	.ce       = { 0x0000001f, ga102_ce_new },
 	.disp     = { 0x00000001, ga102_disp_new },
 	.dma      = { 0x00000001, gv100_dma_new },
 	.fifo     = { 0x00000001, ga102_fifo_new },
+	.gr       = { 0x00000001, ga102_gr_new },
+	.nvdec    = { 0x00000001, ga102_nvdec_new },
+	.sec2     = { 0x00000001, ga102_sec2_new },
 };
 
 struct nvkm_subdev *
@@ -2734,6 +2789,8 @@ nvkm_device_fini(struct nvkm_device *device, bool suspend)
 	if (device->func->fini)
 		device->func->fini(device, suspend);
 
+	nvkm_intr_unarm(device);
+
 	time = ktime_to_us(ktime_get()) - time;
 	nvdev_trace(device, "%s completed in %lldus...\n", action, time);
 	return 0;
@@ -2759,6 +2816,8 @@ nvkm_device_preinit(struct nvkm_device *device)
 	nvdev_trace(device, "preinit running...\n");
 	time = ktime_to_us(ktime_get());
 
+	nvkm_intr_unarm(device);
+
 	if (device->func->preinit) {
 		ret = device->func->preinit(device);
 		if (ret)
@@ -2775,6 +2834,14 @@ nvkm_device_preinit(struct nvkm_device *device)
 	if (ret)
 		goto fail;
 
+	ret = nvkm_top_parse(device);
+	if (ret)
+		goto fail;
+
+	ret = nvkm_fb_mem_unlock(device->fb);
+	if (ret)
+		goto fail;
+
 	time = ktime_to_us(ktime_get()) - time;
 	nvdev_trace(device, "preinit completed in %lldus\n", time);
 	return 0;
@@ -2800,6 +2867,8 @@ nvkm_device_init(struct nvkm_device *device)
 	nvdev_trace(device, "init running...\n");
 	time = ktime_to_us(ktime_get());
 
+	nvkm_intr_rearm(device);
+
 	if (device->func->init) {
 		ret = device->func->init(device);
 		if (ret)
@@ -2837,6 +2906,8 @@ nvkm_device_del(struct nvkm_device **pdevice)
 	if (device) {
 		mutex_lock(&nv_devices_mutex);
 
+		nvkm_intr_dtor(device);
+
 		list_for_each_entry_safe_reverse(subdev, subtmp, &device->subdev, head)
 			nvkm_subdev_del(&subdev);
 
@@ -3144,6 +3215,7 @@ nvkm_device_ctor(const struct nvkm_device_func *func,
 		device->name = device->chip->name;
 
 	mutex_init(&device->mutex);
+	nvkm_intr_ctor(device);
 
 #define NVKM_LAYOUT_ONCE(type,data,ptr)                                                      \
 	if (device->chip->ptr.inst && (subdev_mask & (BIT_ULL(type)))) {                     \
@@ -3185,7 +3257,7 @@ nvkm_device_ctor(const struct nvkm_device_func *func,
 #undef NVKM_LAYOUT_INST
 #undef NVKM_LAYOUT_ONCE
 
-	ret = 0;
+	ret = nvkm_intr_install(device);
 done:
 	if (device->pri && (!mmio || ret)) {
 		iounmap(device->pri);
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/device/pci.c b/drivers/gpu/drm/nouveau/nvkm/engine/device/pci.c
index f302d2b5782a..abccb2bb68a6 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/device/pci.c
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/device/pci.c
@@ -1574,6 +1574,12 @@ nvkm_device_pci_resource_size(struct nvkm_device *device, unsigned bar)
 	return pci_resource_len(pdev->pdev, bar);
 }
 
+static int
+nvkm_device_pci_irq(struct nvkm_device *device)
+{
+	return nvkm_device_pci(device)->pdev->irq;
+}
+
 static void
 nvkm_device_pci_fini(struct nvkm_device *device, bool suspend)
 {
@@ -1612,6 +1618,7 @@ nvkm_device_pci_func = {
 	.dtor = nvkm_device_pci_dtor,
 	.preinit = nvkm_device_pci_preinit,
 	.fini = nvkm_device_pci_fini,
+	.irq = nvkm_device_pci_irq,
 	.resource_addr = nvkm_device_pci_resource_addr,
 	.resource_size = nvkm_device_pci_resource_size,
 	.cpu_coherent = !IS_ENABLED(CONFIG_ARM),
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/device/priv.h b/drivers/gpu/drm/nouveau/nvkm/engine/device/priv.h
index 93949b3c7214..24faaac15891 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/device/priv.h
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/device/priv.h
@@ -27,6 +27,7 @@
 #include <subdev/therm.h>
 #include <subdev/timer.h>
 #include <subdev/top.h>
+#include <subdev/vfn.h>
 #include <subdev/volt.h>
 
 #include <engine/bsp.h>
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/device/tegra.c b/drivers/gpu/drm/nouveau/nvkm/engine/device/tegra.c
index ac9e122586bc..87caa4a72921 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/device/tegra.c
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/device/tegra.c
@@ -206,45 +206,12 @@ nvkm_device_tegra_resource_size(struct nvkm_device *device, unsigned bar)
 	return res ? resource_size(res) : 0;
 }
 
-static irqreturn_t
-nvkm_device_tegra_intr(int irq, void *arg)
-{
-	struct nvkm_device_tegra *tdev = arg;
-	struct nvkm_device *device = &tdev->device;
-	bool handled = false;
-	nvkm_mc_intr_unarm(device);
-	nvkm_mc_intr(device, &handled);
-	nvkm_mc_intr_rearm(device);
-	return handled ? IRQ_HANDLED : IRQ_NONE;
-}
-
-static void
-nvkm_device_tegra_fini(struct nvkm_device *device, bool suspend)
-{
-	struct nvkm_device_tegra *tdev = nvkm_device_tegra(device);
-	if (tdev->irq) {
-		free_irq(tdev->irq, tdev);
-		tdev->irq = 0;
-	}
-}
-
 static int
-nvkm_device_tegra_init(struct nvkm_device *device)
+nvkm_device_tegra_irq(struct nvkm_device *device)
 {
 	struct nvkm_device_tegra *tdev = nvkm_device_tegra(device);
-	int irq, ret;
-
-	irq = platform_get_irq_byname(tdev->pdev, "stall");
-	if (irq < 0)
-		return irq;
 
-	ret = request_irq(irq, nvkm_device_tegra_intr,
-			  IRQF_SHARED, "nvkm", tdev);
-	if (ret)
-		return ret;
-
-	tdev->irq = irq;
-	return 0;
+	return platform_get_irq_byname(tdev->pdev, "stall");
 }
 
 static void *
@@ -260,8 +227,7 @@ static const struct nvkm_device_func
 nvkm_device_tegra_func = {
 	.tegra = nvkm_device_tegra,
 	.dtor = nvkm_device_tegra_dtor,
-	.init = nvkm_device_tegra_init,
-	.fini = nvkm_device_tegra_fini,
+	.irq = nvkm_device_tegra_irq,
 	.resource_addr = nvkm_device_tegra_resource_addr,
 	.resource_size = nvkm_device_tegra_resource_size,
 	.cpu_coherent = false,
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/device/user.c b/drivers/gpu/drm/nouveau/nvkm/engine/device/user.c
index 45f509c11c36..9b39ec341615 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/device/user.c
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/device/user.c
@@ -342,6 +342,8 @@ nvkm_udevice_child_get(struct nvkm_object *object, int index,
 			sclass = &device->mmu->user;
 		else if (device->fault && index-- == 0)
 			sclass = &device->fault->user;
+		else if (device->vfn && index-- == 0)
+			sclass = &device->vfn->user;
 		else
 			return -EINVAL;
 
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/disp/Kbuild b/drivers/gpu/drm/nouveau/nvkm/engine/disp/Kbuild
index 600072a904be..e1aecd3fe96c 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/disp/Kbuild
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/disp/Kbuild
@@ -28,9 +28,7 @@ nvkm-y += nvkm/engine/disp/gv100.o
 nvkm-y += nvkm/engine/disp/tu102.o
 nvkm-y += nvkm/engine/disp/ga102.o
 
-nvkm-y += nvkm/engine/disp/rootnv04.o
-nvkm-y += nvkm/engine/disp/rootnv50.o
-
 nvkm-y += nvkm/engine/disp/udisp.o
 nvkm-y += nvkm/engine/disp/uconn.o
 nvkm-y += nvkm/engine/disp/uoutp.o
+nvkm-y += nvkm/engine/disp/uhead.o
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/disp/base.c b/drivers/gpu/drm/nouveau/nvkm/engine/disp/base.c
index 65c99d948b68..73104b59f97f 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/disp/base.c
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/disp/base.c
@@ -29,7 +29,6 @@
 #include "outp.h"
 
 #include <core/client.h>
-#include <core/notify.h>
 #include <core/ramht.h>
 #include <subdev/bios.h>
 #include <subdev/bios/dcb.h>
@@ -57,32 +56,8 @@ nvkm_disp_vblank_init(struct nvkm_event *event, int type, int id)
 		head->func->vblank_get(head);
 }
 
-static int
-nvkm_disp_vblank_ctor(struct nvkm_object *object, void *data, u32 size,
-		      struct nvkm_notify *notify)
-{
-	struct nvkm_disp *disp =
-		container_of(notify->event, typeof(*disp), vblank);
-	union {
-		struct nvif_notify_head_req_v0 v0;
-	} *req = data;
-	int ret = -ENOSYS;
-
-	if (!(ret = nvif_unpack(ret, &data, &size, req->v0, 0, 0, false))) {
-		notify->size = sizeof(struct nvif_notify_head_rep_v0);
-		if (ret = -ENXIO, req->v0.head <= disp->vblank.index_nr) {
-			notify->types = 1;
-			notify->index = req->v0.head;
-			return 0;
-		}
-	}
-
-	return ret;
-}
-
 static const struct nvkm_event_func
 nvkm_disp_vblank_func = {
-	.ctor = nvkm_disp_vblank_ctor,
 	.init = nvkm_disp_vblank_init,
 	.fini = nvkm_disp_vblank_fini,
 };
@@ -90,59 +65,7 @@ nvkm_disp_vblank_func = {
 void
 nvkm_disp_vblank(struct nvkm_disp *disp, int head)
 {
-	struct nvif_notify_head_rep_v0 rep = {};
-	nvkm_event_send(&disp->vblank, 1, head, &rep, sizeof(rep));
-}
-
-static int
-nvkm_disp_hpd_ctor(struct nvkm_object *object, void *data, u32 size,
-		   struct nvkm_notify *notify)
-{
-	struct nvkm_disp *disp =
-		container_of(notify->event, typeof(*disp), hpd);
-	union {
-		struct nvif_notify_conn_req_v0 v0;
-	} *req = data;
-	struct nvkm_outp *outp;
-	int ret = -ENOSYS;
-
-	if (!(ret = nvif_unpack(ret, &data, &size, req->v0, 0, 0, false))) {
-		notify->size = sizeof(struct nvif_notify_conn_rep_v0);
-		list_for_each_entry(outp, &disp->outps, head) {
-			if (ret = -ENXIO, outp->conn->index == req->v0.conn) {
-				if (ret = -ENODEV, outp->conn->hpd.event) {
-					notify->types = req->v0.mask;
-					notify->index = req->v0.conn;
-					ret = 0;
-				}
-				break;
-			}
-		}
-	}
-
-	return ret;
-}
-
-static const struct nvkm_event_func
-nvkm_disp_hpd_func = {
-	.ctor = nvkm_disp_hpd_ctor
-};
-
-int
-nvkm_disp_ntfy(struct nvkm_object *object, u32 type, struct nvkm_event **event)
-{
-	struct nvkm_disp *disp = nvkm_disp(object->engine);
-	switch (type) {
-	case NV04_DISP_NTFY_VBLANK:
-		*event = &disp->vblank;
-		return 0;
-	case NV04_DISP_NTFY_CONN:
-		*event = &disp->hpd;
-		return 0;
-	default:
-		break;
-	}
-	return -EINVAL;
+	nvkm_event_ntfy(&disp->vblank, head, NVKM_DISP_HEAD_EVENT_VBLANK);
 }
 
 static int
@@ -343,9 +266,7 @@ nvkm_disp_oneinit(struct nvkm_engine *engine)
 		/* Apparently we need to create a new one! */
 		ret = nvkm_conn_new(disp, i, &connE, &outp->conn);
 		if (ret) {
-			nvkm_error(&disp->engine.subdev,
-				   "failed to create outp %d conn: %d\n",
-				   outp->index, ret);
+			nvkm_error(subdev, "failed to create outp %d conn: %d\n", outp->index, ret);
 			nvkm_conn_del(&outp->conn);
 			list_del(&outp->head);
 			nvkm_outp_del(&outp);
@@ -355,10 +276,6 @@ nvkm_disp_oneinit(struct nvkm_engine *engine)
 		list_add_tail(&outp->conn->head, &disp->conns);
 	}
 
-	ret = nvkm_event_init(&nvkm_disp_hpd_func, 3, hpd, &disp->hpd);
-	if (ret)
-		return ret;
-
 	if (disp->func->oneinit) {
 		ret = disp->func->oneinit(disp);
 		if (ret)
@@ -382,7 +299,7 @@ nvkm_disp_oneinit(struct nvkm_engine *engine)
 	list_for_each_entry(head, &disp->heads, head)
 		i = max(i, head->id + 1);
 
-	return nvkm_event_init(&nvkm_disp_vblank_func, 1, i, &disp->vblank);
+	return nvkm_event_init(&nvkm_disp_vblank_func, subdev, 1, i, &disp->vblank);
 }
 
 static void *
@@ -406,7 +323,6 @@ nvkm_disp_dtor(struct nvkm_engine *engine)
 	}
 
 	nvkm_event_fini(&disp->vblank);
-	nvkm_event_fini(&disp->hpd);
 
 	while (!list_empty(&disp->conns)) {
 		conn = list_first_entry(&disp->conns, typeof(*conn), head);
@@ -473,5 +389,6 @@ nvkm_disp_new_(const struct nvkm_disp_func *func, struct nvkm_device *device,
 		mutex_init(&disp->super.mutex);
 	}
 
-	return nvkm_event_init(func->uevent, 1, ARRAY_SIZE(disp->chan), &disp->uevent);
+	return nvkm_event_init(func->uevent, &disp->engine.subdev, 1, ARRAY_SIZE(disp->chan),
+			       &disp->uevent);
 }
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/disp/conn.c b/drivers/gpu/drm/nouveau/nvkm/engine/disp/conn.c
index 7ed11801a3ae..fbdae1137864 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/disp/conn.c
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/disp/conn.c
@@ -29,38 +29,14 @@
 
 #include <nvif/event.h>
 
-static int
-nvkm_conn_hpd(struct nvkm_notify *notify)
-{
-	struct nvkm_conn *conn = container_of(notify, typeof(*conn), hpd);
-	struct nvkm_disp *disp = conn->disp;
-	struct nvkm_gpio *gpio = disp->engine.subdev.device->gpio;
-	const struct nvkm_gpio_ntfy_rep *line = notify->data;
-	struct nvif_notify_conn_rep_v0 rep;
-	int index = conn->index;
-
-	CONN_DBG(conn, "HPD: %d", line->mask);
-
-	if (!nvkm_gpio_get(gpio, 0, DCB_GPIO_UNUSED, conn->hpd.index))
-		rep.mask = NVIF_NOTIFY_CONN_V0_UNPLUG;
-	else
-		rep.mask = NVIF_NOTIFY_CONN_V0_PLUG;
-	rep.version = 0;
-
-	nvkm_event_send(&disp->hpd, rep.mask, index, &rep, sizeof(rep));
-	return NVKM_NOTIFY_KEEP;
-}
-
 void
 nvkm_conn_fini(struct nvkm_conn *conn)
 {
-	nvkm_notify_put(&conn->hpd);
 }
 
 void
 nvkm_conn_init(struct nvkm_conn *conn)
 {
-	nvkm_notify_get(&conn->hpd);
 }
 
 void
@@ -68,7 +44,6 @@ nvkm_conn_del(struct nvkm_conn **pconn)
 {
 	struct nvkm_conn *conn = *pconn;
 	if (conn) {
-		nvkm_notify_fini(&conn->hpd);
 		kfree(*pconn);
 		*pconn = NULL;
 	}
@@ -106,20 +81,6 @@ nvkm_conn_ctor(struct nvkm_disp *disp, int index, struct nvbios_connE *info,
 		}
 
 		conn->info.hpd = func.line;
-
-		ret = nvkm_notify_init(NULL, &gpio->event, nvkm_conn_hpd,
-				       true, &(struct nvkm_gpio_ntfy_req) {
-					.mask = NVKM_GPIO_TOGGLED,
-					.line = func.line,
-				       },
-				       sizeof(struct nvkm_gpio_ntfy_req),
-				       sizeof(struct nvkm_gpio_ntfy_rep),
-				       &conn->hpd);
-		if (ret) {
-			CONN_ERR(conn, "func %02x failed, %d", info->hpd, ret);
-		} else {
-			CONN_DBG(conn, "func %02x (HPD)", info->hpd);
-		}
 	}
 }
 
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/disp/conn.h b/drivers/gpu/drm/nouveau/nvkm/engine/disp/conn.h
index f109634ce5ca..a0600e72b0ec 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/disp/conn.h
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/disp/conn.h
@@ -3,7 +3,6 @@
 #define __NVKM_DISP_CONN_H__
 #include "priv.h"
 
-#include <core/notify.h>
 #include <subdev/bios.h>
 #include <subdev/bios/conn.h>
 
@@ -12,8 +11,6 @@ struct nvkm_conn {
 	int index;
 	struct nvbios_connE info;
 
-	struct nvkm_notify hpd;
-
 	struct list_head head;
 
 	struct nvkm_object object;
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/disp/dp.c b/drivers/gpu/drm/nouveau/nvkm/engine/disp/dp.c
index c1b3206f27e6..40c8ea43c42f 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/disp/dp.c
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/disp/dp.c
@@ -274,70 +274,17 @@ nvkm_dp_train_cr(struct lt_state *lt)
 }
 
 static int
-nvkm_dp_train_links(struct nvkm_outp *outp, int rate)
+nvkm_dp_train_link(struct nvkm_outp *outp, int rate)
 {
 	struct nvkm_ior *ior = outp->ior;
-	struct nvkm_disp *disp = outp->disp;
-	struct nvkm_subdev *subdev = &disp->engine.subdev;
-	struct nvkm_bios *bios = subdev->device->bios;
 	struct lt_state lt = {
 		.outp = outp,
+		.pc2 = outp->dp.dpcd[DPCD_RC02] & DPCD_RC02_TPS3_SUPPORTED,
 	};
-	u32 lnkcmp;
 	u8 sink[2], data;
 	int ret;
 
-	OUTP_DBG(outp, "training %d x %d MB/s", ior->dp.nr, ior->dp.bw * 27);
-
-	/* Intersect misc. capabilities of the OR and sink. */
-	if (disp->engine.subdev.device->chipset < 0x110)
-		outp->dp.dpcd[DPCD_RC03] &= ~DPCD_RC03_TPS4_SUPPORTED;
-	if (disp->engine.subdev.device->chipset < 0xd0)
-		outp->dp.dpcd[DPCD_RC02] &= ~DPCD_RC02_TPS3_SUPPORTED;
-	lt.pc2 = outp->dp.dpcd[DPCD_RC02] & DPCD_RC02_TPS3_SUPPORTED;
-
-	if (AMPERE_IED_HACK(disp) && (lnkcmp = lt.outp->dp.info.script[0])) {
-		/* Execute BeforeLinkTraining script from DP Info table. */
-		while (ior->dp.bw < nvbios_rd08(bios, lnkcmp))
-			lnkcmp += 3;
-		lnkcmp = nvbios_rd16(bios, lnkcmp + 1);
-
-		nvbios_init(&outp->disp->engine.subdev, lnkcmp,
-			init.outp = &outp->info;
-			init.or   = ior->id;
-			init.link = ior->asy.link;
-		);
-	}
-
-	/* Set desired link configuration on the source. */
-	if ((lnkcmp = lt.outp->dp.info.lnkcmp)) {
-		if (outp->dp.version < 0x30) {
-			while ((ior->dp.bw * 2700) < nvbios_rd16(bios, lnkcmp))
-				lnkcmp += 4;
-			lnkcmp = nvbios_rd16(bios, lnkcmp + 2);
-		} else {
-			while (ior->dp.bw < nvbios_rd08(bios, lnkcmp))
-				lnkcmp += 3;
-			lnkcmp = nvbios_rd16(bios, lnkcmp + 1);
-		}
-
-		nvbios_init(subdev, lnkcmp,
-			init.outp = &outp->info;
-			init.or   = ior->id;
-			init.link = ior->asy.link;
-		);
-	}
-
-	ret = ior->func->dp->links(ior, outp->dp.aux);
-	if (ret) {
-		if (ret < 0) {
-			OUTP_ERR(outp, "train failed with %d", ret);
-			return ret;
-		}
-		return 0;
-	}
-
-	ior->func->dp->power(ior, ior->dp.nr);
+	OUTP_DBG(outp, "training %dx%02x", ior->dp.nr, ior->dp.bw);
 
 	/* Select LTTPR non-transparent mode if we have a valid configuration,
 	 * use transparent mode otherwise.
@@ -393,6 +340,71 @@ nvkm_dp_train_links(struct nvkm_outp *outp, int rate)
 	return ret;
 }
 
+static int
+nvkm_dp_train_links(struct nvkm_outp *outp, int rate)
+{
+	struct nvkm_ior *ior = outp->ior;
+	struct nvkm_disp *disp = outp->disp;
+	struct nvkm_subdev *subdev = &disp->engine.subdev;
+	struct nvkm_bios *bios = subdev->device->bios;
+	u32 lnkcmp;
+	int ret;
+
+	OUTP_DBG(outp, "programming link for %dx%02x", ior->dp.nr, ior->dp.bw);
+
+	/* Intersect misc. capabilities of the OR and sink. */
+	if (disp->engine.subdev.device->chipset < 0x110)
+		outp->dp.dpcd[DPCD_RC03] &= ~DPCD_RC03_TPS4_SUPPORTED;
+	if (disp->engine.subdev.device->chipset < 0xd0)
+		outp->dp.dpcd[DPCD_RC02] &= ~DPCD_RC02_TPS3_SUPPORTED;
+
+	if (AMPERE_IED_HACK(disp) && (lnkcmp = outp->dp.info.script[0])) {
+		/* Execute BeforeLinkTraining script from DP Info table. */
+		while (ior->dp.bw < nvbios_rd08(bios, lnkcmp))
+			lnkcmp += 3;
+		lnkcmp = nvbios_rd16(bios, lnkcmp + 1);
+
+		nvbios_init(&outp->disp->engine.subdev, lnkcmp,
+			init.outp = &outp->info;
+			init.or   = ior->id;
+			init.link = ior->asy.link;
+		);
+	}
+
+	/* Set desired link configuration on the source. */
+	if ((lnkcmp = outp->dp.info.lnkcmp)) {
+		if (outp->dp.version < 0x30) {
+			while ((ior->dp.bw * 2700) < nvbios_rd16(bios, lnkcmp))
+				lnkcmp += 4;
+			lnkcmp = nvbios_rd16(bios, lnkcmp + 2);
+		} else {
+			while (ior->dp.bw < nvbios_rd08(bios, lnkcmp))
+				lnkcmp += 3;
+			lnkcmp = nvbios_rd16(bios, lnkcmp + 1);
+		}
+
+		nvbios_init(subdev, lnkcmp,
+			init.outp = &outp->info;
+			init.or   = ior->id;
+			init.link = ior->asy.link;
+		);
+	}
+
+	ret = ior->func->dp->links(ior, outp->dp.aux);
+	if (ret) {
+		if (ret < 0) {
+			OUTP_ERR(outp, "train failed with %d", ret);
+			return ret;
+		}
+		return 0;
+	}
+
+	ior->func->dp->power(ior, ior->dp.nr);
+
+	/* Attempt to train the link in this configuration. */
+	return nvkm_dp_train_link(outp, rate);
+}
+
 static void
 nvkm_dp_train_fini(struct nvkm_outp *outp)
 {
@@ -439,6 +451,16 @@ nvkm_dp_train(struct nvkm_outp *outp, u32 dataKBps)
 	int ret = -EINVAL, nr, rate;
 	u8  pwr;
 
+	/* Retraining link?  Skip source configuration, it can mess up the active modeset. */
+	if (atomic_read(&outp->dp.lt.done)) {
+		for (rate = 0; rate < outp->dp.rates; rate++) {
+			if (outp->dp.rate[rate].rate == ior->dp.bw * 27000)
+				return nvkm_dp_train_link(outp, ret);
+		}
+		WARN_ON(1);
+		return -EINVAL;
+	}
+
 	/* Ensure sink is not in a low-power state. */
 	if (!nvkm_rdaux(outp->dp.aux, DPCD_SC00, &pwr, 1)) {
 		if ((pwr & DPCD_SC00_SET_POWER) != DPCD_SC00_SET_POWER_D0) {
@@ -455,6 +477,21 @@ nvkm_dp_train(struct nvkm_outp *outp, u32 dataKBps)
 	/* Link training. */
 	OUTP_DBG(outp, "training");
 	nvkm_dp_train_init(outp);
+
+	/* Validate and train at configuration requested (if any) on ACQUIRE. */
+	if (outp->dp.lt.nr) {
+		for (nr = outp->dp.links; ret < 0 && nr; nr >>= 1) {
+			for (rate = 0; nr == outp->dp.lt.nr && rate < outp->dp.rates; rate++) {
+				if (outp->dp.rate[rate].rate / 27000 == outp->dp.lt.bw) {
+					ior->dp.bw = outp->dp.rate[rate].rate / 27000;
+					ior->dp.nr = nr;
+					ret = nvkm_dp_train_links(outp, rate);
+				}
+			}
+		}
+	}
+
+	/* Otherwise, loop through all valid link configurations that support the data rate. */
 	for (nr = outp->dp.links; ret < 0 && nr; nr >>= 1) {
 		for (rate = 0; ret < 0 && rate < outp->dp.rates; rate++) {
 			if (outp->dp.rate[rate].rate * nr >= dataKBps || WARN_ON(!ior->dp.nr)) {
@@ -465,6 +502,8 @@ nvkm_dp_train(struct nvkm_outp *outp, u32 dataKBps)
 			}
 		}
 	}
+
+	/* Finish up. */
 	nvkm_dp_train_fini(outp);
 	if (ret < 0)
 		OUTP_ERR(outp, "training failed");
@@ -595,18 +634,38 @@ nvkm_dp_enable_supported_link_rates(struct nvkm_outp *outp)
 	return outp->dp.rates != 0;
 }
 
-static bool
-nvkm_dp_enable(struct nvkm_outp *outp, bool enable)
+void
+nvkm_dp_enable(struct nvkm_outp *outp, bool auxpwr)
 {
+	struct nvkm_gpio *gpio = outp->disp->engine.subdev.device->gpio;
 	struct nvkm_i2c_aux *aux = outp->dp.aux;
 
-	if (enable) {
-		if (!outp->dp.present) {
-			OUTP_DBG(outp, "aux power -> always");
-			nvkm_i2c_aux_monitor(aux, true);
-			outp->dp.present = true;
+	if (auxpwr && !outp->dp.aux_pwr) {
+		/* eDP panels need powering on by us (if the VBIOS doesn't default it
+		 * to on) before doing any AUX channel transactions.  LVDS panel power
+		 * is handled by the SOR itself, and not required for LVDS DDC.
+		 */
+		if (outp->conn->info.type == DCB_CONNECTOR_eDP) {
+			int power = nvkm_gpio_get(gpio, 0, DCB_GPIO_PANEL_POWER, 0xff);
+			if (power == 0) {
+				nvkm_gpio_set(gpio, 0, DCB_GPIO_PANEL_POWER, 0xff, 1);
+				outp->dp.aux_pwr_pu = true;
+			}
+
+			/* We delay here unconditionally, even if already powered,
+			 * because some laptop panels having a significant resume
+			 * delay before the panel begins responding.
+			 *
+			 * This is likely a bit of a hack, but no better idea for
+			 * handling this at the moment.
+			 */
+			msleep(300);
 		}
 
+		OUTP_DBG(outp, "aux power -> always");
+		nvkm_i2c_aux_monitor(aux, true);
+		outp->dp.aux_pwr = true;
+
 		/* Detect any LTTPRs before reading DPCD receiver caps. */
 		if (!nvkm_rdaux(aux, DPCD_LTTPR_REV, outp->dp.lttpr, sizeof(outp->dp.lttpr)) &&
 		    outp->dp.lttpr[0] >= 0x14 && outp->dp.lttpr[2]) {
@@ -659,96 +718,41 @@ nvkm_dp_enable(struct nvkm_outp *outp, bool enable)
 					outp->dp.rates++;
 				}
 			}
-
-			return true;
 		}
-	}
-
-	if (outp->dp.present) {
+	} else
+	if (!auxpwr && outp->dp.aux_pwr) {
 		OUTP_DBG(outp, "aux power -> demand");
 		nvkm_i2c_aux_monitor(aux, false);
-		outp->dp.present = false;
-	}
-
-	atomic_set(&outp->dp.lt.done, 0);
-	return false;
-}
-
-static int
-nvkm_dp_hpd(struct nvkm_notify *notify)
-{
-	const struct nvkm_i2c_ntfy_rep *line = notify->data;
-	struct nvkm_outp *outp = container_of(notify, typeof(*outp), dp.hpd);
-	struct nvkm_conn *conn = outp->conn;
-	struct nvkm_disp *disp = outp->disp;
-	struct nvif_notify_conn_rep_v0 rep = {};
+		outp->dp.aux_pwr = false;
+		atomic_set(&outp->dp.lt.done, 0);
 
-	OUTP_DBG(outp, "HPD: %d", line->mask);
-	if (line->mask & NVKM_I2C_IRQ) {
-		if (atomic_read(&outp->dp.lt.done))
-			outp->func->acquire(outp);
-		rep.mask |= NVIF_NOTIFY_CONN_V0_IRQ;
-	} else {
-		nvkm_dp_enable(outp, true);
+		/* Restore eDP panel GPIO to its prior state if we changed it, as
+		 * it could potentially interfere with other outputs.
+		 */
+		if (outp->conn->info.type == DCB_CONNECTOR_eDP) {
+			if (outp->dp.aux_pwr_pu) {
+				nvkm_gpio_set(gpio, 0, DCB_GPIO_PANEL_POWER, 0xff, 0);
+				outp->dp.aux_pwr_pu = false;
+			}
+		}
 	}
-
-	if (line->mask & NVKM_I2C_UNPLUG)
-		rep.mask |= NVIF_NOTIFY_CONN_V0_UNPLUG;
-	if (line->mask & NVKM_I2C_PLUG)
-		rep.mask |= NVIF_NOTIFY_CONN_V0_PLUG;
-
-	nvkm_event_send(&disp->hpd, rep.mask, conn->index, &rep, sizeof(rep));
-	return NVKM_NOTIFY_KEEP;
 }
 
 static void
 nvkm_dp_fini(struct nvkm_outp *outp)
 {
-	nvkm_notify_put(&outp->dp.hpd);
 	nvkm_dp_enable(outp, false);
 }
 
 static void
 nvkm_dp_init(struct nvkm_outp *outp)
 {
-	struct nvkm_gpio *gpio = outp->disp->engine.subdev.device->gpio;
-
-	nvkm_notify_put(&outp->conn->hpd);
-
-	/* eDP panels need powering on by us (if the VBIOS doesn't default it
-	 * to on) before doing any AUX channel transactions.  LVDS panel power
-	 * is handled by the SOR itself, and not required for LVDS DDC.
-	 */
-	if (outp->conn->info.type == DCB_CONNECTOR_eDP) {
-		int power = nvkm_gpio_get(gpio, 0, DCB_GPIO_PANEL_POWER, 0xff);
-		if (power == 0)
-			nvkm_gpio_set(gpio, 0, DCB_GPIO_PANEL_POWER, 0xff, 1);
-
-		/* We delay here unconditionally, even if already powered,
-		 * because some laptop panels having a significant resume
-		 * delay before the panel begins responding.
-		 *
-		 * This is likely a bit of a hack, but no better idea for
-		 * handling this at the moment.
-		 */
-		msleep(300);
-
-		/* If the eDP panel can't be detected, we need to restore
-		 * the panel power GPIO to avoid breaking another output.
-		 */
-		if (!nvkm_dp_enable(outp, true) && power == 0)
-			nvkm_gpio_set(gpio, 0, DCB_GPIO_PANEL_POWER, 0xff, 0);
-	} else {
-		nvkm_dp_enable(outp, true);
-	}
-
-	nvkm_notify_get(&outp->dp.hpd);
+	nvkm_dp_enable(outp, outp->dp.enabled);
 }
 
 static void *
 nvkm_dp_dtor(struct nvkm_outp *outp)
 {
-	nvkm_notify_fini(&outp->dp.hpd);
 	return outp;
 }
 
@@ -797,21 +801,6 @@ nvkm_dp_new(struct nvkm_disp *disp, int index, struct dcb_output *dcbE, struct n
 
 	OUTP_DBG(outp, "bios dp %02x %02x %02x %02x", outp->dp.version, hdr, cnt, len);
 
-	/* hotplug detect, replaces gpio-based mechanism with aux events */
-	ret = nvkm_notify_init(NULL, &i2c->event, nvkm_dp_hpd, true,
-			       &(struct nvkm_i2c_ntfy_req) {
-				.mask = NVKM_I2C_PLUG | NVKM_I2C_UNPLUG |
-					NVKM_I2C_IRQ,
-				.port = outp->dp.aux->id,
-			       },
-			       sizeof(struct nvkm_i2c_ntfy_req),
-			       sizeof(struct nvkm_i2c_ntfy_rep),
-			       &outp->dp.hpd);
-	if (ret) {
-		OUTP_ERR(outp, "error monitoring aux hpd: %d", ret);
-		return ret;
-	}
-
 	mutex_init(&outp->dp.mutex);
 	atomic_set(&outp->dp.lt.done, 0);
 	return 0;
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/disp/dp.h b/drivers/gpu/drm/nouveau/nvkm/engine/disp/dp.h
index 1d86baa6a424..9a6be43916bc 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/disp/dp.h
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/disp/dp.h
@@ -6,6 +6,7 @@
 int nvkm_dp_new(struct nvkm_disp *, int index, struct dcb_output *,
 		struct nvkm_outp **);
 void nvkm_dp_disable(struct nvkm_outp *, struct nvkm_ior *);
+void nvkm_dp_enable(struct nvkm_outp *, bool auxpwr);
 
 /* DPCD Receiver Capabilities */
 #define DPCD_RC00_DPCD_REV                                              0x00000
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/disp/g84.c b/drivers/gpu/drm/nouveau/nvkm/engine/disp/g84.c
index 4966a51af3d7..23ae451ba473 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/disp/g84.c
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/disp/g84.c
@@ -29,9 +29,54 @@
 
 #include <nvif/class.h>
 
-void
-g84_sor_hdmi_ctrl(struct nvkm_ior *ior, int head, bool enable, u8 max_ac_packet,
-		  u8 rekey, u8 *avi, u8 avi_size, u8 *vendor, u8 vendor_size)
+static void
+g84_sor_hdmi_infoframe_vsi(struct nvkm_ior *ior, int head, void *data, u32 size)
+{
+	struct nvkm_device *device = ior->disp->engine.subdev.device;
+	struct packed_hdmi_infoframe vsi;
+	const u32 hoff = head * 0x800;
+
+	nvkm_mask(device, 0x61653c + hoff, 0x00010001, 0x00010000);
+	if (!size)
+		return;
+
+	pack_hdmi_infoframe(&vsi, data, size);
+
+	nvkm_wr32(device, 0x616544 + hoff, vsi.header);
+	nvkm_wr32(device, 0x616548 + hoff, vsi.subpack0_low);
+	nvkm_wr32(device, 0x61654c + hoff, vsi.subpack0_high);
+	/* Is there a second (or up to fourth?) set of subpack registers here? */
+	/* nvkm_wr32(device, 0x616550 + hoff, vsi.subpack1_low); */
+	/* nvkm_wr32(device, 0x616554 + hoff, vsi.subpack1_high); */
+
+	nvkm_mask(device, 0x61653c + hoff, 0x00010001, 0x00010001);
+}
+
+static void
+g84_sor_hdmi_infoframe_avi(struct nvkm_ior *ior, int head, void *data, u32 size)
+{
+	struct nvkm_device *device = ior->disp->engine.subdev.device;
+	struct packed_hdmi_infoframe avi;
+	const u32 hoff = head * 0x800;
+
+	pack_hdmi_infoframe(&avi, data, size);
+
+	nvkm_mask(device, 0x616520 + hoff, 0x00000001, 0x00000000);
+	if (!size)
+		return;
+
+	nvkm_wr32(device, 0x616528 + hoff, avi.header);
+	nvkm_wr32(device, 0x61652c + hoff, avi.subpack0_low);
+	nvkm_wr32(device, 0x616530 + hoff, avi.subpack0_high);
+	nvkm_wr32(device, 0x616534 + hoff, avi.subpack1_low);
+	nvkm_wr32(device, 0x616538 + hoff, avi.subpack1_high);
+
+	nvkm_mask(device, 0x616520 + hoff, 0x00000001, 0x00000001);
+}
+
+
+static void
+g84_sor_hdmi_ctrl(struct nvkm_ior *ior, int head, bool enable, u8 max_ac_packet, u8 rekey)
 {
 	struct nvkm_device *device = ior->disp->engine.subdev.device;
 	const u32 ctrl = 0x40000000 * enable |
@@ -39,31 +84,13 @@ g84_sor_hdmi_ctrl(struct nvkm_ior *ior, int head, bool enable, u8 max_ac_packet,
 			 max_ac_packet << 16 |
 			 rekey;
 	const u32 hoff = head * 0x800;
-	struct packed_hdmi_infoframe avi_infoframe;
-	struct packed_hdmi_infoframe vendor_infoframe;
-
-	pack_hdmi_infoframe(&avi_infoframe, avi, avi_size);
-	pack_hdmi_infoframe(&vendor_infoframe, vendor, vendor_size);
 
 	if (!(ctrl & 0x40000000)) {
 		nvkm_mask(device, 0x6165a4 + hoff, 0x40000000, 0x00000000);
-		nvkm_mask(device, 0x61653c + hoff, 0x00000001, 0x00000000);
-		nvkm_mask(device, 0x616520 + hoff, 0x00000001, 0x00000000);
 		nvkm_mask(device, 0x616500 + hoff, 0x00000001, 0x00000000);
 		return;
 	}
 
-	/* AVI InfoFrame */
-	nvkm_mask(device, 0x616520 + hoff, 0x00000001, 0x00000000);
-	if (avi_size) {
-		nvkm_wr32(device, 0x616528 + hoff, avi_infoframe.header);
-		nvkm_wr32(device, 0x61652c + hoff, avi_infoframe.subpack0_low);
-		nvkm_wr32(device, 0x616530 + hoff, avi_infoframe.subpack0_high);
-		nvkm_wr32(device, 0x616534 + hoff, avi_infoframe.subpack1_low);
-		nvkm_wr32(device, 0x616538 + hoff, avi_infoframe.subpack1_high);
-		nvkm_mask(device, 0x616520 + hoff, 0x00000001, 0x00000001);
-	}
-
 	/* Audio InfoFrame */
 	nvkm_mask(device, 0x616500 + hoff, 0x00000001, 0x00000000);
 	nvkm_wr32(device, 0x616508 + hoff, 0x000a0184);
@@ -71,17 +98,6 @@ g84_sor_hdmi_ctrl(struct nvkm_ior *ior, int head, bool enable, u8 max_ac_packet,
 	nvkm_wr32(device, 0x616510 + hoff, 0x00000000);
 	nvkm_mask(device, 0x616500 + hoff, 0x00000001, 0x00000001);
 
-	/* Vendor InfoFrame */
-	nvkm_mask(device, 0x61653c + hoff, 0x00010001, 0x00010000);
-	if (vendor_size) {
-		nvkm_wr32(device, 0x616544 + hoff, vendor_infoframe.header);
-		nvkm_wr32(device, 0x616548 + hoff, vendor_infoframe.subpack0_low);
-		nvkm_wr32(device, 0x61654c + hoff, vendor_infoframe.subpack0_high);
-		/* Is there a second (or up to fourth?) set of subpack registers here? */
-		/* nvkm_wr32(device, 0x616550 + hoff, vendor_infoframe->subpack1_low); */
-		/* nvkm_wr32(device, 0x616554 + hoff, vendor_infoframe->subpack1_high); */
-		nvkm_mask(device, 0x61653c + hoff, 0x00010001, 0x00010001);
-	}
 
 	nvkm_mask(device, 0x6165d0 + hoff, 0x00070001, 0x00010001); /* SPARE, HW_CTS */
 	nvkm_mask(device, 0x616568 + hoff, 0x00010101, 0x00000000); /* ACR_CTRL, ?? */
@@ -96,14 +112,19 @@ g84_sor_hdmi_ctrl(struct nvkm_ior *ior, int head, bool enable, u8 max_ac_packet,
 	nvkm_mask(device, 0x6165a4 + hoff, 0x5f1f007f, ctrl);
 }
 
+const struct nvkm_ior_func_hdmi
+g84_sor_hdmi = {
+	.ctrl = g84_sor_hdmi_ctrl,
+	.infoframe_avi = g84_sor_hdmi_infoframe_avi,
+	.infoframe_vsi = g84_sor_hdmi_infoframe_vsi,
+};
+
 static const struct nvkm_ior_func
 g84_sor = {
 	.state = nv50_sor_state,
 	.power = nv50_sor_power,
 	.clock = nv50_sor_clock,
-	.hdmi = {
-		.ctrl = g84_sor_hdmi_ctrl,
-	},
+	.hdmi = &g84_sor_hdmi,
 };
 
 int
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/disp/ga102.c b/drivers/gpu/drm/nouveau/nvkm/engine/disp/ga102.c
index 7489d0d7fce0..52099b75f52a 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/disp/ga102.c
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/disp/ga102.c
@@ -105,10 +105,7 @@ ga102_sor = {
 	.state = gv100_sor_state,
 	.power = nv50_sor_power,
 	.clock = ga102_sor_clock,
-	.hdmi = {
-		.ctrl = gv100_sor_hdmi_ctrl,
-		.scdc = gm200_sor_hdmi_scdc,
-	},
+	.hdmi = &gv100_sor_hdmi,
 	.dp = &ga102_sor_dp,
 	.hda = &gv100_sor_hda,
 };
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/disp/gf119.c b/drivers/gpu/drm/nouveau/nvkm/engine/disp/gf119.c
index 39822f1b5b95..a48e9bdf4cd0 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/disp/gf119.c
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/disp/gf119.c
@@ -202,19 +202,61 @@ gf119_sor_dp = {
 };
 
 static void
-gf119_sor_hdmi_ctrl(struct nvkm_ior *ior, int head, bool enable, u8 max_ac_packet,
-		    u8 rekey, u8 *avi, u8 avi_size, u8 *vendor, u8 vendor_size)
+gf119_sor_hdmi_infoframe_vsi(struct nvkm_ior *ior, int head, void *data, u32 size)
+{
+	struct nvkm_device *device = ior->disp->engine.subdev.device;
+	struct packed_hdmi_infoframe vsi;
+	const u32 hoff = head * 0x800;
+
+	pack_hdmi_infoframe(&vsi, data, size);
+
+	nvkm_mask(device, 0x616730 + hoff, 0x00010001, 0x00010000);
+	if (!size)
+		return;
+
+	/*
+	 * These appear to be the audio infoframe registers,
+	 * but no other set of infoframe registers has yet
+	 * been found.
+	 */
+	nvkm_wr32(device, 0x616738 + hoff, vsi.header);
+	nvkm_wr32(device, 0x61673c + hoff, vsi.subpack0_low);
+	nvkm_wr32(device, 0x616740 + hoff, vsi.subpack0_high);
+	/* Is there a second (or further?) set of subpack registers here? */
+
+	nvkm_mask(device, 0x616730 + hoff, 0x00000001, 0x00000001);
+}
+
+static void
+gf119_sor_hdmi_infoframe_avi(struct nvkm_ior *ior, int head, void *data, u32 size)
+{
+	struct nvkm_device *device = ior->disp->engine.subdev.device;
+	struct packed_hdmi_infoframe avi;
+	const u32 hoff = head * 0x800;
+
+	pack_hdmi_infoframe(&avi, data, size);
+
+	nvkm_mask(device, 0x616714 + hoff, 0x00000001, 0x00000000);
+	if (!size)
+		return;
+
+	nvkm_wr32(device, 0x61671c + hoff, avi.header);
+	nvkm_wr32(device, 0x616720 + hoff, avi.subpack0_low);
+	nvkm_wr32(device, 0x616724 + hoff, avi.subpack0_high);
+	nvkm_wr32(device, 0x616728 + hoff, avi.subpack1_low);
+	nvkm_wr32(device, 0x61672c + hoff, avi.subpack1_high);
+
+	nvkm_mask(device, 0x616714 + hoff, 0x00000001, 0x00000001);
+}
+
+static void
+gf119_sor_hdmi_ctrl(struct nvkm_ior *ior, int head, bool enable, u8 max_ac_packet, u8 rekey)
 {
 	struct nvkm_device *device = ior->disp->engine.subdev.device;
 	const u32 ctrl = 0x40000000 * enable |
 			 max_ac_packet << 16 |
 			 rekey;
 	const u32 hoff = head * 0x800;
-	struct packed_hdmi_infoframe avi_infoframe;
-	struct packed_hdmi_infoframe vendor_infoframe;
-
-	pack_hdmi_infoframe(&avi_infoframe, avi, avi_size);
-	pack_hdmi_infoframe(&vendor_infoframe, vendor, vendor_size);
 
 	if (!(ctrl & 0x40000000)) {
 		nvkm_mask(device, 0x616798 + hoff, 0x40000000, 0x00000000);
@@ -224,32 +266,6 @@ gf119_sor_hdmi_ctrl(struct nvkm_ior *ior, int head, bool enable, u8 max_ac_packe
 		return;
 	}
 
-	/* AVI InfoFrame */
-	nvkm_mask(device, 0x616714 + hoff, 0x00000001, 0x00000000);
-	if (avi_size) {
-		nvkm_wr32(device, 0x61671c + hoff, avi_infoframe.header);
-		nvkm_wr32(device, 0x616720 + hoff, avi_infoframe.subpack0_low);
-		nvkm_wr32(device, 0x616724 + hoff, avi_infoframe.subpack0_high);
-		nvkm_wr32(device, 0x616728 + hoff, avi_infoframe.subpack1_low);
-		nvkm_wr32(device, 0x61672c + hoff, avi_infoframe.subpack1_high);
-		nvkm_mask(device, 0x616714 + hoff, 0x00000001, 0x00000001);
-	}
-
-	/* GENERIC(?) / Vendor InfoFrame? */
-	nvkm_mask(device, 0x616730 + hoff, 0x00010001, 0x00010000);
-	if (vendor_size) {
-		/*
-		 * These appear to be the audio infoframe registers,
-		 * but no other set of infoframe registers has yet
-		 * been found.
-		 */
-		nvkm_wr32(device, 0x616738 + hoff, vendor_infoframe.header);
-		nvkm_wr32(device, 0x61673c + hoff, vendor_infoframe.subpack0_low);
-		nvkm_wr32(device, 0x616740 + hoff, vendor_infoframe.subpack0_high);
-		/* Is there a second (or further?) set of subpack registers here? */
-		nvkm_mask(device, 0x616730 + hoff, 0x00000001, 0x00000001);
-	}
-
 	/* ??? InfoFrame? */
 	nvkm_mask(device, 0x6167a4 + hoff, 0x00000001, 0x00000000);
 	nvkm_wr32(device, 0x6167ac + hoff, 0x00000010);
@@ -259,6 +275,13 @@ gf119_sor_hdmi_ctrl(struct nvkm_ior *ior, int head, bool enable, u8 max_ac_packe
 	nvkm_mask(device, 0x616798 + hoff, 0x401f007f, ctrl);
 }
 
+static const struct nvkm_ior_func_hdmi
+gf119_sor_hdmi = {
+	.ctrl = gf119_sor_hdmi_ctrl,
+	.infoframe_avi = gf119_sor_hdmi_infoframe_avi,
+	.infoframe_vsi = gf119_sor_hdmi_infoframe_vsi,
+};
+
 void
 gf119_sor_clock(struct nvkm_ior *sor)
 {
@@ -305,9 +328,7 @@ gf119_sor = {
 	.state = gf119_sor_state,
 	.power = nv50_sor_power,
 	.clock = gf119_sor_clock,
-	.hdmi = {
-		.ctrl = gf119_sor_hdmi_ctrl,
-	},
+	.hdmi = &gf119_sor_hdmi,
 	.dp = &gf119_sor_dp,
 	.hda = &gf119_sor_hda,
 };
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/disp/gk104.c b/drivers/gpu/drm/nouveau/nvkm/engine/disp/gk104.c
index 7248e9ec835e..876a21a0cebb 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/disp/gk104.c
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/disp/gk104.c
@@ -30,8 +30,51 @@
 #include <nvif/class.h>
 
 void
-gk104_sor_hdmi_ctrl(struct nvkm_ior *ior, int head, bool enable, u8 max_ac_packet,
-		    u8 rekey, u8 *avi, u8 avi_size, u8 *vendor, u8 vendor_size)
+gk104_sor_hdmi_infoframe_vsi(struct nvkm_ior *ior, int head, void *data, u32 size)
+{
+	struct nvkm_device *device = ior->disp->engine.subdev.device;
+	struct packed_hdmi_infoframe vsi;
+	const u32 hoff = head * 0x400;
+
+	pack_hdmi_infoframe(&vsi, data, size);
+
+	/* GENERIC(?) / Vendor InfoFrame? */
+	nvkm_mask(device, 0x690100 + hoff, 0x00010001, 0x00000000);
+	if (!size)
+		return;
+
+	nvkm_wr32(device, 0x690108 + hoff, vsi.header);
+	nvkm_wr32(device, 0x69010c + hoff, vsi.subpack0_low);
+	nvkm_wr32(device, 0x690110 + hoff, vsi.subpack0_high);
+	/* Is there a second (or further?) set of subpack registers here? */
+	nvkm_mask(device, 0x690100 + hoff, 0x00000001, 0x00000001);
+}
+
+void
+gk104_sor_hdmi_infoframe_avi(struct nvkm_ior *ior, int head, void *data, u32 size)
+{
+	struct nvkm_device *device = ior->disp->engine.subdev.device;
+	struct packed_hdmi_infoframe avi;
+	const u32 hoff = head * 0x400;
+
+	pack_hdmi_infoframe(&avi, data, size);
+
+	/* AVI InfoFrame */
+	nvkm_mask(device, 0x690000 + hoff, 0x00000001, 0x00000000);
+	if (!size)
+		return;
+
+	nvkm_wr32(device, 0x690008 + hoff, avi.header);
+	nvkm_wr32(device, 0x69000c + hoff, avi.subpack0_low);
+	nvkm_wr32(device, 0x690010 + hoff, avi.subpack0_high);
+	nvkm_wr32(device, 0x690014 + hoff, avi.subpack1_low);
+	nvkm_wr32(device, 0x690018 + hoff, avi.subpack1_high);
+
+	nvkm_mask(device, 0x690000 + hoff, 0x00000001, 0x00000001);
+}
+
+void
+gk104_sor_hdmi_ctrl(struct nvkm_ior *ior, int head, bool enable, u8 max_ac_packet, u8 rekey)
 {
 	struct nvkm_device *device = ior->disp->engine.subdev.device;
 	const u32 ctrl = 0x40000000 * enable |
@@ -39,11 +82,6 @@ gk104_sor_hdmi_ctrl(struct nvkm_ior *ior, int head, bool enable, u8 max_ac_packe
 			 rekey;
 	const u32 hoff = head * 0x800;
 	const u32 hdmi = head * 0x400;
-	struct packed_hdmi_infoframe avi_infoframe;
-	struct packed_hdmi_infoframe vendor_infoframe;
-
-	pack_hdmi_infoframe(&avi_infoframe, avi, avi_size);
-	pack_hdmi_infoframe(&vendor_infoframe, vendor, vendor_size);
 
 	if (!(ctrl & 0x40000000)) {
 		nvkm_mask(device, 0x616798 + hoff, 0x40000000, 0x00000000);
@@ -53,28 +91,6 @@ gk104_sor_hdmi_ctrl(struct nvkm_ior *ior, int head, bool enable, u8 max_ac_packe
 		return;
 	}
 
-	/* AVI InfoFrame */
-	nvkm_mask(device, 0x690000 + hdmi, 0x00000001, 0x00000000);
-	if (avi_size) {
-		nvkm_wr32(device, 0x690008 + hdmi, avi_infoframe.header);
-		nvkm_wr32(device, 0x69000c + hdmi, avi_infoframe.subpack0_low);
-		nvkm_wr32(device, 0x690010 + hdmi, avi_infoframe.subpack0_high);
-		nvkm_wr32(device, 0x690014 + hdmi, avi_infoframe.subpack1_low);
-		nvkm_wr32(device, 0x690018 + hdmi, avi_infoframe.subpack1_high);
-		nvkm_mask(device, 0x690000 + hdmi, 0x00000001, 0x00000001);
-	}
-
-	/* GENERIC(?) / Vendor InfoFrame? */
-	nvkm_mask(device, 0x690100 + hdmi, 0x00010001, 0x00000000);
-	if (vendor_size) {
-		nvkm_wr32(device, 0x690108 + hdmi, vendor_infoframe.header);
-		nvkm_wr32(device, 0x69010c + hdmi, vendor_infoframe.subpack0_low);
-		nvkm_wr32(device, 0x690110 + hdmi, vendor_infoframe.subpack0_high);
-		/* Is there a second (or further?) set of subpack registers here? */
-		nvkm_mask(device, 0x690100 + hdmi, 0x00000001, 0x00000001);
-	}
-
-
 	/* ??? InfoFrame? */
 	nvkm_mask(device, 0x6900c0 + hdmi, 0x00000001, 0x00000000);
 	nvkm_wr32(device, 0x6900cc + hdmi, 0x00000010);
@@ -87,14 +103,19 @@ gk104_sor_hdmi_ctrl(struct nvkm_ior *ior, int head, bool enable, u8 max_ac_packe
 	nvkm_mask(device, 0x616798 + hoff, 0x401f007f, ctrl);
 }
 
+const struct nvkm_ior_func_hdmi
+gk104_sor_hdmi = {
+	.ctrl = gk104_sor_hdmi_ctrl,
+	.infoframe_avi = gk104_sor_hdmi_infoframe_avi,
+	.infoframe_vsi = gk104_sor_hdmi_infoframe_vsi,
+};
+
 static const struct nvkm_ior_func
 gk104_sor = {
 	.state = gf119_sor_state,
 	.power = nv50_sor_power,
 	.clock = gf119_sor_clock,
-	.hdmi = {
-		.ctrl = gk104_sor_hdmi_ctrl,
-	},
+	.hdmi = &gk104_sor_hdmi,
 	.dp = &gf119_sor_dp,
 	.hda = &gf119_sor_hda,
 };
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/disp/gm107.c b/drivers/gpu/drm/nouveau/nvkm/engine/disp/gm107.c
index 9e9ef49bd8ac..b4d8e868616f 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/disp/gm107.c
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/disp/gm107.c
@@ -70,9 +70,7 @@ gm107_sor = {
 	.state = gf119_sor_state,
 	.power = nv50_sor_power,
 	.clock = gf119_sor_clock,
-	.hdmi = {
-		.ctrl = gk104_sor_hdmi_ctrl,
-	},
+	.hdmi = &gk104_sor_hdmi,
 	.dp = &gm107_sor_dp,
 	.hda = &gf119_sor_hda,
 };
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/disp/gm200.c b/drivers/gpu/drm/nouveau/nvkm/engine/disp/gm200.c
index 4ecc8f98af6e..562ebae57d44 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/disp/gm200.c
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/disp/gm200.c
@@ -79,6 +79,14 @@ gm200_sor_hdmi_scdc(struct nvkm_ior *ior, u8 scdc)
 	ior->tmds.high_speed = !!(scdc & 0x2);
 }
 
+const struct nvkm_ior_func_hdmi
+gm200_sor_hdmi = {
+	.ctrl = gk104_sor_hdmi_ctrl,
+	.scdc = gm200_sor_hdmi_scdc,
+	.infoframe_avi = gk104_sor_hdmi_infoframe_avi,
+	.infoframe_vsi = gk104_sor_hdmi_infoframe_vsi,
+};
+
 void
 gm200_sor_route_set(struct nvkm_outp *outp, struct nvkm_ior *ior)
 {
@@ -131,10 +139,7 @@ gm200_sor = {
 	.state = gf119_sor_state,
 	.power = nv50_sor_power,
 	.clock = gf119_sor_clock,
-	.hdmi = {
-		.ctrl = gk104_sor_hdmi_ctrl,
-		.scdc = gm200_sor_hdmi_scdc,
-	},
+	.hdmi = &gm200_sor_hdmi,
 	.dp = &gm200_sor_dp,
 	.hda = &gf119_sor_hda,
 };
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/disp/gp100.c b/drivers/gpu/drm/nouveau/nvkm/engine/disp/gp100.c
index 7172a9dfd89b..7f1eb4332040 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/disp/gp100.c
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/disp/gp100.c
@@ -37,10 +37,7 @@ gp100_sor = {
 	.state = gf119_sor_state,
 	.power = nv50_sor_power,
 	.clock = gf119_sor_clock,
-	.hdmi = {
-		.ctrl = gk104_sor_hdmi_ctrl,
-		.scdc = gm200_sor_hdmi_scdc,
-	},
+	.hdmi = &gm200_sor_hdmi,
 	.dp = &gm200_sor_dp,
 	.hda = &gf119_sor_hda,
 };
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/disp/gt215.c b/drivers/gpu/drm/nouveau/nvkm/engine/disp/gt215.c
index 70c49e7af9cf..a2c7c6f83dcd 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/disp/gt215.c
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/disp/gt215.c
@@ -92,9 +92,53 @@ gt215_sor_dp = {
 	.watermark = g94_sor_dp_watermark,
 };
 
-void
-gt215_sor_hdmi_ctrl(struct nvkm_ior *ior, int head, bool enable, u8 max_ac_packet,
-		    u8 rekey, u8 *avi, u8 avi_size, u8 *vendor, u8 vendor_size)
+static void
+gt215_sor_hdmi_infoframe_vsi(struct nvkm_ior *ior, int head, void *data, u32 size)
+{
+	struct nvkm_device *device = ior->disp->engine.subdev.device;
+	struct packed_hdmi_infoframe vsi;
+	const u32 soff = nv50_ior_base(ior);
+
+	pack_hdmi_infoframe(&vsi, data, size);
+
+	nvkm_mask(device, 0x61c53c + soff, 0x00010001, 0x00010000);
+	if (!size)
+		return;
+
+	nvkm_wr32(device, 0x61c544 + soff, vsi.header);
+	nvkm_wr32(device, 0x61c548 + soff, vsi.subpack0_low);
+	nvkm_wr32(device, 0x61c54c + soff, vsi.subpack0_high);
+	/* Is there a second (or up to fourth?) set of subpack registers here? */
+	/* nvkm_wr32(device, 0x61c550 + soff, vsi.subpack1_low); */
+	/* nvkm_wr32(device, 0x61c554 + soff, vsi.subpack1_high); */
+
+	nvkm_mask(device, 0x61c53c + soff, 0x00010001, 0x00010001);
+}
+
+static void
+gt215_sor_hdmi_infoframe_avi(struct nvkm_ior *ior, int head, void *data, u32 size)
+{
+	struct nvkm_device *device = ior->disp->engine.subdev.device;
+	struct packed_hdmi_infoframe avi;
+	const u32 soff = nv50_ior_base(ior);
+
+	pack_hdmi_infoframe(&avi, data, size);
+
+	nvkm_mask(device, 0x61c520 + soff, 0x00000001, 0x00000000);
+	if (size)
+		return;
+
+	nvkm_wr32(device, 0x61c528 + soff, avi.header);
+	nvkm_wr32(device, 0x61c52c + soff, avi.subpack0_low);
+	nvkm_wr32(device, 0x61c530 + soff, avi.subpack0_high);
+	nvkm_wr32(device, 0x61c534 + soff, avi.subpack1_low);
+	nvkm_wr32(device, 0x61c538 + soff, avi.subpack1_high);
+
+	nvkm_mask(device, 0x61c520 + soff, 0x00000001, 0x00000001);
+}
+
+static void
+gt215_sor_hdmi_ctrl(struct nvkm_ior *ior, int head, bool enable, u8 max_ac_packet, u8 rekey)
 {
 	struct nvkm_device *device = ior->disp->engine.subdev.device;
 	const u32 ctrl = 0x40000000 * enable |
@@ -102,11 +146,6 @@ gt215_sor_hdmi_ctrl(struct nvkm_ior *ior, int head, bool enable, u8 max_ac_packe
 			 max_ac_packet << 16 |
 			 rekey;
 	const u32 soff = nv50_ior_base(ior);
-	struct packed_hdmi_infoframe avi_infoframe;
-	struct packed_hdmi_infoframe vendor_infoframe;
-
-	pack_hdmi_infoframe(&avi_infoframe, avi, avi_size);
-	pack_hdmi_infoframe(&vendor_infoframe, vendor, vendor_size);
 
 	if (!(ctrl & 0x40000000)) {
 		nvkm_mask(device, 0x61c5a4 + soff, 0x40000000, 0x00000000);
@@ -116,17 +155,6 @@ gt215_sor_hdmi_ctrl(struct nvkm_ior *ior, int head, bool enable, u8 max_ac_packe
 		return;
 	}
 
-	/* AVI InfoFrame */
-	nvkm_mask(device, 0x61c520 + soff, 0x00000001, 0x00000000);
-	if (avi_size) {
-		nvkm_wr32(device, 0x61c528 + soff, avi_infoframe.header);
-		nvkm_wr32(device, 0x61c52c + soff, avi_infoframe.subpack0_low);
-		nvkm_wr32(device, 0x61c530 + soff, avi_infoframe.subpack0_high);
-		nvkm_wr32(device, 0x61c534 + soff, avi_infoframe.subpack1_low);
-		nvkm_wr32(device, 0x61c538 + soff, avi_infoframe.subpack1_high);
-		nvkm_mask(device, 0x61c520 + soff, 0x00000001, 0x00000001);
-	}
-
 	/* Audio InfoFrame */
 	nvkm_mask(device, 0x61c500 + soff, 0x00000001, 0x00000000);
 	nvkm_wr32(device, 0x61c508 + soff, 0x000a0184);
@@ -134,18 +162,6 @@ gt215_sor_hdmi_ctrl(struct nvkm_ior *ior, int head, bool enable, u8 max_ac_packe
 	nvkm_wr32(device, 0x61c510 + soff, 0x00000000);
 	nvkm_mask(device, 0x61c500 + soff, 0x00000001, 0x00000001);
 
-	/* Vendor InfoFrame */
-	nvkm_mask(device, 0x61c53c + soff, 0x00010001, 0x00010000);
-	if (vendor_size) {
-		nvkm_wr32(device, 0x61c544 + soff, vendor_infoframe.header);
-		nvkm_wr32(device, 0x61c548 + soff, vendor_infoframe.subpack0_low);
-		nvkm_wr32(device, 0x61c54c + soff, vendor_infoframe.subpack0_high);
-		/* Is there a second (or up to fourth?) set of subpack registers here? */
-		/* nvkm_wr32(device, 0x61c550 + soff, vendor_infoframe.subpack1_low); */
-		/* nvkm_wr32(device, 0x61c554 + soff, vendor_infoframe.subpack1_high); */
-		nvkm_mask(device, 0x61c53c + soff, 0x00010001, 0x00010001);
-	}
-
 	nvkm_mask(device, 0x61c5d0 + soff, 0x00070001, 0x00010001); /* SPARE, HW_CTS */
 	nvkm_mask(device, 0x61c568 + soff, 0x00010101, 0x00000000); /* ACR_CTRL, ?? */
 	nvkm_mask(device, 0x61c578 + soff, 0x80000000, 0x80000000); /* ACR_0441_ENABLE */
@@ -159,14 +175,19 @@ gt215_sor_hdmi_ctrl(struct nvkm_ior *ior, int head, bool enable, u8 max_ac_packe
 	nvkm_mask(device, 0x61c5a4 + soff, 0x5f1f007f, ctrl);
 }
 
+const struct nvkm_ior_func_hdmi
+gt215_sor_hdmi = {
+	.ctrl = gt215_sor_hdmi_ctrl,
+	.infoframe_avi = gt215_sor_hdmi_infoframe_avi,
+	.infoframe_vsi = gt215_sor_hdmi_infoframe_vsi,
+};
+
 static const struct nvkm_ior_func
 gt215_sor = {
 	.state = g94_sor_state,
 	.power = nv50_sor_power,
 	.clock = nv50_sor_clock,
-	.hdmi = {
-		.ctrl = gt215_sor_hdmi_ctrl,
-	},
+	.hdmi = &gt215_sor_hdmi,
 	.dp = &gt215_sor_dp,
 	.hda = &gt215_sor_hda,
 };
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/disp/gv100.c b/drivers/gpu/drm/nouveau/nvkm/engine/disp/gv100.c
index 6b9d49270fa7..115d0997fd62 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/disp/gv100.c
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/disp/gv100.c
@@ -96,9 +96,54 @@ gv100_sor_dp = {
 	.watermark = gv100_sor_dp_watermark,
 };
 
-void
-gv100_sor_hdmi_ctrl(struct nvkm_ior *ior, int head, bool enable, u8 max_ac_packet,
-		    u8 rekey, u8 *avi, u8 avi_size, u8 *vendor, u8 vendor_size)
+static void
+gv100_sor_hdmi_infoframe_vsi(struct nvkm_ior *ior, int head, void *data, u32 size)
+{
+	struct nvkm_device *device = ior->disp->engine.subdev.device;
+	struct packed_hdmi_infoframe vsi;
+	const u32 hoff = head * 0x400;
+
+	pack_hdmi_infoframe(&vsi, data, size);
+
+	nvkm_mask(device, 0x6f0100 + hoff, 0x00010001, 0x00000000);
+	if (!size)
+		return;
+
+	nvkm_wr32(device, 0x6f0108 + hoff, vsi.header);
+	nvkm_wr32(device, 0x6f010c + hoff, vsi.subpack0_low);
+	nvkm_wr32(device, 0x6f0110 + hoff, vsi.subpack0_high);
+	nvkm_wr32(device, 0x6f0114 + hoff, 0x00000000);
+	nvkm_wr32(device, 0x6f0118 + hoff, 0x00000000);
+	nvkm_wr32(device, 0x6f011c + hoff, 0x00000000);
+	nvkm_wr32(device, 0x6f0120 + hoff, 0x00000000);
+	nvkm_wr32(device, 0x6f0124 + hoff, 0x00000000);
+	nvkm_mask(device, 0x6f0100 + hoff, 0x00000001, 0x00000001);
+}
+
+static void
+gv100_sor_hdmi_infoframe_avi(struct nvkm_ior *ior, int head, void *data, u32 size)
+{
+	struct nvkm_device *device = ior->disp->engine.subdev.device;
+	struct packed_hdmi_infoframe avi;
+	const u32 hoff = head * 0x400;
+
+	pack_hdmi_infoframe(&avi, data, size);
+
+	nvkm_mask(device, 0x6f0000 + hoff, 0x00000001, 0x00000000);
+	if (!size)
+		return;
+
+	nvkm_wr32(device, 0x6f0008 + hoff, avi.header);
+	nvkm_wr32(device, 0x6f000c + hoff, avi.subpack0_low);
+	nvkm_wr32(device, 0x6f0010 + hoff, avi.subpack0_high);
+	nvkm_wr32(device, 0x6f0014 + hoff, avi.subpack1_low);
+	nvkm_wr32(device, 0x6f0018 + hoff, avi.subpack1_high);
+
+	nvkm_mask(device, 0x6f0000 + hoff, 0x00000001, 0x00000001);
+}
+
+static void
+gv100_sor_hdmi_ctrl(struct nvkm_ior *ior, int head, bool enable, u8 max_ac_packet, u8 rekey)
 {
 	struct nvkm_device *device = ior->disp->engine.subdev.device;
 	const u32 ctrl = 0x40000000 * enable |
@@ -106,11 +151,6 @@ gv100_sor_hdmi_ctrl(struct nvkm_ior *ior, int head, bool enable, u8 max_ac_packe
 			 rekey;
 	const u32 hoff = head * 0x800;
 	const u32 hdmi = head * 0x400;
-	struct packed_hdmi_infoframe avi_infoframe;
-	struct packed_hdmi_infoframe vendor_infoframe;
-
-	pack_hdmi_infoframe(&avi_infoframe, avi, avi_size);
-	pack_hdmi_infoframe(&vendor_infoframe, vendor, vendor_size);
 
 	if (!(ctrl & 0x40000000)) {
 		nvkm_mask(device, 0x6165c0 + hoff, 0x40000000, 0x00000000);
@@ -120,32 +160,6 @@ gv100_sor_hdmi_ctrl(struct nvkm_ior *ior, int head, bool enable, u8 max_ac_packe
 		return;
 	}
 
-	/* AVI InfoFrame (AVI). */
-	nvkm_mask(device, 0x6f0000 + hdmi, 0x00000001, 0x00000000);
-	if (avi_size) {
-		nvkm_wr32(device, 0x6f0008 + hdmi, avi_infoframe.header);
-		nvkm_wr32(device, 0x6f000c + hdmi, avi_infoframe.subpack0_low);
-		nvkm_wr32(device, 0x6f0010 + hdmi, avi_infoframe.subpack0_high);
-		nvkm_wr32(device, 0x6f0014 + hdmi, avi_infoframe.subpack1_low);
-		nvkm_wr32(device, 0x6f0018 + hdmi, avi_infoframe.subpack1_high);
-		nvkm_mask(device, 0x6f0000 + hdmi, 0x00000001, 0x00000001);
-	}
-
-	/* Vendor-specific InfoFrame (VSI). */
-	nvkm_mask(device, 0x6f0100 + hdmi, 0x00010001, 0x00000000);
-	if (vendor_size) {
-		nvkm_wr32(device, 0x6f0108 + hdmi, vendor_infoframe.header);
-		nvkm_wr32(device, 0x6f010c + hdmi, vendor_infoframe.subpack0_low);
-		nvkm_wr32(device, 0x6f0110 + hdmi, vendor_infoframe.subpack0_high);
-		nvkm_wr32(device, 0x6f0114 + hdmi, 0x00000000);
-		nvkm_wr32(device, 0x6f0118 + hdmi, 0x00000000);
-		nvkm_wr32(device, 0x6f011c + hdmi, 0x00000000);
-		nvkm_wr32(device, 0x6f0120 + hdmi, 0x00000000);
-		nvkm_wr32(device, 0x6f0124 + hdmi, 0x00000000);
-		nvkm_mask(device, 0x6f0100 + hdmi, 0x00000001, 0x00000001);
-	}
-
-
 	/* General Control (GCP). */
 	nvkm_mask(device, 0x6f00c0 + hdmi, 0x00000001, 0x00000000);
 	nvkm_wr32(device, 0x6f00cc + hdmi, 0x00000010);
@@ -158,6 +172,14 @@ gv100_sor_hdmi_ctrl(struct nvkm_ior *ior, int head, bool enable, u8 max_ac_packe
 	nvkm_mask(device, 0x6165c0 + hoff, 0x401f007f, ctrl);
 }
 
+const struct nvkm_ior_func_hdmi
+gv100_sor_hdmi = {
+	.ctrl = gv100_sor_hdmi_ctrl,
+	.scdc = gm200_sor_hdmi_scdc,
+	.infoframe_avi = gv100_sor_hdmi_infoframe_avi,
+	.infoframe_vsi = gv100_sor_hdmi_infoframe_vsi,
+};
+
 void
 gv100_sor_state(struct nvkm_ior *sor, struct nvkm_ior_state *state)
 {
@@ -190,10 +212,7 @@ gv100_sor = {
 	.state = gv100_sor_state,
 	.power = nv50_sor_power,
 	.clock = gf119_sor_clock,
-	.hdmi = {
-		.ctrl = gv100_sor_hdmi_ctrl,
-		.scdc = gm200_sor_hdmi_scdc,
-	},
+	.hdmi = &gv100_sor_hdmi,
 	.dp = &gv100_sor_dp,
 	.hda = &gv100_sor_hda,
 };
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/disp/head.c b/drivers/gpu/drm/nouveau/nvkm/engine/disp/head.c
index 83152c26fe3e..7f5d13d13c94 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/disp/head.c
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/disp/head.c
@@ -39,44 +39,6 @@ nvkm_head_find(struct nvkm_disp *disp, int id)
 	return NULL;
 }
 
-int
-nvkm_head_mthd_scanoutpos(struct nvkm_object *object,
-			  struct nvkm_head *head, void *data, u32 size)
-{
-	union {
-		struct nv04_disp_scanoutpos_v0 v0;
-	} *args = data;
-	int ret = -ENOSYS;
-
-	nvif_ioctl(object, "head scanoutpos size %d\n", size);
-	if (!(ret = nvif_unpack(ret, &data, &size, args->v0, 0, 0, false))) {
-		nvif_ioctl(object, "head scanoutpos vers %d\n",
-			   args->v0.version);
-
-		head->func->state(head, &head->arm);
-		args->v0.vtotal  = head->arm.vtotal;
-		args->v0.vblanks = head->arm.vblanks;
-		args->v0.vblanke = head->arm.vblanke;
-		args->v0.htotal  = head->arm.htotal;
-		args->v0.hblanks = head->arm.hblanks;
-		args->v0.hblanke = head->arm.hblanke;
-
-		/* We don't support reading htotal/vtotal on pre-NV50 VGA,
-		 * so we have to give up and trigger the timestamping
-		 * fallback in the drm core.
-		 */
-		if (!args->v0.vtotal || !args->v0.htotal)
-			return -ENOTSUPP;
-
-		args->v0.time[0] = ktime_to_ns(ktime_get());
-		head->func->rgpos(head, &args->v0.hline, &args->v0.vline);
-		args->v0.time[1] = ktime_to_ns(ktime_get());
-	} else
-		return ret;
-
-	return 0;
-}
-
 void
 nvkm_head_del(struct nvkm_head **phead)
 {
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/disp/head.h b/drivers/gpu/drm/nouveau/nvkm/engine/disp/head.h
index 84a2989193cf..856252bf559a 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/disp/head.h
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/disp/head.h
@@ -1,6 +1,7 @@
 /* SPDX-License-Identifier: MIT */
 #ifndef __NVKM_DISP_HEAD_H__
 #define __NVKM_DISP_HEAD_H__
+#include <nvif/object.h>
 #include "priv.h"
 
 struct nvkm_head {
@@ -26,12 +27,12 @@ struct nvkm_head {
 			u8 depth;
 		} or;
 	} arm, asy;
+
+	struct nvkm_object object;
 };
 
 int nvkm_head_new_(const struct nvkm_head_func *, struct nvkm_disp *, int id);
 void nvkm_head_del(struct nvkm_head **);
-int nvkm_head_mthd_scanoutpos(struct nvkm_object *,
-			      struct nvkm_head *, void *, u32);
 struct nvkm_head *nvkm_head_find(struct nvkm_disp *, int id);
 
 struct nvkm_head_func {
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/disp/ior.h b/drivers/gpu/drm/nouveau/nvkm/engine/disp/ior.h
index 671c4674ffcc..da1b1a626ef2 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/disp/ior.h
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/disp/ior.h
@@ -63,12 +63,12 @@ struct nvkm_ior_func {
 	void (*war_2)(struct nvkm_ior *);
 	void (*war_3)(struct nvkm_ior *);
 
-	struct {
-		void (*ctrl)(struct nvkm_ior *, int head, bool enable,
-			     u8 max_ac_packet, u8 rekey, u8 *avi, u8 avi_size,
-			     u8 *vendor, u8 vendor_size);
+	const struct nvkm_ior_func_hdmi {
+		void (*ctrl)(struct nvkm_ior *, int head, bool enable, u8 max_ac_packet, u8 rekey);
 		void (*scdc)(struct nvkm_ior *, u8 scdc);
-	} hdmi;
+		void (*infoframe_avi)(struct nvkm_ior *, int head, void *data, u32 size);
+		void (*infoframe_vsi)(struct nvkm_ior *, int head, void *data, u32 size);
+	} *hdmi;
 
 	const struct nvkm_ior_func_dp {
 		u8 lanes[4];
@@ -124,9 +124,10 @@ void nv50_sor_power(struct nvkm_ior *, bool, bool, bool, bool, bool);
 void nv50_sor_clock(struct nvkm_ior *);
 
 int g84_sor_new(struct nvkm_disp *, int);
-void g84_sor_hdmi_ctrl(struct nvkm_ior *, int, bool, u8, u8, u8 *, u8 , u8 *, u8);
+extern const struct nvkm_ior_func_hdmi g84_sor_hdmi;
 
 int g94_sor_cnt(struct nvkm_disp *, unsigned long *);
+
 void g94_sor_state(struct nvkm_ior *, struct nvkm_ior_state *);
 extern const struct nvkm_ior_func_dp g94_sor_dp;
 int g94_sor_dp_links(struct nvkm_ior *, struct nvkm_i2c_aux *);
@@ -137,7 +138,7 @@ void g94_sor_dp_audio_sym(struct nvkm_ior *, int, u16, u32);
 void g94_sor_dp_activesym(struct nvkm_ior *, int, u8, u8, u8, u8);
 void g94_sor_dp_watermark(struct nvkm_ior *, int, u8);
 
-void gt215_sor_hdmi_ctrl(struct nvkm_ior *, int, bool, u8, u8, u8 *, u8 , u8 *, u8);
+extern const struct nvkm_ior_func_hdmi gt215_sor_hdmi;
 void gt215_sor_dp_audio(struct nvkm_ior *, int, bool);
 extern const struct nvkm_ior_func_hda gt215_sor_hda;
 
@@ -156,12 +157,16 @@ void gf119_sor_hda_hpd(struct nvkm_ior *, int, bool);
 void gf119_sor_hda_eld(struct nvkm_ior *, int, u8 *, u8);
 
 int gk104_sor_new(struct nvkm_disp *, int);
-void gk104_sor_hdmi_ctrl(struct nvkm_ior *, int, bool, u8, u8, u8 *, u8 , u8 *, u8);
+extern const struct nvkm_ior_func_hdmi gk104_sor_hdmi;
+void gk104_sor_hdmi_ctrl(struct nvkm_ior *, int, bool, u8, u8);
+void gk104_sor_hdmi_infoframe_avi(struct nvkm_ior *, int, void *, u32);
+void gk104_sor_hdmi_infoframe_vsi(struct nvkm_ior *, int, void *, u32);
 
 void gm107_sor_dp_pattern(struct nvkm_ior *, int);
 
 void gm200_sor_route_set(struct nvkm_outp *, struct nvkm_ior *);
 int gm200_sor_route_get(struct nvkm_outp *, int *);
+extern const struct nvkm_ior_func_hdmi gm200_sor_hdmi;
 void gm200_sor_hdmi_scdc(struct nvkm_ior *, u8);
 extern const struct nvkm_ior_func_dp gm200_sor_dp;
 void gm200_sor_dp_drive(struct nvkm_ior *, int, int, int, int, int);
@@ -170,7 +175,7 @@ int gp100_sor_new(struct nvkm_disp *, int);
 
 int gv100_sor_cnt(struct nvkm_disp *, unsigned long *);
 void gv100_sor_state(struct nvkm_ior *, struct nvkm_ior_state *);
-void gv100_sor_hdmi_ctrl(struct nvkm_ior *, int, bool, u8, u8, u8 *, u8 , u8 *, u8);
+extern const struct nvkm_ior_func_hdmi gv100_sor_hdmi;
 void gv100_sor_dp_audio(struct nvkm_ior *, int, bool);
 void gv100_sor_dp_audio_sym(struct nvkm_ior *, int, u16, u32);
 void gv100_sor_dp_watermark(struct nvkm_ior *, int, u8);
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/disp/mcp77.c b/drivers/gpu/drm/nouveau/nvkm/engine/disp/mcp77.c
index 916b1d477b0b..841e3b69fcaf 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/disp/mcp77.c
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/disp/mcp77.c
@@ -31,9 +31,7 @@ mcp77_sor = {
 	.state = g94_sor_state,
 	.power = nv50_sor_power,
 	.clock = nv50_sor_clock,
-	.hdmi = {
-		.ctrl = g84_sor_hdmi_ctrl,
-	},
+	.hdmi = &g84_sor_hdmi,
 	.dp = &g94_sor_dp,
 };
 
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/disp/mcp89.c b/drivers/gpu/drm/nouveau/nvkm/engine/disp/mcp89.c
index a5a0b9439374..f96ba4752655 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/disp/mcp89.c
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/disp/mcp89.c
@@ -44,9 +44,7 @@ mcp89_sor = {
 	.state = g94_sor_state,
 	.power = nv50_sor_power,
 	.clock = nv50_sor_clock,
-	.hdmi = {
-		.ctrl = gt215_sor_hdmi_ctrl,
-	},
+	.hdmi = &gt215_sor_hdmi,
 	.dp = &mcp89_sor_dp,
 	.hda = &gt215_sor_hda,
 };
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/disp/nv50.c b/drivers/gpu/drm/nouveau/nvkm/engine/disp/nv50.c
index a46e13cc9ff1..be8116802960 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/disp/nv50.c
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/disp/nv50.c
@@ -503,7 +503,7 @@ nv50_disp_chan_uevent_init(struct nvkm_event *event, int types, int index)
 void
 nv50_disp_chan_uevent_send(struct nvkm_disp *disp, int chid)
 {
-	nvkm_event_send(&disp->uevent, NVKM_DISP_EVENT_CHAN_AWAKEN, chid, NULL, 0);
+	nvkm_event_ntfy(&disp->uevent, chid, NVKM_DISP_EVENT_CHAN_AWAKEN);
 }
 
 const struct nvkm_event_func
@@ -1238,6 +1238,8 @@ nv50_disp_super_2_2(struct nvkm_disp *disp, struct nvkm_head *head)
 	if (!ior)
 		return;
 
+	outp = ior->asy.outp;
+
 	/* For some reason, NVIDIA decided not to:
 	 *
 	 * A) Give dual-link LVDS a separate EVO protocol, like for TMDS.
@@ -1247,13 +1249,13 @@ nv50_disp_super_2_2(struct nvkm_disp *disp, struct nvkm_head *head)
 	 * Override the values we usually read from HW with the same
 	 * data we pass though an ioctl instead.
 	 */
-	if (ior->type == SOR && ior->asy.proto == LVDS) {
-		head->asy.or.depth = (disp->sor.lvdsconf & 0x0200) ? 24 : 18;
-		ior->asy.link      = (disp->sor.lvdsconf & 0x0100) ? 3  : 1;
+	if (outp && ior->type == SOR && ior->asy.proto == LVDS) {
+		head->asy.or.depth = outp->lvds.bpc8 ? 24 : 18;
+		ior->asy.link      = outp->lvds.dual ? 3 : 1;
 	}
 
 	/* Handle any link training, etc. */
-	if ((outp = ior->asy.outp) && outp->func->acquire)
+	if (outp && outp->func->acquire)
 		outp->func->acquire(outp);
 
 	/* Execute OnInt2 IED script. */
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/disp/outp.h b/drivers/gpu/drm/nouveau/nvkm/engine/disp/outp.h
index 3f3924c41957..b7631c1ab242 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/disp/outp.h
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/disp/outp.h
@@ -2,7 +2,6 @@
 #ifndef __NVKM_DISP_OUTP_H__
 #define __NVKM_DISP_OUTP_H__
 #include "priv.h"
-#include <core/notify.h>
 
 #include <subdev/bios.h>
 #include <subdev/bios/dcb.h>
@@ -28,13 +27,19 @@ struct nvkm_outp {
 
 	union {
 		struct {
+			bool dual;
+			bool bpc8;
+		} lvds;
+
+		struct {
 			struct nvbios_dpout info;
 			u8 version;
 
 			struct nvkm_i2c_aux *aux;
 
-			struct nvkm_notify hpd;
-			bool present;
+			bool enabled;
+			bool aux_pwr;
+			bool aux_pwr_pu;
 			u8 lttpr[6];
 			u8 lttprs;
 			u8 dpcd[16];
@@ -49,12 +54,17 @@ struct nvkm_outp {
 			struct mutex mutex;
 			struct {
 				atomic_t done;
+				u8 nr;
+				u8 bw;
 				bool mst;
 			} lt;
 		} dp;
 	};
 
 	struct nvkm_object object;
+	struct {
+		struct nvkm_head *head;
+	} asy;
 };
 
 int nvkm_outp_new_(const struct nvkm_outp_func *, struct nvkm_disp *, int index,
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/disp/priv.h b/drivers/gpu/drm/nouveau/nvkm/engine/disp/priv.h
index cb25dfe849f0..ec5292a8f3c8 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/disp/priv.h
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/disp/priv.h
@@ -42,10 +42,6 @@ struct nvkm_disp_func {
 	} user[];
 };
 
-int  nvkm_disp_ntfy(struct nvkm_object *, u32, struct nvkm_event **);
-int nv04_disp_mthd(struct nvkm_object *, u32, void *, u32);
-int nv50_disp_root_mthd_(struct nvkm_object *, u32, void *, u32);
-
 int nv50_disp_oneinit(struct nvkm_disp *);
 int nv50_disp_init(struct nvkm_disp *);
 void nv50_disp_fini(struct nvkm_disp *);
@@ -86,4 +82,5 @@ extern const struct nvkm_event_func gv100_disp_chan_uevent;
 int nvkm_udisp_new(const struct nvkm_oclass *, void *, u32, struct nvkm_object **);
 int nvkm_uconn_new(const struct nvkm_oclass *, void *, u32, struct nvkm_object **);
 int nvkm_uoutp_new(const struct nvkm_oclass *, void *, u32, struct nvkm_object **);
+int nvkm_uhead_new(const struct nvkm_oclass *, void *, u32, struct nvkm_object **);
 #endif
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/disp/rootnv50.c b/drivers/gpu/drm/nouveau/nvkm/engine/disp/rootnv50.c
deleted file mode 100644
index 0af45ccd140c..000000000000
--- a/drivers/gpu/drm/nouveau/nvkm/engine/disp/rootnv50.c
+++ /dev/null
@@ -1,250 +0,0 @@
-/*
- * Copyright 2012 Red Hat Inc.
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * the rights to use, copy, modify, merge, publish, distribute, sublicense,
- * and/or sell copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in
- * all copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
- * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
- * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
- * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
- * OTHER DEALINGS IN THE SOFTWARE.
- *
- * Authors: Ben Skeggs
- */
-#include "chan.h"
-#include "head.h"
-#include "ior.h"
-#include "outp.h"
-
-#include <core/client.h>
-
-#include <nvif/class.h>
-#include <nvif/cl5070.h>
-#include <nvif/unpack.h>
-
-int
-nv50_disp_root_mthd_(struct nvkm_object *object, u32 mthd, void *data, u32 size)
-{
-	union {
-		struct nv50_disp_mthd_v0 v0;
-		struct nv50_disp_mthd_v1 v1;
-	} *args = data;
-	struct nvkm_disp *disp = nvkm_udisp(object);
-	struct nvkm_outp *temp, *outp = NULL;
-	struct nvkm_head *head;
-	u16 type, mask = 0;
-	int hidx, ret = -ENOSYS;
-
-	if (mthd != NV50_DISP_MTHD)
-		return -EINVAL;
-
-	nvif_ioctl(object, "disp mthd size %d\n", size);
-	if (!(ret = nvif_unpack(ret, &data, &size, args->v0, 0, 0, true))) {
-		nvif_ioctl(object, "disp mthd vers %d mthd %02x head %d\n",
-			   args->v0.version, args->v0.method, args->v0.head);
-		mthd = args->v0.method;
-		hidx = args->v0.head;
-	} else
-	if (!(ret = nvif_unpack(ret, &data, &size, args->v1, 1, 1, true))) {
-		nvif_ioctl(object, "disp mthd vers %d mthd %02x "
-				   "type %04x mask %04x\n",
-			   args->v1.version, args->v1.method,
-			   args->v1.hasht, args->v1.hashm);
-		mthd = args->v1.method;
-		type = args->v1.hasht;
-		mask = args->v1.hashm;
-		hidx = ffs((mask >> 8) & 0x0f) - 1;
-	} else
-		return ret;
-
-	if (!(head = nvkm_head_find(disp, hidx)))
-		return -ENXIO;
-
-	if (mask) {
-		list_for_each_entry(temp, &disp->outps, head) {
-			if ((temp->info.hasht         == type) &&
-			    (temp->info.hashm & mask) == mask) {
-				outp = temp;
-				break;
-			}
-		}
-		if (outp == NULL)
-			return -ENXIO;
-	}
-
-	switch (mthd) {
-	case NV50_DISP_SCANOUTPOS: {
-		return nvkm_head_mthd_scanoutpos(object, head, data, size);
-	}
-	default:
-		break;
-	}
-
-	switch (mthd * !!outp) {
-	case NV50_DISP_MTHD_V1_ACQUIRE: {
-		union {
-			struct nv50_disp_acquire_v0 v0;
-		} *args = data;
-		int ret = -ENOSYS;
-		if (!(ret = nvif_unpack(ret, &data, &size, args->v0, 0, 0, false))) {
-			ret = nvkm_outp_acquire(outp, NVKM_OUTP_USER, args->v0.hda);
-			if (ret == 0) {
-				args->v0.or = outp->ior->id;
-				args->v0.link = outp->ior->asy.link;
-			}
-		}
-		return ret;
-	}
-		break;
-	case NV50_DISP_MTHD_V1_RELEASE:
-		nvkm_outp_release(outp, NVKM_OUTP_USER);
-		return 0;
-	case NV50_DISP_MTHD_V1_SOR_HDA_ELD: {
-		union {
-			struct nv50_disp_sor_hda_eld_v0 v0;
-		} *args = data;
-		struct nvkm_ior *ior = outp->ior;
-		int ret = -ENOSYS;
-
-		nvif_ioctl(object, "disp sor hda eld size %d\n", size);
-		if (!(ret = nvif_unpack(ret, &data, &size, args->v0, 0, 0, true))) {
-			nvif_ioctl(object, "disp sor hda eld vers %d\n",
-				   args->v0.version);
-			if (size > 0x60)
-				return -E2BIG;
-		} else
-			return ret;
-
-		if (!ior->hda)
-			return -ENODEV;
-
-		if (size && args->v0.data[0]) {
-			if (outp->info.type == DCB_OUTPUT_DP)
-				ior->func->dp->audio(ior, hidx, true);
-			ior->func->hda->hpd(ior, hidx, true);
-			ior->func->hda->eld(ior, hidx, data, size);
-		} else {
-			if (outp->info.type == DCB_OUTPUT_DP)
-				ior->func->dp->audio(ior, hidx, false);
-			ior->func->hda->hpd(ior, hidx, false);
-		}
-
-		return 0;
-	}
-		break;
-	case NV50_DISP_MTHD_V1_SOR_HDMI_PWR: {
-		union {
-			struct nv50_disp_sor_hdmi_pwr_v0 v0;
-		} *args = data;
-		u8 *vendor, vendor_size;
-		u8 *avi, avi_size;
-		int ret = -ENOSYS;
-
-		nvif_ioctl(object, "disp sor hdmi ctrl size %d\n", size);
-		if (!(ret = nvif_unpack(ret, &data, &size, args->v0, 0, 0, true))) {
-			nvif_ioctl(object, "disp sor hdmi ctrl vers %d state %d "
-					   "max_ac_packet %d rekey %d scdc %d\n",
-				   args->v0.version, args->v0.state,
-				   args->v0.max_ac_packet, args->v0.rekey,
-				   args->v0.scdc);
-			if (args->v0.max_ac_packet > 0x1f || args->v0.rekey > 0x7f)
-				return -EINVAL;
-			if ((args->v0.avi_infoframe_length
-			     + args->v0.vendor_infoframe_length) > size)
-				return -EINVAL;
-			else
-			if ((args->v0.avi_infoframe_length
-			     + args->v0.vendor_infoframe_length) < size)
-				return -E2BIG;
-			avi = data;
-			avi_size = args->v0.avi_infoframe_length;
-			vendor = avi + avi_size;
-			vendor_size = args->v0.vendor_infoframe_length;
-		} else
-			return ret;
-
-		if (!outp->ior->func->hdmi.ctrl)
-			return -ENODEV;
-
-		outp->ior->func->hdmi.ctrl(outp->ior, hidx, args->v0.state,
-					   args->v0.max_ac_packet,
-					   args->v0.rekey, avi, avi_size,
-					   vendor, vendor_size);
-
-		if (outp->ior->func->hdmi.scdc)
-			outp->ior->func->hdmi.scdc(outp->ior, args->v0.scdc);
-
-		return 0;
-	}
-		break;
-	case NV50_DISP_MTHD_V1_SOR_LVDS_SCRIPT: {
-		union {
-			struct nv50_disp_sor_lvds_script_v0 v0;
-		} *args = data;
-		int ret = -ENOSYS;
-		nvif_ioctl(object, "disp sor lvds script size %d\n", size);
-		if (!(ret = nvif_unpack(ret, &data, &size, args->v0, 0, 0, false))) {
-			nvif_ioctl(object, "disp sor lvds script "
-					   "vers %d name %04x\n",
-				   args->v0.version, args->v0.script);
-			disp->sor.lvdsconf = args->v0.script;
-			return 0;
-		} else
-			return ret;
-	}
-		break;
-	case NV50_DISP_MTHD_V1_SOR_DP_MST_LINK: {
-		union {
-			struct nv50_disp_sor_dp_mst_link_v0 v0;
-		} *args = data;
-		int ret = -ENOSYS;
-		nvif_ioctl(object, "disp sor dp mst link size %d\n", size);
-		if (!(ret = nvif_unpack(ret, &data, &size, args->v0, 0, 0, false))) {
-			nvif_ioctl(object, "disp sor dp mst link vers %d state %d\n",
-				   args->v0.version, args->v0.state);
-			outp->dp.lt.mst = !!args->v0.state;
-			return 0;
-		} else
-			return ret;
-	}
-		break;
-	case NV50_DISP_MTHD_V1_SOR_DP_MST_VCPI: {
-		union {
-			struct nv50_disp_sor_dp_mst_vcpi_v0 v0;
-		} *args = data;
-		int ret = -ENOSYS;
-		nvif_ioctl(object, "disp sor dp mst vcpi size %d\n", size);
-		if (!(ret = nvif_unpack(ret, &data, &size, args->v0, 0, 0, false))) {
-			nvif_ioctl(object, "disp sor dp mst vcpi vers %d "
-					   "slot %02x/%02x pbn %04x/%04x\n",
-				   args->v0.version, args->v0.start_slot,
-				   args->v0.num_slots, args->v0.pbn,
-				   args->v0.aligned_pbn);
-			if (!outp->ior->func->dp->vcpi)
-				return -ENODEV;
-			outp->ior->func->dp->vcpi(outp->ior, hidx,
-						 args->v0.start_slot,
-						 args->v0.num_slots,
-						 args->v0.pbn,
-						 args->v0.aligned_pbn);
-			return 0;
-		} else
-			return ret;
-	}
-		break;
-	default:
-		break;
-	}
-
-	return -EINVAL;
-}
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/disp/tu102.c b/drivers/gpu/drm/nouveau/nvkm/engine/disp/tu102.c
index e4ad1a6f6c88..f5242a672279 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/disp/tu102.c
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/disp/tu102.c
@@ -88,10 +88,7 @@ tu102_sor = {
 	.state = gv100_sor_state,
 	.power = nv50_sor_power,
 	.clock = gf119_sor_clock,
-	.hdmi = {
-		.ctrl = gv100_sor_hdmi_ctrl,
-		.scdc = gm200_sor_hdmi_scdc,
-	},
+	.hdmi = &gv100_sor_hdmi,
 	.dp = &tu102_sor_dp,
 	.hda = &gv100_sor_hda,
 };
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/disp/uconn.c b/drivers/gpu/drm/nouveau/nvkm/engine/disp/uconn.c
index fd9f18144c26..dad942be6679 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/disp/uconn.c
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/disp/uconn.c
@@ -21,12 +21,86 @@
  */
 #define nvkm_uconn(p) container_of((p), struct nvkm_conn, object)
 #include "conn.h"
+#include "outp.h"
 
+#include <core/client.h>
+#include <core/event.h>
 #include <subdev/gpio.h>
+#include <subdev/i2c.h>
 
 #include <nvif/if0011.h>
 
 static int
+nvkm_uconn_uevent_aux(struct nvkm_object *object, u64 token, u32 bits)
+{
+	union nvif_conn_event_args args;
+
+	args.v0.version = 0;
+	args.v0.types = 0;
+	if (bits & NVKM_I2C_PLUG)
+		args.v0.types |= NVIF_CONN_EVENT_V0_PLUG;
+	if (bits & NVKM_I2C_UNPLUG)
+		args.v0.types |= NVIF_CONN_EVENT_V0_UNPLUG;
+	if (bits & NVKM_I2C_IRQ)
+		args.v0.types |= NVIF_CONN_EVENT_V0_IRQ;
+
+	return object->client->event(token, &args, sizeof(args.v0));
+}
+
+static int
+nvkm_uconn_uevent_gpio(struct nvkm_object *object, u64 token, u32 bits)
+{
+	union nvif_conn_event_args args;
+
+	args.v0.version = 0;
+	args.v0.types = 0;
+	if (bits & NVKM_GPIO_HI)
+		args.v0.types |= NVIF_CONN_EVENT_V0_PLUG;
+	if (bits & NVKM_GPIO_LO)
+		args.v0.types |= NVIF_CONN_EVENT_V0_UNPLUG;
+
+	return object->client->event(token, &args, sizeof(args.v0));
+}
+
+static int
+nvkm_uconn_uevent(struct nvkm_object *object, void *argv, u32 argc, struct nvkm_uevent *uevent)
+{
+	struct nvkm_conn *conn = nvkm_uconn(object);
+	struct nvkm_device *device = conn->disp->engine.subdev.device;
+	struct nvkm_outp *outp;
+	union nvif_conn_event_args *args = argv;
+	u64 bits = 0;
+
+	if (!uevent) {
+		if (conn->info.hpd == DCB_GPIO_UNUSED)
+			return -ENOSYS;
+		return 0;
+	}
+
+	if (argc != sizeof(args->v0) || args->v0.version != 0)
+		return -ENOSYS;
+
+	list_for_each_entry(outp, &conn->disp->outps, head) {
+		if (outp->info.connector == conn->index && outp->dp.aux) {
+			if (args->v0.types & NVIF_CONN_EVENT_V0_PLUG  ) bits |= NVKM_I2C_PLUG;
+			if (args->v0.types & NVIF_CONN_EVENT_V0_UNPLUG) bits |= NVKM_I2C_UNPLUG;
+			if (args->v0.types & NVIF_CONN_EVENT_V0_IRQ   ) bits |= NVKM_I2C_IRQ;
+
+			return nvkm_uevent_add(uevent, &device->i2c->event, outp->dp.aux->id, bits,
+					       nvkm_uconn_uevent_aux);
+		}
+	}
+
+	if (args->v0.types & NVIF_CONN_EVENT_V0_PLUG  ) bits |= NVKM_GPIO_HI;
+	if (args->v0.types & NVIF_CONN_EVENT_V0_UNPLUG) bits |= NVKM_GPIO_LO;
+	if (args->v0.types & NVIF_CONN_EVENT_V0_IRQ)
+		return -EINVAL;
+
+	return nvkm_uevent_add(uevent, &device->gpio->event, conn->info.hpd, bits,
+			       nvkm_uconn_uevent_gpio);
+}
+
+static int
 nvkm_uconn_mthd_hpd_status(struct nvkm_conn *conn, void *argv, u32 argc)
 {
 	struct nvkm_gpio *gpio = conn->disp->engine.subdev.device->gpio;
@@ -82,6 +156,7 @@ static const struct nvkm_object_func
 nvkm_uconn = {
 	.dtor = nvkm_uconn_dtor,
 	.mthd = nvkm_uconn_mthd,
+	.uevent = nvkm_uconn_uevent,
 };
 
 int
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/disp/udisp.c b/drivers/gpu/drm/nouveau/nvkm/engine/disp/udisp.c
index 0841e7ce0343..0268d1d75805 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/disp/udisp.c
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/disp/udisp.c
@@ -21,6 +21,7 @@
  */
 #include "priv.h"
 #include "conn.h"
+#include "head.h"
 #include "outp.h"
 
 #include <nvif/class.h>
@@ -43,6 +44,12 @@ nvkm_udisp_sclass(struct nvkm_object *object, int index, struct nvkm_oclass *scl
 		return 0;
 	}
 
+	if (index-- == 0) {
+		sclass->base = (struct nvkm_sclass) { 0, 0, NVIF_CLASS_HEAD };
+		sclass->ctor = nvkm_uhead_new;
+		return 0;
+	}
+
 	if (disp->func->user[index].ctor) {
 		sclass->base = disp->func->user[index].base;
 		sclass->ctor = disp->func->user[index].ctor;
@@ -52,17 +59,6 @@ nvkm_udisp_sclass(struct nvkm_object *object, int index, struct nvkm_oclass *scl
 	return -EINVAL;
 }
 
-static int
-nvkm_udisp_mthd(struct nvkm_object *object, u32 mthd, void *argv, u32 argc)
-{
-	struct nvkm_disp *disp = nvkm_udisp(object);
-
-	if (disp->engine.subdev.device->card_type >= NV_50)
-		return nv50_disp_root_mthd_(object, mthd, argv, argc);
-
-	return nv04_disp_mthd(object, mthd, argv, argc);
-}
-
 static void *
 nvkm_udisp_dtor(struct nvkm_object *object)
 {
@@ -78,8 +74,6 @@ nvkm_udisp_dtor(struct nvkm_object *object)
 static const struct nvkm_object_func
 nvkm_udisp = {
 	.dtor = nvkm_udisp_dtor,
-	.mthd = nvkm_udisp_mthd,
-	.ntfy = nvkm_disp_ntfy,
 	.sclass = nvkm_udisp_sclass,
 };
 
@@ -89,6 +83,7 @@ nvkm_udisp_new(const struct nvkm_oclass *oclass, void *argv, u32 argc, struct nv
 	struct nvkm_disp *disp = nvkm_disp(oclass->engine);
 	struct nvkm_conn *conn;
 	struct nvkm_outp *outp;
+	struct nvkm_head *head;
 	union nvif_disp_args *args = argv;
 
 	if (argc != sizeof(args->v0) || args->v0.version != 0)
@@ -111,5 +106,9 @@ nvkm_udisp_new(const struct nvkm_oclass *oclass, void *argv, u32 argc, struct nv
 	list_for_each_entry(outp, &disp->outps, head)
 		args->v0.outp_mask |= BIT(outp->index);
 
+	args->v0.head_mask = 0;
+	list_for_each_entry(head, &disp->heads, head)
+		args->v0.head_mask |= BIT(head->id);
+
 	return 0;
 }
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/disp/uhead.c b/drivers/gpu/drm/nouveau/nvkm/engine/disp/uhead.c
new file mode 100644
index 000000000000..f072cec16040
--- /dev/null
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/disp/uhead.c
@@ -0,0 +1,127 @@
+/*
+ * Copyright 2021 Red Hat Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+#define nvkm_uhead(p) container_of((p), struct nvkm_head, object)
+#include "head.h"
+#include <core/event.h>
+
+#include <nvif/if0013.h>
+
+#include <nvif/event.h>
+
+static int
+nvkm_uhead_uevent(struct nvkm_object *object, void *argv, u32 argc, struct nvkm_uevent *uevent)
+{
+	struct nvkm_head *head = nvkm_uhead(object);
+	union nvif_head_event_args *args = argv;
+
+	if (!uevent)
+		return 0;
+	if (argc != sizeof(args->vn))
+		return -ENOSYS;
+
+	return nvkm_uevent_add(uevent, &head->disp->vblank, head->id,
+			       NVKM_DISP_HEAD_EVENT_VBLANK, NULL);
+}
+
+static int
+nvkm_uhead_mthd_scanoutpos(struct nvkm_head *head, void *argv, u32 argc)
+{
+	union nvif_head_scanoutpos_args *args = argv;
+
+	if (argc != sizeof(args->v0) || args->v0.version != 0)
+		return -ENOSYS;
+
+	head->func->state(head, &head->arm);
+	args->v0.vtotal  = head->arm.vtotal;
+	args->v0.vblanks = head->arm.vblanks;
+	args->v0.vblanke = head->arm.vblanke;
+	args->v0.htotal  = head->arm.htotal;
+	args->v0.hblanks = head->arm.hblanks;
+	args->v0.hblanke = head->arm.hblanke;
+
+	/* We don't support reading htotal/vtotal on pre-NV50 VGA,
+	 * so we have to give up and trigger the timestamping
+	 * fallback in the drm core.
+	 */
+	if (!args->v0.vtotal || !args->v0.htotal)
+		return -ENOTSUPP;
+
+	args->v0.time[0] = ktime_to_ns(ktime_get());
+	head->func->rgpos(head, &args->v0.hline, &args->v0.vline);
+	args->v0.time[1] = ktime_to_ns(ktime_get());
+	return 0;
+}
+
+static int
+nvkm_uhead_mthd(struct nvkm_object *object, u32 mthd, void *argv, u32 argc)
+{
+	struct nvkm_head *head = nvkm_uhead(object);
+
+	switch (mthd) {
+	case NVIF_HEAD_V0_SCANOUTPOS: return nvkm_uhead_mthd_scanoutpos(head, argv, argc);
+	default:
+		return -EINVAL;
+	}
+}
+
+static void *
+nvkm_uhead_dtor(struct nvkm_object *object)
+{
+	struct nvkm_head *head = nvkm_uhead(object);
+	struct nvkm_disp *disp = head->disp;
+
+	spin_lock(&disp->client.lock);
+	head->object.func = NULL;
+	spin_unlock(&disp->client.lock);
+	return NULL;
+}
+
+static const struct nvkm_object_func
+nvkm_uhead = {
+	.dtor = nvkm_uhead_dtor,
+	.mthd = nvkm_uhead_mthd,
+	.uevent = nvkm_uhead_uevent,
+};
+
+int
+nvkm_uhead_new(const struct nvkm_oclass *oclass, void *argv, u32 argc, struct nvkm_object **pobject)
+{
+	struct nvkm_disp *disp = nvkm_udisp(oclass->parent);
+	struct nvkm_head *head;
+	union nvif_head_args *args = argv;
+	int ret;
+
+	if (argc != sizeof(args->v0) || args->v0.version != 0)
+		return -ENOSYS;
+	if (!(head = nvkm_head_find(disp, args->v0.id)))
+		return -EINVAL;
+
+	ret = -EBUSY;
+	spin_lock(&disp->client.lock);
+	if (!head->object.func) {
+		nvkm_object_ctor(&nvkm_uhead, oclass, &head->object);
+		*pobject = &head->object;
+		ret = 0;
+	}
+	spin_unlock(&disp->client.lock);
+	return ret;
+}
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/disp/uoutp.c b/drivers/gpu/drm/nouveau/nvkm/engine/disp/uoutp.c
index abedb3e86361..4f0ca709c85a 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/disp/uoutp.c
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/disp/uoutp.c
@@ -21,11 +21,238 @@
  */
 #define nvkm_uoutp(p) container_of((p), struct nvkm_outp, object)
 #include "outp.h"
+#include "dp.h"
+#include "head.h"
 #include "ior.h"
 
 #include <nvif/if0012.h>
 
 static int
+nvkm_uoutp_mthd_dp_mst_vcpi(struct nvkm_outp *outp, void *argv, u32 argc)
+{
+	struct nvkm_ior *ior = outp->ior;
+	union nvif_outp_dp_mst_vcpi_args *args = argv;
+
+	if (argc != sizeof(args->v0) || args->v0.version != 0)
+		return -ENOSYS;
+	if (!ior->func->dp || !ior->func->dp->vcpi || !nvkm_head_find(outp->disp, args->v0.head))
+		return -EINVAL;
+
+	ior->func->dp->vcpi(ior, args->v0.head, args->v0.start_slot, args->v0.num_slots,
+				 args->v0.pbn, args->v0.aligned_pbn);
+	return 0;
+}
+
+static int
+nvkm_uoutp_mthd_dp_retrain(struct nvkm_outp *outp, void *argv, u32 argc)
+{
+	union nvif_outp_dp_retrain_args *args = argv;
+
+	if (argc != sizeof(args->vn))
+		return -ENOSYS;
+
+	if (!atomic_read(&outp->dp.lt.done))
+		return 0;
+
+	return outp->func->acquire(outp);
+}
+
+static int
+nvkm_uoutp_mthd_dp_aux_pwr(struct nvkm_outp *outp, void *argv, u32 argc)
+{
+	union nvif_outp_dp_aux_pwr_args *args = argv;
+
+	if (argc != sizeof(args->v0) || args->v0.version != 0)
+		return -ENOSYS;
+
+	outp->dp.enabled = !!args->v0.state;
+	nvkm_dp_enable(outp, outp->dp.enabled);
+	return 0;
+}
+
+static int
+nvkm_uoutp_mthd_hda_eld(struct nvkm_outp *outp, void *argv, u32 argc)
+{
+	struct nvkm_ior *ior = outp->ior;
+	union nvif_outp_hda_eld_args *args = argv;
+
+	if (argc < sizeof(args->v0) || args->v0.version != 0)
+		return -ENOSYS;
+	argc -= sizeof(args->v0);
+
+	if (!ior->hda || !nvkm_head_find(outp->disp, args->v0.head))
+		return -EINVAL;
+	if (argc > 0x60)
+		return -E2BIG;
+
+	if (argc && args->v0.data[0]) {
+		if (outp->info.type == DCB_OUTPUT_DP)
+			ior->func->dp->audio(ior, args->v0.head, true);
+		ior->func->hda->hpd(ior, args->v0.head, true);
+		ior->func->hda->eld(ior, args->v0.head, args->v0.data, argc);
+	} else {
+		if (outp->info.type == DCB_OUTPUT_DP)
+			ior->func->dp->audio(ior, args->v0.head, false);
+		ior->func->hda->hpd(ior, args->v0.head, false);
+	}
+
+	return 0;
+}
+
+static int
+nvkm_uoutp_mthd_infoframe(struct nvkm_outp *outp, void *argv, u32 argc)
+{
+	struct nvkm_ior *ior = outp->ior;
+	union nvif_outp_infoframe_args *args = argv;
+	ssize_t size = argc - sizeof(*args);
+
+	if (argc < sizeof(args->v0) || args->v0.version != 0)
+		return -ENOSYS;
+	if (!nvkm_head_find(outp->disp, args->v0.head))
+		return -EINVAL;
+
+	switch (ior->func->hdmi ? args->v0.type : 0xff) {
+	case NVIF_OUTP_INFOFRAME_V0_AVI:
+		ior->func->hdmi->infoframe_avi(ior, args->v0.head, &args->v0.data, size);
+		return 0;
+	case NVIF_OUTP_INFOFRAME_V0_VSI:
+		ior->func->hdmi->infoframe_vsi(ior, args->v0.head, &args->v0.data, size);
+		return 0;
+	default:
+		break;
+	}
+
+	return -EINVAL;
+}
+
+static int
+nvkm_uoutp_mthd_release(struct nvkm_outp *outp, void *argv, u32 argc)
+{
+	struct nvkm_head *head = outp->asy.head;
+	struct nvkm_ior *ior = outp->ior;
+	union nvif_outp_release_args *args = argv;
+
+	if (argc != sizeof(args->vn))
+		return -ENOSYS;
+
+	if (ior->func->hdmi && head) {
+		ior->func->hdmi->infoframe_avi(ior, head->id, NULL, 0);
+		ior->func->hdmi->infoframe_vsi(ior, head->id, NULL, 0);
+		ior->func->hdmi->ctrl(ior, head->id, false, 0, 0);
+	}
+
+	nvkm_outp_release(outp, NVKM_OUTP_USER);
+	return 0;
+}
+
+static int
+nvkm_uoutp_mthd_acquire_dp(struct nvkm_outp *outp, u8 dpcd[16],
+			   u8 link_nr, u8 link_bw, bool hda, bool mst)
+{
+	int ret;
+
+	ret = nvkm_outp_acquire(outp, NVKM_OUTP_USER, hda);
+	if (ret)
+		return ret;
+
+	memcpy(outp->dp.dpcd, dpcd, sizeof(outp->dp.dpcd));
+	outp->dp.lt.nr = link_nr;
+	outp->dp.lt.bw = link_bw;
+	outp->dp.lt.mst = mst;
+	return 0;
+}
+
+static int
+nvkm_uoutp_mthd_acquire_tmds(struct nvkm_outp *outp, u8 head, u8 hdmi, u8 hdmi_max_ac_packet,
+			     u8 hdmi_rekey, u8 hdmi_scdc, u8 hdmi_hda)
+{
+	struct nvkm_ior *ior;
+	int ret;
+
+	if (!(outp->asy.head = nvkm_head_find(outp->disp, head)))
+		return -EINVAL;
+
+	ret = nvkm_outp_acquire(outp, NVKM_OUTP_USER, hdmi && hdmi_hda);
+	if (ret)
+		return ret;
+
+	ior = outp->ior;
+
+	if (hdmi) {
+		if (!ior->func->hdmi ||
+		    hdmi_max_ac_packet > 0x1f || hdmi_rekey > 0x7f ||
+		    (hdmi_scdc && !ior->func->hdmi->scdc)) {
+			nvkm_outp_release(outp, NVKM_OUTP_USER);
+			return -EINVAL;
+		}
+
+		ior->func->hdmi->ctrl(ior, head, hdmi, hdmi_max_ac_packet, hdmi_rekey);
+		if (ior->func->hdmi->scdc)
+			ior->func->hdmi->scdc(ior, hdmi_scdc);
+	}
+
+	return 0;
+}
+
+static int
+nvkm_uoutp_mthd_acquire_lvds(struct nvkm_outp *outp, bool dual, bool bpc8)
+{
+	if (outp->info.type != DCB_OUTPUT_LVDS)
+		return -EINVAL;
+
+	outp->lvds.dual = dual;
+	outp->lvds.bpc8 = bpc8;
+
+	return nvkm_outp_acquire(outp, NVKM_OUTP_USER, false);
+}
+
+static int
+nvkm_uoutp_mthd_acquire(struct nvkm_outp *outp, void *argv, u32 argc)
+{
+	union nvif_outp_acquire_args *args = argv;
+	int ret;
+
+	if (argc != sizeof(args->v0) || args->v0.version != 0)
+		return -ENOSYS;
+	if (outp->ior)
+		return -EBUSY;
+
+	switch (args->v0.proto) {
+	case NVIF_OUTP_ACQUIRE_V0_RGB_CRT:
+		ret = nvkm_outp_acquire(outp, NVKM_OUTP_USER, false);
+		break;
+	case NVIF_OUTP_ACQUIRE_V0_TMDS:
+		ret = nvkm_uoutp_mthd_acquire_tmds(outp, args->v0.tmds.head,
+							 args->v0.tmds.hdmi,
+							 args->v0.tmds.hdmi_max_ac_packet,
+							 args->v0.tmds.hdmi_rekey,
+							 args->v0.tmds.hdmi_scdc,
+							 args->v0.tmds.hdmi_hda);
+		break;
+	case NVIF_OUTP_ACQUIRE_V0_LVDS:
+		ret = nvkm_uoutp_mthd_acquire_lvds(outp, args->v0.lvds.dual, args->v0.lvds.bpc8);
+		break;
+	case NVIF_OUTP_ACQUIRE_V0_DP:
+		ret = nvkm_uoutp_mthd_acquire_dp(outp, args->v0.dp.dpcd,
+						       args->v0.dp.link_nr,
+						       args->v0.dp.link_bw,
+						       args->v0.dp.hda != 0,
+						       args->v0.dp.mst != 0);
+		break;
+	default:
+		ret = -EINVAL;
+		break;
+	}
+
+	if (ret)
+		return ret;
+
+	args->v0.or = outp->ior->id;
+	args->v0.link = outp->ior->asy.link;
+	return 0;
+}
+
+static int
 nvkm_uoutp_mthd_load_detect(struct nvkm_outp *outp, void *argv, u32 argc)
 {
 	union nvif_outp_load_detect_args *args = argv;
@@ -49,10 +276,28 @@ nvkm_uoutp_mthd_load_detect(struct nvkm_outp *outp, void *argv, u32 argc)
 }
 
 static int
+nvkm_uoutp_mthd_acquired(struct nvkm_outp *outp, u32 mthd, void *argv, u32 argc)
+{
+	switch (mthd) {
+	case NVIF_OUTP_V0_RELEASE    : return nvkm_uoutp_mthd_release    (outp, argv, argc);
+	case NVIF_OUTP_V0_INFOFRAME  : return nvkm_uoutp_mthd_infoframe  (outp, argv, argc);
+	case NVIF_OUTP_V0_HDA_ELD    : return nvkm_uoutp_mthd_hda_eld    (outp, argv, argc);
+	case NVIF_OUTP_V0_DP_RETRAIN : return nvkm_uoutp_mthd_dp_retrain (outp, argv, argc);
+	case NVIF_OUTP_V0_DP_MST_VCPI: return nvkm_uoutp_mthd_dp_mst_vcpi(outp, argv, argc);
+	default:
+		break;
+	}
+
+	return -EINVAL;
+}
+
+static int
 nvkm_uoutp_mthd_noacquire(struct nvkm_outp *outp, u32 mthd, void *argv, u32 argc)
 {
 	switch (mthd) {
 	case NVIF_OUTP_V0_LOAD_DETECT: return nvkm_uoutp_mthd_load_detect(outp, argv, argc);
+	case NVIF_OUTP_V0_ACQUIRE    : return nvkm_uoutp_mthd_acquire    (outp, argv, argc);
+	case NVIF_OUTP_V0_DP_AUX_PWR : return nvkm_uoutp_mthd_dp_aux_pwr (outp, argv, argc);
 	default:
 		break;
 	}
@@ -73,6 +318,11 @@ nvkm_uoutp_mthd(struct nvkm_object *object, u32 mthd, void *argv, u32 argc)
 	if (ret <= 0)
 		goto done;
 
+	if (outp->ior)
+		ret = nvkm_uoutp_mthd_acquired(outp, mthd, argv, argc);
+	else
+		ret = -EIO;
+
 done:
 	mutex_unlock(&disp->super.mutex);
 	return ret;
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/falcon.c b/drivers/gpu/drm/nouveau/nvkm/engine/falcon.c
index 43b7dec45179..d619b40a42c3 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/falcon.c
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/falcon.c
@@ -65,10 +65,10 @@ nvkm_falcon_intr(struct nvkm_engine *engine)
 	u32 dest = nvkm_rd32(device, base + 0x01c);
 	u32 intr = nvkm_rd32(device, base + 0x008) & dest & ~(dest >> 16);
 	u32 inst = nvkm_rd32(device, base + 0x050) & 0x3fffffff;
-	struct nvkm_fifo_chan *chan;
+	struct nvkm_chan *chan;
 	unsigned long flags;
 
-	chan = nvkm_fifo_chan_inst(device->fifo, (u64)inst << 12, &flags);
+	chan = nvkm_chan_get_inst(engine, (u64)inst << 12, &flags);
 
 	if (intr & 0x00000040) {
 		if (falcon->func->intr) {
@@ -89,7 +89,7 @@ nvkm_falcon_intr(struct nvkm_engine *engine)
 		nvkm_wr32(device, base + 0x004, intr);
 	}
 
-	nvkm_fifo_chan_put(device->fifo, flags, &chan);
+	nvkm_chan_put(&chan, flags);
 }
 
 static int
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/fifo/Kbuild b/drivers/gpu/drm/nouveau/nvkm/engine/fifo/Kbuild
index 5e831d347a95..5a074b9970ab 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/fifo/Kbuild
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/fifo/Kbuild
@@ -1,11 +1,18 @@
 # SPDX-License-Identifier: MIT
 nvkm-y += nvkm/engine/fifo/base.o
+nvkm-y += nvkm/engine/fifo/cgrp.o
+nvkm-y += nvkm/engine/fifo/chan.o
+nvkm-y += nvkm/engine/fifo/chid.o
+nvkm-y += nvkm/engine/fifo/runl.o
+nvkm-y += nvkm/engine/fifo/runq.o
+
 nvkm-y += nvkm/engine/fifo/nv04.o
 nvkm-y += nvkm/engine/fifo/nv10.o
 nvkm-y += nvkm/engine/fifo/nv17.o
 nvkm-y += nvkm/engine/fifo/nv40.o
 nvkm-y += nvkm/engine/fifo/nv50.o
 nvkm-y += nvkm/engine/fifo/g84.o
+nvkm-y += nvkm/engine/fifo/g98.o
 nvkm-y += nvkm/engine/fifo/gf100.o
 nvkm-y += nvkm/engine/fifo/gk104.o
 nvkm-y += nvkm/engine/fifo/gk110.o
@@ -13,28 +20,11 @@ nvkm-y += nvkm/engine/fifo/gk208.o
 nvkm-y += nvkm/engine/fifo/gk20a.o
 nvkm-y += nvkm/engine/fifo/gm107.o
 nvkm-y += nvkm/engine/fifo/gm200.o
-nvkm-y += nvkm/engine/fifo/gm20b.o
 nvkm-y += nvkm/engine/fifo/gp100.o
-nvkm-y += nvkm/engine/fifo/gp10b.o
 nvkm-y += nvkm/engine/fifo/gv100.o
 nvkm-y += nvkm/engine/fifo/tu102.o
+nvkm-y += nvkm/engine/fifo/ga100.o
 nvkm-y += nvkm/engine/fifo/ga102.o
 
-nvkm-y += nvkm/engine/fifo/chan.o
-nvkm-y += nvkm/engine/fifo/channv50.o
-nvkm-y += nvkm/engine/fifo/chang84.o
-
-nvkm-y += nvkm/engine/fifo/dmanv04.o
-nvkm-y += nvkm/engine/fifo/dmanv10.o
-nvkm-y += nvkm/engine/fifo/dmanv17.o
-nvkm-y += nvkm/engine/fifo/dmanv40.o
-
-nvkm-y += nvkm/engine/fifo/gpfifonv50.o
-nvkm-y += nvkm/engine/fifo/gpfifog84.o
-nvkm-y += nvkm/engine/fifo/gpfifogf100.o
-nvkm-y += nvkm/engine/fifo/gpfifogk104.o
-nvkm-y += nvkm/engine/fifo/gpfifogv100.o
-nvkm-y += nvkm/engine/fifo/gpfifotu102.o
-
-nvkm-y += nvkm/engine/fifo/usergv100.o
-nvkm-y += nvkm/engine/fifo/usertu102.o
+nvkm-y += nvkm/engine/fifo/ucgrp.o
+nvkm-y += nvkm/engine/fifo/uchan.o
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/fifo/base.c b/drivers/gpu/drm/nouveau/nvkm/engine/fifo/base.c
index 58b8df75fc40..5ea9a2ff0663 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/fifo/base.c
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/fifo/base.c
@@ -23,25 +23,32 @@
  */
 #include "priv.h"
 #include "chan.h"
+#include "chid.h"
+#include "runl.h"
+#include "runq.h"
 
-#include <core/client.h>
 #include <core/gpuobj.h>
-#include <core/notify.h>
+#include <subdev/bar.h>
 #include <subdev/mc.h>
+#include <subdev/mmu.h>
 
-#include <nvif/event.h>
 #include <nvif/cl0080.h>
 #include <nvif/unpack.h>
 
-void
-nvkm_fifo_recover_chan(struct nvkm_fifo *fifo, int chid)
+bool
+nvkm_fifo_ctxsw_in_progress(struct nvkm_engine *engine)
 {
-	unsigned long flags;
-	if (WARN_ON(!fifo->func->recover_chan))
-		return;
-	spin_lock_irqsave(&fifo->lock, flags);
-	fifo->func->recover_chan(fifo, chid);
-	spin_unlock_irqrestore(&fifo->lock, flags);
+	struct nvkm_runl *runl;
+	struct nvkm_engn *engn;
+
+	nvkm_runl_foreach(runl, engine->subdev.device->fifo) {
+		nvkm_runl_foreach_engn(engn, runl) {
+			if (engn->engine == engine)
+				return engn->func->chsw ? engn->func->chsw(engn) : false;
+		}
+	}
+
+	return false;
 }
 
 void
@@ -59,160 +66,23 @@ nvkm_fifo_start(struct nvkm_fifo *fifo, unsigned long *flags)
 void
 nvkm_fifo_fault(struct nvkm_fifo *fifo, struct nvkm_fault_data *info)
 {
-	return fifo->func->fault(fifo, info);
-}
-
-void
-nvkm_fifo_chan_put(struct nvkm_fifo *fifo, unsigned long flags,
-		   struct nvkm_fifo_chan **pchan)
-{
-	struct nvkm_fifo_chan *chan = *pchan;
-	if (likely(chan)) {
-		*pchan = NULL;
-		spin_unlock_irqrestore(&fifo->lock, flags);
-	}
-}
-
-struct nvkm_fifo_chan *
-nvkm_fifo_chan_inst_locked(struct nvkm_fifo *fifo, u64 inst)
-{
-	struct nvkm_fifo_chan *chan;
-	list_for_each_entry(chan, &fifo->chan, head) {
-		if (chan->inst->addr == inst) {
-			list_del(&chan->head);
-			list_add(&chan->head, &fifo->chan);
-			return chan;
-		}
-	}
-	return NULL;
-}
-
-struct nvkm_fifo_chan *
-nvkm_fifo_chan_inst(struct nvkm_fifo *fifo, u64 inst, unsigned long *rflags)
-{
-	struct nvkm_fifo_chan *chan;
-	unsigned long flags;
-	spin_lock_irqsave(&fifo->lock, flags);
-	if ((chan = nvkm_fifo_chan_inst_locked(fifo, inst))) {
-		*rflags = flags;
-		return chan;
-	}
-	spin_unlock_irqrestore(&fifo->lock, flags);
-	return NULL;
-}
-
-struct nvkm_fifo_chan *
-nvkm_fifo_chan_chid(struct nvkm_fifo *fifo, int chid, unsigned long *rflags)
-{
-	struct nvkm_fifo_chan *chan;
-	unsigned long flags;
-	spin_lock_irqsave(&fifo->lock, flags);
-	list_for_each_entry(chan, &fifo->chan, head) {
-		if (chan->chid == chid) {
-			list_del(&chan->head);
-			list_add(&chan->head, &fifo->chan);
-			*rflags = flags;
-			return chan;
-		}
-	}
-	spin_unlock_irqrestore(&fifo->lock, flags);
-	return NULL;
-}
-
-void
-nvkm_fifo_kevent(struct nvkm_fifo *fifo, int chid)
-{
-	nvkm_event_send(&fifo->kevent, 1, chid, NULL, 0);
-}
-
-static int
-nvkm_fifo_kevent_ctor(struct nvkm_object *object, void *data, u32 size,
-		      struct nvkm_notify *notify)
-{
-	struct nvkm_fifo_chan *chan = nvkm_fifo_chan(object);
-	if (size == 0) {
-		notify->size  = 0;
-		notify->types = 1;
-		notify->index = chan->chid;
-		return 0;
-	}
-	return -ENOSYS;
-}
-
-static const struct nvkm_event_func
-nvkm_fifo_kevent_func = {
-	.ctor = nvkm_fifo_kevent_ctor,
-};
-
-static void
-nvkm_fifo_uevent_fini(struct nvkm_event *event, int type, int index)
-{
-	struct nvkm_fifo *fifo = container_of(event, typeof(*fifo), uevent);
-	fifo->func->uevent_fini(fifo);
-}
-
-static void
-nvkm_fifo_uevent_init(struct nvkm_event *event, int type, int index)
-{
-	struct nvkm_fifo *fifo = container_of(event, typeof(*fifo), uevent);
-	fifo->func->uevent_init(fifo);
-}
-
-static int
-nvkm_fifo_uevent_ctor(struct nvkm_object *object, void *data, u32 size,
-		      struct nvkm_notify *notify)
-{
-	union {
-		struct nvif_notify_uevent_req none;
-	} *req = data;
-	int ret = -ENOSYS;
-
-	if (!(ret = nvif_unvers(ret, &data, &size, req->none))) {
-		notify->size  = sizeof(struct nvif_notify_uevent_rep);
-		notify->types = 1;
-		notify->index = 0;
-	}
-
-	return ret;
-}
-
-static const struct nvkm_event_func
-nvkm_fifo_uevent_func = {
-	.ctor = nvkm_fifo_uevent_ctor,
-	.init = nvkm_fifo_uevent_init,
-	.fini = nvkm_fifo_uevent_fini,
-};
-
-void
-nvkm_fifo_uevent(struct nvkm_fifo *fifo)
-{
-	struct nvif_notify_uevent_rep rep = {
-	};
-	nvkm_event_send(&fifo->uevent, 1, 0, &rep, sizeof(rep));
+	return fifo->func->mmu_fault->recover(fifo, info);
 }
 
 static int
-nvkm_fifo_class_new_(struct nvkm_device *device,
-		     const struct nvkm_oclass *oclass, void *data, u32 size,
-		     struct nvkm_object **pobject)
+nvkm_fifo_class_new(struct nvkm_device *device, const struct nvkm_oclass *oclass,
+		    void *argv, u32 argc, struct nvkm_object **pobject)
 {
 	struct nvkm_fifo *fifo = nvkm_fifo(oclass->engine);
-	return fifo->func->class_new(fifo, oclass, data, size, pobject);
-}
 
-static const struct nvkm_device_oclass
-nvkm_fifo_class_ = {
-	.ctor = nvkm_fifo_class_new_,
-};
+	if (oclass->engn == &fifo->func->cgrp.user)
+		return nvkm_ucgrp_new(fifo, oclass, argv, argc, pobject);
 
-static int
-nvkm_fifo_class_new(struct nvkm_device *device,
-		    const struct nvkm_oclass *oclass, void *data, u32 size,
-		    struct nvkm_object **pobject)
-{
-	const struct nvkm_fifo_chan_oclass *sclass = oclass->engn;
-	struct nvkm_fifo *fifo = nvkm_fifo(oclass->engine);
-	return sclass->ctor(fifo, oclass, data, size, pobject);
+	if (oclass->engn == &fifo->func->chan.user)
+		return nvkm_uchan_new(fifo, NULL, oclass, argv, argc, pobject);
+
+	WARN_ON(1);
+	return -ENOSYS;
 }
 
 static const struct nvkm_device_oclass
@@ -221,24 +91,28 @@ nvkm_fifo_class = {
 };
 
 static int
-nvkm_fifo_class_get(struct nvkm_oclass *oclass, int index,
-		    const struct nvkm_device_oclass **class)
+nvkm_fifo_class_get(struct nvkm_oclass *oclass, int index, const struct nvkm_device_oclass **class)
 {
 	struct nvkm_fifo *fifo = nvkm_fifo(oclass->engine);
-	const struct nvkm_fifo_chan_oclass *sclass;
+	const struct nvkm_fifo_func_cgrp *cgrp = &fifo->func->cgrp;
+	const struct nvkm_fifo_func_chan *chan = &fifo->func->chan;
 	int c = 0;
 
-	if (fifo->func->class_get) {
-		int ret = fifo->func->class_get(fifo, index, oclass);
-		if (ret == 0)
-			*class = &nvkm_fifo_class_;
-		return ret;
+	/* *_CHANNEL_GROUP_* */
+	if (cgrp->user.oclass) {
+		if (c++ == index) {
+			oclass->base = cgrp->user;
+			oclass->engn = &fifo->func->cgrp.user;
+			*class = &nvkm_fifo_class;
+			return 0;
+		}
 	}
 
-	while ((sclass = fifo->func->chan[c])) {
+	/* *_CHANNEL_DMA, *_CHANNEL_GPFIFO_* */
+	if (chan->user.oclass) {
 		if (c++ == index) {
-			oclass->base = sclass->base;
-			oclass->engn = sclass;
+			oclass->base = chan->user;
+			oclass->engn = &fifo->func->chan.user;
 			*class = &nvkm_fifo_class;
 			return 0;
 		}
@@ -247,19 +121,47 @@ nvkm_fifo_class_get(struct nvkm_oclass *oclass, int index,
 	return c;
 }
 
-static void
-nvkm_fifo_intr(struct nvkm_engine *engine)
+static int
+nvkm_fifo_fini(struct nvkm_engine *engine, bool suspend)
 {
 	struct nvkm_fifo *fifo = nvkm_fifo(engine);
-	fifo->func->intr(fifo);
+	struct nvkm_runl *runl;
+
+	nvkm_inth_block(&fifo->engine.subdev.inth);
+
+	nvkm_runl_foreach(runl, fifo)
+		nvkm_runl_fini(runl);
+
+	return 0;
 }
 
 static int
-nvkm_fifo_fini(struct nvkm_engine *engine, bool suspend)
+nvkm_fifo_init(struct nvkm_engine *engine)
 {
 	struct nvkm_fifo *fifo = nvkm_fifo(engine);
-	if (fifo->func->fini)
-		fifo->func->fini(fifo);
+	struct nvkm_runq *runq;
+	struct nvkm_runl *runl;
+	u32 mask = 0;
+
+	if (fifo->func->init_pbdmas) {
+		nvkm_runq_foreach(runq, fifo)
+			mask |= BIT(runq->id);
+
+		fifo->func->init_pbdmas(fifo, mask);
+
+		nvkm_runq_foreach(runq, fifo)
+			runq->func->init(runq);
+	}
+
+	nvkm_runl_foreach(runl, fifo) {
+		if (runl->func->init)
+			runl->func->init(runl);
+	}
+
+	if (fifo->func->init)
+		fifo->func->init(fifo);
+
+	nvkm_inth_allow(&fifo->engine.subdev.inth);
 	return 0;
 }
 
@@ -267,22 +169,146 @@ static int
 nvkm_fifo_info(struct nvkm_engine *engine, u64 mthd, u64 *data)
 {
 	struct nvkm_fifo *fifo = nvkm_fifo(engine);
+	struct nvkm_runl *runl;
+	struct nvkm_engn *engn;
+	int ret;
+
+	ret = nvkm_subdev_oneinit(&fifo->engine.subdev);
+	if (ret)
+		return ret;
+
 	switch (mthd) {
-	case NV_DEVICE_HOST_CHANNELS: *data = fifo->nr; return 0;
+	case NV_DEVICE_HOST_CHANNELS: *data = fifo->chid ? fifo->chid->nr : 0; return 0;
+	case NV_DEVICE_HOST_RUNLISTS:
+		*data = 0;
+		nvkm_runl_foreach(runl, fifo)
+			*data |= BIT(runl->id);
+		return 0;
+	case NV_DEVICE_HOST_RUNLIST_ENGINES:
+		runl = nvkm_runl_get(fifo, *data, 0);
+		if (runl) {
+			*data = 0;
+			nvkm_runl_foreach_engn(engn, runl) {
+#define CASE(n) case NVKM_ENGINE_##n: *data |= NV_DEVICE_HOST_RUNLIST_ENGINES_##n; break
+				switch (engn->engine->subdev.type) {
+				case NVKM_ENGINE_DMAOBJ:
+					break;
+				CASE(SW    );
+				CASE(GR    );
+				CASE(MPEG  );
+				CASE(ME    );
+				CASE(CIPHER);
+				CASE(BSP   );
+				CASE(VP    );
+				CASE(CE    );
+				CASE(SEC   );
+				CASE(MSVLD );
+				CASE(MSPDEC);
+				CASE(MSPPP );
+				CASE(MSENC );
+				CASE(VIC   );
+				CASE(SEC2  );
+				CASE(NVDEC );
+				CASE(NVENC );
+				default:
+					WARN_ON(1);
+					break;
+				}
+#undef CASE
+			}
+			return 0;
+		}
+		return -EINVAL;
+	case NV_DEVICE_HOST_RUNLIST_CHANNELS:
+		if (!fifo->chid) {
+			runl = nvkm_runl_get(fifo, *data, 0);
+			if (runl) {
+				*data = runl->chid->nr;
+				return 0;
+			}
+		}
+		return -EINVAL;
 	default:
-		if (fifo->func->info)
-			return fifo->func->info(fifo, mthd, data);
 		break;
 	}
+
 	return -ENOSYS;
 }
 
 static int
 nvkm_fifo_oneinit(struct nvkm_engine *engine)
 {
+	struct nvkm_subdev *subdev = &engine->subdev;
+	struct nvkm_device *device = subdev->device;
 	struct nvkm_fifo *fifo = nvkm_fifo(engine);
-	if (fifo->func->oneinit)
-		return fifo->func->oneinit(fifo);
+	struct nvkm_runl *runl;
+	struct nvkm_engn *engn;
+	int ret, nr, i;
+
+	/* Initialise CHID/CGID allocator(s) on GPUs where they aren't per-runlist. */
+	if (fifo->func->chid_nr) {
+		ret = fifo->func->chid_ctor(fifo, fifo->func->chid_nr(fifo));
+		if (ret)
+			return ret;
+	}
+
+	/* Create runqueues for each PBDMA. */
+	if (fifo->func->runq_nr) {
+		for (nr = fifo->func->runq_nr(fifo), i = 0; i < nr; i++) {
+			if (!nvkm_runq_new(fifo, i))
+				return -ENOMEM;
+		}
+	}
+
+	/* Create runlists. */
+	ret = fifo->func->runl_ctor(fifo);
+	if (ret)
+		return ret;
+
+	nvkm_runl_foreach(runl, fifo) {
+		RUNL_DEBUG(runl, "chan:%06x", runl->chan);
+		nvkm_runl_foreach_engn(engn, runl) {
+			ENGN_DEBUG(engn, "");
+		}
+	}
+
+	/* Register interrupt handler. */
+	if (fifo->func->intr) {
+		ret = nvkm_inth_add(&device->mc->intr, NVKM_INTR_SUBDEV, NVKM_INTR_PRIO_NORMAL,
+				    subdev, fifo->func->intr, &subdev->inth);
+		if (ret) {
+			nvkm_error(subdev, "intr %d\n", ret);
+			return ret;
+		}
+	}
+
+	/* Initialise non-stall intr handling. */
+	if (fifo->func->nonstall_ctor) {
+		ret = fifo->func->nonstall_ctor(fifo);
+		if (ret) {
+			nvkm_error(subdev, "nonstall %d\n", ret);
+		}
+	}
+
+	/* Allocate USERD + BAR1 polling area. */
+	if (fifo->func->chan.func->userd->bar == 1) {
+		struct nvkm_vmm *bar1 = nvkm_bar_bar1_vmm(device);
+
+		ret = nvkm_memory_new(device, NVKM_MEM_TARGET_INST, fifo->chid->nr *
+				      fifo->func->chan.func->userd->size, 0, true,
+				      &fifo->userd.mem);
+		if (ret)
+			return ret;
+
+		ret = nvkm_vmm_get(bar1, 12, nvkm_memory_size(fifo->userd.mem), &fifo->userd.bar1);
+		if (ret)
+			return ret;
+
+		ret = nvkm_memory_map(fifo->userd.mem, 0, bar1, fifo->userd.bar1, NULL, 0);
+		if (ret)
+			return ret;
+	}
+
 	return 0;
 }
 
@@ -292,25 +318,28 @@ nvkm_fifo_preinit(struct nvkm_engine *engine)
 	nvkm_mc_reset(engine->subdev.device, NVKM_ENGINE_FIFO, 0);
 }
 
-static int
-nvkm_fifo_init(struct nvkm_engine *engine)
-{
-	struct nvkm_fifo *fifo = nvkm_fifo(engine);
-	fifo->func->init(fifo);
-	return 0;
-}
-
 static void *
 nvkm_fifo_dtor(struct nvkm_engine *engine)
 {
 	struct nvkm_fifo *fifo = nvkm_fifo(engine);
-	void *data = fifo;
-	if (fifo->func->dtor)
-		data = fifo->func->dtor(fifo);
-	nvkm_event_fini(&fifo->kevent);
-	nvkm_event_fini(&fifo->uevent);
+	struct nvkm_runl *runl, *runt;
+	struct nvkm_runq *runq, *rtmp;
+
+	if (fifo->userd.bar1)
+		nvkm_vmm_put(nvkm_bar_bar1_vmm(engine->subdev.device), &fifo->userd.bar1);
+	nvkm_memory_unref(&fifo->userd.mem);
+
+	list_for_each_entry_safe(runl, runt, &fifo->runls, head)
+		nvkm_runl_del(runl);
+	list_for_each_entry_safe(runq, rtmp, &fifo->runqs, head)
+		nvkm_runq_del(runq);
+
+	nvkm_chid_unref(&fifo->cgid);
+	nvkm_chid_unref(&fifo->chid);
+
+	nvkm_event_fini(&fifo->nonstall.event);
 	mutex_destroy(&fifo->mutex);
-	return data;
+	return fifo;
 }
 
 static const struct nvkm_engine_func
@@ -321,37 +350,40 @@ nvkm_fifo = {
 	.info = nvkm_fifo_info,
 	.init = nvkm_fifo_init,
 	.fini = nvkm_fifo_fini,
-	.intr = nvkm_fifo_intr,
 	.base.sclass = nvkm_fifo_class_get,
 };
 
 int
-nvkm_fifo_ctor(const struct nvkm_fifo_func *func, struct nvkm_device *device,
-	       enum nvkm_subdev_type type, int inst, int nr, struct nvkm_fifo *fifo)
+nvkm_fifo_new_(const struct nvkm_fifo_func *func, struct nvkm_device *device,
+	       enum nvkm_subdev_type type, int inst, struct nvkm_fifo **pfifo)
 {
+	struct nvkm_fifo *fifo;
 	int ret;
 
+	if (!(fifo = *pfifo = kzalloc(sizeof(*fifo), GFP_KERNEL)))
+		return -ENOMEM;
+
 	fifo->func = func;
-	INIT_LIST_HEAD(&fifo->chan);
+	INIT_LIST_HEAD(&fifo->runqs);
+	INIT_LIST_HEAD(&fifo->runls);
+	/*TODO: Needs to be >CTXSW_TIMEOUT, so RC can recover before this is hit.
+	 *      CTXSW_TIMEOUT HW default seems to differ between GPUs, so just a
+	 *      large number for now until we support changing it.
+	 */
+	fifo->timeout.chan_msec = 10000;
 	spin_lock_init(&fifo->lock);
 	mutex_init(&fifo->mutex);
 
-	if (WARN_ON(fifo->nr > NVKM_FIFO_CHID_NR))
-		fifo->nr = NVKM_FIFO_CHID_NR;
-	else
-		fifo->nr = nr;
-	bitmap_clear(fifo->mask, 0, fifo->nr);
-
 	ret = nvkm_engine_ctor(&nvkm_fifo, device, type, inst, true, &fifo->engine);
 	if (ret)
 		return ret;
 
-	if (func->uevent_init) {
-		ret = nvkm_event_init(&nvkm_fifo_uevent_func, 1, 1,
-				      &fifo->uevent);
+	if (func->nonstall) {
+		ret = nvkm_event_init(func->nonstall, &fifo->engine.subdev, 1, 1,
+				      &fifo->nonstall.event);
 		if (ret)
 			return ret;
 	}
 
-	return nvkm_event_init(&nvkm_fifo_kevent_func, 1, nr, &fifo->kevent);
+	return 0;
 }
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/fifo/cgrp.c b/drivers/gpu/drm/nouveau/nvkm/engine/fifo/cgrp.c
new file mode 100644
index 000000000000..ea53fb3d5d06
--- /dev/null
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/fifo/cgrp.c
@@ -0,0 +1,252 @@
+/*
+ * Copyright 2021 Red Hat Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+#include "cgrp.h"
+#include "chan.h"
+#include "chid.h"
+#include "runl.h"
+#include "priv.h"
+
+#include <core/gpuobj.h>
+#include <subdev/mmu.h>
+
+static void
+nvkm_cgrp_ectx_put(struct nvkm_cgrp *cgrp, struct nvkm_ectx **pectx)
+{
+	struct nvkm_ectx *ectx = *pectx;
+
+	if (ectx) {
+		struct nvkm_engn *engn = ectx->engn;
+
+		if (refcount_dec_and_test(&ectx->refs)) {
+			CGRP_TRACE(cgrp, "dtor ectx %d[%s]", engn->id, engn->engine->subdev.name);
+			nvkm_object_del(&ectx->object);
+			list_del(&ectx->head);
+			kfree(ectx);
+		}
+
+		*pectx = NULL;
+	}
+}
+
+static int
+nvkm_cgrp_ectx_get(struct nvkm_cgrp *cgrp, struct nvkm_engn *engn, struct nvkm_ectx **pectx,
+		   struct nvkm_chan *chan, struct nvkm_client *client)
+{
+	struct nvkm_engine *engine = engn->engine;
+	struct nvkm_oclass cclass = {
+		.client = client,
+		.engine = engine,
+	};
+	struct nvkm_ectx *ectx;
+	int ret = 0;
+
+	/* Look for an existing context for this engine in the channel group. */
+	ectx = nvkm_list_find(ectx, &cgrp->ectxs, head, ectx->engn == engn);
+	if (ectx) {
+		refcount_inc(&ectx->refs);
+		*pectx = ectx;
+		return 0;
+	}
+
+	/* Nope - create a fresh one. */
+	CGRP_TRACE(cgrp, "ctor ectx %d[%s]", engn->id, engn->engine->subdev.name);
+	if (!(ectx = *pectx = kzalloc(sizeof(*ectx), GFP_KERNEL)))
+		return -ENOMEM;
+
+	ectx->engn = engn;
+	refcount_set(&ectx->refs, 1);
+	refcount_set(&ectx->uses, 0);
+	list_add_tail(&ectx->head, &cgrp->ectxs);
+
+	/* Allocate the HW structures. */
+	if (engine->func->fifo.cclass)
+		ret = engine->func->fifo.cclass(chan, &cclass, &ectx->object);
+	else if (engine->func->cclass)
+		ret = nvkm_object_new_(engine->func->cclass, &cclass, NULL, 0, &ectx->object);
+
+	if (ret)
+		nvkm_cgrp_ectx_put(cgrp, pectx);
+
+	return ret;
+}
+
+void
+nvkm_cgrp_vctx_put(struct nvkm_cgrp *cgrp, struct nvkm_vctx **pvctx)
+{
+	struct nvkm_vctx *vctx = *pvctx;
+
+	if (vctx) {
+		struct nvkm_engn *engn = vctx->ectx->engn;
+
+		if (refcount_dec_and_test(&vctx->refs)) {
+			CGRP_TRACE(cgrp, "dtor vctx %d[%s]", engn->id, engn->engine->subdev.name);
+			nvkm_vmm_put(vctx->vmm, &vctx->vma);
+			nvkm_gpuobj_del(&vctx->inst);
+
+			nvkm_cgrp_ectx_put(cgrp, &vctx->ectx);
+			if (vctx->vmm) {
+				atomic_dec(&vctx->vmm->engref[engn->engine->subdev.type]);
+				nvkm_vmm_unref(&vctx->vmm);
+			}
+			list_del(&vctx->head);
+			kfree(vctx);
+		}
+
+		*pvctx = NULL;
+	}
+}
+
+int
+nvkm_cgrp_vctx_get(struct nvkm_cgrp *cgrp, struct nvkm_engn *engn, struct nvkm_chan *chan,
+		   struct nvkm_vctx **pvctx, struct nvkm_client *client)
+{
+	struct nvkm_ectx *ectx;
+	struct nvkm_vctx *vctx;
+	int ret;
+
+	/* Look for an existing sub-context for this engine+VEID in the channel group. */
+	vctx = nvkm_list_find(vctx, &cgrp->vctxs, head,
+			      vctx->ectx->engn == engn && vctx->vmm == chan->vmm);
+	if (vctx) {
+		refcount_inc(&vctx->refs);
+		*pvctx = vctx;
+		return 0;
+	}
+
+	/* Nope - create a fresh one.  But, context first. */
+	ret = nvkm_cgrp_ectx_get(cgrp, engn, &ectx, chan, client);
+	if (ret) {
+		CGRP_ERROR(cgrp, "ectx %d[%s]: %d", engn->id, engn->engine->subdev.name, ret);
+		return ret;
+	}
+
+	/* Now, create the sub-context. */
+	CGRP_TRACE(cgrp, "ctor vctx %d[%s]", engn->id, engn->engine->subdev.name);
+	if (!(vctx = *pvctx = kzalloc(sizeof(*vctx), GFP_KERNEL))) {
+		nvkm_cgrp_ectx_put(cgrp, &ectx);
+		return -ENOMEM;
+	}
+
+	vctx->ectx = ectx;
+	vctx->vmm = nvkm_vmm_ref(chan->vmm);
+	refcount_set(&vctx->refs, 1);
+	list_add_tail(&vctx->head, &cgrp->vctxs);
+
+	/* MMU on some GPUs needs to know engine usage for TLB invalidation. */
+	if (vctx->vmm)
+		atomic_inc(&vctx->vmm->engref[engn->engine->subdev.type]);
+
+	/* Allocate the HW structures. */
+	if (engn->func->bind) {
+		ret = nvkm_object_bind(vctx->ectx->object, NULL, 0, &vctx->inst);
+		if (ret == 0 && engn->func->ctor)
+			ret = engn->func->ctor(engn, vctx);
+	}
+
+	if (ret)
+		nvkm_cgrp_vctx_put(cgrp, pvctx);
+
+	return ret;
+}
+
+static void
+nvkm_cgrp_del(struct kref *kref)
+{
+	struct nvkm_cgrp *cgrp = container_of(kref, typeof(*cgrp), kref);
+	struct nvkm_runl *runl = cgrp->runl;
+
+	if (runl->cgid)
+		nvkm_chid_put(runl->cgid, cgrp->id, &cgrp->lock);
+
+	mutex_destroy(&cgrp->mutex);
+	nvkm_vmm_unref(&cgrp->vmm);
+	kfree(cgrp);
+}
+
+void
+nvkm_cgrp_unref(struct nvkm_cgrp **pcgrp)
+{
+	struct nvkm_cgrp *cgrp = *pcgrp;
+
+	if (!cgrp)
+		return;
+
+	kref_put(&cgrp->kref, nvkm_cgrp_del);
+	*pcgrp = NULL;
+}
+
+struct nvkm_cgrp *
+nvkm_cgrp_ref(struct nvkm_cgrp *cgrp)
+{
+	if (cgrp)
+		kref_get(&cgrp->kref);
+
+	return cgrp;
+}
+
+void
+nvkm_cgrp_put(struct nvkm_cgrp **pcgrp, unsigned long irqflags)
+{
+	struct nvkm_cgrp *cgrp = *pcgrp;
+
+	if (!cgrp)
+		return;
+
+	*pcgrp = NULL;
+	spin_unlock_irqrestore(&cgrp->lock, irqflags);
+}
+
+int
+nvkm_cgrp_new(struct nvkm_runl *runl, const char *name, struct nvkm_vmm *vmm, bool hw,
+	      struct nvkm_cgrp **pcgrp)
+{
+	struct nvkm_cgrp *cgrp;
+
+	if (!(cgrp = *pcgrp = kmalloc(sizeof(*cgrp), GFP_KERNEL)))
+		return -ENOMEM;
+
+	cgrp->func = runl->fifo->func->cgrp.func;
+	strscpy(cgrp->name, name, sizeof(cgrp->name));
+	cgrp->runl = runl;
+	cgrp->vmm = nvkm_vmm_ref(vmm);
+	cgrp->hw = hw;
+	cgrp->id = -1;
+	kref_init(&cgrp->kref);
+	INIT_LIST_HEAD(&cgrp->chans);
+	cgrp->chan_nr = 0;
+	spin_lock_init(&cgrp->lock);
+	INIT_LIST_HEAD(&cgrp->ectxs);
+	INIT_LIST_HEAD(&cgrp->vctxs);
+	mutex_init(&cgrp->mutex);
+	atomic_set(&cgrp->rc, NVKM_CGRP_RC_NONE);
+
+	if (runl->cgid) {
+		cgrp->id = nvkm_chid_get(runl->cgid, cgrp);
+		if (cgrp->id < 0) {
+			RUNL_ERROR(runl, "!cgids");
+			nvkm_cgrp_unref(pcgrp);
+			return -ENOSPC;
+		}
+	}
+
+	return 0;
+}
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/fifo/cgrp.h b/drivers/gpu/drm/nouveau/nvkm/engine/fifo/cgrp.h
index d0ac60b06720..5f6abd59a6ff 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/fifo/cgrp.h
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/fifo/cgrp.h
@@ -1,11 +1,75 @@
-#ifndef __NVKM_FIFO_CGRP_H__
-#define __NVKM_FIFO_CGRP_H__
-#include "priv.h"
+/* SPDX-License-Identifier: MIT */
+#ifndef __NVKM_CGRP_H__
+#define __NVKM_CGRP_H__
+#include <core/os.h>
+struct nvkm_chan;
+struct nvkm_client;
+
+struct nvkm_vctx {
+	struct nvkm_ectx *ectx;
+	struct nvkm_vmm *vmm;
+	refcount_t refs;
+
+	struct nvkm_gpuobj *inst;
+	struct nvkm_vma *vma;
 
-struct nvkm_fifo_cgrp {
-	int id;
 	struct list_head head;
-	struct list_head chan;
+};
+
+struct nvkm_ectx {
+	struct nvkm_engn *engn;
+	refcount_t refs;
+	refcount_t uses;
+
+	struct nvkm_object *object;
+
+	struct list_head head;
+};
+
+struct nvkm_cgrp {
+	const struct nvkm_cgrp_func {
+		void (*preempt)(struct nvkm_cgrp *);
+	} *func;
+	char name[64];
+	struct nvkm_runl *runl;
+	struct nvkm_vmm *vmm;
+	bool hw;
+	int id;
+	struct kref kref;
+
+	struct list_head chans;
 	int chan_nr;
+
+	spinlock_t lock; /* protects irq handler channel (group) lookup */
+
+	struct list_head ectxs;
+	struct list_head vctxs;
+	struct mutex mutex;
+
+#define NVKM_CGRP_RC_NONE    0
+#define NVKM_CGRP_RC_PENDING 1
+#define NVKM_CGRP_RC_RUNNING 2
+	atomic_t rc;
+
+	struct list_head head;
 };
+
+int nvkm_cgrp_new(struct nvkm_runl *, const char *name, struct nvkm_vmm *, bool hw,
+		  struct nvkm_cgrp **);
+struct nvkm_cgrp *nvkm_cgrp_ref(struct nvkm_cgrp *);
+void nvkm_cgrp_unref(struct nvkm_cgrp **);
+int nvkm_cgrp_vctx_get(struct nvkm_cgrp *, struct nvkm_engn *, struct nvkm_chan *,
+		       struct nvkm_vctx **, struct nvkm_client *);
+void nvkm_cgrp_vctx_put(struct nvkm_cgrp *, struct nvkm_vctx **);
+
+void nvkm_cgrp_put(struct nvkm_cgrp **, unsigned long irqflags);
+
+#define nvkm_cgrp_foreach_chan(chan,cgrp) list_for_each_entry((chan), &(cgrp)->chans, head)
+#define nvkm_cgrp_foreach_chan_safe(chan,ctmp,cgrp) \
+	list_for_each_entry_safe((chan), (ctmp), &(cgrp)->chans, head)
+
+#define CGRP_PRCLI(c,l,p,f,a...) RUNL_PRINT((c)->runl, l, p, "%04x:[%s]"f, (c)->id, (c)->name, ##a)
+#define CGRP_PRINT(c,l,p,f,a...) RUNL_PRINT((c)->runl, l, p, "%04x:"f, (c)->id, ##a)
+#define CGRP_ERROR(c,f,a...) CGRP_PRCLI((c), ERROR,    err, " "f"\n", ##a)
+#define CGRP_TRACE(c,f,a...) CGRP_PRINT((c), TRACE,   info, " "f"\n", ##a)
 #endif
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/fifo/chan.c b/drivers/gpu/drm/nouveau/nvkm/engine/fifo/chan.c
index 2e7f32cebf2a..b7c9d6115bce 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/fifo/chan.c
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/fifo/chan.c
@@ -22,285 +22,265 @@
  * Authors: Ben Skeggs
  */
 #include "chan.h"
+#include "chid.h"
+#include "cgrp.h"
+#include "chid.h"
+#include "runl.h"
+#include "priv.h"
 
-#include <core/client.h>
-#include <core/gpuobj.h>
-#include <core/oproxy.h>
+#include <core/ramht.h>
 #include <subdev/mmu.h>
 #include <engine/dma.h>
 
-struct nvkm_fifo_chan_object {
-	struct nvkm_oproxy oproxy;
-	struct nvkm_fifo_chan *chan;
-	int hash;
+#include <nvif/if0020.h>
+
+const struct nvkm_event_func
+nvkm_chan_event = {
 };
 
-static struct nvkm_fifo_engn *
-nvkm_fifo_chan_engn(struct nvkm_fifo_chan *chan, struct nvkm_engine *engine)
+void
+nvkm_chan_cctx_bind(struct nvkm_chan *chan, struct nvkm_engn *engn, struct nvkm_cctx *cctx)
 {
-	int engi = chan->fifo->func->engine_id(chan->fifo, engine);
-	if (engi >= 0)
-		return &chan->engn[engi];
-	return NULL;
+	struct nvkm_cgrp *cgrp = chan->cgrp;
+	struct nvkm_runl *runl = cgrp->runl;
+	struct nvkm_engine *engine = engn->engine;
+
+	if (!engn->func->bind)
+		return;
+
+	CHAN_TRACE(chan, "%sbind cctx %d[%s]", cctx ? "" : "un", engn->id, engine->subdev.name);
+
+	/* Prevent any channel in channel group from being rescheduled, kick them
+	 * off host and any engine(s) they're loaded on.
+	 */
+	if (cgrp->hw)
+		nvkm_runl_block(runl);
+	else
+		nvkm_chan_block(chan);
+	nvkm_chan_preempt(chan, true);
+
+	/* Update context pointer. */
+	engn->func->bind(engn, cctx, chan);
+
+	/* Resume normal operation. */
+	if (cgrp->hw)
+		nvkm_runl_allow(runl);
+	else
+		nvkm_chan_allow(chan);
 }
 
-static int
-nvkm_fifo_chan_child_fini(struct nvkm_oproxy *base, bool suspend)
+void
+nvkm_chan_cctx_put(struct nvkm_chan *chan, struct nvkm_cctx **pcctx)
 {
-	struct nvkm_fifo_chan_object *object =
-		container_of(base, typeof(*object), oproxy);
-	struct nvkm_engine *engine  = object->oproxy.object->engine;
-	struct nvkm_fifo_chan *chan = object->chan;
-	struct nvkm_fifo_engn *engn = nvkm_fifo_chan_engn(chan, engine);
-	const char *name = engine->subdev.name;
-	int ret = 0;
-
-	if (--engn->usecount)
-		return 0;
+	struct nvkm_cctx *cctx = *pcctx;
 
-	if (chan->func->engine_fini) {
-		ret = chan->func->engine_fini(chan, engine, suspend);
-		if (ret) {
-			nvif_error(&chan->object,
-				   "detach %s failed, %d\n", name, ret);
-			return ret;
+	if (cctx) {
+		struct nvkm_engn *engn = cctx->vctx->ectx->engn;
+
+		if (refcount_dec_and_mutex_lock(&cctx->refs, &chan->cgrp->mutex)) {
+			CHAN_TRACE(chan, "dtor cctx %d[%s]", engn->id, engn->engine->subdev.name);
+			nvkm_cgrp_vctx_put(chan->cgrp, &cctx->vctx);
+			list_del(&cctx->head);
+			kfree(cctx);
+			mutex_unlock(&chan->cgrp->mutex);
 		}
-	}
 
-	if (engn->object) {
-		ret = nvkm_object_fini(engn->object, suspend);
-		if (ret && suspend)
-			return ret;
+		*pcctx = NULL;
 	}
-
-	nvif_trace(&chan->object, "detached %s\n", name);
-	return ret;
 }
 
-static int
-nvkm_fifo_chan_child_init(struct nvkm_oproxy *base)
+int
+nvkm_chan_cctx_get(struct nvkm_chan *chan, struct nvkm_engn *engn, struct nvkm_cctx **pcctx,
+		   struct nvkm_client *client)
 {
-	struct nvkm_fifo_chan_object *object =
-		container_of(base, typeof(*object), oproxy);
-	struct nvkm_engine *engine  = object->oproxy.object->engine;
-	struct nvkm_fifo_chan *chan = object->chan;
-	struct nvkm_fifo_engn *engn = nvkm_fifo_chan_engn(chan, engine);
-	const char *name = engine->subdev.name;
+	struct nvkm_cgrp *cgrp = chan->cgrp;
+	struct nvkm_vctx *vctx;
+	struct nvkm_cctx *cctx;
 	int ret;
 
-	if (engn->usecount++)
+	/* Look for an existing channel context for this engine+VEID. */
+	mutex_lock(&cgrp->mutex);
+	cctx = nvkm_list_find(cctx, &chan->cctxs, head,
+			      cctx->vctx->ectx->engn == engn && cctx->vctx->vmm == chan->vmm);
+	if (cctx) {
+		refcount_inc(&cctx->refs);
+		*pcctx = cctx;
+		mutex_unlock(&chan->cgrp->mutex);
 		return 0;
+	}
 
-	if (engn->object) {
-		ret = nvkm_object_init(engn->object);
-		if (ret)
-			return ret;
+	/* Nope - create a fresh one.  But, sub-context first. */
+	ret = nvkm_cgrp_vctx_get(cgrp, engn, chan, &vctx, client);
+	if (ret) {
+		CHAN_ERROR(chan, "vctx %d[%s]: %d", engn->id, engn->engine->subdev.name, ret);
+		goto done;
 	}
 
-	if (chan->func->engine_init) {
-		ret = chan->func->engine_init(chan, engine);
-		if (ret) {
-			nvif_error(&chan->object,
-				   "attach %s failed, %d\n", name, ret);
-			return ret;
-		}
+	/* Now, create the channel context - to track engine binding. */
+	CHAN_TRACE(chan, "ctor cctx %d[%s]", engn->id, engn->engine->subdev.name);
+	if (!(cctx = *pcctx = kzalloc(sizeof(*cctx), GFP_KERNEL))) {
+		nvkm_cgrp_vctx_put(cgrp, &vctx);
+		ret = -ENOMEM;
+		goto done;
 	}
 
-	nvif_trace(&chan->object, "attached %s\n", name);
-	return 0;
+	cctx->vctx = vctx;
+	refcount_set(&cctx->refs, 1);
+	refcount_set(&cctx->uses, 0);
+	list_add_tail(&cctx->head, &chan->cctxs);
+done:
+	mutex_unlock(&cgrp->mutex);
+	return ret;
 }
 
-static void
-nvkm_fifo_chan_child_del(struct nvkm_oproxy *base)
+int
+nvkm_chan_preempt_locked(struct nvkm_chan *chan, bool wait)
 {
-	struct nvkm_fifo_chan_object *object =
-		container_of(base, typeof(*object), oproxy);
-	struct nvkm_engine *engine  = object->oproxy.base.engine;
-	struct nvkm_fifo_chan *chan = object->chan;
-	struct nvkm_fifo_engn *engn = nvkm_fifo_chan_engn(chan, engine);
-
-	if (chan->func->object_dtor)
-		chan->func->object_dtor(chan, object->hash);
-
-	if (!--engn->refcount) {
-		if (chan->func->engine_dtor)
-			chan->func->engine_dtor(chan, engine);
-		nvkm_object_del(&engn->object);
-		if (chan->vmm)
-			atomic_dec(&chan->vmm->engref[engine->subdev.type]);
-	}
-}
+	struct nvkm_runl *runl = chan->cgrp->runl;
 
-static const struct nvkm_oproxy_func
-nvkm_fifo_chan_child_func = {
-	.dtor[0] = nvkm_fifo_chan_child_del,
-	.init[0] = nvkm_fifo_chan_child_init,
-	.fini[0] = nvkm_fifo_chan_child_fini,
-};
+	CHAN_TRACE(chan, "preempt");
+	chan->func->preempt(chan);
+	if (!wait)
+		return 0;
+
+	return nvkm_runl_preempt_wait(runl);
+}
 
-static int
-nvkm_fifo_chan_child_new(const struct nvkm_oclass *oclass, void *data, u32 size,
-			 struct nvkm_object **pobject)
+int
+nvkm_chan_preempt(struct nvkm_chan *chan, bool wait)
 {
-	struct nvkm_engine *engine = oclass->engine;
-	struct nvkm_fifo_chan *chan = nvkm_fifo_chan(oclass->parent);
-	struct nvkm_fifo_engn *engn = nvkm_fifo_chan_engn(chan, engine);
-	struct nvkm_fifo_chan_object *object;
-	int ret = 0;
+	int ret;
 
-	if (!(object = kzalloc(sizeof(*object), GFP_KERNEL)))
-		return -ENOMEM;
-	nvkm_oproxy_ctor(&nvkm_fifo_chan_child_func, oclass, &object->oproxy);
-	object->chan = chan;
-	*pobject = &object->oproxy.base;
-
-	if (!engn->refcount++) {
-		struct nvkm_oclass cclass = {
-			.client = oclass->client,
-			.engine = oclass->engine,
-		};
-
-		if (chan->vmm)
-			atomic_inc(&chan->vmm->engref[engine->subdev.type]);
-
-		if (engine->func->fifo.cclass) {
-			ret = engine->func->fifo.cclass(chan, &cclass,
-							&engn->object);
-		} else
-		if (engine->func->cclass) {
-			ret = nvkm_object_new_(engine->func->cclass, &cclass,
-					       NULL, 0, &engn->object);
-		}
-		if (ret)
-			return ret;
+	if (!chan->func->preempt)
+		return 0;
 
-		if (chan->func->engine_ctor) {
-			ret = chan->func->engine_ctor(chan, oclass->engine,
-						      engn->object);
-			if (ret)
-				return ret;
-		}
-	}
+	mutex_lock(&chan->cgrp->runl->mutex);
+	ret = nvkm_chan_preempt_locked(chan, wait);
+	mutex_unlock(&chan->cgrp->runl->mutex);
+	return ret;
+}
 
-	ret = oclass->base.ctor(&(const struct nvkm_oclass) {
-					.base = oclass->base,
-					.engn = oclass->engn,
-					.handle = oclass->handle,
-					.object = oclass->object,
-					.client = oclass->client,
-					.parent = engn->object ?
-						  engn->object :
-						  oclass->parent,
-					.engine = engine,
-				}, data, size, &object->oproxy.object);
-	if (ret)
-		return ret;
+void
+nvkm_chan_remove_locked(struct nvkm_chan *chan)
+{
+	struct nvkm_cgrp *cgrp = chan->cgrp;
+	struct nvkm_runl *runl = cgrp->runl;
 
-	if (chan->func->object_ctor) {
-		object->hash =
-			chan->func->object_ctor(chan, object->oproxy.object);
-		if (object->hash < 0)
-			return object->hash;
-	}
+	if (list_empty(&chan->head))
+		return;
 
-	return 0;
+	CHAN_TRACE(chan, "remove");
+	if (!--cgrp->chan_nr) {
+		runl->cgrp_nr--;
+		list_del(&cgrp->head);
+	}
+	runl->chan_nr--;
+	list_del_init(&chan->head);
+	atomic_set(&runl->changed, 1);
 }
 
-static int
-nvkm_fifo_chan_child_get(struct nvkm_object *object, int index,
-			 struct nvkm_oclass *oclass)
+void
+nvkm_chan_remove(struct nvkm_chan *chan, bool preempt)
 {
-	struct nvkm_fifo_chan *chan = nvkm_fifo_chan(object);
-	struct nvkm_fifo *fifo = chan->fifo;
-	struct nvkm_engine *engine;
-	u32 engm = chan->engm;
-	int engi, ret, c;
-
-	for (; c = 0, engi = __ffs(engm), engm; engm &= ~(1ULL << engi)) {
-		if (!(engine = fifo->func->id_engine(fifo, engi)))
-			continue;
-		oclass->engine = engine;
-		oclass->base.oclass = 0;
-
-		if (engine->func->fifo.sclass) {
-			ret = engine->func->fifo.sclass(oclass, index);
-			if (oclass->base.oclass) {
-				if (!oclass->base.ctor)
-					oclass->base.ctor = nvkm_object_new;
-				oclass->ctor = nvkm_fifo_chan_child_new;
-				return 0;
-			}
+	struct nvkm_runl *runl = chan->cgrp->runl;
+
+	mutex_lock(&runl->mutex);
+	if (preempt && chan->func->preempt)
+		nvkm_chan_preempt_locked(chan, true);
+	nvkm_chan_remove_locked(chan);
+	nvkm_runl_update_locked(runl, true);
+	mutex_unlock(&runl->mutex);
+}
 
-			index -= ret;
-			continue;
-		}
+void
+nvkm_chan_insert(struct nvkm_chan *chan)
+{
+	struct nvkm_cgrp *cgrp = chan->cgrp;
+	struct nvkm_runl *runl = cgrp->runl;
 
-		while (engine->func->sclass[c].oclass) {
-			if (c++ == index) {
-				oclass->base = engine->func->sclass[index];
-				if (!oclass->base.ctor)
-					oclass->base.ctor = nvkm_object_new;
-				oclass->ctor = nvkm_fifo_chan_child_new;
-				return 0;
-			}
-		}
-		index -= c;
+	mutex_lock(&runl->mutex);
+	if (WARN_ON(!list_empty(&chan->head))) {
+		mutex_unlock(&runl->mutex);
+		return;
 	}
 
-	return -EINVAL;
+	CHAN_TRACE(chan, "insert");
+	list_add_tail(&chan->head, &cgrp->chans);
+	runl->chan_nr++;
+	if (!cgrp->chan_nr++) {
+		list_add_tail(&cgrp->head, &cgrp->runl->cgrps);
+		runl->cgrp_nr++;
+	}
+	atomic_set(&runl->changed, 1);
+	nvkm_runl_update_locked(runl, true);
+	mutex_unlock(&runl->mutex);
 }
 
-static int
-nvkm_fifo_chan_ntfy(struct nvkm_object *object, u32 type,
-		    struct nvkm_event **pevent)
+static void
+nvkm_chan_block_locked(struct nvkm_chan *chan)
 {
-	struct nvkm_fifo_chan *chan = nvkm_fifo_chan(object);
-	if (chan->func->ntfy)
-		return chan->func->ntfy(chan, type, pevent);
-	return -ENODEV;
+	CHAN_TRACE(chan, "block %d", atomic_read(&chan->blocked));
+	if (atomic_inc_return(&chan->blocked) == 1)
+		chan->func->stop(chan);
 }
 
-static int
-nvkm_fifo_chan_map(struct nvkm_object *object, void *argv, u32 argc,
-		   enum nvkm_object_map *type, u64 *addr, u64 *size)
+void
+nvkm_chan_error(struct nvkm_chan *chan, bool preempt)
 {
-	struct nvkm_fifo_chan *chan = nvkm_fifo_chan(object);
-	*type = NVKM_OBJECT_MAP_IO;
-	*addr = chan->addr;
-	*size = chan->size;
-	return 0;
+	unsigned long flags;
+
+	spin_lock_irqsave(&chan->lock, flags);
+	if (atomic_inc_return(&chan->errored) == 1) {
+		CHAN_ERROR(chan, "errored - disabling channel");
+		nvkm_chan_block_locked(chan);
+		if (preempt)
+			chan->func->preempt(chan);
+		nvkm_event_ntfy(&chan->cgrp->runl->chid->event, chan->id, NVKM_CHAN_EVENT_ERRORED);
+	}
+	spin_unlock_irqrestore(&chan->lock, flags);
 }
 
-static int
-nvkm_fifo_chan_fini(struct nvkm_object *object, bool suspend)
+void
+nvkm_chan_block(struct nvkm_chan *chan)
 {
-	struct nvkm_fifo_chan *chan = nvkm_fifo_chan(object);
-	chan->func->fini(chan);
-	return 0;
+	spin_lock_irq(&chan->lock);
+	nvkm_chan_block_locked(chan);
+	spin_unlock_irq(&chan->lock);
 }
 
-static int
-nvkm_fifo_chan_init(struct nvkm_object *object)
+void
+nvkm_chan_allow(struct nvkm_chan *chan)
 {
-	struct nvkm_fifo_chan *chan = nvkm_fifo_chan(object);
-	chan->func->init(chan);
-	return 0;
+	spin_lock_irq(&chan->lock);
+	CHAN_TRACE(chan, "allow %d", atomic_read(&chan->blocked));
+	if (atomic_dec_and_test(&chan->blocked))
+		chan->func->start(chan);
+	spin_unlock_irq(&chan->lock);
 }
 
-static void *
-nvkm_fifo_chan_dtor(struct nvkm_object *object)
+void
+nvkm_chan_del(struct nvkm_chan **pchan)
 {
-	struct nvkm_fifo_chan *chan = nvkm_fifo_chan(object);
-	struct nvkm_fifo *fifo = chan->fifo;
-	void *data = chan->func->dtor(chan);
-	unsigned long flags;
+	struct nvkm_chan *chan = *pchan;
+
+	if (!chan)
+		return;
+
+	if (chan->func->ramfc->clear)
+		chan->func->ramfc->clear(chan);
 
-	spin_lock_irqsave(&fifo->lock, flags);
-	if (!list_empty(&chan->head)) {
-		__clear_bit(chan->chid, fifo->mask);
-		list_del(&chan->head);
+	nvkm_ramht_del(&chan->ramht);
+	nvkm_gpuobj_del(&chan->pgd);
+	nvkm_gpuobj_del(&chan->eng);
+	nvkm_gpuobj_del(&chan->cache);
+	nvkm_gpuobj_del(&chan->ramfc);
+
+	nvkm_memory_unref(&chan->userd.mem);
+
+	if (chan->cgrp) {
+		nvkm_chid_put(chan->cgrp->runl->chid, chan->id, &chan->cgrp->lock);
+		nvkm_cgrp_unref(&chan->cgrp);
 	}
-	spin_unlock_irqrestore(&fifo->lock, flags);
 
 	if (chan->vmm) {
 		nvkm_vmm_part(chan->vmm, chan->inst->memory);
@@ -309,85 +289,192 @@ nvkm_fifo_chan_dtor(struct nvkm_object *object)
 
 	nvkm_gpuobj_del(&chan->push);
 	nvkm_gpuobj_del(&chan->inst);
-	return data;
+	kfree(chan);
 }
 
-static const struct nvkm_object_func
-nvkm_fifo_chan_func = {
-	.dtor = nvkm_fifo_chan_dtor,
-	.init = nvkm_fifo_chan_init,
-	.fini = nvkm_fifo_chan_fini,
-	.ntfy = nvkm_fifo_chan_ntfy,
-	.map = nvkm_fifo_chan_map,
-	.sclass = nvkm_fifo_chan_child_get,
-};
+void
+nvkm_chan_put(struct nvkm_chan **pchan, unsigned long irqflags)
+{
+	struct nvkm_chan *chan = *pchan;
+
+	if (!chan)
+		return;
+
+	*pchan = NULL;
+	spin_unlock_irqrestore(&chan->cgrp->lock, irqflags);
+}
+
+struct nvkm_chan *
+nvkm_chan_get_inst(struct nvkm_engine *engine, u64 inst, unsigned long *pirqflags)
+{
+	struct nvkm_fifo *fifo = engine->subdev.device->fifo;
+	struct nvkm_runl *runl;
+	struct nvkm_engn *engn;
+	struct nvkm_chan *chan;
+
+	nvkm_runl_foreach(runl, fifo) {
+		nvkm_runl_foreach_engn(engn, runl) {
+			if (engine == &fifo->engine || engn->engine == engine) {
+				chan = nvkm_runl_chan_get_inst(runl, inst, pirqflags);
+				if (chan || engn->engine == engine)
+					return chan;
+			}
+		}
+	}
+
+	return NULL;
+}
+
+struct nvkm_chan *
+nvkm_chan_get_chid(struct nvkm_engine *engine, int id, unsigned long *pirqflags)
+{
+	struct nvkm_fifo *fifo = engine->subdev.device->fifo;
+	struct nvkm_runl *runl;
+	struct nvkm_engn *engn;
+
+	nvkm_runl_foreach(runl, fifo) {
+		nvkm_runl_foreach_engn(engn, runl) {
+			if (fifo->chid || engn->engine == engine)
+				return nvkm_runl_chan_get_chid(runl, id, pirqflags);
+		}
+	}
+
+	return NULL;
+}
 
 int
-nvkm_fifo_chan_ctor(const struct nvkm_fifo_chan_func *func,
-		    struct nvkm_fifo *fifo, u32 size, u32 align, bool zero,
-		    u64 hvmm, u64 push, u32 engm, int bar, u32 base,
-		    u32 user, const struct nvkm_oclass *oclass,
-		    struct nvkm_fifo_chan *chan)
+nvkm_chan_new_(const struct nvkm_chan_func *func, struct nvkm_runl *runl, int runq,
+	       struct nvkm_cgrp *cgrp, const char *name, bool priv, u32 devm, struct nvkm_vmm *vmm,
+	       struct nvkm_dmaobj *dmaobj, u64 offset, u64 length,
+	       struct nvkm_memory *userd, u64 ouserd, struct nvkm_chan **pchan)
 {
-	struct nvkm_client *client = oclass->client;
+	struct nvkm_fifo *fifo = runl->fifo;
 	struct nvkm_device *device = fifo->engine.subdev.device;
-	struct nvkm_dmaobj *dmaobj;
-	unsigned long flags;
+	struct nvkm_chan *chan;
 	int ret;
 
-	nvkm_object_ctor(&nvkm_fifo_chan_func, oclass, &chan->object);
+	/* Validate arguments against class requirements. */
+	if ((runq && runq >= runl->func->runqs) ||
+	    (!func->inst->vmm != !vmm) ||
+	    ((func->userd->bar < 0) == !userd) ||
+	    (!func->ramfc->ctxdma != !dmaobj) ||
+	    ((func->ramfc->devm < devm) && devm != BIT(0)) ||
+	    (!func->ramfc->priv && priv)) {
+		RUNL_DEBUG(runl, "args runq:%d:%d vmm:%d:%p userd:%d:%p "
+				 "push:%d:%p devm:%08x:%08x priv:%d:%d",
+			   runl->func->runqs, runq, func->inst->vmm, vmm,
+			   func->userd->bar < 0, userd, func->ramfc->ctxdma, dmaobj,
+			   func->ramfc->devm, devm, func->ramfc->priv, priv);
+		return -EINVAL;
+	}
+
+	if (!(chan = *pchan = kzalloc(sizeof(*chan), GFP_KERNEL)))
+		return -ENOMEM;
+
 	chan->func = func;
-	chan->fifo = fifo;
-	chan->engm = engm;
+	strscpy(chan->name, name, sizeof(chan->name));
+	chan->runq = runq;
+	chan->id = -1;
+	spin_lock_init(&chan->lock);
+	atomic_set(&chan->blocked, 1);
+	atomic_set(&chan->errored, 0);
+	INIT_LIST_HEAD(&chan->cctxs);
 	INIT_LIST_HEAD(&chan->head);
 
-	/* instance memory */
-	ret = nvkm_gpuobj_new(device, size, align, zero, NULL, &chan->inst);
-	if (ret)
-		return ret;
+	/* Join channel group.
+	 *
+	 * GK110 and newer support channel groups (aka TSGs), where individual channels
+	 * share a timeslice, and, engine context(s).
+	 *
+	 * As such, engine contexts are tracked in nvkm_cgrp and we need them even when
+	 * channels aren't in an API channel group, and on HW that doesn't support TSGs.
+	 */
+	if (!cgrp) {
+		ret = nvkm_cgrp_new(runl, chan->name, vmm, fifo->func->cgrp.force, &chan->cgrp);
+		if (ret) {
+			RUNL_DEBUG(runl, "cgrp %d", ret);
+			return ret;
+		}
 
-	/* allocate push buffer ctxdma instance */
-	if (push) {
-		dmaobj = nvkm_dmaobj_search(client, push);
-		if (IS_ERR(dmaobj))
-			return PTR_ERR(dmaobj);
+		cgrp = chan->cgrp;
+	} else {
+		if (cgrp->runl != runl || cgrp->vmm != vmm) {
+			RUNL_DEBUG(runl, "cgrp %d %d", cgrp->runl != runl, cgrp->vmm != vmm);
+			return -EINVAL;
+		}
 
-		ret = nvkm_object_bind(&dmaobj->object, chan->inst, -16,
-				       &chan->push);
-		if (ret)
-			return ret;
+		chan->cgrp = nvkm_cgrp_ref(cgrp);
 	}
 
-	/* channel address space */
-	if (hvmm) {
-		struct nvkm_vmm *vmm = nvkm_uvmm_search(client, hvmm);
-		if (IS_ERR(vmm))
-			return PTR_ERR(vmm);
+	/* Allocate instance block. */
+	ret = nvkm_gpuobj_new(device, func->inst->size, 0x1000, func->inst->zero, NULL,
+			      &chan->inst);
+	if (ret) {
+		RUNL_DEBUG(runl, "inst %d", ret);
+		return ret;
+	}
 
-		if (vmm->mmu != device->mmu)
+	/* Initialise virtual address-space. */
+	if (func->inst->vmm) {
+		if (WARN_ON(vmm->mmu != device->mmu))
 			return -EINVAL;
 
 		ret = nvkm_vmm_join(vmm, chan->inst->memory);
-		if (ret)
+		if (ret) {
+			RUNL_DEBUG(runl, "vmm %d", ret);
 			return ret;
+		}
 
 		chan->vmm = nvkm_vmm_ref(vmm);
 	}
 
-	/* allocate channel id */
-	spin_lock_irqsave(&fifo->lock, flags);
-	chan->chid = find_first_zero_bit(fifo->mask, NVKM_FIFO_CHID_NR);
-	if (chan->chid >= NVKM_FIFO_CHID_NR) {
-		spin_unlock_irqrestore(&fifo->lock, flags);
+	/* Allocate HW ctxdma for push buffer. */
+	if (func->ramfc->ctxdma) {
+		ret = nvkm_object_bind(&dmaobj->object, chan->inst, -16, &chan->push);
+		if (ret) {
+			RUNL_DEBUG(runl, "bind %d", ret);
+			return ret;
+		}
+	}
+
+	/* Allocate channel ID. */
+	chan->id = nvkm_chid_get(runl->chid, chan);
+	if (chan->id < 0) {
+		RUNL_ERROR(runl, "!chids");
 		return -ENOSPC;
 	}
-	list_add(&chan->head, &fifo->chan);
-	__set_bit(chan->chid, fifo->mask);
-	spin_unlock_irqrestore(&fifo->lock, flags);
-
-	/* determine address of this channel's user registers */
-	chan->addr = device->func->resource_addr(device, bar) +
-		     base + user * chan->chid;
-	chan->size = user;
+
+	if (cgrp->id < 0)
+		cgrp->id = chan->id;
+
+	/* Initialise USERD. */
+	if (func->userd->bar < 0) {
+		if (ouserd + chan->func->userd->size >= nvkm_memory_size(userd)) {
+			RUNL_DEBUG(runl, "ouserd %llx", ouserd);
+			return -EINVAL;
+		}
+
+		ret = nvkm_memory_kmap(userd, &chan->userd.mem);
+		if (ret) {
+			RUNL_DEBUG(runl, "userd %d", ret);
+			return ret;
+		}
+
+		chan->userd.base = ouserd;
+	} else {
+		chan->userd.mem = nvkm_memory_ref(fifo->userd.mem);
+		chan->userd.base = chan->id * chan->func->userd->size;
+	}
+
+	if (chan->func->userd->clear)
+		chan->func->userd->clear(chan);
+
+	/* Initialise RAMFC. */
+	ret = chan->func->ramfc->write(chan, offset, length, devm, priv);
+	if (ret) {
+		RUNL_DEBUG(runl, "ramfc %d", ret);
+		return ret;
+	}
+
 	return 0;
 }
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/fifo/chan.h b/drivers/gpu/drm/nouveau/nvkm/engine/fifo/chan.h
index e53504354841..85b94f699128 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/fifo/chan.h
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/fifo/chan.h
@@ -1,35 +1,78 @@
 /* SPDX-License-Identifier: MIT */
-#ifndef __NVKM_FIFO_CHAN_H__
-#define __NVKM_FIFO_CHAN_H__
-#define nvkm_fifo_chan(p) container_of((p), struct nvkm_fifo_chan, object)
-#include "priv.h"
-
-struct nvkm_fifo_chan_func {
-	void *(*dtor)(struct nvkm_fifo_chan *);
-	void (*init)(struct nvkm_fifo_chan *);
-	void (*fini)(struct nvkm_fifo_chan *);
-	int (*ntfy)(struct nvkm_fifo_chan *, u32 type, struct nvkm_event **);
-	int  (*engine_ctor)(struct nvkm_fifo_chan *, struct nvkm_engine *,
-			    struct nvkm_object *);
-	void (*engine_dtor)(struct nvkm_fifo_chan *, struct nvkm_engine *);
-	int  (*engine_init)(struct nvkm_fifo_chan *, struct nvkm_engine *);
-	int  (*engine_fini)(struct nvkm_fifo_chan *, struct nvkm_engine *,
-			    bool suspend);
-	int  (*object_ctor)(struct nvkm_fifo_chan *, struct nvkm_object *);
-	void (*object_dtor)(struct nvkm_fifo_chan *, int);
-	u32 (*submit_token)(struct nvkm_fifo_chan *);
+#ifndef __NVKM_CHAN_H__
+#define __NVKM_CHAN_H__
+#include <engine/fifo.h>
+struct nvkm_dmaobj;
+struct nvkm_engn;
+struct nvkm_runl;
+
+extern const struct nvkm_event_func nvkm_chan_event;
+
+struct nvkm_cctx {
+	struct nvkm_vctx *vctx;
+	refcount_t refs;
+	refcount_t uses;
+
+	struct list_head head;
 };
 
-int nvkm_fifo_chan_ctor(const struct nvkm_fifo_chan_func *, struct nvkm_fifo *,
-			u32 size, u32 align, bool zero, u64 vm, u64 push,
-			u32 engm, int bar, u32 base, u32 user,
-			const struct nvkm_oclass *, struct nvkm_fifo_chan *);
+struct nvkm_chan_func {
+	const struct nvkm_chan_func_inst {
+		u32 size;
+		bool zero;
+		bool vmm;
+	} *inst;
 
-struct nvkm_fifo_chan_oclass {
-	int (*ctor)(struct nvkm_fifo *, const struct nvkm_oclass *,
-		    void *data, u32 size, struct nvkm_object **);
-	struct nvkm_sclass base;
+	const struct nvkm_chan_func_userd {
+		int bar;
+		u32 base;
+		u32 size;
+		void (*clear)(struct nvkm_chan *);
+	} *userd;
+
+	const struct nvkm_chan_func_ramfc {
+		const struct nvkm_ramfc_layout {
+			unsigned bits:6;
+			unsigned ctxs:5;
+			unsigned ctxp:8;
+			unsigned regs:5;
+			unsigned regp;
+		} *layout;
+		int (*write)(struct nvkm_chan *, u64 offset, u64 length, u32 devm, bool priv);
+		void (*clear)(struct nvkm_chan *);
+		bool ctxdma;
+		u32 devm;
+		bool priv;
+	} *ramfc;
+
+	void (*bind)(struct nvkm_chan *);
+	void (*unbind)(struct nvkm_chan *);
+	void (*start)(struct nvkm_chan *);
+	void (*stop)(struct nvkm_chan *);
+	void (*preempt)(struct nvkm_chan *);
+	u32 (*doorbell_handle)(struct nvkm_chan *);
 };
 
-int gf100_fifo_chan_ntfy(struct nvkm_fifo_chan *, u32, struct nvkm_event **);
+int nvkm_chan_new_(const struct nvkm_chan_func *, struct nvkm_runl *, int runq, struct nvkm_cgrp *,
+		   const char *name, bool priv, u32 devm, struct nvkm_vmm *, struct nvkm_dmaobj *,
+		   u64 offset, u64 length, struct nvkm_memory *userd, u64 userd_bar1,
+		   struct nvkm_chan **);
+void nvkm_chan_del(struct nvkm_chan **);
+void nvkm_chan_allow(struct nvkm_chan *);
+void nvkm_chan_block(struct nvkm_chan *);
+void nvkm_chan_error(struct nvkm_chan *, bool preempt);
+void nvkm_chan_insert(struct nvkm_chan *);
+void nvkm_chan_remove(struct nvkm_chan *, bool preempt);
+void nvkm_chan_remove_locked(struct nvkm_chan *);
+int nvkm_chan_preempt(struct nvkm_chan *, bool wait);
+int nvkm_chan_preempt_locked(struct nvkm_chan *, bool wait);
+int nvkm_chan_cctx_get(struct nvkm_chan *, struct nvkm_engn *, struct nvkm_cctx **,
+		       struct nvkm_client * /*TODO: remove need for this */);
+void nvkm_chan_cctx_put(struct nvkm_chan *, struct nvkm_cctx **);
+void nvkm_chan_cctx_bind(struct nvkm_chan *, struct nvkm_engn *, struct nvkm_cctx *);
+
+#define CHAN_PRCLI(c,l,p,f,a...) CGRP_PRINT((c)->cgrp, l, p, "%04x:[%s]"f, (c)->id, (c)->name, ##a)
+#define CHAN_PRINT(c,l,p,f,a...) CGRP_PRINT((c)->cgrp, l, p, "%04x:"f, (c)->id, ##a)
+#define CHAN_ERROR(c,f,a...) CHAN_PRCLI((c), ERROR,    err, " "f"\n", ##a)
+#define CHAN_TRACE(c,f,a...) CHAN_PRINT((c), TRACE,   info, " "f"\n", ##a)
 #endif
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/fifo/chang84.c b/drivers/gpu/drm/nouveau/nvkm/engine/fifo/chang84.c
deleted file mode 100644
index 3492c561f2cf..000000000000
--- a/drivers/gpu/drm/nouveau/nvkm/engine/fifo/chang84.c
+++ /dev/null
@@ -1,263 +0,0 @@
-/*
- * Copyright 2012 Red Hat Inc.
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * the rights to use, copy, modify, merge, publish, distribute, sublicense,
- * and/or sell copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in
- * all copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
- * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
- * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
- * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
- * OTHER DEALINGS IN THE SOFTWARE.
- *
- * Authors: Ben Skeggs
- */
-#include "channv50.h"
-
-#include <core/client.h>
-#include <core/ramht.h>
-#include <subdev/mmu.h>
-#include <subdev/timer.h>
-
-#include <nvif/cl826e.h>
-
-static int
-g84_fifo_chan_ntfy(struct nvkm_fifo_chan *chan, u32 type,
-		   struct nvkm_event **pevent)
-{
-	switch (type) {
-	case NV826E_V0_NTFY_NON_STALL_INTERRUPT:
-		*pevent = &chan->fifo->uevent;
-		return 0;
-	default:
-		break;
-	}
-	return -EINVAL;
-}
-
-static int
-g84_fifo_chan_engine_addr(struct nvkm_engine *engine)
-{
-	switch (engine->subdev.type) {
-	case NVKM_ENGINE_DMAOBJ:
-	case NVKM_ENGINE_SW    : return -1;
-	case NVKM_ENGINE_GR    : return 0x0020;
-	case NVKM_ENGINE_VP    :
-	case NVKM_ENGINE_MSPDEC: return 0x0040;
-	case NVKM_ENGINE_MPEG  :
-	case NVKM_ENGINE_MSPPP : return 0x0060;
-	case NVKM_ENGINE_BSP   :
-	case NVKM_ENGINE_MSVLD : return 0x0080;
-	case NVKM_ENGINE_CIPHER:
-	case NVKM_ENGINE_SEC   : return 0x00a0;
-	case NVKM_ENGINE_CE    : return 0x00c0;
-	default:
-		WARN_ON(1);
-		return -1;
-	}
-}
-
-static int
-g84_fifo_chan_engine_fini(struct nvkm_fifo_chan *base,
-			  struct nvkm_engine *engine, bool suspend)
-{
-	struct nv50_fifo_chan *chan = nv50_fifo_chan(base);
-	struct nv50_fifo *fifo = chan->fifo;
-	struct nvkm_subdev *subdev = &fifo->base.engine.subdev;
-	struct nvkm_device *device = subdev->device;
-	u32 engn, save;
-	int offset;
-	bool done;
-
-	offset = g84_fifo_chan_engine_addr(engine);
-	if (offset < 0)
-		return 0;
-
-	engn = fifo->base.func->engine_id(&fifo->base, engine) - 1;
-	save = nvkm_mask(device, 0x002520, 0x0000003f, 1 << engn);
-	nvkm_wr32(device, 0x0032fc, chan->base.inst->addr >> 12);
-	done = nvkm_msec(device, 2000,
-		if (nvkm_rd32(device, 0x0032fc) != 0xffffffff)
-			break;
-	) >= 0;
-	nvkm_wr32(device, 0x002520, save);
-	if (!done) {
-		nvkm_error(subdev, "channel %d [%s] unload timeout\n",
-			   chan->base.chid, chan->base.object.client->name);
-		if (suspend)
-			return -EBUSY;
-	}
-
-	nvkm_kmap(chan->eng);
-	nvkm_wo32(chan->eng, offset + 0x00, 0x00000000);
-	nvkm_wo32(chan->eng, offset + 0x04, 0x00000000);
-	nvkm_wo32(chan->eng, offset + 0x08, 0x00000000);
-	nvkm_wo32(chan->eng, offset + 0x0c, 0x00000000);
-	nvkm_wo32(chan->eng, offset + 0x10, 0x00000000);
-	nvkm_wo32(chan->eng, offset + 0x14, 0x00000000);
-	nvkm_done(chan->eng);
-	return 0;
-}
-
-
-static int
-g84_fifo_chan_engine_init(struct nvkm_fifo_chan *base,
-			  struct nvkm_engine *engine)
-{
-	struct nv50_fifo_chan *chan = nv50_fifo_chan(base);
-	struct nvkm_gpuobj *engn = *nv50_fifo_chan_engine(chan, engine);
-	u64 limit, start;
-	int offset;
-
-	offset = g84_fifo_chan_engine_addr(engine);
-	if (offset < 0)
-		return 0;
-	limit = engn->addr + engn->size - 1;
-	start = engn->addr;
-
-	nvkm_kmap(chan->eng);
-	nvkm_wo32(chan->eng, offset + 0x00, 0x00190000);
-	nvkm_wo32(chan->eng, offset + 0x04, lower_32_bits(limit));
-	nvkm_wo32(chan->eng, offset + 0x08, lower_32_bits(start));
-	nvkm_wo32(chan->eng, offset + 0x0c, upper_32_bits(limit) << 24 |
-					    upper_32_bits(start));
-	nvkm_wo32(chan->eng, offset + 0x10, 0x00000000);
-	nvkm_wo32(chan->eng, offset + 0x14, 0x00000000);
-	nvkm_done(chan->eng);
-	return 0;
-}
-
-static int
-g84_fifo_chan_engine_ctor(struct nvkm_fifo_chan *base,
-			  struct nvkm_engine *engine,
-			  struct nvkm_object *object)
-{
-	struct nv50_fifo_chan *chan = nv50_fifo_chan(base);
-
-	if (g84_fifo_chan_engine_addr(engine) < 0)
-		return 0;
-
-	return nvkm_object_bind(object, NULL, 0, nv50_fifo_chan_engine(chan, engine));
-}
-
-static int
-g84_fifo_chan_object_ctor(struct nvkm_fifo_chan *base,
-			  struct nvkm_object *object)
-{
-	struct nv50_fifo_chan *chan = nv50_fifo_chan(base);
-	u32 handle = object->handle;
-	u32 context;
-
-	switch (object->engine->subdev.type) {
-	case NVKM_ENGINE_DMAOBJ:
-	case NVKM_ENGINE_SW    : context = 0x00000000; break;
-	case NVKM_ENGINE_GR    : context = 0x00100000; break;
-	case NVKM_ENGINE_MPEG  :
-	case NVKM_ENGINE_MSPPP : context = 0x00200000; break;
-	case NVKM_ENGINE_ME    :
-	case NVKM_ENGINE_CE    : context = 0x00300000; break;
-	case NVKM_ENGINE_VP    :
-	case NVKM_ENGINE_MSPDEC: context = 0x00400000; break;
-	case NVKM_ENGINE_CIPHER:
-	case NVKM_ENGINE_SEC   :
-	case NVKM_ENGINE_VIC   : context = 0x00500000; break;
-	case NVKM_ENGINE_BSP   :
-	case NVKM_ENGINE_MSVLD : context = 0x00600000; break;
-	default:
-		WARN_ON(1);
-		return -EINVAL;
-	}
-
-	return nvkm_ramht_insert(chan->ramht, object, 0, 4, handle, context);
-}
-
-static void
-g84_fifo_chan_init(struct nvkm_fifo_chan *base)
-{
-	struct nv50_fifo_chan *chan = nv50_fifo_chan(base);
-	struct nv50_fifo *fifo = chan->fifo;
-	struct nvkm_device *device = fifo->base.engine.subdev.device;
-	u64 addr = chan->ramfc->addr >> 8;
-	u32 chid = chan->base.chid;
-
-	nvkm_wr32(device, 0x002600 + (chid * 4), 0x80000000 | addr);
-	nv50_fifo_runlist_update(fifo);
-}
-
-static const struct nvkm_fifo_chan_func
-g84_fifo_chan_func = {
-	.dtor = nv50_fifo_chan_dtor,
-	.init = g84_fifo_chan_init,
-	.fini = nv50_fifo_chan_fini,
-	.ntfy = g84_fifo_chan_ntfy,
-	.engine_ctor = g84_fifo_chan_engine_ctor,
-	.engine_dtor = nv50_fifo_chan_engine_dtor,
-	.engine_init = g84_fifo_chan_engine_init,
-	.engine_fini = g84_fifo_chan_engine_fini,
-	.object_ctor = g84_fifo_chan_object_ctor,
-	.object_dtor = nv50_fifo_chan_object_dtor,
-};
-
-int
-g84_fifo_chan_ctor(struct nv50_fifo *fifo, u64 vmm, u64 push,
-		   const struct nvkm_oclass *oclass,
-		   struct nv50_fifo_chan *chan)
-{
-	struct nvkm_device *device = fifo->base.engine.subdev.device;
-	int ret;
-
-	if (!vmm)
-		return -EINVAL;
-
-	ret = nvkm_fifo_chan_ctor(&g84_fifo_chan_func, &fifo->base,
-				  0x10000, 0x1000, false, vmm, push,
-				  BIT(G84_FIFO_ENGN_SW) |
-				  BIT(G84_FIFO_ENGN_GR) |
-				  BIT(G84_FIFO_ENGN_MPEG) |
-				  BIT(G84_FIFO_ENGN_MSPPP) |
-				  BIT(G84_FIFO_ENGN_ME) |
-				  BIT(G84_FIFO_ENGN_CE0) |
-				  BIT(G84_FIFO_ENGN_VP) |
-				  BIT(G84_FIFO_ENGN_MSPDEC) |
-				  BIT(G84_FIFO_ENGN_CIPHER) |
-				  BIT(G84_FIFO_ENGN_SEC) |
-				  BIT(G84_FIFO_ENGN_VIC) |
-				  BIT(G84_FIFO_ENGN_BSP) |
-				  BIT(G84_FIFO_ENGN_MSVLD) |
-				  BIT(G84_FIFO_ENGN_DMA),
-				  0, 0xc00000, 0x2000, oclass, &chan->base);
-	chan->fifo = fifo;
-	if (ret)
-		return ret;
-
-	ret = nvkm_gpuobj_new(device, 0x0200, 0, true, chan->base.inst,
-			      &chan->eng);
-	if (ret)
-		return ret;
-
-	ret = nvkm_gpuobj_new(device, 0x4000, 0, false, chan->base.inst,
-			      &chan->pgd);
-	if (ret)
-		return ret;
-
-	ret = nvkm_gpuobj_new(device, 0x1000, 0x400, true, chan->base.inst,
-			      &chan->cache);
-	if (ret)
-		return ret;
-
-	ret = nvkm_gpuobj_new(device, 0x100, 0x100, true, chan->base.inst,
-			      &chan->ramfc);
-	if (ret)
-		return ret;
-
-	return nvkm_ramht_new(device, 0x8000, 16, chan->base.inst, &chan->ramht);
-}
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/fifo/changf100.h b/drivers/gpu/drm/nouveau/nvkm/engine/fifo/changf100.h
deleted file mode 100644
index f7ac1061fa84..000000000000
--- a/drivers/gpu/drm/nouveau/nvkm/engine/fifo/changf100.h
+++ /dev/null
@@ -1,29 +0,0 @@
-/* SPDX-License-Identifier: MIT */
-#ifndef __GF100_FIFO_CHAN_H__
-#define __GF100_FIFO_CHAN_H__
-#define gf100_fifo_chan(p) container_of((p), struct gf100_fifo_chan, base)
-#include "chan.h"
-#include "gf100.h"
-
-struct gf100_fifo_chan {
-	struct nvkm_fifo_chan base;
-	struct gf100_fifo *fifo;
-
-	struct list_head head;
-	bool killed;
-
-#define GF100_FIFO_ENGN_GR     0
-#define GF100_FIFO_ENGN_MSPDEC 1
-#define GF100_FIFO_ENGN_MSPPP  2
-#define GF100_FIFO_ENGN_MSVLD  3
-#define GF100_FIFO_ENGN_CE0    4
-#define GF100_FIFO_ENGN_CE1    5
-#define GF100_FIFO_ENGN_SW     15
-	struct gf100_fifo_engn {
-		struct nvkm_gpuobj *inst;
-		struct nvkm_vma *vma;
-	} engn[NVKM_FIFO_ENGN_NR];
-};
-
-extern const struct nvkm_fifo_chan_oclass gf100_fifo_gpfifo_oclass;
-#endif
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/fifo/changk104.h b/drivers/gpu/drm/nouveau/nvkm/engine/fifo/changk104.h
deleted file mode 100644
index 9713daee6c76..000000000000
--- a/drivers/gpu/drm/nouveau/nvkm/engine/fifo/changk104.h
+++ /dev/null
@@ -1,52 +0,0 @@
-/* SPDX-License-Identifier: MIT */
-#ifndef __GK104_FIFO_CHAN_H__
-#define __GK104_FIFO_CHAN_H__
-#define gk104_fifo_chan(p) container_of((p), struct gk104_fifo_chan, base)
-#include "chan.h"
-#include "gk104.h"
-
-struct gk104_fifo_chan {
-	struct nvkm_fifo_chan base;
-	struct gk104_fifo *fifo;
-	int runl;
-
-	struct nvkm_fifo_cgrp *cgrp;
-	struct list_head head;
-	bool killed;
-
-#define GK104_FIFO_ENGN_SW 15
-	struct gk104_fifo_engn {
-		struct nvkm_gpuobj *inst;
-		struct nvkm_vma *vma;
-	} engn[NVKM_FIFO_ENGN_NR];
-};
-
-extern const struct nvkm_fifo_chan_func gk104_fifo_gpfifo_func;
-
-int gk104_fifo_gpfifo_new(struct gk104_fifo *, const struct nvkm_oclass *,
-			  void *data, u32 size, struct nvkm_object **);
-void *gk104_fifo_gpfifo_dtor(struct nvkm_fifo_chan *);
-void gk104_fifo_gpfifo_init(struct nvkm_fifo_chan *);
-void gk104_fifo_gpfifo_fini(struct nvkm_fifo_chan *);
-struct gk104_fifo_engn *gk104_fifo_gpfifo_engine(struct gk104_fifo_chan *, struct nvkm_engine *);
-int gk104_fifo_gpfifo_engine_ctor(struct nvkm_fifo_chan *, struct nvkm_engine *,
-				  struct nvkm_object *);
-void gk104_fifo_gpfifo_engine_dtor(struct nvkm_fifo_chan *,
-				   struct nvkm_engine *);
-int gk104_fifo_gpfifo_kick(struct gk104_fifo_chan *);
-int gk104_fifo_gpfifo_kick_locked(struct gk104_fifo_chan *);
-
-int gv100_fifo_gpfifo_new(struct gk104_fifo *, const struct nvkm_oclass *,
-			  void *data, u32 size, struct nvkm_object **);
-int gv100_fifo_gpfifo_new_(const struct nvkm_fifo_chan_func *,
-			   struct gk104_fifo *, u64 *, u16 *, u64, u64, u64,
-			   u64 *, bool, u32 *, const struct nvkm_oclass *,
-			   struct nvkm_object **);
-int gv100_fifo_gpfifo_engine_init(struct nvkm_fifo_chan *,
-				  struct nvkm_engine *);
-int gv100_fifo_gpfifo_engine_fini(struct nvkm_fifo_chan *,
-				  struct nvkm_engine *, bool);
-
-int tu102_fifo_gpfifo_new(struct gk104_fifo *, const struct nvkm_oclass *,
-			  void *data, u32 size, struct nvkm_object **);
-#endif
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/fifo/channv04.h b/drivers/gpu/drm/nouveau/nvkm/engine/fifo/channv04.h
deleted file mode 100644
index 727bc8976b40..000000000000
--- a/drivers/gpu/drm/nouveau/nvkm/engine/fifo/channv04.h
+++ /dev/null
@@ -1,29 +0,0 @@
-/* SPDX-License-Identifier: MIT */
-#ifndef __NV04_FIFO_CHAN_H__
-#define __NV04_FIFO_CHAN_H__
-#define nv04_fifo_chan(p) container_of((p), struct nv04_fifo_chan, base)
-#include "chan.h"
-#include "nv04.h"
-
-struct nv04_fifo_chan {
-	struct nvkm_fifo_chan base;
-	struct nv04_fifo *fifo;
-	u32 ramfc;
-#define NV04_FIFO_ENGN_SW   0
-#define NV04_FIFO_ENGN_GR   1
-#define NV04_FIFO_ENGN_MPEG 2
-#define NV04_FIFO_ENGN_DMA  3
-	struct nvkm_gpuobj *engn[NVKM_FIFO_ENGN_NR];
-};
-
-extern const struct nvkm_fifo_chan_func nv04_fifo_dma_func;
-void *nv04_fifo_dma_dtor(struct nvkm_fifo_chan *);
-void nv04_fifo_dma_init(struct nvkm_fifo_chan *);
-void nv04_fifo_dma_fini(struct nvkm_fifo_chan *);
-void nv04_fifo_dma_object_dtor(struct nvkm_fifo_chan *, int);
-
-extern const struct nvkm_fifo_chan_oclass nv04_fifo_dma_oclass;
-extern const struct nvkm_fifo_chan_oclass nv10_fifo_dma_oclass;
-extern const struct nvkm_fifo_chan_oclass nv17_fifo_dma_oclass;
-extern const struct nvkm_fifo_chan_oclass nv40_fifo_dma_oclass;
-#endif
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/fifo/channv50.c b/drivers/gpu/drm/nouveau/nvkm/engine/fifo/channv50.c
deleted file mode 100644
index c44d7c81dd52..000000000000
--- a/drivers/gpu/drm/nouveau/nvkm/engine/fifo/channv50.c
+++ /dev/null
@@ -1,276 +0,0 @@
-/*
- * Copyright 2012 Red Hat Inc.
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * the rights to use, copy, modify, merge, publish, distribute, sublicense,
- * and/or sell copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in
- * all copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
- * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
- * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
- * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
- * OTHER DEALINGS IN THE SOFTWARE.
- *
- * Authors: Ben Skeggs
- */
-#include "channv50.h"
-
-#include <core/client.h>
-#include <core/ramht.h>
-#include <subdev/mmu.h>
-#include <subdev/timer.h>
-
-static int
-nv50_fifo_chan_engine_addr(struct nvkm_engine *engine)
-{
-	switch (engine->subdev.type) {
-	case NVKM_ENGINE_DMAOBJ:
-	case NVKM_ENGINE_SW    : return -1;
-	case NVKM_ENGINE_GR    : return 0x0000;
-	case NVKM_ENGINE_MPEG  : return 0x0060;
-	default:
-		WARN_ON(1);
-		return -1;
-	}
-}
-
-struct nvkm_gpuobj **
-nv50_fifo_chan_engine(struct nv50_fifo_chan *chan, struct nvkm_engine *engine)
-{
-	int engi = chan->base.fifo->func->engine_id(chan->base.fifo, engine);
-	if (engi >= 0)
-		return &chan->engn[engi];
-	return NULL;
-}
-
-static int
-nv50_fifo_chan_engine_fini(struct nvkm_fifo_chan *base,
-			   struct nvkm_engine *engine, bool suspend)
-{
-	struct nv50_fifo_chan *chan = nv50_fifo_chan(base);
-	struct nv50_fifo *fifo = chan->fifo;
-	struct nvkm_subdev *subdev = &fifo->base.engine.subdev;
-	struct nvkm_device *device = subdev->device;
-	int offset, ret = 0;
-	u32 me;
-
-	offset = nv50_fifo_chan_engine_addr(engine);
-	if (offset < 0)
-		return 0;
-
-	/* HW bug workaround:
-	 *
-	 * PFIFO will hang forever if the connected engines don't report
-	 * that they've processed the context switch request.
-	 *
-	 * In order for the kickoff to work, we need to ensure all the
-	 * connected engines are in a state where they can answer.
-	 *
-	 * Newer chipsets don't seem to suffer from this issue, and well,
-	 * there's also a "ignore these engines" bitmask reg we can use
-	 * if we hit the issue there..
-	 */
-	me = nvkm_mask(device, 0x00b860, 0x00000001, 0x00000001);
-
-	/* do the kickoff... */
-	nvkm_wr32(device, 0x0032fc, chan->base.inst->addr >> 12);
-	if (nvkm_msec(device, 2000,
-		if (nvkm_rd32(device, 0x0032fc) != 0xffffffff)
-			break;
-	) < 0) {
-		nvkm_error(subdev, "channel %d [%s] unload timeout\n",
-			   chan->base.chid, chan->base.object.client->name);
-		if (suspend)
-			ret = -EBUSY;
-	}
-	nvkm_wr32(device, 0x00b860, me);
-
-	if (ret == 0) {
-		nvkm_kmap(chan->eng);
-		nvkm_wo32(chan->eng, offset + 0x00, 0x00000000);
-		nvkm_wo32(chan->eng, offset + 0x04, 0x00000000);
-		nvkm_wo32(chan->eng, offset + 0x08, 0x00000000);
-		nvkm_wo32(chan->eng, offset + 0x0c, 0x00000000);
-		nvkm_wo32(chan->eng, offset + 0x10, 0x00000000);
-		nvkm_wo32(chan->eng, offset + 0x14, 0x00000000);
-		nvkm_done(chan->eng);
-	}
-
-	return ret;
-}
-
-static int
-nv50_fifo_chan_engine_init(struct nvkm_fifo_chan *base,
-			   struct nvkm_engine *engine)
-{
-	struct nv50_fifo_chan *chan = nv50_fifo_chan(base);
-	struct nvkm_gpuobj *engn = *nv50_fifo_chan_engine(chan, engine);
-	u64 limit, start;
-	int offset;
-
-	offset = nv50_fifo_chan_engine_addr(engine);
-	if (offset < 0)
-		return 0;
-	limit = engn->addr + engn->size - 1;
-	start = engn->addr;
-
-	nvkm_kmap(chan->eng);
-	nvkm_wo32(chan->eng, offset + 0x00, 0x00190000);
-	nvkm_wo32(chan->eng, offset + 0x04, lower_32_bits(limit));
-	nvkm_wo32(chan->eng, offset + 0x08, lower_32_bits(start));
-	nvkm_wo32(chan->eng, offset + 0x0c, upper_32_bits(limit) << 24 |
-					    upper_32_bits(start));
-	nvkm_wo32(chan->eng, offset + 0x10, 0x00000000);
-	nvkm_wo32(chan->eng, offset + 0x14, 0x00000000);
-	nvkm_done(chan->eng);
-	return 0;
-}
-
-void
-nv50_fifo_chan_engine_dtor(struct nvkm_fifo_chan *base,
-			   struct nvkm_engine *engine)
-{
-	struct nv50_fifo_chan *chan = nv50_fifo_chan(base);
-	nvkm_gpuobj_del(nv50_fifo_chan_engine(chan, engine));
-}
-
-static int
-nv50_fifo_chan_engine_ctor(struct nvkm_fifo_chan *base,
-			   struct nvkm_engine *engine,
-			   struct nvkm_object *object)
-{
-	struct nv50_fifo_chan *chan = nv50_fifo_chan(base);
-
-	if (nv50_fifo_chan_engine_addr(engine) < 0)
-		return 0;
-
-	return nvkm_object_bind(object, NULL, 0, nv50_fifo_chan_engine(chan, engine));
-}
-
-void
-nv50_fifo_chan_object_dtor(struct nvkm_fifo_chan *base, int cookie)
-{
-	struct nv50_fifo_chan *chan = nv50_fifo_chan(base);
-	nvkm_ramht_remove(chan->ramht, cookie);
-}
-
-static int
-nv50_fifo_chan_object_ctor(struct nvkm_fifo_chan *base,
-			   struct nvkm_object *object)
-{
-	struct nv50_fifo_chan *chan = nv50_fifo_chan(base);
-	u32 handle = object->handle;
-	u32 context;
-
-	switch (object->engine->subdev.type) {
-	case NVKM_ENGINE_DMAOBJ:
-	case NVKM_ENGINE_SW    : context = 0x00000000; break;
-	case NVKM_ENGINE_GR    : context = 0x00100000; break;
-	case NVKM_ENGINE_MPEG  : context = 0x00200000; break;
-	default:
-		WARN_ON(1);
-		return -EINVAL;
-	}
-
-	return nvkm_ramht_insert(chan->ramht, object, 0, 4, handle, context);
-}
-
-void
-nv50_fifo_chan_fini(struct nvkm_fifo_chan *base)
-{
-	struct nv50_fifo_chan *chan = nv50_fifo_chan(base);
-	struct nv50_fifo *fifo = chan->fifo;
-	struct nvkm_device *device = fifo->base.engine.subdev.device;
-	u32 chid = chan->base.chid;
-
-	/* remove channel from runlist, fifo will unload context */
-	nvkm_mask(device, 0x002600 + (chid * 4), 0x80000000, 0x00000000);
-	nv50_fifo_runlist_update(fifo);
-	nvkm_wr32(device, 0x002600 + (chid * 4), 0x00000000);
-}
-
-static void
-nv50_fifo_chan_init(struct nvkm_fifo_chan *base)
-{
-	struct nv50_fifo_chan *chan = nv50_fifo_chan(base);
-	struct nv50_fifo *fifo = chan->fifo;
-	struct nvkm_device *device = fifo->base.engine.subdev.device;
-	u64 addr = chan->ramfc->addr >> 12;
-	u32 chid = chan->base.chid;
-
-	nvkm_wr32(device, 0x002600 + (chid * 4), 0x80000000 | addr);
-	nv50_fifo_runlist_update(fifo);
-}
-
-void *
-nv50_fifo_chan_dtor(struct nvkm_fifo_chan *base)
-{
-	struct nv50_fifo_chan *chan = nv50_fifo_chan(base);
-	nvkm_ramht_del(&chan->ramht);
-	nvkm_gpuobj_del(&chan->pgd);
-	nvkm_gpuobj_del(&chan->eng);
-	nvkm_gpuobj_del(&chan->cache);
-	nvkm_gpuobj_del(&chan->ramfc);
-	return chan;
-}
-
-static const struct nvkm_fifo_chan_func
-nv50_fifo_chan_func = {
-	.dtor = nv50_fifo_chan_dtor,
-	.init = nv50_fifo_chan_init,
-	.fini = nv50_fifo_chan_fini,
-	.engine_ctor = nv50_fifo_chan_engine_ctor,
-	.engine_dtor = nv50_fifo_chan_engine_dtor,
-	.engine_init = nv50_fifo_chan_engine_init,
-	.engine_fini = nv50_fifo_chan_engine_fini,
-	.object_ctor = nv50_fifo_chan_object_ctor,
-	.object_dtor = nv50_fifo_chan_object_dtor,
-};
-
-int
-nv50_fifo_chan_ctor(struct nv50_fifo *fifo, u64 vmm, u64 push,
-		    const struct nvkm_oclass *oclass,
-		    struct nv50_fifo_chan *chan)
-{
-	struct nvkm_device *device = fifo->base.engine.subdev.device;
-	int ret;
-
-	if (!vmm)
-		return -EINVAL;
-
-	ret = nvkm_fifo_chan_ctor(&nv50_fifo_chan_func, &fifo->base,
-				  0x10000, 0x1000, false, vmm, push,
-				  BIT(NV50_FIFO_ENGN_SW) |
-				  BIT(NV50_FIFO_ENGN_GR) |
-				  BIT(NV50_FIFO_ENGN_MPEG) |
-				  BIT(NV50_FIFO_ENGN_DMA),
-				  0, 0xc00000, 0x2000, oclass, &chan->base);
-	chan->fifo = fifo;
-	if (ret)
-		return ret;
-
-	ret = nvkm_gpuobj_new(device, 0x0200, 0x1000, true, chan->base.inst,
-			      &chan->ramfc);
-	if (ret)
-		return ret;
-
-	ret = nvkm_gpuobj_new(device, 0x1200, 0, true, chan->base.inst,
-			      &chan->eng);
-	if (ret)
-		return ret;
-
-	ret = nvkm_gpuobj_new(device, 0x4000, 0, false, chan->base.inst,
-			      &chan->pgd);
-	if (ret)
-		return ret;
-
-	return nvkm_ramht_new(device, 0x8000, 16, chan->base.inst, &chan->ramht);
-}
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/fifo/channv50.h b/drivers/gpu/drm/nouveau/nvkm/engine/fifo/channv50.h
deleted file mode 100644
index 3a95730d7ff5..000000000000
--- a/drivers/gpu/drm/nouveau/nvkm/engine/fifo/channv50.h
+++ /dev/null
@@ -1,53 +0,0 @@
-/* SPDX-License-Identifier: MIT */
-#ifndef __NV50_FIFO_CHAN_H__
-#define __NV50_FIFO_CHAN_H__
-#define nv50_fifo_chan(p) container_of((p), struct nv50_fifo_chan, base)
-#include "chan.h"
-#include "nv50.h"
-
-struct nv50_fifo_chan {
-	struct nv50_fifo *fifo;
-	struct nvkm_fifo_chan base;
-
-	struct nvkm_gpuobj *ramfc;
-	struct nvkm_gpuobj *cache;
-	struct nvkm_gpuobj *eng;
-	struct nvkm_gpuobj *pgd;
-	struct nvkm_ramht *ramht;
-
-#define NV50_FIFO_ENGN_SW   0
-#define NV50_FIFO_ENGN_GR   1
-#define NV50_FIFO_ENGN_MPEG 2
-#define NV50_FIFO_ENGN_DMA  3
-
-#define G84_FIFO_ENGN_SW     0
-#define G84_FIFO_ENGN_GR     1
-#define G84_FIFO_ENGN_MPEG   2
-#define G84_FIFO_ENGN_MSPPP  2
-#define G84_FIFO_ENGN_ME     3
-#define G84_FIFO_ENGN_CE0    3
-#define G84_FIFO_ENGN_VP     4
-#define G84_FIFO_ENGN_MSPDEC 4
-#define G84_FIFO_ENGN_CIPHER 5
-#define G84_FIFO_ENGN_SEC    5
-#define G84_FIFO_ENGN_VIC    5
-#define G84_FIFO_ENGN_BSP    6
-#define G84_FIFO_ENGN_MSVLD  6
-#define G84_FIFO_ENGN_DMA    7
-	struct nvkm_gpuobj *engn[NVKM_FIFO_ENGN_NR];
-};
-
-int nv50_fifo_chan_ctor(struct nv50_fifo *, u64 vmm, u64 push,
-			const struct nvkm_oclass *, struct nv50_fifo_chan *);
-void *nv50_fifo_chan_dtor(struct nvkm_fifo_chan *);
-void nv50_fifo_chan_fini(struct nvkm_fifo_chan *);
-struct nvkm_gpuobj **nv50_fifo_chan_engine(struct nv50_fifo_chan *, struct nvkm_engine *);
-void nv50_fifo_chan_engine_dtor(struct nvkm_fifo_chan *, struct nvkm_engine *);
-void nv50_fifo_chan_object_dtor(struct nvkm_fifo_chan *, int);
-
-int g84_fifo_chan_ctor(struct nv50_fifo *, u64 vmm, u64 push,
-		       const struct nvkm_oclass *, struct nv50_fifo_chan *);
-
-extern const struct nvkm_fifo_chan_oclass nv50_fifo_gpfifo_oclass;
-extern const struct nvkm_fifo_chan_oclass g84_fifo_gpfifo_oclass;
-#endif
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/fifo/chid.c b/drivers/gpu/drm/nouveau/nvkm/engine/fifo/chid.c
new file mode 100644
index 000000000000..23944d95efd5
--- /dev/null
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/fifo/chid.c
@@ -0,0 +1,111 @@
+/*
+ * Copyright 2020 Red Hat Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+#include "chid.h"
+
+void
+nvkm_chid_put(struct nvkm_chid *chid, int id, spinlock_t *data_lock)
+{
+	if (id >= 0) {
+		spin_lock_irq(&chid->lock);
+		spin_lock(data_lock);
+		chid->data[id] = NULL;
+		spin_unlock(data_lock);
+		clear_bit(id, chid->used);
+		spin_unlock_irq(&chid->lock);
+	}
+}
+
+int
+nvkm_chid_get(struct nvkm_chid *chid, void *data)
+{
+	int id = -1, cid;
+
+	spin_lock_irq(&chid->lock);
+	cid = find_first_zero_bit(chid->used, chid->nr);
+	if (cid < chid->nr) {
+		set_bit(cid, chid->used);
+		chid->data[cid] = data;
+		id = cid;
+	}
+	spin_unlock_irq(&chid->lock);
+	return id;
+}
+
+static void
+nvkm_chid_del(struct kref *kref)
+{
+	struct nvkm_chid *chid = container_of(kref, typeof(*chid), kref);
+
+	nvkm_event_fini(&chid->event);
+
+	kvfree(chid->data);
+	kfree(chid);
+}
+
+void
+nvkm_chid_unref(struct nvkm_chid **pchid)
+{
+	struct nvkm_chid *chid = *pchid;
+
+	if (!chid)
+		return;
+
+	kref_put(&chid->kref, nvkm_chid_del);
+	*pchid = NULL;
+}
+
+struct nvkm_chid *
+nvkm_chid_ref(struct nvkm_chid *chid)
+{
+	if (chid)
+		kref_get(&chid->kref);
+
+	return chid;
+}
+
+int
+nvkm_chid_new(const struct nvkm_event_func *func, struct nvkm_subdev *subdev,
+	      int nr, int first, int count, struct nvkm_chid **pchid)
+{
+	struct nvkm_chid *chid;
+	int id;
+
+	if (!(chid = *pchid = kzalloc(struct_size(chid, used, nr), GFP_KERNEL)))
+		return -ENOMEM;
+
+	kref_init(&chid->kref);
+	chid->nr = nr;
+	chid->mask = chid->nr - 1;
+	spin_lock_init(&chid->lock);
+
+	if (!(chid->data = kvzalloc(sizeof(*chid->data) * nr, GFP_KERNEL))) {
+		nvkm_chid_unref(pchid);
+		return -ENOMEM;
+	}
+
+	for (id = 0; id < first; id++)
+		__set_bit(id, chid->used);
+	for (id = first + count; id < nr; id++)
+		__set_bit(id, chid->used);
+
+	return nvkm_event_init(func, subdev, 1, nr, &chid->event);
+}
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/fifo/chid.h b/drivers/gpu/drm/nouveau/nvkm/engine/fifo/chid.h
new file mode 100644
index 000000000000..2a42efb18401
--- /dev/null
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/fifo/chid.h
@@ -0,0 +1,25 @@
+/* SPDX-License-Identifier: MIT */
+#ifndef __NVKM_CHID_H__
+#define __NVKM_CHID_H__
+#include <core/event.h>
+
+struct nvkm_chid {
+	struct kref kref;
+	int nr;
+	u32 mask;
+
+	struct nvkm_event event;
+
+	void **data;
+
+	spinlock_t lock;
+	unsigned long used[];
+};
+
+int nvkm_chid_new(const struct nvkm_event_func *, struct nvkm_subdev *,
+		  int nr, int first, int count, struct nvkm_chid **pchid);
+struct nvkm_chid *nvkm_chid_ref(struct nvkm_chid *);
+void nvkm_chid_unref(struct nvkm_chid **);
+int nvkm_chid_get(struct nvkm_chid *, void *data);
+void nvkm_chid_put(struct nvkm_chid *, int id, spinlock_t *data_lock);
+#endif
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/fifo/dmanv04.c b/drivers/gpu/drm/nouveau/nvkm/engine/fifo/dmanv04.c
deleted file mode 100644
index dbcdc5fab990..000000000000
--- a/drivers/gpu/drm/nouveau/nvkm/engine/fifo/dmanv04.c
+++ /dev/null
@@ -1,226 +0,0 @@
-/*
- * Copyright 2012 Red Hat Inc.
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * the rights to use, copy, modify, merge, publish, distribute, sublicense,
- * and/or sell copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in
- * all copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
- * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
- * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
- * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
- * OTHER DEALINGS IN THE SOFTWARE.
- *
- * Authors: Ben Skeggs
- */
-#include "channv04.h"
-#include "regsnv04.h"
-
-#include <core/client.h>
-#include <core/ramht.h>
-#include <subdev/instmem.h>
-
-#include <nvif/class.h>
-#include <nvif/cl006b.h>
-#include <nvif/unpack.h>
-
-void
-nv04_fifo_dma_object_dtor(struct nvkm_fifo_chan *base, int cookie)
-{
-	struct nv04_fifo_chan *chan = nv04_fifo_chan(base);
-	struct nvkm_instmem *imem = chan->fifo->base.engine.subdev.device->imem;
-
-	mutex_lock(&chan->fifo->base.mutex);
-	nvkm_ramht_remove(imem->ramht, cookie);
-	mutex_unlock(&chan->fifo->base.mutex);
-}
-
-static int
-nv04_fifo_dma_object_ctor(struct nvkm_fifo_chan *base,
-			  struct nvkm_object *object)
-{
-	struct nv04_fifo_chan *chan = nv04_fifo_chan(base);
-	struct nvkm_instmem *imem = chan->fifo->base.engine.subdev.device->imem;
-	u32 context = 0x80000000 | chan->base.chid << 24;
-	u32 handle  = object->handle;
-	int hash;
-
-	switch (object->engine->subdev.type) {
-	case NVKM_ENGINE_DMAOBJ:
-	case NVKM_ENGINE_SW    : context |= 0x00000000; break;
-	case NVKM_ENGINE_GR    : context |= 0x00010000; break;
-	case NVKM_ENGINE_MPEG  : context |= 0x00020000; break;
-	default:
-		WARN_ON(1);
-		return -EINVAL;
-	}
-
-	mutex_lock(&chan->fifo->base.mutex);
-	hash = nvkm_ramht_insert(imem->ramht, object, chan->base.chid, 4,
-				 handle, context);
-	mutex_unlock(&chan->fifo->base.mutex);
-	return hash;
-}
-
-void
-nv04_fifo_dma_fini(struct nvkm_fifo_chan *base)
-{
-	struct nv04_fifo_chan *chan = nv04_fifo_chan(base);
-	struct nv04_fifo *fifo = chan->fifo;
-	struct nvkm_device *device = fifo->base.engine.subdev.device;
-	struct nvkm_memory *fctx = device->imem->ramfc;
-	const struct nv04_fifo_ramfc *c;
-	unsigned long flags;
-	u32 mask = fifo->base.nr - 1;
-	u32 data = chan->ramfc;
-	u32 chid;
-
-	/* prevent fifo context switches */
-	spin_lock_irqsave(&fifo->base.lock, flags);
-	nvkm_wr32(device, NV03_PFIFO_CACHES, 0);
-
-	/* if this channel is active, replace it with a null context */
-	chid = nvkm_rd32(device, NV03_PFIFO_CACHE1_PUSH1) & mask;
-	if (chid == chan->base.chid) {
-		nvkm_mask(device, NV04_PFIFO_CACHE1_DMA_PUSH, 0x00000001, 0);
-		nvkm_wr32(device, NV03_PFIFO_CACHE1_PUSH0, 0);
-		nvkm_mask(device, NV04_PFIFO_CACHE1_PULL0, 0x00000001, 0);
-
-		c = fifo->ramfc;
-		nvkm_kmap(fctx);
-		do {
-			u32 rm = ((1ULL << c->bits) - 1) << c->regs;
-			u32 cm = ((1ULL << c->bits) - 1) << c->ctxs;
-			u32 rv = (nvkm_rd32(device, c->regp) &  rm) >> c->regs;
-			u32 cv = (nvkm_ro32(fctx, c->ctxp + data) & ~cm);
-			nvkm_wo32(fctx, c->ctxp + data, cv | (rv << c->ctxs));
-		} while ((++c)->bits);
-		nvkm_done(fctx);
-
-		c = fifo->ramfc;
-		do {
-			nvkm_wr32(device, c->regp, 0x00000000);
-		} while ((++c)->bits);
-
-		nvkm_wr32(device, NV03_PFIFO_CACHE1_GET, 0);
-		nvkm_wr32(device, NV03_PFIFO_CACHE1_PUT, 0);
-		nvkm_wr32(device, NV03_PFIFO_CACHE1_PUSH1, mask);
-		nvkm_wr32(device, NV03_PFIFO_CACHE1_PUSH0, 1);
-		nvkm_wr32(device, NV04_PFIFO_CACHE1_PULL0, 1);
-	}
-
-	/* restore normal operation, after disabling dma mode */
-	nvkm_mask(device, NV04_PFIFO_MODE, 1 << chan->base.chid, 0);
-	nvkm_wr32(device, NV03_PFIFO_CACHES, 1);
-	spin_unlock_irqrestore(&fifo->base.lock, flags);
-}
-
-void
-nv04_fifo_dma_init(struct nvkm_fifo_chan *base)
-{
-	struct nv04_fifo_chan *chan = nv04_fifo_chan(base);
-	struct nv04_fifo *fifo = chan->fifo;
-	struct nvkm_device *device = fifo->base.engine.subdev.device;
-	u32 mask = 1 << chan->base.chid;
-	unsigned long flags;
-	spin_lock_irqsave(&fifo->base.lock, flags);
-	nvkm_mask(device, NV04_PFIFO_MODE, mask, mask);
-	spin_unlock_irqrestore(&fifo->base.lock, flags);
-}
-
-void *
-nv04_fifo_dma_dtor(struct nvkm_fifo_chan *base)
-{
-	struct nv04_fifo_chan *chan = nv04_fifo_chan(base);
-	struct nv04_fifo *fifo = chan->fifo;
-	struct nvkm_instmem *imem = fifo->base.engine.subdev.device->imem;
-	const struct nv04_fifo_ramfc *c = fifo->ramfc;
-
-	nvkm_kmap(imem->ramfc);
-	do {
-		nvkm_wo32(imem->ramfc, chan->ramfc + c->ctxp, 0x00000000);
-	} while ((++c)->bits);
-	nvkm_done(imem->ramfc);
-	return chan;
-}
-
-const struct nvkm_fifo_chan_func
-nv04_fifo_dma_func = {
-	.dtor = nv04_fifo_dma_dtor,
-	.init = nv04_fifo_dma_init,
-	.fini = nv04_fifo_dma_fini,
-	.object_ctor = nv04_fifo_dma_object_ctor,
-	.object_dtor = nv04_fifo_dma_object_dtor,
-};
-
-static int
-nv04_fifo_dma_new(struct nvkm_fifo *base, const struct nvkm_oclass *oclass,
-		  void *data, u32 size, struct nvkm_object **pobject)
-{
-	struct nvkm_object *parent = oclass->parent;
-	union {
-		struct nv03_channel_dma_v0 v0;
-	} *args = data;
-	struct nv04_fifo *fifo = nv04_fifo(base);
-	struct nv04_fifo_chan *chan = NULL;
-	struct nvkm_device *device = fifo->base.engine.subdev.device;
-	struct nvkm_instmem *imem = device->imem;
-	int ret = -ENOSYS;
-
-	nvif_ioctl(parent, "create channel dma size %d\n", size);
-	if (!(ret = nvif_unpack(ret, &data, &size, args->v0, 0, 0, false))) {
-		nvif_ioctl(parent, "create channel dma vers %d pushbuf %llx "
-				   "offset %08x\n", args->v0.version,
-			   args->v0.pushbuf, args->v0.offset);
-		if (!args->v0.pushbuf)
-			return -EINVAL;
-	} else
-		return ret;
-
-	if (!(chan = kzalloc(sizeof(*chan), GFP_KERNEL)))
-		return -ENOMEM;
-	*pobject = &chan->base.object;
-
-	ret = nvkm_fifo_chan_ctor(&nv04_fifo_dma_func, &fifo->base,
-				  0x1000, 0x1000, false, 0, args->v0.pushbuf,
-				  BIT(NV04_FIFO_ENGN_SW) |
-				  BIT(NV04_FIFO_ENGN_GR) |
-				  BIT(NV04_FIFO_ENGN_DMA),
-				  0, 0x800000, 0x10000, oclass, &chan->base);
-	chan->fifo = fifo;
-	if (ret)
-		return ret;
-
-	args->v0.chid = chan->base.chid;
-	chan->ramfc = chan->base.chid * 32;
-
-	nvkm_kmap(imem->ramfc);
-	nvkm_wo32(imem->ramfc, chan->ramfc + 0x00, args->v0.offset);
-	nvkm_wo32(imem->ramfc, chan->ramfc + 0x04, args->v0.offset);
-	nvkm_wo32(imem->ramfc, chan->ramfc + 0x08, chan->base.push->addr >> 4);
-	nvkm_wo32(imem->ramfc, chan->ramfc + 0x10,
-			       NV_PFIFO_CACHE1_DMA_FETCH_TRIG_128_BYTES |
-			       NV_PFIFO_CACHE1_DMA_FETCH_SIZE_128_BYTES |
-#ifdef __BIG_ENDIAN
-			       NV_PFIFO_CACHE1_BIG_ENDIAN |
-#endif
-			       NV_PFIFO_CACHE1_DMA_FETCH_MAX_REQS_8);
-	nvkm_done(imem->ramfc);
-	return 0;
-}
-
-const struct nvkm_fifo_chan_oclass
-nv04_fifo_dma_oclass = {
-	.base.oclass = NV03_CHANNEL_DMA,
-	.base.minver = 0,
-	.base.maxver = 0,
-	.ctor = nv04_fifo_dma_new,
-};
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/fifo/dmanv10.c b/drivers/gpu/drm/nouveau/nvkm/engine/fifo/dmanv10.c
deleted file mode 100644
index 07d80d54a07c..000000000000
--- a/drivers/gpu/drm/nouveau/nvkm/engine/fifo/dmanv10.c
+++ /dev/null
@@ -1,97 +0,0 @@
-/*
- * Copyright 2012 Red Hat Inc.
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * the rights to use, copy, modify, merge, publish, distribute, sublicense,
- * and/or sell copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in
- * all copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
- * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
- * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
- * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
- * OTHER DEALINGS IN THE SOFTWARE.
- *
- * Authors: Ben Skeggs
- */
-#include "channv04.h"
-#include "regsnv04.h"
-
-#include <core/client.h>
-#include <core/gpuobj.h>
-#include <subdev/instmem.h>
-
-#include <nvif/class.h>
-#include <nvif/cl006b.h>
-#include <nvif/unpack.h>
-
-static int
-nv10_fifo_dma_new(struct nvkm_fifo *base, const struct nvkm_oclass *oclass,
-		  void *data, u32 size, struct nvkm_object **pobject)
-{
-	struct nvkm_object *parent = oclass->parent;
-	union {
-		struct nv03_channel_dma_v0 v0;
-	} *args = data;
-	struct nv04_fifo *fifo = nv04_fifo(base);
-	struct nv04_fifo_chan *chan = NULL;
-	struct nvkm_device *device = fifo->base.engine.subdev.device;
-	struct nvkm_instmem *imem = device->imem;
-	int ret = -ENOSYS;
-
-	nvif_ioctl(parent, "create channel dma size %d\n", size);
-	if (!(ret = nvif_unpack(ret, &data, &size, args->v0, 0, 0, false))) {
-		nvif_ioctl(parent, "create channel dma vers %d pushbuf %llx "
-				   "offset %08x\n", args->v0.version,
-			   args->v0.pushbuf, args->v0.offset);
-		if (!args->v0.pushbuf)
-			return -EINVAL;
-	} else
-		return ret;
-
-	if (!(chan = kzalloc(sizeof(*chan), GFP_KERNEL)))
-		return -ENOMEM;
-	*pobject = &chan->base.object;
-
-	ret = nvkm_fifo_chan_ctor(&nv04_fifo_dma_func, &fifo->base,
-				  0x1000, 0x1000, false, 0, args->v0.pushbuf,
-				  BIT(NV04_FIFO_ENGN_SW) |
-				  BIT(NV04_FIFO_ENGN_GR) |
-				  BIT(NV04_FIFO_ENGN_DMA),
-				  0, 0x800000, 0x10000, oclass, &chan->base);
-	chan->fifo = fifo;
-	if (ret)
-		return ret;
-
-	args->v0.chid = chan->base.chid;
-	chan->ramfc = chan->base.chid * 32;
-
-	nvkm_kmap(imem->ramfc);
-	nvkm_wo32(imem->ramfc, chan->ramfc + 0x00, args->v0.offset);
-	nvkm_wo32(imem->ramfc, chan->ramfc + 0x04, args->v0.offset);
-	nvkm_wo32(imem->ramfc, chan->ramfc + 0x0c, chan->base.push->addr >> 4);
-	nvkm_wo32(imem->ramfc, chan->ramfc + 0x14,
-			       NV_PFIFO_CACHE1_DMA_FETCH_TRIG_128_BYTES |
-			       NV_PFIFO_CACHE1_DMA_FETCH_SIZE_128_BYTES |
-#ifdef __BIG_ENDIAN
-			       NV_PFIFO_CACHE1_BIG_ENDIAN |
-#endif
-			       NV_PFIFO_CACHE1_DMA_FETCH_MAX_REQS_8);
-	nvkm_done(imem->ramfc);
-	return 0;
-}
-
-const struct nvkm_fifo_chan_oclass
-nv10_fifo_dma_oclass = {
-	.base.oclass = NV10_CHANNEL_DMA,
-	.base.minver = 0,
-	.base.maxver = 0,
-	.ctor = nv10_fifo_dma_new,
-};
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/fifo/dmanv17.c b/drivers/gpu/drm/nouveau/nvkm/engine/fifo/dmanv17.c
deleted file mode 100644
index edd70a114218..000000000000
--- a/drivers/gpu/drm/nouveau/nvkm/engine/fifo/dmanv17.c
+++ /dev/null
@@ -1,98 +0,0 @@
-/*
- * Copyright 2012 Red Hat Inc.
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * the rights to use, copy, modify, merge, publish, distribute, sublicense,
- * and/or sell copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in
- * all copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
- * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
- * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
- * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
- * OTHER DEALINGS IN THE SOFTWARE.
- *
- * Authors: Ben Skeggs
- */
-#include "channv04.h"
-#include "regsnv04.h"
-
-#include <core/client.h>
-#include <core/gpuobj.h>
-#include <subdev/instmem.h>
-
-#include <nvif/class.h>
-#include <nvif/cl006b.h>
-#include <nvif/unpack.h>
-
-static int
-nv17_fifo_dma_new(struct nvkm_fifo *base, const struct nvkm_oclass *oclass,
-		  void *data, u32 size, struct nvkm_object **pobject)
-{
-	struct nvkm_object *parent = oclass->parent;
-	union {
-		struct nv03_channel_dma_v0 v0;
-	} *args = data;
-	struct nv04_fifo *fifo = nv04_fifo(base);
-	struct nv04_fifo_chan *chan = NULL;
-	struct nvkm_device *device = fifo->base.engine.subdev.device;
-	struct nvkm_instmem *imem = device->imem;
-	int ret = -ENOSYS;
-
-	nvif_ioctl(parent, "create channel dma size %d\n", size);
-	if (!(ret = nvif_unpack(ret, &data, &size, args->v0, 0, 0, false))) {
-		nvif_ioctl(parent, "create channel dma vers %d pushbuf %llx "
-				   "offset %08x\n", args->v0.version,
-			   args->v0.pushbuf, args->v0.offset);
-		if (!args->v0.pushbuf)
-			return -EINVAL;
-	} else
-		return ret;
-
-	if (!(chan = kzalloc(sizeof(*chan), GFP_KERNEL)))
-		return -ENOMEM;
-	*pobject = &chan->base.object;
-
-	ret = nvkm_fifo_chan_ctor(&nv04_fifo_dma_func, &fifo->base,
-				  0x1000, 0x1000, false, 0, args->v0.pushbuf,
-				  BIT(NV04_FIFO_ENGN_SW) |
-				  BIT(NV04_FIFO_ENGN_GR) |
-				  BIT(NV04_FIFO_ENGN_MPEG) | /* NV31- */
-				  BIT(NV04_FIFO_ENGN_DMA),
-				  0, 0x800000, 0x10000, oclass, &chan->base);
-	chan->fifo = fifo;
-	if (ret)
-		return ret;
-
-	args->v0.chid = chan->base.chid;
-	chan->ramfc = chan->base.chid * 64;
-
-	nvkm_kmap(imem->ramfc);
-	nvkm_wo32(imem->ramfc, chan->ramfc + 0x00, args->v0.offset);
-	nvkm_wo32(imem->ramfc, chan->ramfc + 0x04, args->v0.offset);
-	nvkm_wo32(imem->ramfc, chan->ramfc + 0x0c, chan->base.push->addr >> 4);
-	nvkm_wo32(imem->ramfc, chan->ramfc + 0x14,
-			       NV_PFIFO_CACHE1_DMA_FETCH_TRIG_128_BYTES |
-			       NV_PFIFO_CACHE1_DMA_FETCH_SIZE_128_BYTES |
-#ifdef __BIG_ENDIAN
-			       NV_PFIFO_CACHE1_BIG_ENDIAN |
-#endif
-			       NV_PFIFO_CACHE1_DMA_FETCH_MAX_REQS_8);
-	nvkm_done(imem->ramfc);
-	return 0;
-}
-
-const struct nvkm_fifo_chan_oclass
-nv17_fifo_dma_oclass = {
-	.base.oclass = NV17_CHANNEL_DMA,
-	.base.minver = 0,
-	.base.maxver = 0,
-	.ctor = nv17_fifo_dma_new,
-};
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/fifo/dmanv40.c b/drivers/gpu/drm/nouveau/nvkm/engine/fifo/dmanv40.c
deleted file mode 100644
index 0411fb908457..000000000000
--- a/drivers/gpu/drm/nouveau/nvkm/engine/fifo/dmanv40.c
+++ /dev/null
@@ -1,254 +0,0 @@
-/*
- * Copyright 2012 Red Hat Inc.
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * the rights to use, copy, modify, merge, publish, distribute, sublicense,
- * and/or sell copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in
- * all copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
- * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
- * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
- * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
- * OTHER DEALINGS IN THE SOFTWARE.
- *
- * Authors: Ben Skeggs
- */
-#include "channv04.h"
-#include "regsnv04.h"
-
-#include <core/client.h>
-#include <core/ramht.h>
-#include <subdev/instmem.h>
-
-#include <nvif/class.h>
-#include <nvif/cl006b.h>
-#include <nvif/unpack.h>
-
-static bool
-nv40_fifo_dma_engine(struct nvkm_engine *engine, u32 *reg, u32 *ctx)
-{
-	switch (engine->subdev.type) {
-	case NVKM_ENGINE_DMAOBJ:
-	case NVKM_ENGINE_SW:
-		return false;
-	case NVKM_ENGINE_GR:
-		*reg = 0x0032e0;
-		*ctx = 0x38;
-		return true;
-	case NVKM_ENGINE_MPEG:
-		if (engine->subdev.device->chipset < 0x44)
-			return false;
-		*reg = 0x00330c;
-		*ctx = 0x54;
-		return true;
-	default:
-		WARN_ON(1);
-		return false;
-	}
-}
-
-static struct nvkm_gpuobj **
-nv40_fifo_dma_engn(struct nv04_fifo_chan *chan, struct nvkm_engine *engine)
-{
-	int engi = chan->base.fifo->func->engine_id(chan->base.fifo, engine);
-	if (engi >= 0)
-		return &chan->engn[engi];
-	return NULL;
-}
-
-static int
-nv40_fifo_dma_engine_fini(struct nvkm_fifo_chan *base,
-			  struct nvkm_engine *engine, bool suspend)
-{
-	struct nv04_fifo_chan *chan = nv04_fifo_chan(base);
-	struct nv04_fifo *fifo = chan->fifo;
-	struct nvkm_device *device = fifo->base.engine.subdev.device;
-	struct nvkm_instmem *imem = device->imem;
-	unsigned long flags;
-	u32 reg, ctx;
-	int chid;
-
-	if (!nv40_fifo_dma_engine(engine, &reg, &ctx))
-		return 0;
-
-	spin_lock_irqsave(&fifo->base.lock, flags);
-	nvkm_mask(device, 0x002500, 0x00000001, 0x00000000);
-
-	chid = nvkm_rd32(device, 0x003204) & (fifo->base.nr - 1);
-	if (chid == chan->base.chid)
-		nvkm_wr32(device, reg, 0x00000000);
-	nvkm_kmap(imem->ramfc);
-	nvkm_wo32(imem->ramfc, chan->ramfc + ctx, 0x00000000);
-	nvkm_done(imem->ramfc);
-
-	nvkm_mask(device, 0x002500, 0x00000001, 0x00000001);
-	spin_unlock_irqrestore(&fifo->base.lock, flags);
-	return 0;
-}
-
-static int
-nv40_fifo_dma_engine_init(struct nvkm_fifo_chan *base,
-			  struct nvkm_engine *engine)
-{
-	struct nv04_fifo_chan *chan = nv04_fifo_chan(base);
-	struct nv04_fifo *fifo = chan->fifo;
-	struct nvkm_device *device = fifo->base.engine.subdev.device;
-	struct nvkm_instmem *imem = device->imem;
-	unsigned long flags;
-	u32 inst, reg, ctx;
-	int chid;
-
-	if (!nv40_fifo_dma_engine(engine, &reg, &ctx))
-		return 0;
-	inst = (*nv40_fifo_dma_engn(chan, engine))->addr >> 4;
-
-	spin_lock_irqsave(&fifo->base.lock, flags);
-	nvkm_mask(device, 0x002500, 0x00000001, 0x00000000);
-
-	chid = nvkm_rd32(device, 0x003204) & (fifo->base.nr - 1);
-	if (chid == chan->base.chid)
-		nvkm_wr32(device, reg, inst);
-	nvkm_kmap(imem->ramfc);
-	nvkm_wo32(imem->ramfc, chan->ramfc + ctx, inst);
-	nvkm_done(imem->ramfc);
-
-	nvkm_mask(device, 0x002500, 0x00000001, 0x00000001);
-	spin_unlock_irqrestore(&fifo->base.lock, flags);
-	return 0;
-}
-
-static void
-nv40_fifo_dma_engine_dtor(struct nvkm_fifo_chan *base,
-			  struct nvkm_engine *engine)
-{
-	struct nv04_fifo_chan *chan = nv04_fifo_chan(base);
-	nvkm_gpuobj_del(nv40_fifo_dma_engn(chan, engine));
-}
-
-static int
-nv40_fifo_dma_engine_ctor(struct nvkm_fifo_chan *base,
-			  struct nvkm_engine *engine,
-			  struct nvkm_object *object)
-{
-	struct nv04_fifo_chan *chan = nv04_fifo_chan(base);
-	u32 reg, ctx;
-
-	if (!nv40_fifo_dma_engine(engine, &reg, &ctx))
-		return 0;
-
-	return nvkm_object_bind(object, NULL, 0, nv40_fifo_dma_engn(chan, engine));
-}
-
-static int
-nv40_fifo_dma_object_ctor(struct nvkm_fifo_chan *base,
-			  struct nvkm_object *object)
-{
-	struct nv04_fifo_chan *chan = nv04_fifo_chan(base);
-	struct nvkm_instmem *imem = chan->fifo->base.engine.subdev.device->imem;
-	u32 context = chan->base.chid << 23;
-	u32 handle  = object->handle;
-	int hash;
-
-	switch (object->engine->subdev.type) {
-	case NVKM_ENGINE_DMAOBJ:
-	case NVKM_ENGINE_SW    : context |= 0x00000000; break;
-	case NVKM_ENGINE_GR    : context |= 0x00100000; break;
-	case NVKM_ENGINE_MPEG  : context |= 0x00200000; break;
-	default:
-		WARN_ON(1);
-		return -EINVAL;
-	}
-
-	mutex_lock(&chan->fifo->base.mutex);
-	hash = nvkm_ramht_insert(imem->ramht, object, chan->base.chid, 4,
-				 handle, context);
-	mutex_unlock(&chan->fifo->base.mutex);
-	return hash;
-}
-
-static const struct nvkm_fifo_chan_func
-nv40_fifo_dma_func = {
-	.dtor = nv04_fifo_dma_dtor,
-	.init = nv04_fifo_dma_init,
-	.fini = nv04_fifo_dma_fini,
-	.engine_ctor = nv40_fifo_dma_engine_ctor,
-	.engine_dtor = nv40_fifo_dma_engine_dtor,
-	.engine_init = nv40_fifo_dma_engine_init,
-	.engine_fini = nv40_fifo_dma_engine_fini,
-	.object_ctor = nv40_fifo_dma_object_ctor,
-	.object_dtor = nv04_fifo_dma_object_dtor,
-};
-
-static int
-nv40_fifo_dma_new(struct nvkm_fifo *base, const struct nvkm_oclass *oclass,
-		  void *data, u32 size, struct nvkm_object **pobject)
-{
-	struct nvkm_object *parent = oclass->parent;
-	union {
-		struct nv03_channel_dma_v0 v0;
-	} *args = data;
-	struct nv04_fifo *fifo = nv04_fifo(base);
-	struct nv04_fifo_chan *chan = NULL;
-	struct nvkm_device *device = fifo->base.engine.subdev.device;
-	struct nvkm_instmem *imem = device->imem;
-	int ret = -ENOSYS;
-
-	nvif_ioctl(parent, "create channel dma size %d\n", size);
-	if (!(ret = nvif_unpack(ret, &data, &size, args->v0, 0, 0, false))) {
-		nvif_ioctl(parent, "create channel dma vers %d pushbuf %llx "
-				   "offset %08x\n", args->v0.version,
-			   args->v0.pushbuf, args->v0.offset);
-		if (!args->v0.pushbuf)
-			return -EINVAL;
-	} else
-		return ret;
-
-	if (!(chan = kzalloc(sizeof(*chan), GFP_KERNEL)))
-		return -ENOMEM;
-	*pobject = &chan->base.object;
-
-	ret = nvkm_fifo_chan_ctor(&nv40_fifo_dma_func, &fifo->base,
-				  0x1000, 0x1000, false, 0, args->v0.pushbuf,
-				  BIT(NV04_FIFO_ENGN_SW) |
-				  BIT(NV04_FIFO_ENGN_GR) |
-				  BIT(NV04_FIFO_ENGN_MPEG) |
-				  BIT(NV04_FIFO_ENGN_DMA),
-				  0, 0xc00000, 0x1000, oclass, &chan->base);
-	chan->fifo = fifo;
-	if (ret)
-		return ret;
-
-	args->v0.chid = chan->base.chid;
-	chan->ramfc = chan->base.chid * 128;
-
-	nvkm_kmap(imem->ramfc);
-	nvkm_wo32(imem->ramfc, chan->ramfc + 0x00, args->v0.offset);
-	nvkm_wo32(imem->ramfc, chan->ramfc + 0x04, args->v0.offset);
-	nvkm_wo32(imem->ramfc, chan->ramfc + 0x0c, chan->base.push->addr >> 4);
-	nvkm_wo32(imem->ramfc, chan->ramfc + 0x18, 0x30000000 |
-			       NV_PFIFO_CACHE1_DMA_FETCH_TRIG_128_BYTES |
-			       NV_PFIFO_CACHE1_DMA_FETCH_SIZE_128_BYTES |
-#ifdef __BIG_ENDIAN
-			       NV_PFIFO_CACHE1_BIG_ENDIAN |
-#endif
-			       NV_PFIFO_CACHE1_DMA_FETCH_MAX_REQS_8);
-	nvkm_wo32(imem->ramfc, chan->ramfc + 0x3c, 0x0001ffff);
-	nvkm_done(imem->ramfc);
-	return 0;
-}
-
-const struct nvkm_fifo_chan_oclass
-nv40_fifo_dma_oclass = {
-	.base.oclass = NV40_CHANNEL_DMA,
-	.base.minver = 0,
-	.base.maxver = 0,
-	.ctor = nv40_fifo_dma_new,
-};
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/fifo/g84.c b/drivers/gpu/drm/nouveau/nvkm/engine/fifo/g84.c
index 3885c3830b94..6b229a3fbd97 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/fifo/g84.c
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/fifo/g84.c
@@ -21,112 +21,211 @@
  *
  * Authors: Ben Skeggs
  */
-#include "nv50.h"
-#include "channv50.h"
+#include "priv.h"
+#include "cgrp.h"
+#include "chan.h"
+#include "runl.h"
+
+#include <core/ramht.h>
+#include <subdev/timer.h>
+
+#include <nvif/class.h>
 
 static void
-g84_fifo_uevent_fini(struct nvkm_fifo *fifo)
+g84_chan_bind(struct nvkm_chan *chan)
 {
-	struct nvkm_device *device = fifo->engine.subdev.device;
-	nvkm_mask(device, 0x002140, 0x40000000, 0x00000000);
+	struct nvkm_device *device = chan->cgrp->runl->fifo->engine.subdev.device;
+
+	nvkm_wr32(device, 0x002600 + (chan->id * 4), chan->ramfc->addr >> 8);
 }
 
-static void
-g84_fifo_uevent_init(struct nvkm_fifo *fifo)
+static int
+g84_chan_ramfc_write(struct nvkm_chan *chan, u64 offset, u64 length, u32 devm, bool priv)
 {
-	struct nvkm_device *device = fifo->engine.subdev.device;
-	nvkm_mask(device, 0x002140, 0x40000000, 0x40000000);
+	struct nvkm_device *device = chan->cgrp->runl->fifo->engine.subdev.device;
+	const u32 limit2 = ilog2(length / 8);
+	int ret;
+
+	ret = nvkm_gpuobj_new(device, 0x0200, 0, true, chan->inst, &chan->eng);
+	if (ret)
+		return ret;
+
+	ret = nvkm_gpuobj_new(device, 0x4000, 0, false, chan->inst, &chan->pgd);
+	if (ret)
+		return ret;
+
+	ret = nvkm_gpuobj_new(device, 0x1000, 0x400, true, chan->inst, &chan->cache);
+	if (ret)
+		return ret;
+
+	ret = nvkm_gpuobj_new(device, 0x100, 0x100, true, chan->inst, &chan->ramfc);
+	if (ret)
+		return ret;
+
+	ret = nvkm_ramht_new(device, 0x8000, 16, chan->inst, &chan->ramht);
+	if (ret)
+		return ret;
+
+	nvkm_kmap(chan->ramfc);
+	nvkm_wo32(chan->ramfc, 0x3c, 0x403f6078);
+	nvkm_wo32(chan->ramfc, 0x44, 0x01003fff);
+	nvkm_wo32(chan->ramfc, 0x48, chan->push->node->offset >> 4);
+	nvkm_wo32(chan->ramfc, 0x50, lower_32_bits(offset));
+	nvkm_wo32(chan->ramfc, 0x54, upper_32_bits(offset) | (limit2 << 16));
+	nvkm_wo32(chan->ramfc, 0x60, 0x7fffffff);
+	nvkm_wo32(chan->ramfc, 0x78, 0x00000000);
+	nvkm_wo32(chan->ramfc, 0x7c, 0x30000000 | devm);
+	nvkm_wo32(chan->ramfc, 0x80, ((chan->ramht->bits - 9) << 27) |
+				     (4 << 24) /* SEARCH_FULL */ |
+				     (chan->ramht->gpuobj->node->offset >> 4));
+	nvkm_wo32(chan->ramfc, 0x88, chan->cache->addr >> 10);
+	nvkm_wo32(chan->ramfc, 0x98, chan->inst->addr >> 12);
+	nvkm_done(chan->ramfc);
+	return 0;
 }
 
-static struct nvkm_engine *
-g84_fifo_id_engine(struct nvkm_fifo *fifo, int engi)
+static const struct nvkm_chan_func_ramfc
+g84_chan_ramfc = {
+	.write = g84_chan_ramfc_write,
+	.ctxdma = true,
+	.devm = 0xfff,
+};
+
+const struct nvkm_chan_func
+g84_chan = {
+	.inst = &nv50_chan_inst,
+	.userd = &nv50_chan_userd,
+	.ramfc = &g84_chan_ramfc,
+	.bind = g84_chan_bind,
+	.unbind = nv50_chan_unbind,
+	.start = nv50_chan_start,
+	.stop = nv50_chan_stop,
+};
+
+static void
+g84_ectx_bind(struct nvkm_engn *engn, struct nvkm_cctx *cctx, struct nvkm_chan *chan)
 {
-	struct nvkm_device *device = fifo->engine.subdev.device;
-	struct nvkm_engine *engine;
-	enum nvkm_subdev_type type;
-
-	switch (engi) {
-	case G84_FIFO_ENGN_SW    : type = NVKM_ENGINE_SW; break;
-	case G84_FIFO_ENGN_GR    : type = NVKM_ENGINE_GR; break;
-	case G84_FIFO_ENGN_MPEG  :
-		if ((engine = nvkm_device_engine(device, NVKM_ENGINE_MSPPP, 0)))
-			return engine;
-		type = NVKM_ENGINE_MPEG;
-		break;
-	case G84_FIFO_ENGN_ME    :
-		if ((engine = nvkm_device_engine(device, NVKM_ENGINE_CE, 0)))
-			return engine;
-		type = NVKM_ENGINE_ME;
-		break;
-	case G84_FIFO_ENGN_VP    :
-		if ((engine = nvkm_device_engine(device, NVKM_ENGINE_MSPDEC, 0)))
-			return engine;
-		type = NVKM_ENGINE_VP;
-		break;
-	case G84_FIFO_ENGN_CIPHER:
-		if ((engine = nvkm_device_engine(device, NVKM_ENGINE_VIC, 0)))
-			return engine;
-		if ((engine = nvkm_device_engine(device, NVKM_ENGINE_SEC, 0)))
-			return engine;
-		type = NVKM_ENGINE_CIPHER;
-		break;
-	case G84_FIFO_ENGN_BSP   :
-		if ((engine = nvkm_device_engine(device, NVKM_ENGINE_MSVLD, 0)))
-			return engine;
-		type = NVKM_ENGINE_BSP;
-		break;
-	case G84_FIFO_ENGN_DMA   : type = NVKM_ENGINE_DMAOBJ; break;
+	struct nvkm_subdev *subdev = &chan->cgrp->runl->fifo->engine.subdev;
+	struct nvkm_device *device = subdev->device;
+	u64 start = 0, limit = 0;
+	u32 flags = 0, ptr0, save;
+
+	switch (engn->engine->subdev.type) {
+	case NVKM_ENGINE_GR    : ptr0 = 0x0020; break;
+	case NVKM_ENGINE_VP    :
+	case NVKM_ENGINE_MSPDEC: ptr0 = 0x0040; break;
+	case NVKM_ENGINE_MPEG  :
+	case NVKM_ENGINE_MSPPP : ptr0 = 0x0060; break;
+	case NVKM_ENGINE_BSP   :
+	case NVKM_ENGINE_MSVLD : ptr0 = 0x0080; break;
+	case NVKM_ENGINE_CIPHER:
+	case NVKM_ENGINE_SEC   : ptr0 = 0x00a0; break;
+	case NVKM_ENGINE_CE    : ptr0 = 0x00c0; break;
 	default:
 		WARN_ON(1);
-		return NULL;
+		return;
+	}
+
+	if (!cctx) {
+		save = nvkm_mask(device, 0x002520, 0x0000003f, BIT(engn->id - 1));
+		nvkm_wr32(device, 0x0032fc, chan->inst->addr >> 12);
+		nvkm_msec(device, 2000,
+			if (nvkm_rd32(device, 0x0032fc) != 0xffffffff)
+				break;
+		);
+		nvkm_wr32(device, 0x002520, save);
+	} else {
+		flags = 0x00190000;
+		start = cctx->vctx->inst->addr;
+		limit = start + cctx->vctx->inst->size - 1;
 	}
 
-	return nvkm_device_engine(fifo->engine.subdev.device, type, 0);
+	nvkm_kmap(chan->eng);
+	nvkm_wo32(chan->eng, ptr0 + 0x00, flags);
+	nvkm_wo32(chan->eng, ptr0 + 0x04, lower_32_bits(limit));
+	nvkm_wo32(chan->eng, ptr0 + 0x08, lower_32_bits(start));
+	nvkm_wo32(chan->eng, ptr0 + 0x0c, upper_32_bits(limit) << 24 |
+					  lower_32_bits(start));
+	nvkm_wo32(chan->eng, ptr0 + 0x10, 0x00000000);
+	nvkm_wo32(chan->eng, ptr0 + 0x14, 0x00000000);
+	nvkm_done(chan->eng);
 }
 
+const struct nvkm_engn_func
+g84_engn = {
+	.bind = g84_ectx_bind,
+	.ramht_add = nv50_eobj_ramht_add,
+	.ramht_del = nv50_eobj_ramht_del,
+};
+
+static void
+g84_fifo_nonstall_block(struct nvkm_event *event, int type, int index)
+{
+	struct nvkm_fifo *fifo = container_of(event, typeof(*fifo), nonstall.event);
+	unsigned long flags;
+
+	spin_lock_irqsave(&fifo->lock, flags);
+	nvkm_mask(fifo->engine.subdev.device, 0x002140, 0x40000000, 0x00000000);
+	spin_unlock_irqrestore(&fifo->lock, flags);
+}
+
+static void
+g84_fifo_nonstall_allow(struct nvkm_event *event, int type, int index)
+{
+	struct nvkm_fifo *fifo = container_of(event, typeof(*fifo), nonstall.event);
+	unsigned long flags;
+
+	spin_lock_irqsave(&fifo->lock, flags);
+	nvkm_mask(fifo->engine.subdev.device, 0x002140, 0x40000000, 0x40000000);
+	spin_unlock_irqrestore(&fifo->lock, flags);
+}
+
+const struct nvkm_event_func
+g84_fifo_nonstall = {
+	.init = g84_fifo_nonstall_allow,
+	.fini = g84_fifo_nonstall_block,
+};
+
 static int
-g84_fifo_engine_id(struct nvkm_fifo *base, struct nvkm_engine *engine)
+g84_fifo_runl_ctor(struct nvkm_fifo *fifo)
 {
-	switch (engine->subdev.type) {
-	case NVKM_ENGINE_SW    : return G84_FIFO_ENGN_SW;
-	case NVKM_ENGINE_GR    : return G84_FIFO_ENGN_GR;
-	case NVKM_ENGINE_MPEG  :
-	case NVKM_ENGINE_MSPPP : return G84_FIFO_ENGN_MPEG;
-	case NVKM_ENGINE_CE    : return G84_FIFO_ENGN_CE0;
-	case NVKM_ENGINE_VP    :
-	case NVKM_ENGINE_MSPDEC: return G84_FIFO_ENGN_VP;
-	case NVKM_ENGINE_CIPHER:
-	case NVKM_ENGINE_SEC   : return G84_FIFO_ENGN_CIPHER;
-	case NVKM_ENGINE_BSP   :
-	case NVKM_ENGINE_MSVLD : return G84_FIFO_ENGN_BSP;
-	case NVKM_ENGINE_DMAOBJ: return G84_FIFO_ENGN_DMA;
-	default:
-		WARN_ON(1);
-		return -1;
-	}
+	struct nvkm_runl *runl;
+
+	runl = nvkm_runl_new(fifo, 0, 0, 0);
+	if (IS_ERR(runl))
+		return PTR_ERR(runl);
+
+	nvkm_runl_add(runl, 0, fifo->func->engn_sw, NVKM_ENGINE_SW, 0);
+	nvkm_runl_add(runl, 0, fifo->func->engn_sw, NVKM_ENGINE_DMAOBJ, 0);
+	nvkm_runl_add(runl, 1, fifo->func->engn, NVKM_ENGINE_GR, 0);
+	nvkm_runl_add(runl, 2, fifo->func->engn, NVKM_ENGINE_MPEG, 0);
+	nvkm_runl_add(runl, 3, fifo->func->engn, NVKM_ENGINE_ME, 0);
+	nvkm_runl_add(runl, 4, fifo->func->engn, NVKM_ENGINE_VP, 0);
+	nvkm_runl_add(runl, 5, fifo->func->engn, NVKM_ENGINE_CIPHER, 0);
+	nvkm_runl_add(runl, 6, fifo->func->engn, NVKM_ENGINE_BSP, 0);
+	return 0;
 }
 
 static const struct nvkm_fifo_func
 g84_fifo = {
-	.dtor = nv50_fifo_dtor,
-	.oneinit = nv50_fifo_oneinit,
+	.chid_nr = nv50_fifo_chid_nr,
+	.chid_ctor = nv50_fifo_chid_ctor,
+	.runl_ctor = g84_fifo_runl_ctor,
 	.init = nv50_fifo_init,
 	.intr = nv04_fifo_intr,
-	.engine_id = g84_fifo_engine_id,
-	.id_engine = g84_fifo_id_engine,
 	.pause = nv04_fifo_pause,
 	.start = nv04_fifo_start,
-	.uevent_init = g84_fifo_uevent_init,
-	.uevent_fini = g84_fifo_uevent_fini,
-	.chan = {
-		&g84_fifo_gpfifo_oclass,
-		NULL
-	},
+	.nonstall = &g84_fifo_nonstall,
+	.runl = &nv50_runl,
+	.engn = &g84_engn,
+	.engn_sw = &nv50_engn_sw,
+	.cgrp = {{                          }, &nv04_cgrp },
+	.chan = {{ 0, 0, G82_CHANNEL_GPFIFO }, &g84_chan },
 };
 
 int
 g84_fifo_new(struct nvkm_device *device, enum nvkm_subdev_type type, int inst,
 	     struct nvkm_fifo **pfifo)
 {
-	return nv50_fifo_new_(&g84_fifo, device, type, inst, pfifo);
+	return nvkm_fifo_new_(&g84_fifo, device, type, inst, pfifo);
 }
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/fifo/g98.c b/drivers/gpu/drm/nouveau/nvkm/engine/fifo/g98.c
new file mode 100644
index 000000000000..c6ca050c38bf
--- /dev/null
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/fifo/g98.c
@@ -0,0 +1,70 @@
+/*
+ * Copyright 2021 Red Hat Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+#include "priv.h"
+#include "chan.h"
+#include "runl.h"
+
+#include <nvif/class.h>
+
+static int
+g98_fifo_runl_ctor(struct nvkm_fifo *fifo)
+{
+	struct nvkm_runl *runl;
+
+	runl = nvkm_runl_new(fifo, 0, 0, 0);
+	if (IS_ERR(runl))
+		return PTR_ERR(runl);
+
+	nvkm_runl_add(runl, 0, fifo->func->engn_sw, NVKM_ENGINE_SW, 0);
+	nvkm_runl_add(runl, 0, fifo->func->engn_sw, NVKM_ENGINE_DMAOBJ, 0);
+	nvkm_runl_add(runl, 1, fifo->func->engn, NVKM_ENGINE_GR, 0);
+	nvkm_runl_add(runl, 2, fifo->func->engn, NVKM_ENGINE_MSPPP, 0);
+	nvkm_runl_add(runl, 3, fifo->func->engn, NVKM_ENGINE_CE, 0);
+	nvkm_runl_add(runl, 4, fifo->func->engn, NVKM_ENGINE_MSPDEC, 0);
+	nvkm_runl_add(runl, 5, fifo->func->engn, NVKM_ENGINE_SEC, 0);
+	nvkm_runl_add(runl, 6, fifo->func->engn, NVKM_ENGINE_MSVLD, 0);
+	return 0;
+}
+
+static const struct nvkm_fifo_func
+g98_fifo = {
+	.chid_nr = nv50_fifo_chid_nr,
+	.chid_ctor = nv50_fifo_chid_ctor,
+	.runl_ctor = g98_fifo_runl_ctor,
+	.init = nv50_fifo_init,
+	.intr = nv04_fifo_intr,
+	.pause = nv04_fifo_pause,
+	.start = nv04_fifo_start,
+	.nonstall = &g84_fifo_nonstall,
+	.runl = &nv50_runl,
+	.engn = &g84_engn,
+	.engn_sw = &nv50_engn_sw,
+	.cgrp = {{                          }, &nv04_cgrp },
+	.chan = {{ 0, 0, G82_CHANNEL_GPFIFO }, &g84_chan },
+};
+
+int
+g98_fifo_new(struct nvkm_device *device, enum nvkm_subdev_type type, int inst,
+	       struct nvkm_fifo **pfifo)
+{
+	return nvkm_fifo_new_(&g98_fifo, device, type, inst, pfifo);
+}
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/fifo/ga100.c b/drivers/gpu/drm/nouveau/nvkm/engine/fifo/ga100.c
new file mode 100644
index 000000000000..12a5d99d5e77
--- /dev/null
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/fifo/ga100.c
@@ -0,0 +1,550 @@
+/*
+ * Copyright 2021 Red Hat Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+#include "priv.h"
+#include "cgrp.h"
+#include "chan.h"
+#include "chid.h"
+#include "runl.h"
+#include "runq.h"
+
+#include <core/gpuobj.h>
+#include <subdev/top.h>
+#include <subdev/vfn.h>
+
+#include <nvif/class.h>
+
+/*TODO: allocate? */
+#define GA100_FIFO_NONSTALL_VECTOR 0
+
+static u32
+ga100_chan_doorbell_handle(struct nvkm_chan *chan)
+{
+	return (chan->cgrp->runl->doorbell << 16) | chan->id;
+}
+
+static void
+ga100_chan_stop(struct nvkm_chan *chan)
+{
+	struct nvkm_runl *runl = chan->cgrp->runl;
+
+	nvkm_wr32(runl->fifo->engine.subdev.device, runl->chan + (chan->id * 4), 0x00000003);
+}
+
+static void
+ga100_chan_start(struct nvkm_chan *chan)
+{
+	struct nvkm_runl *runl = chan->cgrp->runl;
+	struct nvkm_device *device = runl->fifo->engine.subdev.device;
+	const int gfid = 0;
+
+	nvkm_wr32(device, runl->chan + (chan->id * 4), 0x00000002);
+	nvkm_wr32(device, runl->addr + 0x0090, (gfid << 16) | chan->id); /* INTERNAL_DOORBELL. */
+}
+
+static void
+ga100_chan_unbind(struct nvkm_chan *chan)
+{
+	struct nvkm_runl *runl = chan->cgrp->runl;
+
+	nvkm_wr32(runl->fifo->engine.subdev.device, runl->chan + (chan->id * 4), 0xffffffff);
+}
+
+static int
+ga100_chan_ramfc_write(struct nvkm_chan *chan, u64 offset, u64 length, u32 devm, bool priv)
+{
+	const u32 limit2 = ilog2(length / 8);
+
+	nvkm_kmap(chan->inst);
+	nvkm_wo32(chan->inst, 0x010, 0x0000face);
+	nvkm_wo32(chan->inst, 0x030, 0x7ffff902);
+	nvkm_wo32(chan->inst, 0x048, lower_32_bits(offset));
+	nvkm_wo32(chan->inst, 0x04c, upper_32_bits(offset) | (limit2 << 16));
+	nvkm_wo32(chan->inst, 0x084, 0x20400000);
+	nvkm_wo32(chan->inst, 0x094, 0x30000000 | devm);
+	nvkm_wo32(chan->inst, 0x0e4, priv ? 0x00000020 : 0x00000000);
+	nvkm_wo32(chan->inst, 0x0e8, chan->id);
+	nvkm_wo32(chan->inst, 0x0f4, 0x00001000 | (priv ? 0x00000100 : 0x00000000));
+	nvkm_wo32(chan->inst, 0x0f8, 0x80000000 | GA100_FIFO_NONSTALL_VECTOR);
+	nvkm_mo32(chan->inst, 0x218, 0x00000000, 0x00000000);
+	nvkm_done(chan->inst);
+	return 0;
+}
+
+static const struct nvkm_chan_func_ramfc
+ga100_chan_ramfc = {
+	.write = ga100_chan_ramfc_write,
+	.devm = 0xfff,
+	.priv = true,
+};
+
+const struct nvkm_chan_func
+ga100_chan = {
+	.inst = &gf100_chan_inst,
+	.userd = &gv100_chan_userd,
+	.ramfc = &ga100_chan_ramfc,
+	.unbind = ga100_chan_unbind,
+	.start = ga100_chan_start,
+	.stop = ga100_chan_stop,
+	.preempt = gk110_chan_preempt,
+	.doorbell_handle = ga100_chan_doorbell_handle,
+};
+
+static void
+ga100_cgrp_preempt(struct nvkm_cgrp *cgrp)
+{
+	struct nvkm_runl *runl = cgrp->runl;
+
+	nvkm_wr32(runl->fifo->engine.subdev.device, runl->addr + 0x098, 0x01000000 | cgrp->id);
+}
+
+const struct nvkm_cgrp_func
+ga100_cgrp = {
+	.preempt = ga100_cgrp_preempt,
+};
+
+static int
+ga100_engn_cxid(struct nvkm_engn *engn, bool *cgid)
+{
+	struct nvkm_runl *runl = engn->runl;
+	struct nvkm_device *device = runl->fifo->engine.subdev.device;
+	u32 stat = nvkm_rd32(device, runl->addr + 0x200 + engn->id * 0x40);
+
+	ENGN_DEBUG(engn, "status %08x", stat);
+	*cgid = true;
+
+	switch ((stat & 0x0000e000) >> 13) {
+	case 0 /* INVALID */: return -ENODEV;
+	case 1 /*   VALID */:
+	case 5 /*    SAVE */: return (stat & 0x00000fff);
+	case 6 /*    LOAD */: return (stat & 0x0fff0000) >> 16;
+	case 7 /*  SWITCH */:
+		if (nvkm_engine_chsw_load(engn->engine))
+			return (stat & 0x0fff0000) >> 16;
+		return (stat & 0x00000fff);
+	default:
+		WARN_ON(1);
+		break;
+	}
+
+	return -ENODEV;
+}
+
+const struct nvkm_engn_func
+ga100_engn = {
+	.cxid = ga100_engn_cxid,
+	.ctor = gk104_ectx_ctor,
+	.bind = gv100_ectx_bind,
+};
+
+const struct nvkm_engn_func
+ga100_engn_ce = {
+	.cxid = ga100_engn_cxid,
+	.ctor = gv100_ectx_ce_ctor,
+	.bind = gv100_ectx_ce_bind,
+};
+
+static bool
+ga100_runq_idle(struct nvkm_runq *runq)
+{
+	struct nvkm_device *device = runq->fifo->engine.subdev.device;
+
+	return !(nvkm_rd32(device, 0x04015c + (runq->id * 0x800)) & 0x0000e000);
+}
+
+static bool
+ga100_runq_intr_1(struct nvkm_runq *runq, struct nvkm_runl *runl)
+{
+	struct nvkm_device *device = runq->fifo->engine.subdev.device;
+	u32 inte = nvkm_rd32(device, 0x040180 + (runq->id * 0x800));
+	u32 intr = nvkm_rd32(device, 0x040148 + (runq->id * 0x800));
+	u32 stat = intr & inte;
+
+	if (!stat) {
+		RUNQ_DEBUG(runq, "inte1 %08x %08x", intr, inte);
+		return false;
+	}
+
+	if (stat & 0x80000000) {
+		u32 chid = nvkm_rd32(device, 0x040120 + (runq->id * 0x0800)) & runl->chid->mask;
+		struct nvkm_chan *chan;
+		unsigned long flags;
+
+		RUNQ_ERROR(runq, "CTXNOTVALID chid:%d", chid);
+		chan = nvkm_runl_chan_get_chid(runl, chid, &flags);
+		if (chan) {
+			nvkm_chan_error(chan, true);
+			nvkm_chan_put(&chan, flags);
+		}
+
+		nvkm_mask(device, 0x0400ac + (runq->id * 0x800), 0x00030000, 0x00030000);
+		stat &= ~0x80000000;
+	}
+
+	if (stat) {
+		RUNQ_ERROR(runq, "intr1 %08x", stat);
+		nvkm_wr32(device, 0x0401a0 + (runq->id * 0x800), stat);
+	}
+
+	nvkm_wr32(device, 0x040148 + (runq->id * 0x800), intr);
+	return true;
+}
+
+static bool
+ga100_runq_intr_0(struct nvkm_runq *runq, struct nvkm_runl *runl)
+{
+	struct nvkm_device *device = runq->fifo->engine.subdev.device;
+	u32 inte = nvkm_rd32(device, 0x040170 + (runq->id * 0x800));
+	u32 intr = nvkm_rd32(device, 0x040108 + (runq->id * 0x800));
+	u32 stat = intr & inte;
+
+	if (!stat) {
+		RUNQ_DEBUG(runq, "inte0 %08x %08x", intr, inte);
+		return false;
+	}
+
+	/*TODO: expand on this when fixing up gf100's version. */
+	if (stat & 0xc6afe000) {
+		u32 chid = nvkm_rd32(device, 0x040120 + (runq->id * 0x0800)) & runl->chid->mask;
+		struct nvkm_chan *chan;
+		unsigned long flags;
+
+		RUNQ_ERROR(runq, "intr0 %08x", stat);
+		chan = nvkm_runl_chan_get_chid(runl, chid, &flags);
+		if (chan) {
+			nvkm_chan_error(chan, true);
+			nvkm_chan_put(&chan, flags);
+		}
+
+		stat &= ~0xc6afe000;
+	}
+
+	if (stat) {
+		RUNQ_ERROR(runq, "intr0 %08x", stat);
+		nvkm_wr32(device, 0x040190 + (runq->id * 0x800), stat);
+	}
+
+	nvkm_wr32(device, 0x040108 + (runq->id * 0x800), intr);
+	return true;
+}
+
+static bool
+ga100_runq_intr(struct nvkm_runq *runq, struct nvkm_runl *runl)
+{
+	bool intr0 = ga100_runq_intr_0(runq, runl);
+	bool intr1 = ga100_runq_intr_1(runq, runl);
+
+	return intr0 || intr1;
+}
+
+static void
+ga100_runq_init(struct nvkm_runq *runq)
+{
+	struct nvkm_device *device = runq->fifo->engine.subdev.device;
+
+	nvkm_wr32(device, 0x040108 + (runq->id * 0x800), 0xffffffff); /* INTR_0 */
+	nvkm_wr32(device, 0x040148 + (runq->id * 0x800), 0xffffffff); /* INTR_1 */
+	nvkm_wr32(device, 0x040170 + (runq->id * 0x800), 0xffffffff); /* INTR_0_EN_SET_TREE */
+	nvkm_wr32(device, 0x040180 + (runq->id * 0x800), 0xffffffff); /* INTR_1_EN_SET_TREE */
+}
+
+const struct nvkm_runq_func
+ga100_runq = {
+	.init = ga100_runq_init,
+	.intr = ga100_runq_intr,
+	.idle = ga100_runq_idle,
+};
+
+static bool
+ga100_runl_preempt_pending(struct nvkm_runl *runl)
+{
+	return nvkm_rd32(runl->fifo->engine.subdev.device, runl->addr + 0x098) & 0x00100000;
+}
+
+static void
+ga100_runl_preempt(struct nvkm_runl *runl)
+{
+	nvkm_wr32(runl->fifo->engine.subdev.device, runl->addr + 0x098, 0x00000000);
+}
+
+static void
+ga100_runl_allow(struct nvkm_runl *runl, u32 engm)
+{
+	nvkm_mask(runl->fifo->engine.subdev.device, runl->addr + 0x094, 0x00000001, 0x00000000);
+}
+
+static void
+ga100_runl_block(struct nvkm_runl *runl, u32 engm)
+{
+	nvkm_mask(runl->fifo->engine.subdev.device, runl->addr + 0x094, 0x00000001, 0x00000001);
+}
+
+static bool
+ga100_runl_pending(struct nvkm_runl *runl)
+{
+	struct nvkm_device *device = runl->fifo->engine.subdev.device;
+
+	return nvkm_rd32(device, runl->addr + 0x08c) & 0x00008000;
+}
+
+static void
+ga100_runl_commit(struct nvkm_runl *runl, struct nvkm_memory *memory, u32 start, int count)
+{
+	struct nvkm_device *device = runl->fifo->engine.subdev.device;
+	u64 addr = nvkm_memory_addr(memory) + start;
+
+	nvkm_wr32(device, runl->addr + 0x080, lower_32_bits(addr));
+	nvkm_wr32(device, runl->addr + 0x084, upper_32_bits(addr));
+	nvkm_wr32(device, runl->addr + 0x088, count);
+}
+
+static irqreturn_t
+ga100_runl_intr(struct nvkm_inth *inth)
+{
+	struct nvkm_runl *runl = container_of(inth, typeof(*runl), inth);
+	struct nvkm_engn *engn;
+	struct nvkm_device *device = runl->fifo->engine.subdev.device;
+	u32 inte = nvkm_rd32(device, runl->addr + 0x120);
+	u32 intr = nvkm_rd32(device, runl->addr + 0x100);
+	u32 stat = intr & inte;
+	u32 info;
+
+	if (!stat) {
+		RUNL_DEBUG(runl, "inte %08x %08x", intr, inte);
+		return IRQ_NONE;
+	}
+
+	if (stat & 0x00000007) {
+		nvkm_runl_foreach_engn_cond(engn, runl, stat & BIT(engn->id)) {
+			info = nvkm_rd32(device, runl->addr + 0x224 + (engn->id * 0x40));
+
+			tu102_fifo_intr_ctxsw_timeout_info(engn, info);
+
+			nvkm_wr32(device, runl->addr + 0x100, BIT(engn->id));
+			stat &= ~BIT(engn->id);
+		}
+	}
+
+	if (stat & 0x00000300) {
+		nvkm_wr32(device, runl->addr + 0x100, stat & 0x00000300);
+		stat &= ~0x00000300;
+	}
+
+	if (stat & 0x00010000) {
+		if (runl->runq[0]) {
+			if (runl->runq[0]->func->intr(runl->runq[0], runl))
+				stat &= ~0x00010000;
+		}
+	}
+
+	if (stat & 0x00020000) {
+		if (runl->runq[1]) {
+			if (runl->runq[1]->func->intr(runl->runq[1], runl))
+				stat &= ~0x00020000;
+		}
+	}
+
+	if (stat) {
+		RUNL_ERROR(runl, "intr %08x", stat);
+		nvkm_wr32(device, runl->addr + 0x140, stat);
+	}
+
+	nvkm_wr32(device, runl->addr + 0x180, 0x00000001);
+	return IRQ_HANDLED;
+}
+
+static void
+ga100_runl_fini(struct nvkm_runl *runl)
+{
+	nvkm_mask(runl->fifo->engine.subdev.device, runl->addr + 0x300, 0x80000000, 0x00000000);
+	nvkm_inth_block(&runl->inth);
+}
+
+static void
+ga100_runl_init(struct nvkm_runl *runl)
+{
+	struct nvkm_fifo *fifo = runl->fifo;
+	struct nvkm_runq *runq;
+	struct nvkm_device *device = fifo->engine.subdev.device;
+	int i;
+
+	/* Submit NULL runlist and preempt. */
+	nvkm_wr32(device, runl->addr + 0x088, 0x00000000);
+	runl->func->preempt(runl);
+
+	/* Enable doorbell. */
+	nvkm_mask(device, runl->addr + 0x300, 0x80000000, 0x80000000);
+
+	nvkm_wr32(device, runl->addr + 0x100, 0xffffffff); /* INTR_0 */
+	nvkm_wr32(device, runl->addr + 0x140, 0xffffffff); /* INTR_0_EN_CLEAR_TREE(0) */
+	nvkm_wr32(device, runl->addr + 0x120, 0x000f1307); /* INTR_0_EN_SET_TREE(0) */
+	nvkm_wr32(device, runl->addr + 0x148, 0xffffffff); /* INTR_0_EN_CLEAR_TREE(1) */
+	nvkm_wr32(device, runl->addr + 0x128, 0x00000000); /* INTR_0_EN_SET_TREE(1) */
+
+	/* Init PBDMA(s). */
+	for (i = 0; i < runl->runq_nr; i++) {
+		runq = runl->runq[i];
+		runq->func->init(runq);
+	}
+
+	nvkm_inth_allow(&runl->inth);
+}
+
+const struct nvkm_runl_func
+ga100_runl = {
+	.init = ga100_runl_init,
+	.fini = ga100_runl_fini,
+	.size = 16,
+	.update = nv50_runl_update,
+	.insert_cgrp = gv100_runl_insert_cgrp,
+	.insert_chan = gv100_runl_insert_chan,
+	.commit = ga100_runl_commit,
+	.wait = nv50_runl_wait,
+	.pending = ga100_runl_pending,
+	.block = ga100_runl_block,
+	.allow = ga100_runl_allow,
+	.preempt = ga100_runl_preempt,
+	.preempt_pending = ga100_runl_preempt_pending,
+};
+
+static int
+ga100_runl_new(struct nvkm_fifo *fifo, int id, u32 addr, struct nvkm_runl **prunl)
+{
+	struct nvkm_device *device = fifo->engine.subdev.device;
+	struct nvkm_runl *runl;
+	u32 chcfg  = nvkm_rd32(device, addr + 0x004);
+	u32 chnum  = 1 << (chcfg & 0x0000000f);
+	u32 chaddr = (chcfg & 0xfffffff0);
+	u32 dbcfg  = nvkm_rd32(device, addr + 0x008);
+	u32 vector = nvkm_rd32(device, addr + 0x160);
+	int i, ret;
+
+	runl = *prunl = nvkm_runl_new(fifo, id, addr, chnum);
+	if (IS_ERR(runl))
+		return PTR_ERR(runl);
+
+	for (i = 0; i < 2; i++) {
+		u32 pbcfg = nvkm_rd32(device, addr + 0x010 + (i * 0x04));
+		if (pbcfg & 0x80000000) {
+			runl->runq[runl->runq_nr] =
+				nvkm_runq_new(fifo, ((pbcfg & 0x03fffc00) - 0x040000) / 0x800);
+			if (!runl->runq[runl->runq_nr])
+				return -ENOMEM;
+
+			runl->runq_nr++;
+		}
+	}
+
+	ret = nvkm_inth_add(&device->vfn->intr, vector & 0x00000fff, NVKM_INTR_PRIO_NORMAL,
+			    &fifo->engine.subdev, ga100_runl_intr, &runl->inth);
+	if (ret)
+		return ret;
+
+	runl->chan = chaddr;
+	runl->doorbell = dbcfg >> 16;
+	return 0;
+}
+
+static irqreturn_t
+ga100_fifo_nonstall_intr(struct nvkm_inth *inth)
+{
+	struct nvkm_fifo *fifo = container_of(inth, typeof(*fifo), nonstall.intr);
+
+	nvkm_event_ntfy(&fifo->nonstall.event, 0, NVKM_FIFO_NONSTALL_EVENT);
+	return IRQ_HANDLED;
+}
+
+static void
+ga100_fifo_nonstall_block(struct nvkm_event *event, int type, int index)
+{
+	struct nvkm_fifo *fifo = container_of(event, typeof(*fifo), nonstall.event);
+
+	nvkm_inth_block(&fifo->nonstall.intr);
+}
+
+static void
+ga100_fifo_nonstall_allow(struct nvkm_event *event, int type, int index)
+{
+	struct nvkm_fifo *fifo = container_of(event, typeof(*fifo), nonstall.event);
+
+	nvkm_inth_allow(&fifo->nonstall.intr);
+}
+
+const struct nvkm_event_func
+ga100_fifo_nonstall = {
+	.init = ga100_fifo_nonstall_allow,
+	.fini = ga100_fifo_nonstall_block,
+};
+
+int
+ga100_fifo_nonstall_ctor(struct nvkm_fifo *fifo)
+{
+	return nvkm_inth_add(&fifo->engine.subdev.device->vfn->intr, GA100_FIFO_NONSTALL_VECTOR,
+			     NVKM_INTR_PRIO_NORMAL, &fifo->engine.subdev, ga100_fifo_nonstall_intr,
+			     &fifo->nonstall.intr);
+}
+
+int
+ga100_fifo_runl_ctor(struct nvkm_fifo *fifo)
+{
+	struct nvkm_device *device = fifo->engine.subdev.device;
+	struct nvkm_top_device *tdev;
+	struct nvkm_runl *runl;
+	int id = 0, ret;
+
+	nvkm_list_foreach(tdev, &device->top->device, head, tdev->runlist >= 0) {
+		runl = nvkm_runl_get(fifo, -1, tdev->runlist);
+		if (!runl) {
+			ret = ga100_runl_new(fifo, id++, tdev->runlist, &runl);
+			if (ret)
+				return ret;
+		}
+
+		if (tdev->engine < 0)
+			continue;
+
+		nvkm_runl_add(runl, tdev->engine, (tdev->type == NVKM_ENGINE_CE) ?
+			      fifo->func->engn_ce : fifo->func->engn, tdev->type, tdev->inst);
+	}
+
+	return 0;
+}
+
+static const struct nvkm_fifo_func
+ga100_fifo = {
+	.runl_ctor = ga100_fifo_runl_ctor,
+	.mmu_fault = &tu102_fifo_mmu_fault,
+	.nonstall_ctor = ga100_fifo_nonstall_ctor,
+	.nonstall = &ga100_fifo_nonstall,
+	.runl = &ga100_runl,
+	.runq = &ga100_runq,
+	.engn = &ga100_engn,
+	.engn_ce = &ga100_engn_ce,
+	.cgrp = {{ 0, 0, KEPLER_CHANNEL_GROUP_A  }, &ga100_cgrp, .force = true },
+	.chan = {{ 0, 0, AMPERE_CHANNEL_GPFIFO_A }, &ga100_chan },
+};
+
+int
+ga100_fifo_new(struct nvkm_device *device, enum nvkm_subdev_type type, int inst,
+	       struct nvkm_fifo **pfifo)
+{
+	return nvkm_fifo_new_(&ga100_fifo, device, type, inst, pfifo);
+}
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/fifo/ga102.c b/drivers/gpu/drm/nouveau/nvkm/engine/fifo/ga102.c
index c630dbd2911a..2cdf5da339b6 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/fifo/ga102.c
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/fifo/ga102.c
@@ -19,293 +19,27 @@
  * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
  * OTHER DEALINGS IN THE SOFTWARE.
  */
-#define ga102_fifo(p) container_of((p), struct ga102_fifo, base.engine)
-#define ga102_chan(p) container_of((p), struct ga102_chan, object)
-#include <engine/fifo.h>
-#include "user.h"
+#include "priv.h"
 
-#include <core/memory.h>
-#include <subdev/mmu.h>
-#include <subdev/timer.h>
-#include <subdev/top.h>
-
-#include <nvif/cl0080.h>
-#include <nvif/clc36f.h>
 #include <nvif/class.h>
 
-struct ga102_fifo {
-	struct nvkm_fifo base;
-};
-
-struct ga102_chan {
-	struct nvkm_object object;
-
-	struct {
-		u32 runl;
-		u32 chan;
-	} ctrl;
-
-	struct nvkm_memory *mthd;
-	struct nvkm_memory *inst;
-	struct nvkm_memory *user;
-	struct nvkm_memory *runl;
-
-	struct nvkm_vmm *vmm;
-};
-
-static int
-ga102_chan_sclass(struct nvkm_object *object, int index, struct nvkm_oclass *oclass)
-{
-	if (index == 0) {
-		oclass->ctor = nvkm_object_new;
-		oclass->base = (struct nvkm_sclass) { -1, -1, AMPERE_DMA_COPY_B };
-		return 0;
-	}
-
-	return -EINVAL;
-}
-
-static int
-ga102_chan_map(struct nvkm_object *object, void *argv, u32 argc,
-	       enum nvkm_object_map *type, u64 *addr, u64 *size)
-{
-	struct ga102_chan *chan = ga102_chan(object);
-	struct nvkm_device *device = chan->object.engine->subdev.device;
-	u64 bar2 = nvkm_memory_bar2(chan->user);
-
-	if (bar2 == ~0ULL)
-		return -EFAULT;
-
-	*type = NVKM_OBJECT_MAP_IO;
-	*addr = device->func->resource_addr(device, 3) + bar2;
-	*size = 0x1000;
-	return 0;
-}
-
-static int
-ga102_chan_fini(struct nvkm_object *object, bool suspend)
-{
-	struct ga102_chan *chan = ga102_chan(object);
-	struct nvkm_device *device = chan->object.engine->subdev.device;
-
-	nvkm_wr32(device, chan->ctrl.chan, 0x00000003);
-
-	nvkm_wr32(device, chan->ctrl.runl + 0x098, 0x01000000);
-	nvkm_msec(device, 2000,
-		if (!(nvkm_rd32(device, chan->ctrl.runl + 0x098) & 0x00100000))
-			break;
-	);
-
-	nvkm_wr32(device, chan->ctrl.runl + 0x088, 0);
-
-	nvkm_wr32(device, chan->ctrl.chan, 0xffffffff);
-	return 0;
-}
-
-static int
-ga102_chan_init(struct nvkm_object *object)
-{
-	struct ga102_chan *chan = ga102_chan(object);
-	struct nvkm_device *device = chan->object.engine->subdev.device;
-
-	nvkm_mask(device, chan->ctrl.runl + 0x300, 0x80000000, 0x80000000);
-
-	nvkm_wr32(device, chan->ctrl.runl + 0x080, lower_32_bits(nvkm_memory_addr(chan->runl)));
-	nvkm_wr32(device, chan->ctrl.runl + 0x084, upper_32_bits(nvkm_memory_addr(chan->runl)));
-	nvkm_wr32(device, chan->ctrl.runl + 0x088, 2);
-
-	nvkm_wr32(device, chan->ctrl.chan, 0x00000002);
-	nvkm_wr32(device, chan->ctrl.runl + 0x0090, 0);
-	return 0;
-}
-
-static void *
-ga102_chan_dtor(struct nvkm_object *object)
-{
-	struct ga102_chan *chan = ga102_chan(object);
-
-	if (chan->vmm) {
-		nvkm_vmm_part(chan->vmm, chan->inst);
-		nvkm_vmm_unref(&chan->vmm);
-	}
-
-	nvkm_memory_unref(&chan->runl);
-	nvkm_memory_unref(&chan->user);
-	nvkm_memory_unref(&chan->inst);
-	nvkm_memory_unref(&chan->mthd);
-	return chan;
-}
-
-static const struct nvkm_object_func
-ga102_chan = {
-	.dtor = ga102_chan_dtor,
-	.init = ga102_chan_init,
-	.fini = ga102_chan_fini,
-	.map = ga102_chan_map,
-	.sclass = ga102_chan_sclass,
-};
-
-static int
-ga102_chan_new(struct nvkm_device *device,
-	       const struct nvkm_oclass *oclass, void *argv, u32 argc, struct nvkm_object **pobject)
-{
-	struct volta_channel_gpfifo_a_v0 *args = argv;
-	struct nvkm_top_device *tdev;
-	struct nvkm_vmm *vmm;
-	struct ga102_chan *chan;
-	int ret;
-
-	if (argc != sizeof(*args))
-		return -ENOSYS;
-
-	vmm = nvkm_uvmm_search(oclass->client, args->vmm);
-	if (IS_ERR(vmm))
-		return PTR_ERR(vmm);
-
-	if (!(chan = kzalloc(sizeof(*chan), GFP_KERNEL)))
-		return -ENOMEM;
-
-	nvkm_object_ctor(&ga102_chan, oclass, &chan->object);
-	*pobject = &chan->object;
-
-	list_for_each_entry(tdev, &device->top->device, head) {
-		if (tdev->type == NVKM_ENGINE_CE) {
-			chan->ctrl.runl = tdev->runlist;
-			break;
-		}
-	}
-
-	if (!chan->ctrl.runl)
-		return -ENODEV;
-
-	chan->ctrl.chan = nvkm_rd32(device, chan->ctrl.runl + 0x004) & 0xfffffff0;
-
-	args->chid = 0;
-	args->inst = 0;
-	args->token = nvkm_rd32(device, chan->ctrl.runl + 0x008) & 0xffff0000;
-
-	ret = nvkm_memory_new(device, NVKM_MEM_TARGET_INST, 0x1000, 0x1000, true, &chan->mthd);
-	if (ret)
-		return ret;
-
-	ret = nvkm_memory_new(device, NVKM_MEM_TARGET_INST, 0x1000, 0x1000, true, &chan->inst);
-	if (ret)
-		return ret;
-
-	nvkm_kmap(chan->inst);
-	nvkm_wo32(chan->inst, 0x010, 0x0000face);
-	nvkm_wo32(chan->inst, 0x030, 0x7ffff902);
-	nvkm_wo32(chan->inst, 0x048, lower_32_bits(args->ioffset));
-	nvkm_wo32(chan->inst, 0x04c, upper_32_bits(args->ioffset) |
-				     (order_base_2(args->ilength / 8) << 16));
-	nvkm_wo32(chan->inst, 0x084, 0x20400000);
-	nvkm_wo32(chan->inst, 0x094, 0x30000001);
-	nvkm_wo32(chan->inst, 0x0ac, 0x00020000);
-	nvkm_wo32(chan->inst, 0x0e4, 0x00000000);
-	nvkm_wo32(chan->inst, 0x0e8, 0);
-	nvkm_wo32(chan->inst, 0x0f4, 0x00001000);
-	nvkm_wo32(chan->inst, 0x0f8, 0x10003080);
-	nvkm_mo32(chan->inst, 0x218, 0x00000000, 0x00000000);
-	nvkm_wo32(chan->inst, 0x220, lower_32_bits(nvkm_memory_bar2(chan->mthd)));
-	nvkm_wo32(chan->inst, 0x224, upper_32_bits(nvkm_memory_bar2(chan->mthd)));
-	nvkm_done(chan->inst);
-
-	ret = nvkm_memory_new(device, NVKM_MEM_TARGET_INST, 0x1000, 0x1000, true, &chan->user);
-	if (ret)
-		return ret;
-
-	ret = nvkm_memory_new(device, NVKM_MEM_TARGET_INST, 0x1000, 0x1000, true, &chan->runl);
-	if (ret)
-		return ret;
-
-	nvkm_kmap(chan->runl);
-	nvkm_wo32(chan->runl, 0x00, 0x80030001);
-	nvkm_wo32(chan->runl, 0x04, 1);
-	nvkm_wo32(chan->runl, 0x08, 0);
-	nvkm_wo32(chan->runl, 0x0c, 0x00000000);
-	nvkm_wo32(chan->runl, 0x10, lower_32_bits(nvkm_memory_addr(chan->user)));
-	nvkm_wo32(chan->runl, 0x14, upper_32_bits(nvkm_memory_addr(chan->user)));
-	nvkm_wo32(chan->runl, 0x18, lower_32_bits(nvkm_memory_addr(chan->inst)));
-	nvkm_wo32(chan->runl, 0x1c, upper_32_bits(nvkm_memory_addr(chan->inst)));
-	nvkm_done(chan->runl);
-
-	ret = nvkm_vmm_join(vmm, chan->inst);
-	if (ret)
-		return ret;
-
-	chan->vmm = nvkm_vmm_ref(vmm);
-	return 0;
-}
-
-static const struct nvkm_device_oclass
-ga102_chan_oclass = {
-	.ctor = ga102_chan_new,
-};
-
-static int
-ga102_user_new(struct nvkm_device *device,
-	       const struct nvkm_oclass *oclass, void *argv, u32 argc, struct nvkm_object **pobject)
-{
-	return tu102_fifo_user_new(oclass, argv, argc, pobject);
-}
-
-static const struct nvkm_device_oclass
-ga102_user_oclass = {
-	.ctor = ga102_user_new,
-};
-
-static int
-ga102_fifo_sclass(struct nvkm_oclass *oclass, int index, const struct nvkm_device_oclass **class)
-{
-	if (index == 0) {
-		oclass->base = (struct nvkm_sclass) { -1, -1, VOLTA_USERMODE_A };
-		*class = &ga102_user_oclass;
-		return 0;
-	} else
-	if (index == 1) {
-		oclass->base = (struct nvkm_sclass) { 0, 0, AMPERE_CHANNEL_GPFIFO_B };
-		*class = &ga102_chan_oclass;
-		return 0;
-	}
-
-	return 2;
-}
-
-static int
-ga102_fifo_info(struct nvkm_engine *engine, u64 mthd, u64 *data)
-{
-	switch (mthd) {
-	case NV_DEVICE_HOST_CHANNELS: *data = 1; return 0;
-	default:
-		break;
-	}
-
-	return -ENOSYS;
-}
-
-static void *
-ga102_fifo_dtor(struct nvkm_engine *engine)
-{
-	return ga102_fifo(engine);
-}
-
-static const struct nvkm_engine_func
+static const struct nvkm_fifo_func
 ga102_fifo = {
-	.dtor = ga102_fifo_dtor,
-	.info = ga102_fifo_info,
-	.base.sclass = ga102_fifo_sclass,
+	.runl_ctor = ga100_fifo_runl_ctor,
+	.mmu_fault = &tu102_fifo_mmu_fault,
+	.nonstall_ctor = ga100_fifo_nonstall_ctor,
+	.nonstall = &ga100_fifo_nonstall,
+	.runl = &ga100_runl,
+	.runq = &ga100_runq,
+	.engn = &ga100_engn,
+	.engn_ce = &ga100_engn_ce,
+	.cgrp = {{ 0, 0, KEPLER_CHANNEL_GROUP_A  }, &ga100_cgrp, .force = true },
+	.chan = {{ 0, 0, AMPERE_CHANNEL_GPFIFO_B }, &ga100_chan },
 };
 
 int
 ga102_fifo_new(struct nvkm_device *device, enum nvkm_subdev_type type, int inst,
 	       struct nvkm_fifo **pfifo)
 {
-	struct ga102_fifo *fifo;
-
-	if (!(fifo = kzalloc(sizeof(*fifo), GFP_KERNEL)))
-		return -ENOMEM;
-
-	nvkm_engine_ctor(&ga102_fifo, device, type, inst, true, &fifo->base.engine);
-	*pfifo = &fifo->base;
-	return 0;
+	return nvkm_fifo_new_(&ga102_fifo, device, type, inst, pfifo);
 }
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/fifo/gf100.c b/drivers/gpu/drm/nouveau/nvkm/engine/fifo/gf100.c
index 8b4f36b3e34b..5bb65258c36d 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/fifo/gf100.c
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/fifo/gf100.c
@@ -21,186 +21,456 @@
  *
  * Authors: Ben Skeggs
  */
-#include "gf100.h"
-#include "changf100.h"
+#include "priv.h"
+#include "cgrp.h"
+#include "chan.h"
+#include "chid.h"
+#include "runl.h"
+#include "runq.h"
 
-#include <core/client.h>
-#include <core/enum.h>
 #include <core/gpuobj.h>
 #include <subdev/bar.h>
 #include <subdev/fault.h>
+#include <subdev/mc.h>
+#include <subdev/mmu.h>
 #include <engine/sw.h>
 
 #include <nvif/class.h>
 
+void
+gf100_chan_preempt(struct nvkm_chan *chan)
+{
+	nvkm_wr32(chan->cgrp->runl->fifo->engine.subdev.device, 0x002634, chan->id);
+}
+
 static void
-gf100_fifo_uevent_init(struct nvkm_fifo *fifo)
+gf100_chan_stop(struct nvkm_chan *chan)
 {
-	struct nvkm_device *device = fifo->engine.subdev.device;
-	nvkm_mask(device, 0x002140, 0x80000000, 0x80000000);
+	struct nvkm_device *device = chan->cgrp->runl->fifo->engine.subdev.device;
+
+	nvkm_mask(device, 0x003004 + (chan->id * 8), 0x00000001, 0x00000000);
 }
 
 static void
-gf100_fifo_uevent_fini(struct nvkm_fifo *fifo)
+gf100_chan_start(struct nvkm_chan *chan)
 {
+	struct nvkm_device *device = chan->cgrp->runl->fifo->engine.subdev.device;
+
+	nvkm_wr32(device, 0x003004 + (chan->id * 8), 0x001f0001);
+}
+
+static void gf100_fifo_intr_engine(struct nvkm_fifo *);
+
+static void
+gf100_chan_unbind(struct nvkm_chan *chan)
+{
+	struct nvkm_fifo *fifo = chan->cgrp->runl->fifo;
 	struct nvkm_device *device = fifo->engine.subdev.device;
-	nvkm_mask(device, 0x002140, 0x80000000, 0x00000000);
+
+	/*TODO: Is this cargo-culted, or necessary? RM does *something* here... Why? */
+	gf100_fifo_intr_engine(fifo);
+
+	nvkm_wr32(device, 0x003000 + (chan->id * 8), 0x00000000);
 }
 
+static void
+gf100_chan_bind(struct nvkm_chan *chan)
+{
+	struct nvkm_device *device = chan->cgrp->runl->fifo->engine.subdev.device;
+
+	nvkm_wr32(device, 0x003000 + (chan->id * 8), 0xc0000000 | chan->inst->addr >> 12);
+}
+
+static int
+gf100_chan_ramfc_write(struct nvkm_chan *chan, u64 offset, u64 length, u32 devm, bool priv)
+{
+	const u64 userd = nvkm_memory_addr(chan->userd.mem) + chan->userd.base;
+	const u32 limit2 = ilog2(length / 8);
+
+	nvkm_kmap(chan->inst);
+	nvkm_wo32(chan->inst, 0x08, lower_32_bits(userd));
+	nvkm_wo32(chan->inst, 0x0c, upper_32_bits(userd));
+	nvkm_wo32(chan->inst, 0x10, 0x0000face);
+	nvkm_wo32(chan->inst, 0x30, 0xfffff902);
+	nvkm_wo32(chan->inst, 0x48, lower_32_bits(offset));
+	nvkm_wo32(chan->inst, 0x4c, upper_32_bits(offset) | (limit2 << 16));
+	nvkm_wo32(chan->inst, 0x54, 0x00000002);
+	nvkm_wo32(chan->inst, 0x84, 0x20400000);
+	nvkm_wo32(chan->inst, 0x94, 0x30000000 | devm);
+	nvkm_wo32(chan->inst, 0x9c, 0x00000100);
+	nvkm_wo32(chan->inst, 0xa4, 0x1f1f1f1f);
+	nvkm_wo32(chan->inst, 0xa8, 0x1f1f1f1f);
+	nvkm_wo32(chan->inst, 0xac, 0x0000001f);
+	nvkm_wo32(chan->inst, 0xb8, 0xf8000000);
+	nvkm_wo32(chan->inst, 0xf8, 0x10003080); /* 0x002310 */
+	nvkm_wo32(chan->inst, 0xfc, 0x10000010); /* 0x002350 */
+	nvkm_done(chan->inst);
+	return 0;
+}
+
+static const struct nvkm_chan_func_ramfc
+gf100_chan_ramfc = {
+	.write = gf100_chan_ramfc_write,
+	.devm = 0xfff,
+};
+
 void
-gf100_fifo_runlist_commit(struct gf100_fifo *fifo)
+gf100_chan_userd_clear(struct nvkm_chan *chan)
 {
-	struct gf100_fifo_chan *chan;
-	struct nvkm_subdev *subdev = &fifo->base.engine.subdev;
-	struct nvkm_device *device = subdev->device;
-	struct nvkm_memory *cur;
-	int nr = 0;
-	int target;
+	nvkm_kmap(chan->userd.mem);
+	nvkm_wo32(chan->userd.mem, chan->userd.base + 0x040, 0x00000000);
+	nvkm_wo32(chan->userd.mem, chan->userd.base + 0x044, 0x00000000);
+	nvkm_wo32(chan->userd.mem, chan->userd.base + 0x048, 0x00000000);
+	nvkm_wo32(chan->userd.mem, chan->userd.base + 0x04c, 0x00000000);
+	nvkm_wo32(chan->userd.mem, chan->userd.base + 0x050, 0x00000000);
+	nvkm_wo32(chan->userd.mem, chan->userd.base + 0x058, 0x00000000);
+	nvkm_wo32(chan->userd.mem, chan->userd.base + 0x05c, 0x00000000);
+	nvkm_wo32(chan->userd.mem, chan->userd.base + 0x060, 0x00000000);
+	nvkm_wo32(chan->userd.mem, chan->userd.base + 0x088, 0x00000000);
+	nvkm_wo32(chan->userd.mem, chan->userd.base + 0x08c, 0x00000000);
+	nvkm_done(chan->userd.mem);
+}
 
-	mutex_lock(&fifo->base.mutex);
-	cur = fifo->runlist.mem[fifo->runlist.active];
-	fifo->runlist.active = !fifo->runlist.active;
+static const struct nvkm_chan_func_userd
+gf100_chan_userd = {
+	.bar = 1,
+	.size = 0x1000,
+	.clear = gf100_chan_userd_clear,
+};
 
-	nvkm_kmap(cur);
-	list_for_each_entry(chan, &fifo->chan, head) {
-		nvkm_wo32(cur, (nr * 8) + 0, chan->base.chid);
-		nvkm_wo32(cur, (nr * 8) + 4, 0x00000004);
-		nr++;
-	}
-	nvkm_done(cur);
+const struct nvkm_chan_func_inst
+gf100_chan_inst = {
+	.size = 0x1000,
+	.zero = true,
+	.vmm = true,
+};
 
-	switch (nvkm_memory_target(cur)) {
-	case NVKM_MEM_TARGET_VRAM: target = 0; break;
-	case NVKM_MEM_TARGET_NCOH: target = 3; break;
+static const struct nvkm_chan_func
+gf100_chan = {
+	.inst = &gf100_chan_inst,
+	.userd = &gf100_chan_userd,
+	.ramfc = &gf100_chan_ramfc,
+	.bind = gf100_chan_bind,
+	.unbind = gf100_chan_unbind,
+	.start = gf100_chan_start,
+	.stop = gf100_chan_stop,
+	.preempt = gf100_chan_preempt,
+};
+
+static void
+gf100_ectx_bind(struct nvkm_engn *engn, struct nvkm_cctx *cctx, struct nvkm_chan *chan)
+{
+	u64 addr = 0ULL;
+	u32 ptr0;
+
+	switch (engn->engine->subdev.type) {
+	case NVKM_ENGINE_SW    : return;
+	case NVKM_ENGINE_GR    : ptr0 = 0x0210; break;
+	case NVKM_ENGINE_CE    : ptr0 = 0x0230 + (engn->engine->subdev.inst * 0x10); break;
+	case NVKM_ENGINE_MSPDEC: ptr0 = 0x0250; break;
+	case NVKM_ENGINE_MSPPP : ptr0 = 0x0260; break;
+	case NVKM_ENGINE_MSVLD : ptr0 = 0x0270; break;
 	default:
-		mutex_unlock(&fifo->base.mutex);
 		WARN_ON(1);
 		return;
 	}
 
-	nvkm_wr32(device, 0x002270, (nvkm_memory_addr(cur) >> 12) |
-				    (target << 28));
-	nvkm_wr32(device, 0x002274, 0x01f00000 | nr);
+	if (cctx) {
+		addr  = cctx->vctx->vma->addr;
+		addr |= 4ULL;
+	}
 
-	if (wait_event_timeout(fifo->runlist.wait,
-			       !(nvkm_rd32(device, 0x00227c) & 0x00100000),
-			       msecs_to_jiffies(2000)) == 0)
-		nvkm_error(subdev, "runlist update timeout\n");
-	mutex_unlock(&fifo->base.mutex);
+	nvkm_kmap(chan->inst);
+	nvkm_wo32(chan->inst, ptr0 + 0, lower_32_bits(addr));
+	nvkm_wo32(chan->inst, ptr0 + 4, upper_32_bits(addr));
+	nvkm_done(chan->inst);
 }
 
-void
-gf100_fifo_runlist_remove(struct gf100_fifo *fifo, struct gf100_fifo_chan *chan)
+static int
+gf100_ectx_ctor(struct nvkm_engn *engn, struct nvkm_vctx *vctx)
+{
+	int ret;
+
+	ret = nvkm_vmm_get(vctx->vmm, 12, vctx->inst->size, &vctx->vma);
+	if (ret)
+		return ret;
+
+	return nvkm_memory_map(vctx->inst, 0, vctx->vmm, vctx->vma, NULL, 0);
+}
+
+bool
+gf100_engn_mmu_fault_triggered(struct nvkm_engn *engn)
 {
-	mutex_lock(&fifo->base.mutex);
-	list_del_init(&chan->head);
-	mutex_unlock(&fifo->base.mutex);
+	struct nvkm_runl *runl = engn->runl;
+	struct nvkm_fifo *fifo = runl->fifo;
+	struct nvkm_device *device = fifo->engine.subdev.device;
+	u32 data = nvkm_rd32(device, 0x002a30 + (engn->id * 4));
+
+	ENGN_DEBUG(engn, "%08x: mmu fault triggered", data);
+	if (!(data & 0x00000100))
+		return false;
+
+	spin_lock(&fifo->lock);
+	nvkm_mask(device, 0x002a30 + (engn->id * 4), 0x00000100, 0x00000000);
+	if (atomic_dec_and_test(&runl->rc_triggered))
+		nvkm_mask(device, 0x002140, 0x00000100, 0x00000100);
+	spin_unlock(&fifo->lock);
+	return true;
 }
 
 void
-gf100_fifo_runlist_insert(struct gf100_fifo *fifo, struct gf100_fifo_chan *chan)
+gf100_engn_mmu_fault_trigger(struct nvkm_engn *engn)
 {
-	mutex_lock(&fifo->base.mutex);
-	list_add_tail(&chan->head, &fifo->chan);
-	mutex_unlock(&fifo->base.mutex);
+	struct nvkm_runl *runl = engn->runl;
+	struct nvkm_fifo *fifo = runl->fifo;
+	struct nvkm_device *device = fifo->engine.subdev.device;
+
+	ENGN_DEBUG(engn, "triggering mmu fault on 0x%02x", engn->fault);
+	spin_lock(&fifo->lock);
+	if (atomic_inc_return(&runl->rc_triggered) == 1)
+		nvkm_mask(device, 0x002140, 0x00000100, 0x00000000);
+	nvkm_wr32(device, 0x002100, 0x00000100);
+	nvkm_wr32(device, 0x002a30 + (engn->id * 4), 0x00000100 | engn->fault);
+	spin_unlock(&fifo->lock);
 }
 
-static struct nvkm_engine *
-gf100_fifo_id_engine(struct nvkm_fifo *fifo, int engi)
+/*TODO: clean all this up. */
+struct gf100_engn_status {
+	bool busy;
+	bool save;
+	bool unk0;
+	bool unk1;
+	u8   chid;
+};
+
+static void
+gf100_engn_status(struct nvkm_engn *engn, struct gf100_engn_status *status)
 {
-	enum nvkm_subdev_type type;
-	int inst;
+	u32 stat = nvkm_rd32(engn->engine->subdev.device, 0x002640 + (engn->id * 4));
 
-	switch (engi) {
-	case GF100_FIFO_ENGN_GR    : type = NVKM_ENGINE_GR    ; inst = 0; break;
-	case GF100_FIFO_ENGN_MSPDEC: type = NVKM_ENGINE_MSPDEC; inst = 0; break;
-	case GF100_FIFO_ENGN_MSPPP : type = NVKM_ENGINE_MSPPP ; inst = 0; break;
-	case GF100_FIFO_ENGN_MSVLD : type = NVKM_ENGINE_MSVLD ; inst = 0; break;
-	case GF100_FIFO_ENGN_CE0   : type = NVKM_ENGINE_CE    ; inst = 0; break;
-	case GF100_FIFO_ENGN_CE1   : type = NVKM_ENGINE_CE    ; inst = 1; break;
-	case GF100_FIFO_ENGN_SW    : type = NVKM_ENGINE_SW    ; inst = 0; break;
-	default:
-		WARN_ON(1);
-		return NULL;
-	}
+	status->busy = (stat & 0x10000000);
+	status->save = (stat & 0x00100000);
+	status->unk0 = (stat & 0x00004000);
+	status->unk1 = (stat & 0x00001000);
+	status->chid = (stat & 0x0000007f);
 
-	return nvkm_device_engine(fifo->engine.subdev.device, type, inst);
+	ENGN_DEBUG(engn, "%08x: busy %d save %d unk0 %d unk1 %d chid %d",
+		   stat, status->busy, status->save, status->unk0, status->unk1, status->chid);
 }
 
 static int
-gf100_fifo_engine_id(struct nvkm_fifo *base, struct nvkm_engine *engine)
-{
-	switch (engine->subdev.type) {
-	case NVKM_ENGINE_GR    : return GF100_FIFO_ENGN_GR;
-	case NVKM_ENGINE_MSPDEC: return GF100_FIFO_ENGN_MSPDEC;
-	case NVKM_ENGINE_MSPPP : return GF100_FIFO_ENGN_MSPPP;
-	case NVKM_ENGINE_MSVLD : return GF100_FIFO_ENGN_MSVLD;
-	case NVKM_ENGINE_CE    : return GF100_FIFO_ENGN_CE0 + engine->subdev.inst;
-	case NVKM_ENGINE_SW    : return GF100_FIFO_ENGN_SW;
-	default:
-		WARN_ON(1);
-		return -1;
+gf100_engn_cxid(struct nvkm_engn *engn, bool *cgid)
+{
+	struct gf100_engn_status status;
+
+	gf100_engn_status(engn, &status);
+	if (status.busy) {
+		*cgid = false;
+		return status.chid;
 	}
+
+	return -ENODEV;
 }
 
-static void
-gf100_fifo_recover_work(struct work_struct *w)
+static bool
+gf100_engn_chsw(struct nvkm_engn *engn)
 {
-	struct gf100_fifo *fifo = container_of(w, typeof(*fifo), recover.work);
-	struct nvkm_device *device = fifo->base.engine.subdev.device;
-	struct nvkm_engine *engine;
-	unsigned long flags;
-	u32 engm, engn, todo;
+	struct gf100_engn_status status;
+
+	gf100_engn_status(engn, &status);
+	if (status.busy && (status.unk0 || status.unk1))
+		return true;
 
-	spin_lock_irqsave(&fifo->base.lock, flags);
-	engm = fifo->recover.mask;
-	fifo->recover.mask = 0ULL;
-	spin_unlock_irqrestore(&fifo->base.lock, flags);
+	return false;
+}
 
-	nvkm_mask(device, 0x002630, engm, engm);
+static const struct nvkm_engn_func
+gf100_engn = {
+	.chsw = gf100_engn_chsw,
+	.cxid = gf100_engn_cxid,
+	.mmu_fault_trigger = gf100_engn_mmu_fault_trigger,
+	.mmu_fault_triggered = gf100_engn_mmu_fault_triggered,
+	.ctor = gf100_ectx_ctor,
+	.bind = gf100_ectx_bind,
+};
 
-	for (todo = engm; engn = __ffs(todo), todo; todo &= ~BIT_ULL(engn)) {
-		if ((engine = gf100_fifo_id_engine(&fifo->base, engn))) {
-			nvkm_subdev_fini(&engine->subdev, false);
-			WARN_ON(nvkm_subdev_init(&engine->subdev));
+const struct nvkm_engn_func
+gf100_engn_sw = {
+};
+
+static const struct nvkm_bitfield
+gf100_runq_intr_0_names[] = {
+/*	{ 0x00008000, "" }	seen with null ib push */
+	{ 0x00200000, "ILLEGAL_MTHD" },
+	{ 0x00800000, "EMPTY_SUBC" },
+	{}
+};
+
+bool
+gf100_runq_intr(struct nvkm_runq *runq, struct nvkm_runl *null)
+{
+	struct nvkm_subdev *subdev = &runq->fifo->engine.subdev;
+	struct nvkm_device *device = subdev->device;
+	u32 mask = nvkm_rd32(device, 0x04010c + (runq->id * 0x2000));
+	u32 stat = nvkm_rd32(device, 0x040108 + (runq->id * 0x2000)) & mask;
+	u32 addr = nvkm_rd32(device, 0x0400c0 + (runq->id * 0x2000));
+	u32 data = nvkm_rd32(device, 0x0400c4 + (runq->id * 0x2000));
+	u32 chid = nvkm_rd32(device, 0x040120 + (runq->id * 0x2000)) & runq->fifo->chid->mask;
+	u32 subc = (addr & 0x00070000) >> 16;
+	u32 mthd = (addr & 0x00003ffc);
+	u32 show = stat;
+	struct nvkm_chan *chan;
+	unsigned long flags;
+	char msg[128];
+
+	if (stat & 0x00800000) {
+		if (device->sw) {
+			if (nvkm_sw_mthd(device->sw, chid, subc, mthd, data))
+				show &= ~0x00800000;
 		}
 	}
 
-	gf100_fifo_runlist_commit(fifo);
-	nvkm_wr32(device, 0x00262c, engm);
-	nvkm_mask(device, 0x002630, engm, 0x00000000);
+	if (show) {
+		nvkm_snprintbf(msg, sizeof(msg), runq->func->intr_0_names, show);
+		chan = nvkm_chan_get_chid(&runq->fifo->engine, chid, &flags);
+		nvkm_error(subdev, "PBDMA%d: %08x [%s] ch %d [%010llx %s] "
+				   "subc %d mthd %04x data %08x\n",
+			   runq->id, show, msg, chid, chan ? chan->inst->addr : 0,
+			   chan ? chan->name : "unknown", subc, mthd, data);
+
+		/*TODO: use proper procedure for clearing each exception / debug output */
+		if ((stat & 0xc67fe000) && chan)
+			nvkm_chan_error(chan, true);
+		nvkm_chan_put(&chan, flags);
+	}
+
+	nvkm_wr32(device, 0x0400c0 + (runq->id * 0x2000), 0x80600008);
+	nvkm_wr32(device, 0x040108 + (runq->id * 0x2000), stat);
+	return true;
+}
+
+void
+gf100_runq_init(struct nvkm_runq *runq)
+{
+	struct nvkm_device *device = runq->fifo->engine.subdev.device;
+
+	nvkm_mask(device, 0x04013c + (runq->id * 0x2000), 0x10000100, 0x00000000);
+	nvkm_wr32(device, 0x040108 + (runq->id * 0x2000), 0xffffffff); /* INTR */
+	nvkm_wr32(device, 0x04010c + (runq->id * 0x2000), 0xfffffeff); /* INTREN */
+}
+
+static const struct nvkm_runq_func
+gf100_runq = {
+	.init = gf100_runq_init,
+	.intr = gf100_runq_intr,
+	.intr_0_names = gf100_runq_intr_0_names,
+};
+
+bool
+gf100_runl_preempt_pending(struct nvkm_runl *runl)
+{
+	return nvkm_rd32(runl->fifo->engine.subdev.device, 0x002634) & 0x00100000;
 }
 
 static void
-gf100_fifo_recover(struct gf100_fifo *fifo, struct nvkm_engine *engine,
-		   struct gf100_fifo_chan *chan)
+gf100_runl_fault_clear(struct nvkm_runl *runl)
 {
-	struct nvkm_subdev *subdev = &fifo->base.engine.subdev;
-	struct nvkm_device *device = subdev->device;
-	u32 chid = chan->base.chid;
-	int engi = gf100_fifo_engine_id(&fifo->base, engine);
+	nvkm_mask(runl->fifo->engine.subdev.device, 0x00262c, 0x00000000, 0x00000000);
+}
+
+static void
+gf100_runl_allow(struct nvkm_runl *runl, u32 engm)
+{
+	nvkm_mask(runl->fifo->engine.subdev.device, 0x002630, engm, 0x00000000);
+}
+
+static void
+gf100_runl_block(struct nvkm_runl *runl, u32 engm)
+{
+	nvkm_mask(runl->fifo->engine.subdev.device, 0x002630, engm, engm);
+}
+
+static bool
+gf100_runl_pending(struct nvkm_runl *runl)
+{
+	return nvkm_rd32(runl->fifo->engine.subdev.device, 0x00227c) & 0x00100000;
+}
+
+static void
+gf100_runl_commit(struct nvkm_runl *runl, struct nvkm_memory *memory, u32 start, int count)
+{
+	struct nvkm_device *device = runl->fifo->engine.subdev.device;
+	u64 addr = nvkm_memory_addr(memory) + start;
+	int target;
+
+	switch (nvkm_memory_target(memory)) {
+	case NVKM_MEM_TARGET_VRAM: target = 0; break;
+	case NVKM_MEM_TARGET_NCOH: target = 3; break;
+	default:
+		WARN_ON(1);
+		return;
+	}
+
+	nvkm_wr32(device, 0x002270, (target << 28) | (addr >> 12));
+	nvkm_wr32(device, 0x002274, 0x01f00000 | count);
+}
+
+static void
+gf100_runl_insert_chan(struct nvkm_chan *chan, struct nvkm_memory *memory, u64 offset)
+{
+	nvkm_wo32(memory, offset + 0, chan->id);
+	nvkm_wo32(memory, offset + 4, 0x00000004);
+}
 
-	nvkm_error(subdev, "%s engine fault on channel %d, recovering...\n",
-		   engine->subdev.name, chid);
-	assert_spin_locked(&fifo->base.lock);
+static const struct nvkm_runl_func
+gf100_runl = {
+	.size = 8,
+	.update = nv50_runl_update,
+	.insert_chan = gf100_runl_insert_chan,
+	.commit = gf100_runl_commit,
+	.wait = nv50_runl_wait,
+	.pending = gf100_runl_pending,
+	.block = gf100_runl_block,
+	.allow = gf100_runl_allow,
+	.fault_clear = gf100_runl_fault_clear,
+	.preempt_pending = gf100_runl_preempt_pending,
+};
 
-	nvkm_mask(device, 0x003004 + (chid * 0x08), 0x00000001, 0x00000000);
-	list_del_init(&chan->head);
-	chan->killed = true;
+static void
+gf100_fifo_nonstall_allow(struct nvkm_event *event, int type, int index)
+{
+	struct nvkm_fifo *fifo = container_of(event, typeof(*fifo), nonstall.event);
+	unsigned long flags;
 
-	if (engi >= 0 && engi != GF100_FIFO_ENGN_SW)
-		fifo->recover.mask |= BIT(engi);
-	schedule_work(&fifo->recover.work);
-	nvkm_fifo_kevent(&fifo->base, chid);
+	spin_lock_irqsave(&fifo->lock, flags);
+	nvkm_mask(fifo->engine.subdev.device, 0x002140, 0x80000000, 0x80000000);
+	spin_unlock_irqrestore(&fifo->lock, flags);
 }
 
+void
+gf100_fifo_nonstall_block(struct nvkm_event *event, int type, int index)
+{
+	struct nvkm_fifo *fifo = container_of(event, typeof(*fifo), nonstall.event);
+	unsigned long flags;
+
+	spin_lock_irqsave(&fifo->lock, flags);
+	nvkm_mask(fifo->engine.subdev.device, 0x002140, 0x80000000, 0x00000000);
+	spin_unlock_irqrestore(&fifo->lock, flags);
+}
+
+const struct nvkm_event_func
+gf100_fifo_nonstall = {
+	.init = gf100_fifo_nonstall_allow,
+	.fini = gf100_fifo_nonstall_block,
+};
+
 static const struct nvkm_enum
-gf100_fifo_fault_engine[] = {
+gf100_fifo_mmu_fault_engine[] = {
 	{ 0x00, "PGRAPH", NULL, NVKM_ENGINE_GR },
 	{ 0x03, "PEEPHOLE", NULL, NVKM_ENGINE_IFB },
 	{ 0x04, "BAR1", NULL, NVKM_SUBDEV_BAR },
 	{ 0x05, "BAR3", NULL, NVKM_SUBDEV_INSTMEM },
-	{ 0x07, "PFIFO", NULL, NVKM_ENGINE_FIFO },
+	{ 0x07, "PFIFO" },
 	{ 0x10, "PMSVLD", NULL, NVKM_ENGINE_MSVLD },
 	{ 0x11, "PMSPPP", NULL, NVKM_ENGINE_MSPPP },
 	{ 0x13, "PCOUNTER" },
@@ -212,7 +482,7 @@ gf100_fifo_fault_engine[] = {
 };
 
 static const struct nvkm_enum
-gf100_fifo_fault_reason[] = {
+gf100_fifo_mmu_fault_reason[] = {
 	{ 0x00, "PT_NOT_PRESENT" },
 	{ 0x01, "PT_TOO_SHORT" },
 	{ 0x02, "PAGE_NOT_PRESENT" },
@@ -226,7 +496,7 @@ gf100_fifo_fault_reason[] = {
 };
 
 static const struct nvkm_enum
-gf100_fifo_fault_hubclient[] = {
+gf100_fifo_mmu_fault_hubclient[] = {
 	{ 0x01, "PCOPY0" },
 	{ 0x02, "PCOPY1" },
 	{ 0x04, "DISPATCH" },
@@ -245,7 +515,7 @@ gf100_fifo_fault_hubclient[] = {
 };
 
 static const struct nvkm_enum
-gf100_fifo_fault_gpcclient[] = {
+gf100_fifo_mmu_fault_gpcclient[] = {
 	{ 0x01, "TEX" },
 	{ 0x0c, "ESETUP" },
 	{ 0x0e, "CTXCTL" },
@@ -253,29 +523,55 @@ gf100_fifo_fault_gpcclient[] = {
 	{}
 };
 
-static void
-gf100_fifo_fault(struct nvkm_fifo *base, struct nvkm_fault_data *info)
+const struct nvkm_enum
+gf100_fifo_mmu_fault_access[] = {
+	{ 0x00, "READ" },
+	{ 0x01, "WRITE" },
+	{}
+};
+
+void
+gf100_fifo_mmu_fault_recover(struct nvkm_fifo *fifo, struct nvkm_fault_data *info)
 {
-	struct gf100_fifo *fifo = gf100_fifo(base);
-	struct nvkm_subdev *subdev = &fifo->base.engine.subdev;
+	struct nvkm_subdev *subdev = &fifo->engine.subdev;
 	struct nvkm_device *device = subdev->device;
-	const struct nvkm_enum *er, *eu, *ec;
+	const struct nvkm_enum *er, *ee, *ec, *ea;
 	struct nvkm_engine *engine = NULL;
-	struct nvkm_fifo_chan *chan;
+	struct nvkm_runl *runl;
+	struct nvkm_engn *engn;
+	struct nvkm_chan *chan;
 	unsigned long flags;
-	char gpcid[8] = "";
+	char ct[8] = "HUB/";
+
+	/* Lookup engine by MMU fault ID. */
+	nvkm_runl_foreach(runl, fifo) {
+		engn = nvkm_runl_find_engn(engn, runl, engn->fault == info->engine);
+		if (engn) {
+			/* Fault triggered by CTXSW_TIMEOUT recovery procedure. */
+			if (engn->func->mmu_fault_triggered &&
+			    engn->func->mmu_fault_triggered(engn)) {
+				nvkm_runl_rc_engn(runl, engn);
+				return;
+			}
+
+			engine = engn->engine;
+			break;
+		}
+	}
 
-	er = nvkm_enum_find(gf100_fifo_fault_reason, info->reason);
-	eu = nvkm_enum_find(gf100_fifo_fault_engine, info->engine);
+	er = nvkm_enum_find(fifo->func->mmu_fault->reason, info->reason);
+	ee = nvkm_enum_find(fifo->func->mmu_fault->engine, info->engine);
 	if (info->hub) {
-		ec = nvkm_enum_find(gf100_fifo_fault_hubclient, info->client);
+		ec = nvkm_enum_find(fifo->func->mmu_fault->hubclient, info->client);
 	} else {
-		ec = nvkm_enum_find(gf100_fifo_fault_gpcclient, info->client);
-		snprintf(gpcid, sizeof(gpcid), "GPC%d/", info->gpc);
+		ec = nvkm_enum_find(fifo->func->mmu_fault->gpcclient, info->client);
+		snprintf(ct, sizeof(ct), "GPC%d/", info->gpc);
 	}
+	ea = nvkm_enum_find(fifo->func->mmu_fault->access, info->access);
 
-	if (eu && eu->data2) {
-		switch (eu->data2) {
+	/* Handle BAR faults. */
+	if (ee && ee->data2) {
+		switch (ee->data2) {
 		case NVKM_SUBDEV_BAR:
 			nvkm_bar_bar1_reset(device);
 			break;
@@ -286,77 +582,104 @@ gf100_fifo_fault(struct nvkm_fifo *base, struct nvkm_fault_data *info)
 			nvkm_mask(device, 0x001718, 0x00000000, 0x00000000);
 			break;
 		default:
-			engine = nvkm_device_engine(device, eu->data2, eu->inst);
 			break;
 		}
 	}
 
-	chan = nvkm_fifo_chan_inst(&fifo->base, info->inst, &flags);
+	chan = nvkm_chan_get_inst(&fifo->engine, info->inst, &flags);
 
 	nvkm_error(subdev,
-		   "%s fault at %010llx engine %02x [%s] client %02x [%s%s] "
-		   "reason %02x [%s] on channel %d [%010llx %s]\n",
-		   info->access ? "write" : "read", info->addr,
-		   info->engine, eu ? eu->name : "",
-		   info->client, gpcid, ec ? ec->name : "",
-		   info->reason, er ? er->name : "", chan ? chan->chid : -1,
-		   info->inst, chan ? chan->object.client->name : "unknown");
-
-	if (engine && chan)
-		gf100_fifo_recover(fifo, engine, (void *)chan);
-	nvkm_fifo_chan_put(&fifo->base, flags, &chan);
+		   "fault %02x [%s] at %016llx engine %02x [%s] client %02x "
+		   "[%s%s] reason %02x [%s] on channel %d [%010llx %s]\n",
+		   info->access, ea ? ea->name : "", info->addr,
+		   info->engine, ee ? ee->name : engine ? engine->subdev.name : "",
+		   info->client, ct, ec ? ec->name : "",
+		   info->reason, er ? er->name : "",
+		   chan ? chan->id : -1, info->inst, chan ? chan->name : "unknown");
+
+	/* Handle host/engine faults. */
+	if (chan)
+		nvkm_runl_rc_cgrp(chan->cgrp);
+
+	nvkm_chan_put(&chan, flags);
 }
 
-static const struct nvkm_enum
-gf100_fifo_sched_reason[] = {
-	{ 0x0a, "CTXSW_TIMEOUT" },
-	{}
+static const struct nvkm_fifo_func_mmu_fault
+gf100_fifo_mmu_fault = {
+	.recover = gf100_fifo_mmu_fault_recover,
+	.access = gf100_fifo_mmu_fault_access,
+	.engine = gf100_fifo_mmu_fault_engine,
+	.reason = gf100_fifo_mmu_fault_reason,
+	.hubclient = gf100_fifo_mmu_fault_hubclient,
+	.gpcclient = gf100_fifo_mmu_fault_gpcclient,
 };
 
-static void
-gf100_fifo_intr_sched_ctxsw(struct gf100_fifo *fifo)
+void
+gf100_fifo_intr_ctxsw_timeout(struct nvkm_fifo *fifo, u32 engm)
 {
-	struct nvkm_device *device = fifo->base.engine.subdev.device;
-	struct nvkm_engine *engine;
-	struct gf100_fifo_chan *chan;
-	unsigned long flags;
-	u32 engn;
-
-	spin_lock_irqsave(&fifo->base.lock, flags);
-	for (engn = 0; engn < 6; engn++) {
-		u32 stat = nvkm_rd32(device, 0x002640 + (engn * 0x04));
-		u32 busy = (stat & 0x80000000);
-		u32 save = (stat & 0x00100000); /* maybe? */
-		u32 unk0 = (stat & 0x00040000);
-		u32 unk1 = (stat & 0x00001000);
-		u32 chid = (stat & 0x0000007f);
-		(void)save;
-
-		if (busy && unk0 && unk1) {
-			list_for_each_entry(chan, &fifo->chan, head) {
-				if (chan->base.chid == chid) {
-					engine = gf100_fifo_id_engine(&fifo->base, engn);
-					if (!engine)
-						break;
-					gf100_fifo_recover(fifo, engine, chan);
-					break;
+	struct nvkm_runl *runl;
+	struct nvkm_engn *engn, *engn2;
+	bool cgid, cgid2;
+	int id, id2;
+
+	nvkm_runl_foreach(runl, fifo) {
+		/* Stop the runlist, and go through all engines serving it. */
+		nvkm_runl_block(runl);
+		nvkm_runl_foreach_engn_cond(engn, runl, engm & BIT(engn->id)) {
+			/* Determine what channel (group) the engine is on. */
+			id = engn->func->cxid(engn, &cgid);
+			if (id >= 0) {
+				/* Trigger MMU fault on any engine(s) on that channel (group). */
+				nvkm_runl_foreach_engn_cond(engn2, runl, engn2->func->cxid) {
+					id2 = engn2->func->cxid(engn2, &cgid2);
+					if (cgid2 == cgid && id2 == id)
+						engn2->func->mmu_fault_trigger(engn2);
 				}
 			}
 		}
+		nvkm_runl_allow(runl); /* HW will keep runlist blocked via ERROR_SCHED_DISABLE. */
 	}
-	spin_unlock_irqrestore(&fifo->base.lock, flags);
 }
 
 static void
-gf100_fifo_intr_sched(struct gf100_fifo *fifo)
+gf100_fifo_intr_sched_ctxsw(struct nvkm_fifo *fifo)
 {
-	struct nvkm_subdev *subdev = &fifo->base.engine.subdev;
+	struct nvkm_runl *runl;
+	struct nvkm_engn *engn;
+	u32 engm = 0;
+
+	/* Look for any engines that are busy, and awaiting chsw ack. */
+	nvkm_runl_foreach(runl, fifo) {
+		nvkm_runl_foreach_engn_cond(engn, runl, engn->func->chsw) {
+			if (WARN_ON(engn->fault < 0) || !engn->func->chsw(engn))
+				continue;
+
+			engm |= BIT(engn->id);
+		}
+	}
+
+	if (!engm)
+		return;
+
+	fifo->func->intr_ctxsw_timeout(fifo, engm);
+}
+
+static const struct nvkm_enum
+gf100_fifo_intr_sched_names[] = {
+	{ 0x0a, "CTXSW_TIMEOUT" },
+	{}
+};
+
+void
+gf100_fifo_intr_sched(struct nvkm_fifo *fifo)
+{
+	struct nvkm_subdev *subdev = &fifo->engine.subdev;
 	struct nvkm_device *device = subdev->device;
 	u32 intr = nvkm_rd32(device, 0x00254c);
 	u32 code = intr & 0x000000ff;
 	const struct nvkm_enum *en;
 
-	en = nvkm_enum_find(gf100_fifo_sched_reason, code);
+	en = nvkm_enum_find(gf100_fifo_intr_sched_names, code);
 
 	nvkm_error(subdev, "SCHED_ERROR %02x [%s]\n", code, en ? en->name : "");
 
@@ -370,7 +693,7 @@ gf100_fifo_intr_sched(struct gf100_fifo *fifo)
 }
 
 void
-gf100_fifo_intr_fault(struct nvkm_fifo *fifo, int unit)
+gf100_fifo_intr_mmu_fault_unit(struct nvkm_fifo *fifo, int unit)
 {
 	struct nvkm_device *device = fifo->engine.subdev.device;
 	u32 inst = nvkm_rd32(device, 0x002800 + (unit * 0x10));
@@ -393,61 +716,45 @@ gf100_fifo_intr_fault(struct nvkm_fifo *fifo, int unit)
 	nvkm_fifo_fault(fifo, &info);
 }
 
-static const struct nvkm_bitfield
-gf100_fifo_pbdma_intr[] = {
-/*	{ 0x00008000, "" }	seen with null ib push */
-	{ 0x00200000, "ILLEGAL_MTHD" },
-	{ 0x00800000, "EMPTY_SUBC" },
-	{}
-};
-
-static void
-gf100_fifo_intr_pbdma(struct gf100_fifo *fifo, int unit)
+void
+gf100_fifo_intr_mmu_fault(struct nvkm_fifo *fifo)
 {
-	struct nvkm_subdev *subdev = &fifo->base.engine.subdev;
-	struct nvkm_device *device = subdev->device;
-	u32 stat = nvkm_rd32(device, 0x040108 + (unit * 0x2000));
-	u32 addr = nvkm_rd32(device, 0x0400c0 + (unit * 0x2000));
-	u32 data = nvkm_rd32(device, 0x0400c4 + (unit * 0x2000));
-	u32 chid = nvkm_rd32(device, 0x040120 + (unit * 0x2000)) & 0x7f;
-	u32 subc = (addr & 0x00070000) >> 16;
-	u32 mthd = (addr & 0x00003ffc);
-	struct nvkm_fifo_chan *chan;
-	unsigned long flags;
-	u32 show= stat;
-	char msg[128];
+	struct nvkm_device *device = fifo->engine.subdev.device;
+	unsigned long mask = nvkm_rd32(device, 0x00259c);
+	int unit;
 
-	if (stat & 0x00800000) {
-		if (device->sw) {
-			if (nvkm_sw_mthd(device->sw, chid, subc, mthd, data))
-				show &= ~0x00800000;
-		}
+	for_each_set_bit(unit, &mask, 32) {
+		fifo->func->intr_mmu_fault_unit(fifo, unit);
+		nvkm_wr32(device, 0x00259c, BIT(unit));
 	}
+}
 
-	if (show) {
-		nvkm_snprintbf(msg, sizeof(msg), gf100_fifo_pbdma_intr, show);
-		chan = nvkm_fifo_chan_chid(&fifo->base, chid, &flags);
-		nvkm_error(subdev, "PBDMA%d: %08x [%s] ch %d [%010llx %s] "
-				   "subc %d mthd %04x data %08x\n",
-			   unit, show, msg, chid, chan ? chan->inst->addr : 0,
-			   chan ? chan->object.client->name : "unknown",
-			   subc, mthd, data);
-		nvkm_fifo_chan_put(&fifo->base, flags, &chan);
+bool
+gf100_fifo_intr_pbdma(struct nvkm_fifo *fifo)
+{
+	struct nvkm_device *device = fifo->engine.subdev.device;
+	struct nvkm_runq *runq;
+	u32 mask = nvkm_rd32(device, 0x0025a0);
+	bool handled = false;
+
+	nvkm_runq_foreach_cond(runq, fifo, mask & BIT(runq->id)) {
+		if (runq->func->intr(runq, NULL))
+			handled = true;
+
+		nvkm_wr32(device, 0x0025a0, BIT(runq->id));
 	}
 
-	nvkm_wr32(device, 0x0400c0 + (unit * 0x2000), 0x80600008);
-	nvkm_wr32(device, 0x040108 + (unit * 0x2000), stat);
+	return handled;
 }
 
 static void
-gf100_fifo_intr_runlist(struct gf100_fifo *fifo)
+gf100_fifo_intr_runlist(struct nvkm_fifo *fifo)
 {
-	struct nvkm_subdev *subdev = &fifo->base.engine.subdev;
+	struct nvkm_subdev *subdev = &fifo->engine.subdev;
 	struct nvkm_device *device = subdev->device;
 	u32 intr = nvkm_rd32(device, 0x002a00);
 
 	if (intr & 0x10000000) {
-		wake_up(&fifo->runlist.wait);
 		nvkm_wr32(device, 0x002a00, 0x10000000);
 		intr &= ~0x10000000;
 	}
@@ -459,9 +766,9 @@ gf100_fifo_intr_runlist(struct gf100_fifo *fifo)
 }
 
 static void
-gf100_fifo_intr_engine_unit(struct gf100_fifo *fifo, int engn)
+gf100_fifo_intr_engine_unit(struct nvkm_fifo *fifo, int engn)
 {
-	struct nvkm_subdev *subdev = &fifo->base.engine.subdev;
+	struct nvkm_subdev *subdev = &fifo->engine.subdev;
 	struct nvkm_device *device = subdev->device;
 	u32 intr = nvkm_rd32(device, 0x0025a8 + (engn * 0x04));
 	u32 inte = nvkm_rd32(device, 0x002628);
@@ -472,22 +779,22 @@ gf100_fifo_intr_engine_unit(struct gf100_fifo *fifo, int engn)
 	for (unkn = 0; unkn < 8; unkn++) {
 		u32 ints = (intr >> (unkn * 0x04)) & inte;
 		if (ints & 0x1) {
-			nvkm_fifo_uevent(&fifo->base);
+			nvkm_event_ntfy(&fifo->nonstall.event, 0, NVKM_FIFO_NONSTALL_EVENT);
 			ints &= ~1;
 		}
 		if (ints) {
-			nvkm_error(subdev, "ENGINE %d %d %01x",
-				   engn, unkn, ints);
+			nvkm_error(subdev, "ENGINE %d %d %01x", engn, unkn, ints);
 			nvkm_mask(device, 0x002628, ints, 0);
 		}
 	}
 }
 
-void
-gf100_fifo_intr_engine(struct gf100_fifo *fifo)
+static void
+gf100_fifo_intr_engine(struct nvkm_fifo *fifo)
 {
-	struct nvkm_device *device = fifo->base.engine.subdev.device;
+	struct nvkm_device *device = fifo->engine.subdev.device;
 	u32 mask = nvkm_rd32(device, 0x0025a4);
+
 	while (mask) {
 		u32 unit = __ffs(mask);
 		gf100_fifo_intr_engine_unit(fifo, unit);
@@ -495,11 +802,11 @@ gf100_fifo_intr_engine(struct gf100_fifo *fifo)
 	}
 }
 
-static void
-gf100_fifo_intr(struct nvkm_fifo *base)
+static irqreturn_t
+gf100_fifo_intr(struct nvkm_inth *inth)
 {
-	struct gf100_fifo *fifo = gf100_fifo(base);
-	struct nvkm_subdev *subdev = &fifo->base.engine.subdev;
+	struct nvkm_fifo *fifo = container_of(inth, typeof(*fifo), engine.subdev.inth);
+	struct nvkm_subdev *subdev = &fifo->engine.subdev;
 	struct nvkm_device *device = subdev->device;
 	u32 mask = nvkm_rd32(device, 0x002140);
 	u32 stat = nvkm_rd32(device, 0x002100) & mask;
@@ -532,25 +839,13 @@ gf100_fifo_intr(struct nvkm_fifo *base)
 	}
 
 	if (stat & 0x10000000) {
-		u32 mask = nvkm_rd32(device, 0x00259c);
-		while (mask) {
-			u32 unit = __ffs(mask);
-			gf100_fifo_intr_fault(&fifo->base, unit);
-			nvkm_wr32(device, 0x00259c, (1 << unit));
-			mask &= ~(1 << unit);
-		}
+		gf100_fifo_intr_mmu_fault(fifo);
 		stat &= ~0x10000000;
 	}
 
 	if (stat & 0x20000000) {
-		u32 mask = nvkm_rd32(device, 0x0025a0);
-		while (mask) {
-			u32 unit = __ffs(mask);
-			gf100_fifo_intr_pbdma(fifo, unit);
-			nvkm_wr32(device, 0x0025a0, (1 << unit));
-			mask &= ~(1 << unit);
-		}
-		stat &= ~0x20000000;
+		if (gf100_fifo_intr_pbdma(fifo))
+			stat &= ~0x20000000;
 	}
 
 	if (stat & 0x40000000) {
@@ -565,71 +860,26 @@ gf100_fifo_intr(struct nvkm_fifo *base)
 
 	if (stat) {
 		nvkm_error(subdev, "INTR %08x\n", stat);
+		spin_lock(&fifo->lock);
 		nvkm_mask(device, 0x002140, stat, 0x00000000);
+		spin_unlock(&fifo->lock);
 		nvkm_wr32(device, 0x002100, stat);
 	}
-}
-
-static int
-gf100_fifo_oneinit(struct nvkm_fifo *base)
-{
-	struct gf100_fifo *fifo = gf100_fifo(base);
-	struct nvkm_subdev *subdev = &fifo->base.engine.subdev;
-	struct nvkm_device *device = subdev->device;
-	struct nvkm_vmm *bar = nvkm_bar_bar1_vmm(device);
-	int ret;
-
-	/* Determine number of PBDMAs by checking valid enable bits. */
-	nvkm_wr32(device, 0x002204, 0xffffffff);
-	fifo->pbdma_nr = hweight32(nvkm_rd32(device, 0x002204));
-	nvkm_debug(subdev, "%d PBDMA(s)\n", fifo->pbdma_nr);
-
-
-	ret = nvkm_memory_new(device, NVKM_MEM_TARGET_INST, 0x1000, 0x1000,
-			      false, &fifo->runlist.mem[0]);
-	if (ret)
-		return ret;
-
-	ret = nvkm_memory_new(device, NVKM_MEM_TARGET_INST, 0x1000, 0x1000,
-			      false, &fifo->runlist.mem[1]);
-	if (ret)
-		return ret;
 
-	init_waitqueue_head(&fifo->runlist.wait);
-
-	ret = nvkm_memory_new(device, NVKM_MEM_TARGET_INST, 128 * 0x1000,
-			      0x1000, false, &fifo->user.mem);
-	if (ret)
-		return ret;
-
-	ret = nvkm_vmm_get(bar, 12, nvkm_memory_size(fifo->user.mem),
-			   &fifo->user.bar);
-	if (ret)
-		return ret;
-
-	return nvkm_memory_map(fifo->user.mem, 0, bar, fifo->user.bar, NULL, 0);
-}
-
-static void
-gf100_fifo_fini(struct nvkm_fifo *base)
-{
-	struct gf100_fifo *fifo = gf100_fifo(base);
-	flush_work(&fifo->recover.work);
+	return IRQ_HANDLED;
 }
 
 static void
-gf100_fifo_init(struct nvkm_fifo *base)
+gf100_fifo_init_pbdmas(struct nvkm_fifo *fifo, u32 mask)
 {
-	struct gf100_fifo *fifo = gf100_fifo(base);
-	struct nvkm_device *device = fifo->base.engine.subdev.device;
-	int i;
+	struct nvkm_device *device = fifo->engine.subdev.device;
 
 	/* Enable PBDMAs. */
-	nvkm_wr32(device, 0x000204, (1 << fifo->pbdma_nr) - 1);
-	nvkm_wr32(device, 0x002204, (1 << fifo->pbdma_nr) - 1);
+	nvkm_wr32(device, 0x000204, mask);
+	nvkm_wr32(device, 0x002204, mask);
 
 	/* Assign engines to PBDMAs. */
-	if (fifo->pbdma_nr >= 3) {
+	if ((mask & 7) == 7) {
 		nvkm_wr32(device, 0x002208, ~(1 << 0)); /* PGRAPH */
 		nvkm_wr32(device, 0x00220c, ~(1 << 1)); /* PVP */
 		nvkm_wr32(device, 0x002210, ~(1 << 1)); /* PMSPP */
@@ -638,62 +888,82 @@ gf100_fifo_init(struct nvkm_fifo *base)
 		nvkm_wr32(device, 0x00221c, ~(1 << 1)); /* PCE1 */
 	}
 
-	/* PBDMA[n] */
-	for (i = 0; i < fifo->pbdma_nr; i++) {
-		nvkm_mask(device, 0x04013c + (i * 0x2000), 0x10000100, 0x00000000);
-		nvkm_wr32(device, 0x040108 + (i * 0x2000), 0xffffffff); /* INTR */
-		nvkm_wr32(device, 0x04010c + (i * 0x2000), 0xfffffeff); /* INTREN */
-	}
+	nvkm_mask(device, 0x002a04, 0xbfffffff, 0xbfffffff);
+}
+
+static void
+gf100_fifo_init(struct nvkm_fifo *fifo)
+{
+	struct nvkm_device *device = fifo->engine.subdev.device;
 
 	nvkm_mask(device, 0x002200, 0x00000001, 0x00000001);
-	nvkm_wr32(device, 0x002254, 0x10000000 | fifo->user.bar->addr >> 12);
+	nvkm_wr32(device, 0x002254, 0x10000000 | fifo->userd.bar1->addr >> 12);
 
 	nvkm_wr32(device, 0x002100, 0xffffffff);
 	nvkm_wr32(device, 0x002140, 0x7fffffff);
 	nvkm_wr32(device, 0x002628, 0x00000001); /* ENGINE_INTR_EN */
 }
 
-static void *
-gf100_fifo_dtor(struct nvkm_fifo *base)
+static int
+gf100_fifo_runl_ctor(struct nvkm_fifo *fifo)
+{
+	struct nvkm_runl *runl;
+
+	runl = nvkm_runl_new(fifo, 0, 0, 0);
+	if (IS_ERR(runl))
+		return PTR_ERR(runl);
+
+	nvkm_runl_add(runl,  0, fifo->func->engn, NVKM_ENGINE_GR, 0);
+	nvkm_runl_add(runl,  1, fifo->func->engn, NVKM_ENGINE_MSPDEC, 0);
+	nvkm_runl_add(runl,  2, fifo->func->engn, NVKM_ENGINE_MSPPP, 0);
+	nvkm_runl_add(runl,  3, fifo->func->engn, NVKM_ENGINE_MSVLD, 0);
+	nvkm_runl_add(runl,  4, fifo->func->engn, NVKM_ENGINE_CE, 0);
+	nvkm_runl_add(runl,  5, fifo->func->engn, NVKM_ENGINE_CE, 1);
+	nvkm_runl_add(runl, 15,   &gf100_engn_sw, NVKM_ENGINE_SW, 0);
+	return 0;
+}
+
+int
+gf100_fifo_runq_nr(struct nvkm_fifo *fifo)
+{
+	struct nvkm_device *device = fifo->engine.subdev.device;
+	u32 save;
+
+	/* Determine number of PBDMAs by checking valid enable bits. */
+	save = nvkm_mask(device, 0x000204, 0xffffffff, 0xffffffff);
+	save = nvkm_mask(device, 0x000204, 0xffffffff, save);
+	return hweight32(save);
+}
+
+int
+gf100_fifo_chid_ctor(struct nvkm_fifo *fifo, int nr)
 {
-	struct gf100_fifo *fifo = gf100_fifo(base);
-	struct nvkm_device *device = fifo->base.engine.subdev.device;
-	nvkm_vmm_put(nvkm_bar_bar1_vmm(device), &fifo->user.bar);
-	nvkm_memory_unref(&fifo->user.mem);
-	nvkm_memory_unref(&fifo->runlist.mem[0]);
-	nvkm_memory_unref(&fifo->runlist.mem[1]);
-	return fifo;
+	return nvkm_chid_new(&nvkm_chan_event, &fifo->engine.subdev, nr, 0, nr, &fifo->chid);
 }
 
 static const struct nvkm_fifo_func
 gf100_fifo = {
-	.dtor = gf100_fifo_dtor,
-	.oneinit = gf100_fifo_oneinit,
+	.chid_nr = nv50_fifo_chid_nr,
+	.chid_ctor = gf100_fifo_chid_ctor,
+	.runq_nr = gf100_fifo_runq_nr,
+	.runl_ctor = gf100_fifo_runl_ctor,
 	.init = gf100_fifo_init,
-	.fini = gf100_fifo_fini,
+	.init_pbdmas = gf100_fifo_init_pbdmas,
 	.intr = gf100_fifo_intr,
-	.fault = gf100_fifo_fault,
-	.engine_id = gf100_fifo_engine_id,
-	.id_engine = gf100_fifo_id_engine,
-	.uevent_init = gf100_fifo_uevent_init,
-	.uevent_fini = gf100_fifo_uevent_fini,
-	.chan = {
-		&gf100_fifo_gpfifo_oclass,
-		NULL
-	},
+	.intr_mmu_fault_unit = gf100_fifo_intr_mmu_fault_unit,
+	.intr_ctxsw_timeout = gf100_fifo_intr_ctxsw_timeout,
+	.mmu_fault = &gf100_fifo_mmu_fault,
+	.nonstall = &gf100_fifo_nonstall,
+	.runl = &gf100_runl,
+	.runq = &gf100_runq,
+	.engn = &gf100_engn,
+	.cgrp = {{                            }, &nv04_cgrp },
+	.chan = {{ 0, 0, FERMI_CHANNEL_GPFIFO }, &gf100_chan },
 };
 
 int
 gf100_fifo_new(struct nvkm_device *device, enum nvkm_subdev_type type, int inst,
 	       struct nvkm_fifo **pfifo)
 {
-	struct gf100_fifo *fifo;
-
-	if (!(fifo = kzalloc(sizeof(*fifo), GFP_KERNEL)))
-		return -ENOMEM;
-	INIT_LIST_HEAD(&fifo->chan);
-	INIT_WORK(&fifo->recover.work, gf100_fifo_recover_work);
-	*pfifo = &fifo->base;
-
-	return nvkm_fifo_ctor(&gf100_fifo, device, type, inst, 128, &fifo->base);
+	return nvkm_fifo_new_(&gf100_fifo, device, type, inst, pfifo);
 }
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/fifo/gf100.h b/drivers/gpu/drm/nouveau/nvkm/engine/fifo/gf100.h
deleted file mode 100644
index b8642490eb2f..000000000000
--- a/drivers/gpu/drm/nouveau/nvkm/engine/fifo/gf100.h
+++ /dev/null
@@ -1,38 +0,0 @@
-/* SPDX-License-Identifier: MIT */
-#ifndef __GF100_FIFO_H__
-#define __GF100_FIFO_H__
-#define gf100_fifo(p) container_of((p), struct gf100_fifo, base)
-#include "priv.h"
-
-#include <subdev/mmu.h>
-
-struct gf100_fifo_chan;
-struct gf100_fifo {
-	struct nvkm_fifo base;
-
-	struct list_head chan;
-
-	struct {
-		struct work_struct work;
-		u64 mask;
-	} recover;
-
-	int pbdma_nr;
-
-	struct {
-		struct nvkm_memory *mem[2];
-		int active;
-		wait_queue_head_t wait;
-	} runlist;
-
-	struct {
-		struct nvkm_memory *mem;
-		struct nvkm_vma *bar;
-	} user;
-};
-
-void gf100_fifo_intr_engine(struct gf100_fifo *);
-void gf100_fifo_runlist_insert(struct gf100_fifo *, struct gf100_fifo_chan *);
-void gf100_fifo_runlist_remove(struct gf100_fifo *, struct gf100_fifo_chan *);
-void gf100_fifo_runlist_commit(struct gf100_fifo *);
-#endif
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/fifo/gk104.c b/drivers/gpu/drm/nouveau/nvkm/engine/fifo/gk104.c
index e771bd519ee2..d8a4d773a58c 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/fifo/gk104.c
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/fifo/gk104.c
@@ -21,643 +21,318 @@
  *
  * Authors: Ben Skeggs
  */
-#include "gk104.h"
+#include "priv.h"
 #include "cgrp.h"
-#include "changk104.h"
+#include "chan.h"
+#include "chid.h"
+#include "runl.h"
+#include "runq.h"
 
-#include <core/client.h>
 #include <core/gpuobj.h>
-#include <subdev/bar.h>
-#include <subdev/fault.h>
-#include <subdev/timer.h>
+#include <subdev/mc.h>
+#include <subdev/mmu.h>
 #include <subdev/top.h>
-#include <engine/sw.h>
 
 #include <nvif/class.h>
-#include <nvif/cl0080.h>
+#include <nvif/if900d.h>
 
 void
-gk104_fifo_engine_status(struct gk104_fifo *fifo, int engn,
-			 struct gk104_fifo_engine_status *status)
+gk104_chan_stop(struct nvkm_chan *chan)
 {
-	struct nvkm_engine *engine = fifo->engine[engn].engine;
-	struct nvkm_subdev *subdev = &fifo->base.engine.subdev;
-	struct nvkm_device *device = subdev->device;
-	u32 stat = nvkm_rd32(device, 0x002640 + (engn * 0x08));
-
-	status->busy     = !!(stat & 0x80000000);
-	status->faulted  = !!(stat & 0x40000000);
-	status->next.tsg = !!(stat & 0x10000000);
-	status->next.id  =   (stat & 0x0fff0000) >> 16;
-	status->chsw     = !!(stat & 0x00008000);
-	status->save     = !!(stat & 0x00004000);
-	status->load     = !!(stat & 0x00002000);
-	status->prev.tsg = !!(stat & 0x00001000);
-	status->prev.id  =   (stat & 0x00000fff);
-	status->chan     = NULL;
-
-	if (status->busy && status->chsw) {
-		if (status->load && status->save) {
-			if (engine && nvkm_engine_chsw_load(engine))
-				status->chan = &status->next;
-			else
-				status->chan = &status->prev;
-		} else
-		if (status->load) {
-			status->chan = &status->next;
-		} else {
-			status->chan = &status->prev;
-		}
-	} else
-	if (status->load) {
-		status->chan = &status->prev;
-	}
+	struct nvkm_device *device = chan->cgrp->runl->fifo->engine.subdev.device;
 
-	nvkm_debug(subdev, "engine %02d: busy %d faulted %d chsw %d "
-			   "save %d load %d %sid %d%s-> %sid %d%s\n",
-		   engn, status->busy, status->faulted,
-		   status->chsw, status->save, status->load,
-		   status->prev.tsg ? "tsg" : "ch", status->prev.id,
-		   status->chan == &status->prev ? "*" : " ",
-		   status->next.tsg ? "tsg" : "ch", status->next.id,
-		   status->chan == &status->next ? "*" : " ");
-}
-
-int
-gk104_fifo_class_new(struct nvkm_fifo *base, const struct nvkm_oclass *oclass,
-		     void *argv, u32 argc, struct nvkm_object **pobject)
-{
-	struct gk104_fifo *fifo = gk104_fifo(base);
-	if (oclass->engn == &fifo->func->chan) {
-		const struct gk104_fifo_chan_user *user = oclass->engn;
-		return user->ctor(fifo, oclass, argv, argc, pobject);
-	} else
-	if (oclass->engn == &fifo->func->user) {
-		const struct gk104_fifo_user_user *user = oclass->engn;
-		return user->ctor(oclass, argv, argc, pobject);
-	}
-	WARN_ON(1);
-	return -EINVAL;
-}
-
-int
-gk104_fifo_class_get(struct nvkm_fifo *base, int index,
-		     struct nvkm_oclass *oclass)
-{
-	struct gk104_fifo *fifo = gk104_fifo(base);
-	int c = 0;
-
-	if (fifo->func->user.ctor && c++ == index) {
-		oclass->base =  fifo->func->user.user;
-		oclass->engn = &fifo->func->user;
-		return 0;
-	}
-
-	if (fifo->func->chan.ctor && c++ == index) {
-		oclass->base =  fifo->func->chan.user;
-		oclass->engn = &fifo->func->chan;
-		return 0;
-	}
-
-	return c;
+	nvkm_mask(device, 0x800004 + (chan->id * 8), 0x00000800, 0x00000800);
 }
 
 void
-gk104_fifo_uevent_fini(struct nvkm_fifo *fifo)
+gk104_chan_start(struct nvkm_chan *chan)
 {
-	struct nvkm_device *device = fifo->engine.subdev.device;
-	nvkm_mask(device, 0x002140, 0x80000000, 0x00000000);
-}
+	struct nvkm_device *device = chan->cgrp->runl->fifo->engine.subdev.device;
 
-void
-gk104_fifo_uevent_init(struct nvkm_fifo *fifo)
-{
-	struct nvkm_device *device = fifo->engine.subdev.device;
-	nvkm_mask(device, 0x002140, 0x80000000, 0x80000000);
+	nvkm_mask(device, 0x800004 + (chan->id * 8), 0x00000400, 0x00000400);
 }
 
 void
-gk104_fifo_runlist_commit(struct gk104_fifo *fifo, int runl,
-			  struct nvkm_memory *mem, int nr)
+gk104_chan_unbind(struct nvkm_chan *chan)
 {
-	struct nvkm_subdev *subdev = &fifo->base.engine.subdev;
-	struct nvkm_device *device = subdev->device;
-	int target;
+	struct nvkm_device *device = chan->cgrp->runl->fifo->engine.subdev.device;
 
-	switch (nvkm_memory_target(mem)) {
-	case NVKM_MEM_TARGET_VRAM: target = 0; break;
-	case NVKM_MEM_TARGET_NCOH: target = 3; break;
-	default:
-		WARN_ON(1);
-		return;
-	}
-
-	nvkm_wr32(device, 0x002270, (nvkm_memory_addr(mem) >> 12) |
-				    (target << 28));
-	nvkm_wr32(device, 0x002274, (runl << 20) | nr);
-
-	if (nvkm_msec(device, 2000,
-		if (!(nvkm_rd32(device, 0x002284 + (runl * 0x08)) & 0x00100000))
-			break;
-	) < 0)
-		nvkm_error(subdev, "runlist %d update timeout\n", runl);
+	nvkm_wr32(device, 0x800000 + (chan->id * 8), 0x00000000);
 }
 
 void
-gk104_fifo_runlist_update(struct gk104_fifo *fifo, int runl)
+gk104_chan_bind_inst(struct nvkm_chan *chan)
 {
-	const struct gk104_fifo_runlist_func *func = fifo->func->runlist;
-	struct gk104_fifo_chan *chan;
-	struct nvkm_memory *mem;
-	struct nvkm_fifo_cgrp *cgrp;
-	int nr = 0;
-
-	mutex_lock(&fifo->base.mutex);
-	mem = fifo->runlist[runl].mem[fifo->runlist[runl].next];
-	fifo->runlist[runl].next = !fifo->runlist[runl].next;
-
-	nvkm_kmap(mem);
-	list_for_each_entry(chan, &fifo->runlist[runl].chan, head) {
-		func->chan(chan, mem, nr++ * func->size);
-	}
-
-	list_for_each_entry(cgrp, &fifo->runlist[runl].cgrp, head) {
-		func->cgrp(cgrp, mem, nr++ * func->size);
-		list_for_each_entry(chan, &cgrp->chan, head) {
-			func->chan(chan, mem, nr++ * func->size);
-		}
-	}
-	nvkm_done(mem);
+	struct nvkm_device *device = chan->cgrp->runl->fifo->engine.subdev.device;
 
-	func->commit(fifo, runl, mem, nr);
-	mutex_unlock(&fifo->base.mutex);
+	nvkm_wr32(device, 0x800000 + (chan->id * 8), 0x80000000 | chan->inst->addr >> 12);
 }
 
 void
-gk104_fifo_runlist_remove(struct gk104_fifo *fifo, struct gk104_fifo_chan *chan)
-{
-	struct nvkm_fifo_cgrp *cgrp = chan->cgrp;
-	mutex_lock(&fifo->base.mutex);
-	if (!list_empty(&chan->head)) {
-		list_del_init(&chan->head);
-		if (cgrp && !--cgrp->chan_nr)
-			list_del_init(&cgrp->head);
-	}
-	mutex_unlock(&fifo->base.mutex);
-}
-
-void
-gk104_fifo_runlist_insert(struct gk104_fifo *fifo, struct gk104_fifo_chan *chan)
-{
-	struct nvkm_fifo_cgrp *cgrp = chan->cgrp;
-	mutex_lock(&fifo->base.mutex);
-	if (cgrp) {
-		if (!cgrp->chan_nr++)
-			list_add_tail(&cgrp->head, &fifo->runlist[chan->runl].cgrp);
-		list_add_tail(&chan->head, &cgrp->chan);
-	} else {
-		list_add_tail(&chan->head, &fifo->runlist[chan->runl].chan);
-	}
-	mutex_unlock(&fifo->base.mutex);
-}
-
-void
-gk104_fifo_runlist_chan(struct gk104_fifo_chan *chan,
-			struct nvkm_memory *memory, u32 offset)
-{
-	nvkm_wo32(memory, offset + 0, chan->base.chid);
-	nvkm_wo32(memory, offset + 4, 0x00000000);
-}
-
-const struct gk104_fifo_runlist_func
-gk104_fifo_runlist = {
-	.size = 8,
-	.chan = gk104_fifo_runlist_chan,
-	.commit = gk104_fifo_runlist_commit,
+gk104_chan_bind(struct nvkm_chan *chan)
+{
+	struct nvkm_runl *runl = chan->cgrp->runl;
+	struct nvkm_device *device = runl->fifo->engine.subdev.device;
+
+	nvkm_mask(device, 0x800004 + (chan->id * 8), 0x000f0000, runl->id << 16);
+	gk104_chan_bind_inst(chan);
+}
+
+static int
+gk104_chan_ramfc_write(struct nvkm_chan *chan, u64 offset, u64 length, u32 devm, bool priv)
+{
+	const u64 userd = nvkm_memory_addr(chan->userd.mem) + chan->userd.base;
+	const u32 limit2 = ilog2(length / 8);
+
+	nvkm_kmap(chan->inst);
+	nvkm_wo32(chan->inst, 0x08, lower_32_bits(userd));
+	nvkm_wo32(chan->inst, 0x0c, upper_32_bits(userd));
+	nvkm_wo32(chan->inst, 0x10, 0x0000face);
+	nvkm_wo32(chan->inst, 0x30, 0xfffff902);
+	nvkm_wo32(chan->inst, 0x48, lower_32_bits(offset));
+	nvkm_wo32(chan->inst, 0x4c, upper_32_bits(offset) | (limit2 << 16));
+	nvkm_wo32(chan->inst, 0x84, 0x20400000);
+	nvkm_wo32(chan->inst, 0x94, 0x30000000 | devm);
+	nvkm_wo32(chan->inst, 0x9c, 0x00000100);
+	nvkm_wo32(chan->inst, 0xac, 0x0000001f);
+	nvkm_wo32(chan->inst, 0xe4, priv ? 0x00000020 : 0x00000000);
+	nvkm_wo32(chan->inst, 0xe8, chan->id);
+	nvkm_wo32(chan->inst, 0xb8, 0xf8000000);
+	nvkm_wo32(chan->inst, 0xf8, 0x10003080); /* 0x002310 */
+	nvkm_wo32(chan->inst, 0xfc, 0x10000010); /* 0x002350 */
+	nvkm_done(chan->inst);
+	return 0;
+}
+
+const struct nvkm_chan_func_ramfc
+gk104_chan_ramfc = {
+	.write = gk104_chan_ramfc_write,
+	.devm = 0xfff,
+	.priv = true,
 };
 
-void
-gk104_fifo_pbdma_init(struct gk104_fifo *fifo)
-{
-	struct nvkm_device *device = fifo->base.engine.subdev.device;
-	nvkm_wr32(device, 0x000204, (1 << fifo->pbdma_nr) - 1);
-}
-
-int
-gk104_fifo_pbdma_nr(struct gk104_fifo *fifo)
-{
-	struct nvkm_device *device = fifo->base.engine.subdev.device;
-	/* Determine number of PBDMAs by checking valid enable bits. */
-	nvkm_wr32(device, 0x000204, 0xffffffff);
-	return hweight32(nvkm_rd32(device, 0x000204));
-}
-
-const struct gk104_fifo_pbdma_func
-gk104_fifo_pbdma = {
-	.nr = gk104_fifo_pbdma_nr,
-	.init = gk104_fifo_pbdma_init,
+const struct nvkm_chan_func_userd
+gk104_chan_userd = {
+	.bar = 1,
+	.size = 0x200,
+	.clear = gf100_chan_userd_clear,
 };
 
-struct nvkm_engine *
-gk104_fifo_id_engine(struct nvkm_fifo *base, int engi)
-{
-	if (engi == GK104_FIFO_ENGN_SW)
-		return nvkm_device_engine(base->engine.subdev.device, NVKM_ENGINE_SW, 0);
-
-	return gk104_fifo(base)->engine[engi].engine;
-}
-
-int
-gk104_fifo_engine_id(struct nvkm_fifo *base, struct nvkm_engine *engine)
-{
-	struct gk104_fifo *fifo = gk104_fifo(base);
-	int engn;
+static const struct nvkm_chan_func
+gk104_chan = {
+	.inst = &gf100_chan_inst,
+	.userd = &gk104_chan_userd,
+	.ramfc = &gk104_chan_ramfc,
+	.bind = gk104_chan_bind,
+	.unbind = gk104_chan_unbind,
+	.start = gk104_chan_start,
+	.stop = gk104_chan_stop,
+	.preempt = gf100_chan_preempt,
+};
 
-	if (engine->subdev.type == NVKM_ENGINE_SW)
-		return GK104_FIFO_ENGN_SW;
+static void
+gk104_ectx_bind(struct nvkm_engn *engn, struct nvkm_cctx *cctx, struct nvkm_chan *chan)
+{
+	u32 ptr0, ptr1 = 0;
+	u64 addr = 0ULL;
+
+	switch (engn->engine->subdev.type) {
+	case NVKM_ENGINE_SW    : return;
+	case NVKM_ENGINE_GR    : ptr0 = 0x0210; break;
+	case NVKM_ENGINE_SEC   : ptr0 = 0x0220; break;
+	case NVKM_ENGINE_MSPDEC: ptr0 = 0x0250; break;
+	case NVKM_ENGINE_MSPPP : ptr0 = 0x0260; break;
+	case NVKM_ENGINE_MSVLD : ptr0 = 0x0270; break;
+	case NVKM_ENGINE_VIC   : ptr0 = 0x0280; break;
+	case NVKM_ENGINE_MSENC : ptr0 = 0x0290; break;
+	case NVKM_ENGINE_NVDEC :
+		ptr1 = 0x0270;
+		ptr0 = 0x0210;
+		break;
+	case NVKM_ENGINE_NVENC :
+		if (!engn->engine->subdev.inst)
+			ptr1 = 0x0290;
+		ptr0 = 0x0210;
+		break;
+	default:
+		WARN_ON(1);
+		return;
+	}
 
-	for (engn = 0; engn < fifo->engine_nr && engine; engn++) {
-		if (fifo->engine[engn].engine == engine)
-			return engn;
+	if (cctx) {
+		addr  = cctx->vctx->vma->addr;
+		addr |= 4ULL;
 	}
 
-	WARN_ON(1);
-	return -1;
+	nvkm_kmap(chan->inst);
+	nvkm_wo32(chan->inst, ptr0 + 0, lower_32_bits(addr));
+	nvkm_wo32(chan->inst, ptr0 + 4, upper_32_bits(addr));
+	if (ptr1) {
+		nvkm_wo32(chan->inst, ptr1 + 0, lower_32_bits(addr));
+		nvkm_wo32(chan->inst, ptr1 + 4, upper_32_bits(addr));
+	}
+	nvkm_done(chan->inst);
 }
 
-static void
-gk104_fifo_recover_work(struct work_struct *w)
+int
+gk104_ectx_ctor(struct nvkm_engn *engn, struct nvkm_vctx *vctx)
 {
-	struct gk104_fifo *fifo = container_of(w, typeof(*fifo), recover.work);
-	struct nvkm_device *device = fifo->base.engine.subdev.device;
-	struct nvkm_engine *engine;
-	unsigned long flags;
-	u32 engm, runm, todo;
-	int engn, runl;
-
-	spin_lock_irqsave(&fifo->base.lock, flags);
-	runm = fifo->recover.runm;
-	engm = fifo->recover.engm;
-	fifo->recover.engm = 0;
-	fifo->recover.runm = 0;
-	spin_unlock_irqrestore(&fifo->base.lock, flags);
-
-	nvkm_mask(device, 0x002630, runm, runm);
-
-	for (todo = engm; engn = __ffs(todo), todo; todo &= ~BIT(engn)) {
-		if ((engine = fifo->engine[engn].engine)) {
-			nvkm_subdev_fini(&engine->subdev, false);
-			WARN_ON(nvkm_subdev_init(&engine->subdev));
-		}
-	}
+	struct gf100_vmm_map_v0 args = { .priv = 1 };
+	int ret;
 
-	for (todo = runm; runl = __ffs(todo), todo; todo &= ~BIT(runl))
-		gk104_fifo_runlist_update(fifo, runl);
+	ret = nvkm_vmm_get(vctx->vmm, 12, vctx->inst->size, &vctx->vma);
+	if (ret)
+		return ret;
 
-	nvkm_wr32(device, 0x00262c, runm);
-	nvkm_mask(device, 0x002630, runm, 0x00000000);
+	return nvkm_memory_map(vctx->inst, 0, vctx->vmm, vctx->vma, &args, sizeof(args));
 }
 
-static void gk104_fifo_recover_engn(struct gk104_fifo *fifo, int engn);
+/*TODO: clean this up */
+struct gk104_engn_status {
+	bool busy;
+	bool faulted;
+	bool chsw;
+	bool save;
+	bool load;
+	struct {
+		bool tsg;
+		u32 id;
+	} prev, next, *chan;
+};
 
 static void
-gk104_fifo_recover_runl(struct gk104_fifo *fifo, int runl)
-{
-	struct nvkm_subdev *subdev = &fifo->base.engine.subdev;
-	struct nvkm_device *device = subdev->device;
-	const u32 runm = BIT(runl);
-
-	assert_spin_locked(&fifo->base.lock);
-	if (fifo->recover.runm & runm)
-		return;
-	fifo->recover.runm |= runm;
-
-	/* Block runlist to prevent channel assignment(s) from changing. */
-	nvkm_mask(device, 0x002630, runm, runm);
-
-	/* Schedule recovery. */
-	nvkm_warn(subdev, "runlist %d: scheduled for recovery\n", runl);
-	schedule_work(&fifo->recover.work);
-}
-
-static struct gk104_fifo_chan *
-gk104_fifo_recover_chid(struct gk104_fifo *fifo, int runl, int chid)
+gk104_engn_status(struct nvkm_engn *engn, struct gk104_engn_status *status)
 {
-	struct gk104_fifo_chan *chan;
-	struct nvkm_fifo_cgrp *cgrp;
+	u32 stat = nvkm_rd32(engn->runl->fifo->engine.subdev.device, 0x002640 + (engn->id * 0x08));
 
-	list_for_each_entry(chan, &fifo->runlist[runl].chan, head) {
-		if (chan->base.chid == chid) {
-			list_del_init(&chan->head);
-			return chan;
-		}
-	}
+	status->busy     = !!(stat & 0x80000000);
+	status->faulted  = !!(stat & 0x40000000);
+	status->next.tsg = !!(stat & 0x10000000);
+	status->next.id  =   (stat & 0x0fff0000) >> 16;
+	status->chsw     = !!(stat & 0x00008000);
+	status->save     = !!(stat & 0x00004000);
+	status->load     = !!(stat & 0x00002000);
+	status->prev.tsg = !!(stat & 0x00001000);
+	status->prev.id  =   (stat & 0x00000fff);
+	status->chan     = NULL;
 
-	list_for_each_entry(cgrp, &fifo->runlist[runl].cgrp, head) {
-		if (cgrp->id == chid) {
-			chan = list_first_entry(&cgrp->chan, typeof(*chan), head);
-			list_del_init(&chan->head);
-			if (!--cgrp->chan_nr)
-				list_del_init(&cgrp->head);
-			return chan;
+	if (status->busy && status->chsw) {
+		if (status->load && status->save) {
+			if (nvkm_engine_chsw_load(engn->engine))
+				status->chan = &status->next;
+			else
+				status->chan = &status->prev;
+		} else
+		if (status->load) {
+			status->chan = &status->next;
+		} else {
+			status->chan = &status->prev;
 		}
+	} else
+	if (status->load) {
+		status->chan = &status->prev;
 	}
 
-	return NULL;
-}
-
-static void
-gk104_fifo_recover_chan(struct nvkm_fifo *base, int chid)
-{
-	struct gk104_fifo *fifo = gk104_fifo(base);
-	struct nvkm_subdev *subdev = &fifo->base.engine.subdev;
-	struct nvkm_device *device = subdev->device;
-	const u32  stat = nvkm_rd32(device, 0x800004 + (chid * 0x08));
-	const u32  runl = (stat & 0x000f0000) >> 16;
-	const bool used = (stat & 0x00000001);
-	unsigned long engn, engm = fifo->runlist[runl].engm;
-	struct gk104_fifo_chan *chan;
-
-	assert_spin_locked(&fifo->base.lock);
-	if (!used)
-		return;
-
-	/* Lookup SW state for channel, and mark it as dead. */
-	chan = gk104_fifo_recover_chid(fifo, runl, chid);
-	if (chan) {
-		chan->killed = true;
-		nvkm_fifo_kevent(&fifo->base, chid);
-	}
-
-	/* Disable channel. */
-	nvkm_wr32(device, 0x800004 + (chid * 0x08), stat | 0x00000800);
-	nvkm_warn(subdev, "channel %d: killed\n", chid);
-
-	/* Block channel assignments from changing during recovery. */
-	gk104_fifo_recover_runl(fifo, runl);
-
-	/* Schedule recovery for any engines the channel is on. */
-	for_each_set_bit(engn, &engm, fifo->engine_nr) {
-		struct gk104_fifo_engine_status status;
-		gk104_fifo_engine_status(fifo, engn, &status);
-		if (!status.chan || status.chan->id != chid)
-			continue;
-		gk104_fifo_recover_engn(fifo, engn);
-	}
+	ENGN_DEBUG(engn, "%08x: busy %d faulted %d chsw %d save %d load %d %sid %d%s-> %sid %d%s",
+		   stat, status->busy, status->faulted, status->chsw, status->save, status->load,
+		   status->prev.tsg ? "tsg" : "ch", status->prev.id,
+		   status->chan == &status->prev ? "*" : " ",
+		   status->next.tsg ? "tsg" : "ch", status->next.id,
+		   status->chan == &status->next ? "*" : " ");
 }
 
-static void
-gk104_fifo_recover_engn(struct gk104_fifo *fifo, int engn)
+int
+gk104_engn_cxid(struct nvkm_engn *engn, bool *cgid)
 {
-	struct nvkm_engine *engine = fifo->engine[engn].engine;
-	struct nvkm_subdev *subdev = &fifo->base.engine.subdev;
-	struct nvkm_device *device = subdev->device;
-	const u32 runl = fifo->engine[engn].runl;
-	const u32 engm = BIT(engn);
-	struct gk104_fifo_engine_status status;
-	int mmui = -1;
-
-	assert_spin_locked(&fifo->base.lock);
-	if (fifo->recover.engm & engm)
-		return;
-	fifo->recover.engm |= engm;
-
-	/* Block channel assignments from changing during recovery. */
-	gk104_fifo_recover_runl(fifo, runl);
+	struct gk104_engn_status status;
 
-	/* Determine which channel (if any) is currently on the engine. */
-	gk104_fifo_engine_status(fifo, engn, &status);
+	gk104_engn_status(engn, &status);
 	if (status.chan) {
-		/* The channel is not longer viable, kill it. */
-		gk104_fifo_recover_chan(&fifo->base, status.chan->id);
+		*cgid = status.chan->tsg;
+		return status.chan->id;
 	}
 
-	/* Determine MMU fault ID for the engine, if we're not being
-	 * called from the fault handler already.
-	 */
-	if (!status.faulted && engine) {
-		mmui = nvkm_top_fault_id(device, engine->subdev.type, engine->subdev.inst);
-		if (mmui < 0) {
-			const struct nvkm_enum *en = fifo->func->fault.engine;
-			for (; en && en->name; en++) {
-				if (en->data2 == engine->subdev.type &&
-				    en->inst  == engine->subdev.inst) {
-					mmui = en->value;
-					break;
-				}
-			}
-		}
-		WARN_ON(mmui < 0);
-	}
-
-	/* Trigger a MMU fault for the engine.
-	 *
-	 * No good idea why this is needed, but nvgpu does something similar,
-	 * and it makes recovery from CTXSW_TIMEOUT a lot more reliable.
-	 */
-	if (mmui >= 0) {
-		nvkm_wr32(device, 0x002a30 + (engn * 0x04), 0x00000100 | mmui);
-
-		/* Wait for fault to trigger. */
-		nvkm_msec(device, 2000,
-			gk104_fifo_engine_status(fifo, engn, &status);
-			if (status.faulted)
-				break;
-		);
-
-		/* Release MMU fault trigger, and ACK the fault. */
-		nvkm_wr32(device, 0x002a30 + (engn * 0x04), 0x00000000);
-		nvkm_wr32(device, 0x00259c, BIT(mmui));
-		nvkm_wr32(device, 0x002100, 0x10000000);
-	}
-
-	/* Schedule recovery. */
-	nvkm_warn(subdev, "engine %d: scheduled for recovery\n", engn);
-	schedule_work(&fifo->recover.work);
+	return -ENODEV;
 }
 
-static void
-gk104_fifo_fault(struct nvkm_fifo *base, struct nvkm_fault_data *info)
+bool
+gk104_engn_chsw(struct nvkm_engn *engn)
 {
-	struct gk104_fifo *fifo = gk104_fifo(base);
-	struct nvkm_subdev *subdev = &fifo->base.engine.subdev;
-	struct nvkm_device *device = subdev->device;
-	const struct nvkm_enum *er, *ee, *ec, *ea;
-	struct nvkm_engine *engine = NULL;
-	struct nvkm_fifo_chan *chan;
-	unsigned long flags;
-	const char *en = "";
-	char ct[8] = "HUB/";
-
-	er = nvkm_enum_find(fifo->func->fault.reason, info->reason);
-	ee = nvkm_enum_find(fifo->func->fault.engine, info->engine);
-	if (info->hub) {
-		ec = nvkm_enum_find(fifo->func->fault.hubclient, info->client);
-	} else {
-		ec = nvkm_enum_find(fifo->func->fault.gpcclient, info->client);
-		snprintf(ct, sizeof(ct), "GPC%d/", info->gpc);
-	}
-	ea = nvkm_enum_find(fifo->func->fault.access, info->access);
+	struct gk104_engn_status status;
 
-	if (ee && ee->data2) {
-		switch (ee->data2) {
-		case NVKM_SUBDEV_BAR:
-			nvkm_bar_bar1_reset(device);
-			break;
-		case NVKM_SUBDEV_INSTMEM:
-			nvkm_bar_bar2_reset(device);
-			break;
-		case NVKM_ENGINE_IFB:
-			nvkm_mask(device, 0x001718, 0x00000000, 0x00000000);
-			break;
-		default:
-			engine = nvkm_device_engine(device, ee->data2, 0);
-			break;
-		}
-	}
-
-	if (ee == NULL) {
-		struct nvkm_subdev *subdev = nvkm_top_fault(device, info->engine);
-		if (subdev) {
-			if (subdev->func == &nvkm_engine)
-				engine = container_of(subdev, typeof(*engine), subdev);
-			en = engine->subdev.name;
-		}
-	} else {
-		en = ee->name;
-	}
-
-	spin_lock_irqsave(&fifo->base.lock, flags);
-	chan = nvkm_fifo_chan_inst_locked(&fifo->base, info->inst);
-
-	nvkm_error(subdev,
-		   "fault %02x [%s] at %016llx engine %02x [%s] client %02x "
-		   "[%s%s] reason %02x [%s] on channel %d [%010llx %s]\n",
-		   info->access, ea ? ea->name : "", info->addr,
-		   info->engine, ee ? ee->name : en,
-		   info->client, ct, ec ? ec->name : "",
-		   info->reason, er ? er->name : "", chan ? chan->chid : -1,
-		   info->inst, chan ? chan->object.client->name : "unknown");
-
-	/* Kill the channel that caused the fault. */
-	if (chan)
-		gk104_fifo_recover_chan(&fifo->base, chan->chid);
-
-	/* Channel recovery will probably have already done this for the
-	 * correct engine(s), but just in case we can't find the channel
-	 * information...
-	 */
-	if (engine) {
-		int engn = fifo->base.func->engine_id(&fifo->base, engine);
-		if (engn >= 0 && engn != GK104_FIFO_ENGN_SW)
-			gk104_fifo_recover_engn(fifo, engn);
-	}
+	gk104_engn_status(engn, &status);
+	if (status.busy && status.chsw)
+		return true;
 
-	spin_unlock_irqrestore(&fifo->base.lock, flags);
+	return false;
 }
 
-static const struct nvkm_enum
-gk104_fifo_bind_reason[] = {
-	{ 0x01, "BIND_NOT_UNBOUND" },
-	{ 0x02, "SNOOP_WITHOUT_BAR1" },
-	{ 0x03, "UNBIND_WHILE_RUNNING" },
-	{ 0x05, "INVALID_RUNLIST" },
-	{ 0x06, "INVALID_CTX_TGT" },
-	{ 0x0b, "UNBIND_WHILE_PARKED" },
-	{}
+const struct nvkm_engn_func
+gk104_engn = {
+	.chsw = gk104_engn_chsw,
+	.cxid = gk104_engn_cxid,
+	.mmu_fault_trigger = gf100_engn_mmu_fault_trigger,
+	.mmu_fault_triggered = gf100_engn_mmu_fault_triggered,
+	.ctor = gk104_ectx_ctor,
+	.bind = gk104_ectx_bind,
 };
 
-void
-gk104_fifo_intr_bind(struct gk104_fifo *fifo)
+const struct nvkm_engn_func
+gk104_engn_ce = {
+	.chsw = gk104_engn_chsw,
+	.cxid = gk104_engn_cxid,
+	.mmu_fault_trigger = gf100_engn_mmu_fault_trigger,
+	.mmu_fault_triggered = gf100_engn_mmu_fault_triggered,
+};
+
+bool
+gk104_runq_idle(struct nvkm_runq *runq)
 {
-	struct nvkm_subdev *subdev = &fifo->base.engine.subdev;
-	struct nvkm_device *device = subdev->device;
-	u32 intr = nvkm_rd32(device, 0x00252c);
-	u32 code = intr & 0x000000ff;
-	const struct nvkm_enum *en =
-		nvkm_enum_find(gk104_fifo_bind_reason, code);
+	struct nvkm_device *device = runq->fifo->engine.subdev.device;
 
-	nvkm_error(subdev, "BIND_ERROR %02x [%s]\n", code, en ? en->name : "");
+	return !(nvkm_rd32(device, 0x003080 + (runq->id * 4)) & 0x0000e000);
 }
 
-static const struct nvkm_enum
-gk104_fifo_sched_reason[] = {
-	{ 0x0a, "CTXSW_TIMEOUT" },
+static const struct nvkm_bitfield
+gk104_runq_intr_1_names[] = {
+	{ 0x00000001, "HCE_RE_ILLEGAL_OP" },
+	{ 0x00000002, "HCE_RE_ALIGNB" },
+	{ 0x00000004, "HCE_PRIV" },
+	{ 0x00000008, "HCE_ILLEGAL_MTHD" },
+	{ 0x00000010, "HCE_ILLEGAL_CLASS" },
 	{}
 };
 
-static void
-gk104_fifo_intr_sched_ctxsw(struct gk104_fifo *fifo)
+static bool
+gk104_runq_intr_1(struct nvkm_runq *runq)
 {
-	struct nvkm_device *device = fifo->base.engine.subdev.device;
-	unsigned long flags, engm = 0;
-	u32 engn;
-
-	/* We need to ACK the SCHED_ERROR here, and prevent it reasserting,
-	 * as MMU_FAULT cannot be triggered while it's pending.
-	 */
-	spin_lock_irqsave(&fifo->base.lock, flags);
-	nvkm_mask(device, 0x002140, 0x00000100, 0x00000000);
-	nvkm_wr32(device, 0x002100, 0x00000100);
-
-	for (engn = 0; engn < fifo->engine_nr; engn++) {
-		struct gk104_fifo_engine_status status;
-
-		gk104_fifo_engine_status(fifo, engn, &status);
-		if (!status.busy || !status.chsw)
-			continue;
-
-		engm |= BIT(engn);
-	}
-
-	for_each_set_bit(engn, &engm, fifo->engine_nr)
-		gk104_fifo_recover_engn(fifo, engn);
-
-	nvkm_mask(device, 0x002140, 0x00000100, 0x00000100);
-	spin_unlock_irqrestore(&fifo->base.lock, flags);
-}
-
-static void
-gk104_fifo_intr_sched(struct gk104_fifo *fifo)
-{
-	struct nvkm_subdev *subdev = &fifo->base.engine.subdev;
+	struct nvkm_subdev *subdev = &runq->fifo->engine.subdev;
 	struct nvkm_device *device = subdev->device;
-	u32 intr = nvkm_rd32(device, 0x00254c);
-	u32 code = intr & 0x000000ff;
-	const struct nvkm_enum *en =
-		nvkm_enum_find(gk104_fifo_sched_reason, code);
-
-	nvkm_error(subdev, "SCHED_ERROR %02x [%s]\n", code, en ? en->name : "");
+	u32 mask = nvkm_rd32(device, 0x04014c + (runq->id * 0x2000));
+	u32 stat = nvkm_rd32(device, 0x040148 + (runq->id * 0x2000)) & mask;
+	u32 chid = nvkm_rd32(device, 0x040120 + (runq->id * 0x2000)) & 0xfff;
+	char msg[128];
 
-	switch (code) {
-	case 0x0a:
-		gk104_fifo_intr_sched_ctxsw(fifo);
-		break;
-	default:
-		break;
+	if (stat & 0x80000000) {
+		if (runq->func->intr_1_ctxnotvalid &&
+		    runq->func->intr_1_ctxnotvalid(runq, chid))
+			stat &= ~0x80000000;
 	}
-}
 
-void
-gk104_fifo_intr_chsw(struct gk104_fifo *fifo)
-{
-	struct nvkm_subdev *subdev = &fifo->base.engine.subdev;
-	struct nvkm_device *device = subdev->device;
-	u32 stat = nvkm_rd32(device, 0x00256c);
-	nvkm_error(subdev, "CHSW_ERROR %08x\n", stat);
-	nvkm_wr32(device, 0x00256c, stat);
-}
+	if (stat) {
+		nvkm_snprintbf(msg, sizeof(msg), gk104_runq_intr_1_names, stat);
+		nvkm_error(subdev, "PBDMA%d: %08x [%s] ch %d %08x %08x\n",
+			   runq->id, stat, msg, chid,
+			   nvkm_rd32(device, 0x040150 + (runq->id * 0x2000)),
+			   nvkm_rd32(device, 0x040154 + (runq->id * 0x2000)));
+	}
 
-void
-gk104_fifo_intr_dropped_fault(struct gk104_fifo *fifo)
-{
-	struct nvkm_subdev *subdev = &fifo->base.engine.subdev;
-	struct nvkm_device *device = subdev->device;
-	u32 stat = nvkm_rd32(device, 0x00259c);
-	nvkm_error(subdev, "DROPPED_MMU_FAULT %08x\n", stat);
+	nvkm_wr32(device, 0x040148 + (runq->id * 0x2000), stat);
+	return true;
 }
 
-static const struct nvkm_bitfield gk104_fifo_pbdma_intr_0[] = {
+const struct nvkm_bitfield
+gk104_runq_intr_0_names[] = {
 	{ 0x00000001, "MEMREQ" },
 	{ 0x00000002, "MEMACK_TIMEOUT" },
 	{ 0x00000004, "MEMACK_EXTRA" },
@@ -691,430 +366,111 @@ static const struct nvkm_bitfield gk104_fifo_pbdma_intr_0[] = {
 	{}
 };
 
-void
-gk104_fifo_intr_pbdma_0(struct gk104_fifo *fifo, int unit)
+bool
+gk104_runq_intr(struct nvkm_runq *runq, struct nvkm_runl *null)
 {
-	struct nvkm_subdev *subdev = &fifo->base.engine.subdev;
-	struct nvkm_device *device = subdev->device;
-	u32 mask = nvkm_rd32(device, 0x04010c + (unit * 0x2000));
-	u32 stat = nvkm_rd32(device, 0x040108 + (unit * 0x2000)) & mask;
-	u32 addr = nvkm_rd32(device, 0x0400c0 + (unit * 0x2000));
-	u32 data = nvkm_rd32(device, 0x0400c4 + (unit * 0x2000));
-	u32 chid = nvkm_rd32(device, 0x040120 + (unit * 0x2000)) & 0xfff;
-	u32 subc = (addr & 0x00070000) >> 16;
-	u32 mthd = (addr & 0x00003ffc);
-	u32 show = stat;
-	struct nvkm_fifo_chan *chan;
-	unsigned long flags;
-	char msg[128];
-
-	if (stat & 0x00800000) {
-		if (device->sw) {
-			if (nvkm_sw_mthd(device->sw, chid, subc, mthd, data))
-				show &= ~0x00800000;
-		}
-	}
-
-	nvkm_wr32(device, 0x0400c0 + (unit * 0x2000), 0x80600008);
-
-	if (show) {
-		nvkm_snprintbf(msg, sizeof(msg), gk104_fifo_pbdma_intr_0, show);
-		chan = nvkm_fifo_chan_chid(&fifo->base, chid, &flags);
-		nvkm_error(subdev, "PBDMA%d: %08x [%s] ch %d [%010llx %s] "
-				   "subc %d mthd %04x data %08x\n",
-			   unit, show, msg, chid, chan ? chan->inst->addr : 0,
-			   chan ? chan->object.client->name : "unknown",
-			   subc, mthd, data);
-		nvkm_fifo_chan_put(&fifo->base, flags, &chan);
-	}
+	bool intr0 = gf100_runq_intr(runq, NULL);
+	bool intr1 = gk104_runq_intr_1(runq);
 
-	nvkm_wr32(device, 0x040108 + (unit * 0x2000), stat);
+	return intr0 || intr1;
 }
 
-static const struct nvkm_bitfield gk104_fifo_pbdma_intr_1[] = {
-	{ 0x00000001, "HCE_RE_ILLEGAL_OP" },
-	{ 0x00000002, "HCE_RE_ALIGNB" },
-	{ 0x00000004, "HCE_PRIV" },
-	{ 0x00000008, "HCE_ILLEGAL_MTHD" },
-	{ 0x00000010, "HCE_ILLEGAL_CLASS" },
-	{}
-};
-
 void
-gk104_fifo_intr_pbdma_1(struct gk104_fifo *fifo, int unit)
+gk104_runq_init(struct nvkm_runq *runq)
 {
-	struct nvkm_subdev *subdev = &fifo->base.engine.subdev;
-	struct nvkm_device *device = subdev->device;
-	u32 mask = nvkm_rd32(device, 0x04014c + (unit * 0x2000));
-	u32 stat = nvkm_rd32(device, 0x040148 + (unit * 0x2000)) & mask;
-	u32 chid = nvkm_rd32(device, 0x040120 + (unit * 0x2000)) & 0xfff;
-	char msg[128];
+	struct nvkm_device *device = runq->fifo->engine.subdev.device;
 
-	if (stat) {
-		nvkm_snprintbf(msg, sizeof(msg), gk104_fifo_pbdma_intr_1, stat);
-		nvkm_error(subdev, "PBDMA%d: %08x [%s] ch %d %08x %08x\n",
-			   unit, stat, msg, chid,
-			   nvkm_rd32(device, 0x040150 + (unit * 0x2000)),
-			   nvkm_rd32(device, 0x040154 + (unit * 0x2000)));
-	}
+	gf100_runq_init(runq);
 
-	nvkm_wr32(device, 0x040148 + (unit * 0x2000), stat);
+	nvkm_wr32(device, 0x040148 + (runq->id * 0x2000), 0xffffffff); /* HCE.INTR */
+	nvkm_wr32(device, 0x04014c + (runq->id * 0x2000), 0xffffffff); /* HCE.INTREN */
 }
 
-void
-gk104_fifo_intr_runlist(struct gk104_fifo *fifo)
+static u32
+gk104_runq_runm(struct nvkm_runq *runq)
 {
-	struct nvkm_device *device = fifo->base.engine.subdev.device;
-	u32 mask = nvkm_rd32(device, 0x002a00);
-	while (mask) {
-		int runl = __ffs(mask);
-		wake_up(&fifo->runlist[runl].wait);
-		nvkm_wr32(device, 0x002a00, 1 << runl);
-		mask &= ~(1 << runl);
-	}
+	return nvkm_rd32(runq->fifo->engine.subdev.device, 0x002390 + (runq->id * 0x04));
 }
 
+const struct nvkm_runq_func
+gk104_runq = {
+	.init = gk104_runq_init,
+	.intr = gk104_runq_intr,
+	.intr_0_names = gk104_runq_intr_0_names,
+	.idle = gk104_runq_idle,
+};
+
 void
-gk104_fifo_intr_engine(struct gk104_fifo *fifo)
+gk104_runl_fault_clear(struct nvkm_runl *runl)
 {
-	nvkm_fifo_uevent(&fifo->base);
-}
-
-static void
-gk104_fifo_intr(struct nvkm_fifo *base)
-{
-	struct gk104_fifo *fifo = gk104_fifo(base);
-	struct nvkm_subdev *subdev = &fifo->base.engine.subdev;
-	struct nvkm_device *device = subdev->device;
-	u32 mask = nvkm_rd32(device, 0x002140);
-	u32 stat = nvkm_rd32(device, 0x002100) & mask;
-
-	if (stat & 0x00000001) {
-		gk104_fifo_intr_bind(fifo);
-		nvkm_wr32(device, 0x002100, 0x00000001);
-		stat &= ~0x00000001;
-	}
-
-	if (stat & 0x00000010) {
-		nvkm_error(subdev, "PIO_ERROR\n");
-		nvkm_wr32(device, 0x002100, 0x00000010);
-		stat &= ~0x00000010;
-	}
-
-	if (stat & 0x00000100) {
-		gk104_fifo_intr_sched(fifo);
-		nvkm_wr32(device, 0x002100, 0x00000100);
-		stat &= ~0x00000100;
-	}
-
-	if (stat & 0x00010000) {
-		gk104_fifo_intr_chsw(fifo);
-		nvkm_wr32(device, 0x002100, 0x00010000);
-		stat &= ~0x00010000;
-	}
-
-	if (stat & 0x00800000) {
-		nvkm_error(subdev, "FB_FLUSH_TIMEOUT\n");
-		nvkm_wr32(device, 0x002100, 0x00800000);
-		stat &= ~0x00800000;
-	}
-
-	if (stat & 0x01000000) {
-		nvkm_error(subdev, "LB_ERROR\n");
-		nvkm_wr32(device, 0x002100, 0x01000000);
-		stat &= ~0x01000000;
-	}
-
-	if (stat & 0x08000000) {
-		gk104_fifo_intr_dropped_fault(fifo);
-		nvkm_wr32(device, 0x002100, 0x08000000);
-		stat &= ~0x08000000;
-	}
-
-	if (stat & 0x10000000) {
-		u32 mask = nvkm_rd32(device, 0x00259c);
-		while (mask) {
-			u32 unit = __ffs(mask);
-			fifo->func->intr.fault(&fifo->base, unit);
-			nvkm_wr32(device, 0x00259c, (1 << unit));
-			mask &= ~(1 << unit);
-		}
-		stat &= ~0x10000000;
-	}
-
-	if (stat & 0x20000000) {
-		u32 mask = nvkm_rd32(device, 0x0025a0);
-		while (mask) {
-			u32 unit = __ffs(mask);
-			gk104_fifo_intr_pbdma_0(fifo, unit);
-			gk104_fifo_intr_pbdma_1(fifo, unit);
-			nvkm_wr32(device, 0x0025a0, (1 << unit));
-			mask &= ~(1 << unit);
-		}
-		stat &= ~0x20000000;
-	}
-
-	if (stat & 0x40000000) {
-		gk104_fifo_intr_runlist(fifo);
-		stat &= ~0x40000000;
-	}
-
-	if (stat & 0x80000000) {
-		nvkm_wr32(device, 0x002100, 0x80000000);
-		gk104_fifo_intr_engine(fifo);
-		stat &= ~0x80000000;
-	}
-
-	if (stat) {
-		nvkm_error(subdev, "INTR %08x\n", stat);
-		nvkm_mask(device, 0x002140, stat, 0x00000000);
-		nvkm_wr32(device, 0x002100, stat);
-	}
+	nvkm_wr32(runl->fifo->engine.subdev.device, 0x00262c, BIT(runl->id));
 }
 
 void
-gk104_fifo_fini(struct nvkm_fifo *base)
+gk104_runl_allow(struct nvkm_runl *runl, u32 engm)
 {
-	struct gk104_fifo *fifo = gk104_fifo(base);
-	struct nvkm_device *device = fifo->base.engine.subdev.device;
-	flush_work(&fifo->recover.work);
-	/* allow mmu fault interrupts, even when we're not using fifo */
-	nvkm_mask(device, 0x002140, 0x10000000, 0x10000000);
+	nvkm_mask(runl->fifo->engine.subdev.device, 0x002630, BIT(runl->id), 0x00000000);
 }
 
-int
-gk104_fifo_info(struct nvkm_fifo *base, u64 mthd, u64 *data)
+void
+gk104_runl_block(struct nvkm_runl *runl, u32 engm)
 {
-	struct gk104_fifo *fifo = gk104_fifo(base);
-	switch (mthd) {
-	case NV_DEVICE_HOST_RUNLISTS:
-		*data = (1ULL << fifo->runlist_nr) - 1;
-		return 0;
-	case NV_DEVICE_HOST_RUNLIST_ENGINES: {
-		if (*data < fifo->runlist_nr) {
-			unsigned long engm = fifo->runlist[*data].engm;
-			struct nvkm_engine *engine;
-			int engn;
-			*data = 0;
-			for_each_set_bit(engn, &engm, fifo->engine_nr) {
-				if ((engine = fifo->engine[engn].engine)) {
-#define CASE(n) case NVKM_ENGINE_##n: *data |= NV_DEVICE_HOST_RUNLIST_ENGINES_##n; break
-					switch (engine->subdev.type) {
-					CASE(SW    );
-					CASE(GR    );
-					CASE(MPEG  );
-					CASE(ME    );
-					CASE(CIPHER);
-					CASE(BSP   );
-					CASE(VP    );
-					CASE(CE    );
-					CASE(SEC   );
-					CASE(MSVLD );
-					CASE(MSPDEC);
-					CASE(MSPPP );
-					CASE(MSENC );
-					CASE(VIC   );
-					CASE(SEC2  );
-					CASE(NVDEC );
-					CASE(NVENC );
-					default:
-						WARN_ON(1);
-						break;
-					}
-				}
-			}
-			return 0;
-		}
-	}
-		return -EINVAL;
-	default:
-		return -EINVAL;
-	}
+	nvkm_mask(runl->fifo->engine.subdev.device, 0x002630, BIT(runl->id), BIT(runl->id));
 }
 
-int
-gk104_fifo_oneinit(struct nvkm_fifo *base)
+bool
+gk104_runl_pending(struct nvkm_runl *runl)
 {
-	struct gk104_fifo *fifo = gk104_fifo(base);
-	struct nvkm_subdev *subdev = &fifo->base.engine.subdev;
-	struct nvkm_device *device = subdev->device;
-	struct nvkm_vmm *bar = nvkm_bar_bar1_vmm(device);
-	struct nvkm_top_device *tdev;
-	int pbid, ret, i, j;
-	u32 *map;
-
-	fifo->pbdma_nr = fifo->func->pbdma->nr(fifo);
-	nvkm_debug(subdev, "%d PBDMA(s)\n", fifo->pbdma_nr);
-
-	/* Read PBDMA->runlist(s) mapping from HW. */
-	if (!(map = kcalloc(fifo->pbdma_nr, sizeof(*map), GFP_KERNEL)))
-		return -ENOMEM;
+	struct nvkm_device *device = runl->fifo->engine.subdev.device;
 
-	for (i = 0; i < fifo->pbdma_nr; i++)
-		map[i] = nvkm_rd32(device, 0x002390 + (i * 0x04));
-
-	/* Determine runlist configuration from topology device info. */
-	list_for_each_entry(tdev, &device->top->device, head) {
-		const int engn = tdev->engine;
-		char _en[16], *en;
-
-		if (engn < 0)
-			continue;
-
-		/* Determine which PBDMA handles requests for this engine. */
-		for (j = 0, pbid = -1; j < fifo->pbdma_nr; j++) {
-			if (map[j] & BIT(tdev->runlist)) {
-				pbid = j;
-				break;
-			}
-		}
-
-		fifo->engine[engn].engine = nvkm_device_engine(device, tdev->type, tdev->inst);
-		if (!fifo->engine[engn].engine) {
-			snprintf(_en, sizeof(_en), "%s, %d",
-				 nvkm_subdev_type[tdev->type], tdev->inst);
-			en = _en;
-		} else {
-			en = fifo->engine[engn].engine->subdev.name;
-		}
-
-		nvkm_debug(subdev, "engine %2d: runlist %2d pbdma %2d (%s)\n",
-			   tdev->engine, tdev->runlist, pbid, en);
-
-		fifo->engine[engn].runl = tdev->runlist;
-		fifo->engine[engn].pbid = pbid;
-		fifo->engine_nr = max(fifo->engine_nr, engn + 1);
-		fifo->runlist[tdev->runlist].engm |= BIT(engn);
-		fifo->runlist[tdev->runlist].engm_sw |= BIT(engn);
-		if (tdev->type == NVKM_ENGINE_GR)
-			fifo->runlist[tdev->runlist].engm_sw |= BIT(GK104_FIFO_ENGN_SW);
-		fifo->runlist_nr = max(fifo->runlist_nr, tdev->runlist + 1);
-	}
-
-	kfree(map);
-
-	for (i = 0; i < fifo->runlist_nr; i++) {
-		for (j = 0; j < ARRAY_SIZE(fifo->runlist[i].mem); j++) {
-			ret = nvkm_memory_new(device, NVKM_MEM_TARGET_INST,
-					      fifo->base.nr * 2/* TSG+chan */ *
-					      fifo->func->runlist->size,
-					      0x1000, false,
-					      &fifo->runlist[i].mem[j]);
-			if (ret)
-				return ret;
-		}
-
-		init_waitqueue_head(&fifo->runlist[i].wait);
-		INIT_LIST_HEAD(&fifo->runlist[i].cgrp);
-		INIT_LIST_HEAD(&fifo->runlist[i].chan);
-	}
-
-	ret = nvkm_memory_new(device, NVKM_MEM_TARGET_INST,
-			      fifo->base.nr * 0x200, 0x1000, true,
-			      &fifo->user.mem);
-	if (ret)
-		return ret;
-
-	ret = nvkm_vmm_get(bar, 12, nvkm_memory_size(fifo->user.mem),
-			   &fifo->user.bar);
-	if (ret)
-		return ret;
-
-	return nvkm_memory_map(fifo->user.mem, 0, bar, fifo->user.bar, NULL, 0);
+	return nvkm_rd32(device, 0x002284 + (runl->id * 0x08)) & 0x00100000;
 }
 
 void
-gk104_fifo_init(struct nvkm_fifo *base)
+gk104_runl_commit(struct nvkm_runl *runl, struct nvkm_memory *memory, u32 start, int count)
 {
-	struct gk104_fifo *fifo = gk104_fifo(base);
-	struct nvkm_device *device = fifo->base.engine.subdev.device;
-	int i;
-
-	/* Enable PBDMAs. */
-	fifo->func->pbdma->init(fifo);
-
-	/* PBDMA[n] */
-	for (i = 0; i < fifo->pbdma_nr; i++) {
-		nvkm_mask(device, 0x04013c + (i * 0x2000), 0x10000100, 0x00000000);
-		nvkm_wr32(device, 0x040108 + (i * 0x2000), 0xffffffff); /* INTR */
-		nvkm_wr32(device, 0x04010c + (i * 0x2000), 0xfffffeff); /* INTREN */
-	}
-
-	/* PBDMA[n].HCE */
-	for (i = 0; i < fifo->pbdma_nr; i++) {
-		nvkm_wr32(device, 0x040148 + (i * 0x2000), 0xffffffff); /* INTR */
-		nvkm_wr32(device, 0x04014c + (i * 0x2000), 0xffffffff); /* INTREN */
-	}
-
-	nvkm_wr32(device, 0x002254, 0x10000000 | fifo->user.bar->addr >> 12);
-
-	if (fifo->func->pbdma->init_timeout)
-		fifo->func->pbdma->init_timeout(fifo);
-
-	nvkm_wr32(device, 0x002100, 0xffffffff);
-	nvkm_wr32(device, 0x002140, 0x7fffffff);
-}
-
-void *
-gk104_fifo_dtor(struct nvkm_fifo *base)
-{
-	struct gk104_fifo *fifo = gk104_fifo(base);
-	struct nvkm_device *device = fifo->base.engine.subdev.device;
-	int i;
-
-	nvkm_vmm_put(nvkm_bar_bar1_vmm(device), &fifo->user.bar);
-	nvkm_memory_unref(&fifo->user.mem);
+	struct nvkm_fifo *fifo = runl->fifo;
+	struct nvkm_device *device = fifo->engine.subdev.device;
+	u64 addr = nvkm_memory_addr(memory) + start;
+	int target;
 
-	for (i = 0; i < fifo->runlist_nr; i++) {
-		nvkm_memory_unref(&fifo->runlist[i].mem[1]);
-		nvkm_memory_unref(&fifo->runlist[i].mem[0]);
+	switch (nvkm_memory_target(memory)) {
+	case NVKM_MEM_TARGET_VRAM: target = 0; break;
+	case NVKM_MEM_TARGET_NCOH: target = 3; break;
+	default:
+		WARN_ON(1);
+		return;
 	}
 
-	return fifo;
+	spin_lock_irq(&fifo->lock);
+	nvkm_wr32(device, 0x002270, (target << 28) | (addr >> 12));
+	nvkm_wr32(device, 0x002274, (runl->id << 20) | count);
+	spin_unlock_irq(&fifo->lock);
 }
 
-static const struct nvkm_fifo_func
-gk104_fifo_ = {
-	.dtor = gk104_fifo_dtor,
-	.oneinit = gk104_fifo_oneinit,
-	.info = gk104_fifo_info,
-	.init = gk104_fifo_init,
-	.fini = gk104_fifo_fini,
-	.intr = gk104_fifo_intr,
-	.fault = gk104_fifo_fault,
-	.engine_id = gk104_fifo_engine_id,
-	.id_engine = gk104_fifo_id_engine,
-	.uevent_init = gk104_fifo_uevent_init,
-	.uevent_fini = gk104_fifo_uevent_fini,
-	.recover_chan = gk104_fifo_recover_chan,
-	.class_get = gk104_fifo_class_get,
-	.class_new = gk104_fifo_class_new,
-};
-
-int
-gk104_fifo_new_(const struct gk104_fifo_func *func, struct nvkm_device *device,
-		enum nvkm_subdev_type type, int inst, int nr, struct nvkm_fifo **pfifo)
+void
+gk104_runl_insert_chan(struct nvkm_chan *chan, struct nvkm_memory *memory, u64 offset)
 {
-	struct gk104_fifo *fifo;
-
-	if (!(fifo = kzalloc(sizeof(*fifo), GFP_KERNEL)))
-		return -ENOMEM;
-	fifo->func = func;
-	INIT_WORK(&fifo->recover.work, gk104_fifo_recover_work);
-	*pfifo = &fifo->base;
-
-	return nvkm_fifo_ctor(&gk104_fifo_, device, type, inst, nr, &fifo->base);
+	nvkm_wo32(memory, offset + 0, chan->id);
+	nvkm_wo32(memory, offset + 4, 0x00000000);
 }
 
-const struct nvkm_enum
-gk104_fifo_fault_access[] = {
-	{ 0x0, "READ" },
-	{ 0x1, "WRITE" },
-	{}
+static const struct nvkm_runl_func
+gk104_runl = {
+	.size = 8,
+	.update = nv50_runl_update,
+	.insert_chan = gk104_runl_insert_chan,
+	.commit = gk104_runl_commit,
+	.wait = nv50_runl_wait,
+	.pending = gk104_runl_pending,
+	.block = gk104_runl_block,
+	.allow = gk104_runl_allow,
+	.fault_clear = gk104_runl_fault_clear,
+	.preempt_pending = gf100_runl_preempt_pending,
 };
 
-const struct nvkm_enum
-gk104_fifo_fault_engine[] = {
+static const struct nvkm_enum
+gk104_fifo_mmu_fault_engine[] = {
 	{ 0x00, "GR", NULL, NVKM_ENGINE_GR },
 	{ 0x01, "DISPLAY" },
 	{ 0x02, "CAPTURE" },
@@ -1122,14 +478,14 @@ gk104_fifo_fault_engine[] = {
 	{ 0x04, "BAR1", NULL, NVKM_SUBDEV_BAR },
 	{ 0x05, "BAR2", NULL, NVKM_SUBDEV_INSTMEM },
 	{ 0x06, "SCHED" },
-	{ 0x07, "HOST0", NULL, NVKM_ENGINE_FIFO },
-	{ 0x08, "HOST1", NULL, NVKM_ENGINE_FIFO },
-	{ 0x09, "HOST2", NULL, NVKM_ENGINE_FIFO },
-	{ 0x0a, "HOST3", NULL, NVKM_ENGINE_FIFO },
-	{ 0x0b, "HOST4", NULL, NVKM_ENGINE_FIFO },
-	{ 0x0c, "HOST5", NULL, NVKM_ENGINE_FIFO },
-	{ 0x0d, "HOST6", NULL, NVKM_ENGINE_FIFO },
-	{ 0x0e, "HOST7", NULL, NVKM_ENGINE_FIFO },
+	{ 0x07, "HOST0" },
+	{ 0x08, "HOST1" },
+	{ 0x09, "HOST2" },
+	{ 0x0a, "HOST3" },
+	{ 0x0b, "HOST4" },
+	{ 0x0c, "HOST5" },
+	{ 0x0d, "HOST6" },
+	{ 0x0e, "HOST7" },
 	{ 0x0f, "HOSTSR" },
 	{ 0x10, "MSVLD", NULL, NVKM_ENGINE_MSVLD },
 	{ 0x11, "MSPPP", NULL, NVKM_ENGINE_MSPPP },
@@ -1145,7 +501,7 @@ gk104_fifo_fault_engine[] = {
 };
 
 const struct nvkm_enum
-gk104_fifo_fault_reason[] = {
+gk104_fifo_mmu_fault_reason[] = {
 	{ 0x00, "PDE" },
 	{ 0x01, "PDE_SIZE" },
 	{ 0x02, "PTE" },
@@ -1166,7 +522,7 @@ gk104_fifo_fault_reason[] = {
 };
 
 const struct nvkm_enum
-gk104_fifo_fault_hubclient[] = {
+gk104_fifo_mmu_fault_hubclient[] = {
 	{ 0x00, "VIP" },
 	{ 0x01, "CE0" },
 	{ 0x02, "CE1" },
@@ -1203,7 +559,7 @@ gk104_fifo_fault_hubclient[] = {
 };
 
 const struct nvkm_enum
-gk104_fifo_fault_gpcclient[] = {
+gk104_fifo_mmu_fault_gpcclient[] = {
 	{ 0x00, "L1_0" }, { 0x01, "T1_0" }, { 0x02, "PE_0" },
 	{ 0x03, "L1_1" }, { 0x04, "T1_1" }, { 0x05, "PE_1" },
 	{ 0x06, "L1_2" }, { 0x07, "T1_2" }, { 0x08, "PE_2" },
@@ -1228,22 +584,250 @@ gk104_fifo_fault_gpcclient[] = {
 	{}
 };
 
-static const struct gk104_fifo_func
+const struct nvkm_fifo_func_mmu_fault
+gk104_fifo_mmu_fault = {
+	.recover = gf100_fifo_mmu_fault_recover,
+	.access = gf100_fifo_mmu_fault_access,
+	.engine = gk104_fifo_mmu_fault_engine,
+	.reason = gk104_fifo_mmu_fault_reason,
+	.hubclient = gk104_fifo_mmu_fault_hubclient,
+	.gpcclient = gk104_fifo_mmu_fault_gpcclient,
+};
+
+static const struct nvkm_enum
+gk104_fifo_intr_bind_reason[] = {
+	{ 0x01, "BIND_NOT_UNBOUND" },
+	{ 0x02, "SNOOP_WITHOUT_BAR1" },
+	{ 0x03, "UNBIND_WHILE_RUNNING" },
+	{ 0x05, "INVALID_RUNLIST" },
+	{ 0x06, "INVALID_CTX_TGT" },
+	{ 0x0b, "UNBIND_WHILE_PARKED" },
+	{}
+};
+
+void
+gk104_fifo_intr_bind(struct nvkm_fifo *fifo)
+{
+	struct nvkm_subdev *subdev = &fifo->engine.subdev;
+	u32 intr = nvkm_rd32(subdev->device, 0x00252c);
+	u32 code = intr & 0x000000ff;
+	const struct nvkm_enum *en = nvkm_enum_find(gk104_fifo_intr_bind_reason, code);
+
+	nvkm_error(subdev, "BIND_ERROR %02x [%s]\n", code, en ? en->name : "");
+}
+
+void
+gk104_fifo_intr_chsw(struct nvkm_fifo *fifo)
+{
+	struct nvkm_subdev *subdev = &fifo->engine.subdev;
+	struct nvkm_device *device = subdev->device;
+	u32 stat = nvkm_rd32(device, 0x00256c);
+
+	nvkm_error(subdev, "CHSW_ERROR %08x\n", stat);
+	nvkm_wr32(device, 0x00256c, stat);
+}
+
+static void
+gk104_fifo_intr_dropped_fault(struct nvkm_fifo *fifo)
+{
+	struct nvkm_subdev *subdev = &fifo->engine.subdev;
+	u32 stat = nvkm_rd32(subdev->device, 0x00259c);
+
+	nvkm_error(subdev, "DROPPED_MMU_FAULT %08x\n", stat);
+}
+
+void
+gk104_fifo_intr_runlist(struct nvkm_fifo *fifo)
+{
+	struct nvkm_device *device = fifo->engine.subdev.device;
+	struct nvkm_runl *runl;
+	u32 mask = nvkm_rd32(device, 0x002a00);
+
+	nvkm_runl_foreach_cond(runl, fifo, mask & BIT(runl->id)) {
+		nvkm_wr32(device, 0x002a00, BIT(runl->id));
+	}
+}
+
+irqreturn_t
+gk104_fifo_intr(struct nvkm_inth *inth)
+{
+	struct nvkm_fifo *fifo = container_of(inth, typeof(*fifo), engine.subdev.inth);
+	struct nvkm_subdev *subdev = &fifo->engine.subdev;
+	struct nvkm_device *device = subdev->device;
+	u32 mask = nvkm_rd32(device, 0x002140);
+	u32 stat = nvkm_rd32(device, 0x002100) & mask;
+
+	if (stat & 0x00000001) {
+		gk104_fifo_intr_bind(fifo);
+		nvkm_wr32(device, 0x002100, 0x00000001);
+		stat &= ~0x00000001;
+	}
+
+	if (stat & 0x00000010) {
+		nvkm_error(subdev, "PIO_ERROR\n");
+		nvkm_wr32(device, 0x002100, 0x00000010);
+		stat &= ~0x00000010;
+	}
+
+	if (stat & 0x00000100) {
+		gf100_fifo_intr_sched(fifo);
+		nvkm_wr32(device, 0x002100, 0x00000100);
+		stat &= ~0x00000100;
+	}
+
+	if (stat & 0x00010000) {
+		gk104_fifo_intr_chsw(fifo);
+		nvkm_wr32(device, 0x002100, 0x00010000);
+		stat &= ~0x00010000;
+	}
+
+	if (stat & 0x00800000) {
+		nvkm_error(subdev, "FB_FLUSH_TIMEOUT\n");
+		nvkm_wr32(device, 0x002100, 0x00800000);
+		stat &= ~0x00800000;
+	}
+
+	if (stat & 0x01000000) {
+		nvkm_error(subdev, "LB_ERROR\n");
+		nvkm_wr32(device, 0x002100, 0x01000000);
+		stat &= ~0x01000000;
+	}
+
+	if (stat & 0x08000000) {
+		gk104_fifo_intr_dropped_fault(fifo);
+		nvkm_wr32(device, 0x002100, 0x08000000);
+		stat &= ~0x08000000;
+	}
+
+	if (stat & 0x10000000) {
+		gf100_fifo_intr_mmu_fault(fifo);
+		stat &= ~0x10000000;
+	}
+
+	if (stat & 0x20000000) {
+		if (gf100_fifo_intr_pbdma(fifo))
+			stat &= ~0x20000000;
+	}
+
+	if (stat & 0x40000000) {
+		gk104_fifo_intr_runlist(fifo);
+		stat &= ~0x40000000;
+	}
+
+	if (stat & 0x80000000) {
+		nvkm_wr32(device, 0x002100, 0x80000000);
+		nvkm_event_ntfy(&fifo->nonstall.event, 0, NVKM_FIFO_NONSTALL_EVENT);
+		stat &= ~0x80000000;
+	}
+
+	if (stat) {
+		nvkm_error(subdev, "INTR %08x\n", stat);
+		spin_lock(&fifo->lock);
+		nvkm_mask(device, 0x002140, stat, 0x00000000);
+		spin_unlock(&fifo->lock);
+		nvkm_wr32(device, 0x002100, stat);
+	}
+
+	return IRQ_HANDLED;
+}
+
+void
+gk104_fifo_init_pbdmas(struct nvkm_fifo *fifo, u32 mask)
+{
+	struct nvkm_device *device = fifo->engine.subdev.device;
+
+	nvkm_wr32(device, 0x000204, mask);
+	nvkm_mask(device, 0x002a04, 0xbfffffff, 0xbfffffff);
+}
+
+void
+gk104_fifo_init(struct nvkm_fifo *fifo)
+{
+	struct nvkm_device *device = fifo->engine.subdev.device;
+
+	if (fifo->func->chan.func->userd->bar == 1)
+		nvkm_wr32(device, 0x002254, 0x10000000 | fifo->userd.bar1->addr >> 12);
+
+	nvkm_wr32(device, 0x002100, 0xffffffff);
+	nvkm_wr32(device, 0x002140, 0x7fffffff);
+}
+
+int
+gk104_fifo_runl_ctor(struct nvkm_fifo *fifo)
+{
+	struct nvkm_device *device = fifo->engine.subdev.device;
+	struct nvkm_top_device *tdev;
+	struct nvkm_runl *runl;
+	struct nvkm_runq *runq;
+	const struct nvkm_engn_func *func;
+
+	nvkm_list_foreach(tdev, &device->top->device, head, tdev->runlist >= 0) {
+		runl = nvkm_runl_get(fifo, tdev->runlist, tdev->runlist);
+		if (!runl) {
+			runl = nvkm_runl_new(fifo, tdev->runlist, tdev->runlist, 0);
+			if (IS_ERR(runl))
+				return PTR_ERR(runl);
+
+			nvkm_runq_foreach_cond(runq, fifo, gk104_runq_runm(runq) & BIT(runl->id)) {
+				if (WARN_ON(runl->runq_nr == ARRAY_SIZE(runl->runq)))
+					return -ENOMEM;
+
+				runl->runq[runl->runq_nr++] = runq;
+			}
+
+		}
+
+		if (tdev->engine < 0)
+			continue;
+
+		switch (tdev->type) {
+		case NVKM_ENGINE_CE:
+			func = fifo->func->engn_ce;
+			break;
+		case NVKM_ENGINE_GR:
+			nvkm_runl_add(runl, 15, &gf100_engn_sw, NVKM_ENGINE_SW, 0);
+			fallthrough;
+		default:
+			func = fifo->func->engn;
+			break;
+		}
+
+		nvkm_runl_add(runl, tdev->engine, func, tdev->type, tdev->inst);
+	}
+
+	return 0;
+}
+
+int
+gk104_fifo_chid_nr(struct nvkm_fifo *fifo)
+{
+	return 4096;
+}
+
+static const struct nvkm_fifo_func
 gk104_fifo = {
-	.intr.fault = gf100_fifo_intr_fault,
-	.pbdma = &gk104_fifo_pbdma,
-	.fault.access = gk104_fifo_fault_access,
-	.fault.engine = gk104_fifo_fault_engine,
-	.fault.reason = gk104_fifo_fault_reason,
-	.fault.hubclient = gk104_fifo_fault_hubclient,
-	.fault.gpcclient = gk104_fifo_fault_gpcclient,
-	.runlist = &gk104_fifo_runlist,
-	.chan = {{0,0,KEPLER_CHANNEL_GPFIFO_A}, gk104_fifo_gpfifo_new },
+	.chid_nr = gk104_fifo_chid_nr,
+	.chid_ctor = gf100_fifo_chid_ctor,
+	.runq_nr = gf100_fifo_runq_nr,
+	.runl_ctor = gk104_fifo_runl_ctor,
+	.init = gk104_fifo_init,
+	.init_pbdmas = gk104_fifo_init_pbdmas,
+	.intr = gk104_fifo_intr,
+	.intr_mmu_fault_unit = gf100_fifo_intr_mmu_fault_unit,
+	.intr_ctxsw_timeout = gf100_fifo_intr_ctxsw_timeout,
+	.mmu_fault = &gk104_fifo_mmu_fault,
+	.nonstall = &gf100_fifo_nonstall,
+	.runl = &gk104_runl,
+	.runq = &gk104_runq,
+	.engn = &gk104_engn,
+	.engn_ce = &gk104_engn_ce,
+	.cgrp = {{                               }, &nv04_cgrp },
+	.chan = {{ 0, 0, KEPLER_CHANNEL_GPFIFO_A }, &gk104_chan },
 };
 
 int
 gk104_fifo_new(struct nvkm_device *device, enum nvkm_subdev_type type, int inst,
 	       struct nvkm_fifo **pfifo)
 {
-	return gk104_fifo_new_(&gk104_fifo, device, type, inst, 4096, pfifo);
+	return nvkm_fifo_new_(&gk104_fifo, device, type, inst, pfifo);
 }
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/fifo/gk104.h b/drivers/gpu/drm/nouveau/nvkm/engine/fifo/gk104.h
deleted file mode 100644
index f2d12ae73944..000000000000
--- a/drivers/gpu/drm/nouveau/nvkm/engine/fifo/gk104.h
+++ /dev/null
@@ -1,168 +0,0 @@
-/* SPDX-License-Identifier: MIT */
-#ifndef __GK104_FIFO_H__
-#define __GK104_FIFO_H__
-#define gk104_fifo(p) container_of((p), struct gk104_fifo, base)
-#include "priv.h"
-struct nvkm_fifo_cgrp;
-
-#include <core/enum.h>
-#include <subdev/mmu.h>
-
-struct gk104_fifo_chan;
-struct gk104_fifo {
-	const struct gk104_fifo_func *func;
-	struct nvkm_fifo base;
-
-	struct {
-		struct work_struct work;
-		u32 engm;
-		u32 runm;
-	} recover;
-
-	int pbdma_nr;
-
-	struct {
-		struct nvkm_engine *engine;
-		int runl;
-		int pbid;
-	} engine[16];
-	int engine_nr;
-
-	struct {
-		struct nvkm_memory *mem[2];
-		int next;
-		wait_queue_head_t wait;
-		struct list_head cgrp;
-		struct list_head chan;
-		u32 engm;
-		u32 engm_sw;
-	} runlist[16];
-	int runlist_nr;
-
-	struct {
-		struct nvkm_memory *mem;
-		struct nvkm_vma *bar;
-	} user;
-};
-
-struct gk104_fifo_func {
-	struct {
-		void (*fault)(struct nvkm_fifo *, int unit);
-	} intr;
-
-	const struct gk104_fifo_pbdma_func {
-		int (*nr)(struct gk104_fifo *);
-		void (*init)(struct gk104_fifo *);
-		void (*init_timeout)(struct gk104_fifo *);
-	} *pbdma;
-
-	struct {
-		const struct nvkm_enum *access;
-		const struct nvkm_enum *engine;
-		const struct nvkm_enum *reason;
-		const struct nvkm_enum *hubclient;
-		const struct nvkm_enum *gpcclient;
-	} fault;
-
-	const struct gk104_fifo_runlist_func {
-		u8 size;
-		void (*cgrp)(struct nvkm_fifo_cgrp *,
-			     struct nvkm_memory *, u32 offset);
-		void (*chan)(struct gk104_fifo_chan *,
-			     struct nvkm_memory *, u32 offset);
-		void (*commit)(struct gk104_fifo *, int runl,
-			       struct nvkm_memory *, int entries);
-	} *runlist;
-
-	struct gk104_fifo_user_user {
-		struct nvkm_sclass user;
-		int (*ctor)(const struct nvkm_oclass *, void *, u32,
-			    struct nvkm_object **);
-	} user;
-
-	struct gk104_fifo_chan_user {
-		struct nvkm_sclass user;
-		int (*ctor)(struct gk104_fifo *, const struct nvkm_oclass *,
-			    void *, u32, struct nvkm_object **);
-	} chan;
-	bool cgrp_force;
-};
-
-struct gk104_fifo_engine_status {
-	bool busy;
-	bool faulted;
-	bool chsw;
-	bool save;
-	bool load;
-	struct {
-		bool tsg;
-		u32 id;
-	} prev, next, *chan;
-};
-
-int gk104_fifo_new_(const struct gk104_fifo_func *, struct nvkm_device *, enum nvkm_subdev_type,
-		    int index, int nr, struct nvkm_fifo **);
-void gk104_fifo_runlist_insert(struct gk104_fifo *, struct gk104_fifo_chan *);
-void gk104_fifo_runlist_remove(struct gk104_fifo *, struct gk104_fifo_chan *);
-void gk104_fifo_runlist_update(struct gk104_fifo *, int runl);
-void gk104_fifo_engine_status(struct gk104_fifo *fifo, int engn,
-			      struct gk104_fifo_engine_status *status);
-void gk104_fifo_intr_bind(struct gk104_fifo *fifo);
-void gk104_fifo_intr_chsw(struct gk104_fifo *fifo);
-void gk104_fifo_intr_dropped_fault(struct gk104_fifo *fifo);
-void gk104_fifo_intr_pbdma_0(struct gk104_fifo *fifo, int unit);
-void gk104_fifo_intr_pbdma_1(struct gk104_fifo *fifo, int unit);
-void gk104_fifo_intr_runlist(struct gk104_fifo *fifo);
-void gk104_fifo_intr_engine(struct gk104_fifo *fifo);
-void *gk104_fifo_dtor(struct nvkm_fifo *base);
-int gk104_fifo_oneinit(struct nvkm_fifo *base);
-int gk104_fifo_info(struct nvkm_fifo *base, u64 mthd, u64 *data);
-void gk104_fifo_init(struct nvkm_fifo *base);
-void gk104_fifo_fini(struct nvkm_fifo *base);
-int gk104_fifo_class_new(struct nvkm_fifo *base, const struct nvkm_oclass *oclass,
-			 void *argv, u32 argc, struct nvkm_object **pobject);
-int gk104_fifo_class_get(struct nvkm_fifo *base, int index,
-			 struct nvkm_oclass *oclass);
-void gk104_fifo_uevent_fini(struct nvkm_fifo *fifo);
-void gk104_fifo_uevent_init(struct nvkm_fifo *fifo);
-
-extern const struct gk104_fifo_pbdma_func gk104_fifo_pbdma;
-int gk104_fifo_pbdma_nr(struct gk104_fifo *);
-void gk104_fifo_pbdma_init(struct gk104_fifo *);
-extern const struct nvkm_enum gk104_fifo_fault_access[];
-extern const struct nvkm_enum gk104_fifo_fault_engine[];
-extern const struct nvkm_enum gk104_fifo_fault_reason[];
-extern const struct nvkm_enum gk104_fifo_fault_hubclient[];
-extern const struct nvkm_enum gk104_fifo_fault_gpcclient[];
-extern const struct gk104_fifo_runlist_func gk104_fifo_runlist;
-void gk104_fifo_runlist_chan(struct gk104_fifo_chan *,
-			     struct nvkm_memory *, u32);
-void gk104_fifo_runlist_commit(struct gk104_fifo *, int runl,
-			       struct nvkm_memory *, int);
-
-extern const struct gk104_fifo_runlist_func gk110_fifo_runlist;
-void gk110_fifo_runlist_cgrp(struct nvkm_fifo_cgrp *,
-			     struct nvkm_memory *, u32);
-
-extern const struct gk104_fifo_pbdma_func gk208_fifo_pbdma;
-void gk208_fifo_pbdma_init_timeout(struct gk104_fifo *);
-
-void gm107_fifo_intr_fault(struct nvkm_fifo *, int);
-extern const struct nvkm_enum gm107_fifo_fault_engine[];
-extern const struct gk104_fifo_runlist_func gm107_fifo_runlist;
-
-extern const struct gk104_fifo_pbdma_func gm200_fifo_pbdma;
-int gm200_fifo_pbdma_nr(struct gk104_fifo *);
-
-void gp100_fifo_intr_fault(struct nvkm_fifo *, int);
-extern const struct nvkm_enum gp100_fifo_fault_engine[];
-
-extern const struct nvkm_enum gv100_fifo_fault_access[];
-extern const struct nvkm_enum gv100_fifo_fault_reason[];
-extern const struct nvkm_enum gv100_fifo_fault_hubclient[];
-extern const struct nvkm_enum gv100_fifo_fault_gpcclient[];
-void gv100_fifo_runlist_cgrp(struct nvkm_fifo_cgrp *,
-			     struct nvkm_memory *, u32);
-void gv100_fifo_runlist_chan(struct gk104_fifo_chan *,
-			     struct nvkm_memory *, u32);
-#endif
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/fifo/gk110.c b/drivers/gpu/drm/nouveau/nvkm/engine/fifo/gk110.c
index 915278c7e012..a8ff21cf7712 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/fifo/gk110.c
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/fifo/gk110.c
@@ -21,47 +21,112 @@
  *
  * Authors: Ben Skeggs
  */
-#include "gk104.h"
+#include "priv.h"
 #include "cgrp.h"
-#include "changk104.h"
+#include "chan.h"
+#include "chid.h"
+#include "runl.h"
 
 #include <core/memory.h>
+#include <subdev/timer.h>
 
 #include <nvif/class.h>
 
 void
-gk110_fifo_runlist_cgrp(struct nvkm_fifo_cgrp *cgrp,
-			struct nvkm_memory *memory, u32 offset)
+gk110_chan_preempt(struct nvkm_chan *chan)
+{
+	struct nvkm_cgrp *cgrp = chan->cgrp;
+
+	if (cgrp->hw) {
+		cgrp->func->preempt(cgrp);
+		return;
+	}
+
+	gf100_chan_preempt(chan);
+}
+
+const struct nvkm_chan_func
+gk110_chan = {
+	.inst = &gf100_chan_inst,
+	.userd = &gk104_chan_userd,
+	.ramfc = &gk104_chan_ramfc,
+	.bind = gk104_chan_bind,
+	.unbind = gk104_chan_unbind,
+	.start = gk104_chan_start,
+	.stop = gk104_chan_stop,
+	.preempt = gk110_chan_preempt,
+};
+
+static void
+gk110_cgrp_preempt(struct nvkm_cgrp *cgrp)
+{
+	nvkm_wr32(cgrp->runl->fifo->engine.subdev.device, 0x002634, 0x01000000 | cgrp->id);
+}
+
+const struct nvkm_cgrp_func
+gk110_cgrp = {
+	.preempt = gk110_cgrp_preempt,
+};
+
+void
+gk110_runl_insert_cgrp(struct nvkm_cgrp *cgrp, struct nvkm_memory *memory, u64 offset)
 {
 	nvkm_wo32(memory, offset + 0, (cgrp->chan_nr << 26) | (128 << 18) |
 				      (3 << 14) | 0x00002000 | cgrp->id);
 	nvkm_wo32(memory, offset + 4, 0x00000000);
 }
 
-const struct gk104_fifo_runlist_func
-gk110_fifo_runlist = {
+const struct nvkm_runl_func
+gk110_runl = {
 	.size = 8,
-	.cgrp = gk110_fifo_runlist_cgrp,
-	.chan = gk104_fifo_runlist_chan,
-	.commit = gk104_fifo_runlist_commit,
+	.update = nv50_runl_update,
+	.insert_cgrp = gk110_runl_insert_cgrp,
+	.insert_chan = gk104_runl_insert_chan,
+	.commit = gk104_runl_commit,
+	.wait = nv50_runl_wait,
+	.pending = gk104_runl_pending,
+	.block = gk104_runl_block,
+	.allow = gk104_runl_allow,
+	.fault_clear = gk104_runl_fault_clear,
+	.preempt_pending = gf100_runl_preempt_pending,
 };
 
-static const struct gk104_fifo_func
+int
+gk110_fifo_chid_ctor(struct nvkm_fifo *fifo, int nr)
+{
+	int ret;
+
+	ret = nvkm_chid_new(&nvkm_chan_event, &fifo->engine.subdev, nr, 0, nr, &fifo->cgid);
+	if (ret)
+		return ret;
+
+	return gf100_fifo_chid_ctor(fifo, nr);
+}
+
+static const struct nvkm_fifo_func
 gk110_fifo = {
-	.intr.fault = gf100_fifo_intr_fault,
-	.pbdma = &gk104_fifo_pbdma,
-	.fault.access = gk104_fifo_fault_access,
-	.fault.engine = gk104_fifo_fault_engine,
-	.fault.reason = gk104_fifo_fault_reason,
-	.fault.hubclient = gk104_fifo_fault_hubclient,
-	.fault.gpcclient = gk104_fifo_fault_gpcclient,
-	.runlist = &gk110_fifo_runlist,
-	.chan = {{0,0,KEPLER_CHANNEL_GPFIFO_B}, gk104_fifo_gpfifo_new },
+	.chid_nr = gk104_fifo_chid_nr,
+	.chid_ctor = gk110_fifo_chid_ctor,
+	.runq_nr = gf100_fifo_runq_nr,
+	.runl_ctor = gk104_fifo_runl_ctor,
+	.init = gk104_fifo_init,
+	.init_pbdmas = gk104_fifo_init_pbdmas,
+	.intr = gk104_fifo_intr,
+	.intr_mmu_fault_unit = gf100_fifo_intr_mmu_fault_unit,
+	.intr_ctxsw_timeout = gf100_fifo_intr_ctxsw_timeout,
+	.mmu_fault = &gk104_fifo_mmu_fault,
+	.nonstall = &gf100_fifo_nonstall,
+	.runl = &gk110_runl,
+	.runq = &gk104_runq,
+	.engn = &gk104_engn,
+	.engn_ce = &gk104_engn_ce,
+	.cgrp = {{ 0, 0, KEPLER_CHANNEL_GROUP_A  }, &gk110_cgrp },
+	.chan = {{ 0, 0, KEPLER_CHANNEL_GPFIFO_B }, &gk110_chan },
 };
 
 int
 gk110_fifo_new(struct nvkm_device *device, enum nvkm_subdev_type type, int inst,
 	       struct nvkm_fifo **pfifo)
 {
-	return gk104_fifo_new_(&gk110_fifo, device, type, inst, 4096, pfifo);
+	return nvkm_fifo_new_(&gk110_fifo, device, type, inst, pfifo);
 }
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/fifo/gk208.c b/drivers/gpu/drm/nouveau/nvkm/engine/fifo/gk208.c
index cb703693de52..8fa2b0be141a 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/fifo/gk208.c
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/fifo/gk208.c
@@ -21,44 +21,57 @@
  *
  * Authors: Ben Skeggs
  */
-#include "gk104.h"
-#include "changk104.h"
+#include "priv.h"
+#include "runq.h"
 
 #include <nvif/class.h>
 
 void
-gk208_fifo_pbdma_init_timeout(struct gk104_fifo *fifo)
+gk208_runq_init(struct nvkm_runq *runq)
 {
-	struct nvkm_device *device = fifo->base.engine.subdev.device;
-	int i;
+	gk104_runq_init(runq);
 
-	for (i = 0; i < fifo->pbdma_nr; i++)
-		nvkm_wr32(device, 0x04012c + (i * 0x2000), 0x0000ffff);
+	nvkm_wr32(runq->fifo->engine.subdev.device, 0x04012c + (runq->id * 0x2000), 0x000f4240);
 }
 
-const struct gk104_fifo_pbdma_func
-gk208_fifo_pbdma = {
-	.nr = gk104_fifo_pbdma_nr,
-	.init = gk104_fifo_pbdma_init,
-	.init_timeout = gk208_fifo_pbdma_init_timeout,
+const struct nvkm_runq_func
+gk208_runq = {
+	.init = gk208_runq_init,
+	.intr = gk104_runq_intr,
+	.intr_0_names = gk104_runq_intr_0_names,
+	.idle = gk104_runq_idle,
 };
 
-static const struct gk104_fifo_func
+static int
+gk208_fifo_chid_nr(struct nvkm_fifo *fifo)
+{
+	return 1024;
+}
+
+static const struct nvkm_fifo_func
 gk208_fifo = {
-	.intr.fault = gf100_fifo_intr_fault,
-	.pbdma = &gk208_fifo_pbdma,
-	.fault.access = gk104_fifo_fault_access,
-	.fault.engine = gk104_fifo_fault_engine,
-	.fault.reason = gk104_fifo_fault_reason,
-	.fault.hubclient = gk104_fifo_fault_hubclient,
-	.fault.gpcclient = gk104_fifo_fault_gpcclient,
-	.runlist = &gk110_fifo_runlist,
-	.chan = {{0,0,KEPLER_CHANNEL_GPFIFO_A}, gk104_fifo_gpfifo_new },
+	.chid_nr = gk208_fifo_chid_nr,
+	.chid_ctor = gk110_fifo_chid_ctor,
+	.runq_nr = gf100_fifo_runq_nr,
+	.runl_ctor = gk104_fifo_runl_ctor,
+	.init = gk104_fifo_init,
+	.init_pbdmas = gk104_fifo_init_pbdmas,
+	.intr = gk104_fifo_intr,
+	.intr_mmu_fault_unit = gf100_fifo_intr_mmu_fault_unit,
+	.intr_ctxsw_timeout = gf100_fifo_intr_ctxsw_timeout,
+	.mmu_fault = &gk104_fifo_mmu_fault,
+	.nonstall = &gf100_fifo_nonstall,
+	.runl = &gk110_runl,
+	.runq = &gk208_runq,
+	.engn = &gk104_engn,
+	.engn_ce = &gk104_engn_ce,
+	.cgrp = {{ 0, 0, KEPLER_CHANNEL_GROUP_A  }, &gk110_cgrp },
+	.chan = {{ 0, 0, KEPLER_CHANNEL_GPFIFO_A }, &gk110_chan },
 };
 
 int
 gk208_fifo_new(struct nvkm_device *device, enum nvkm_subdev_type type, int inst,
 	       struct nvkm_fifo **pfifo)
 {
-	return gk104_fifo_new_(&gk208_fifo, device, type, inst, 1024, pfifo);
+	return nvkm_fifo_new_(&gk208_fifo, device, type, inst, pfifo);
 }
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/fifo/gk20a.c b/drivers/gpu/drm/nouveau/nvkm/engine/fifo/gk20a.c
index 6e35cf44c640..b63ca836130f 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/fifo/gk20a.c
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/fifo/gk20a.c
@@ -19,27 +19,34 @@
  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
  * DEALINGS IN THE SOFTWARE.
  */
-#include "gk104.h"
-#include "changk104.h"
+#include "priv.h"
 
 #include <nvif/class.h>
 
-static const struct gk104_fifo_func
+static const struct nvkm_fifo_func
 gk20a_fifo = {
-	.intr.fault = gf100_fifo_intr_fault,
-	.pbdma = &gk208_fifo_pbdma,
-	.fault.access = gk104_fifo_fault_access,
-	.fault.engine = gk104_fifo_fault_engine,
-	.fault.reason = gk104_fifo_fault_reason,
-	.fault.hubclient = gk104_fifo_fault_hubclient,
-	.fault.gpcclient = gk104_fifo_fault_gpcclient,
-	.runlist = &gk110_fifo_runlist,
-	.chan = {{0,0,KEPLER_CHANNEL_GPFIFO_A}, gk104_fifo_gpfifo_new },
+	.chid_nr = nv50_fifo_chid_nr,
+	.chid_ctor = gk110_fifo_chid_ctor,
+	.runq_nr = gf100_fifo_runq_nr,
+	.runl_ctor = gk104_fifo_runl_ctor,
+	.init = gk104_fifo_init,
+	.init_pbdmas = gk104_fifo_init_pbdmas,
+	.intr = gk104_fifo_intr,
+	.intr_mmu_fault_unit = gf100_fifo_intr_mmu_fault_unit,
+	.intr_ctxsw_timeout = gf100_fifo_intr_ctxsw_timeout,
+	.mmu_fault = &gk104_fifo_mmu_fault,
+	.nonstall = &gf100_fifo_nonstall,
+	.runl = &gk110_runl,
+	.runq = &gk208_runq,
+	.engn = &gk104_engn,
+	.engn_ce = &gk104_engn_ce,
+	.cgrp = {{                               }, &gk110_cgrp },
+	.chan = {{ 0, 0, KEPLER_CHANNEL_GPFIFO_A }, &gk110_chan },
 };
 
 int
 gk20a_fifo_new(struct nvkm_device *device, enum nvkm_subdev_type type, int inst,
 	       struct nvkm_fifo **pfifo)
 {
-	return gk104_fifo_new_(&gk20a_fifo, device, type, inst, 128, pfifo);
+	return nvkm_fifo_new_(&gk20a_fifo, device, type, inst, pfifo);
 }
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/fifo/gm107.c b/drivers/gpu/drm/nouveau/nvkm/engine/fifo/gm107.c
index 7af6e687d474..5ba60021b510 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/fifo/gm107.c
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/fifo/gm107.c
@@ -21,46 +21,65 @@
  *
  * Authors: Ben Skeggs
  */
-#include "gk104.h"
-#include "changk104.h"
+#include "priv.h"
+#include "chan.h"
+#include "runl.h"
 
 #include <core/gpuobj.h>
 #include <subdev/fault.h>
 
 #include <nvif/class.h>
 
+const struct nvkm_chan_func
+gm107_chan = {
+	.inst = &gf100_chan_inst,
+	.userd = &gk104_chan_userd,
+	.ramfc = &gk104_chan_ramfc,
+	.bind = gk104_chan_bind_inst,
+	.unbind = gk104_chan_unbind,
+	.start = gk104_chan_start,
+	.stop = gk104_chan_stop,
+	.preempt = gk110_chan_preempt,
+};
+
 static void
-gm107_fifo_runlist_chan(struct gk104_fifo_chan *chan,
-			struct nvkm_memory *memory, u32 offset)
+gm107_runl_insert_chan(struct nvkm_chan *chan, struct nvkm_memory *memory, u64 offset)
 {
-	nvkm_wo32(memory, offset + 0, chan->base.chid);
-	nvkm_wo32(memory, offset + 4, chan->base.inst->addr >> 12);
+	nvkm_wo32(memory, offset + 0, chan->id);
+	nvkm_wo32(memory, offset + 4, chan->inst->addr >> 12);
 }
 
-const struct gk104_fifo_runlist_func
-gm107_fifo_runlist = {
+const struct nvkm_runl_func
+gm107_runl = {
 	.size = 8,
-	.cgrp = gk110_fifo_runlist_cgrp,
-	.chan = gm107_fifo_runlist_chan,
-	.commit = gk104_fifo_runlist_commit,
+	.update = nv50_runl_update,
+	.insert_cgrp = gk110_runl_insert_cgrp,
+	.insert_chan = gm107_runl_insert_chan,
+	.commit = gk104_runl_commit,
+	.wait = nv50_runl_wait,
+	.pending = gk104_runl_pending,
+	.block = gk104_runl_block,
+	.allow = gk104_runl_allow,
+	.fault_clear = gk104_runl_fault_clear,
+	.preempt_pending = gf100_runl_preempt_pending,
 };
 
-const struct nvkm_enum
-gm107_fifo_fault_engine[] = {
+static const struct nvkm_enum
+gm107_fifo_mmu_fault_engine[] = {
 	{ 0x01, "DISPLAY" },
 	{ 0x02, "CAPTURE" },
 	{ 0x03, "IFB", NULL, NVKM_ENGINE_IFB },
 	{ 0x04, "BAR1", NULL, NVKM_SUBDEV_BAR },
 	{ 0x05, "BAR2", NULL, NVKM_SUBDEV_INSTMEM },
 	{ 0x06, "SCHED" },
-	{ 0x07, "HOST0", NULL, NVKM_ENGINE_FIFO },
-	{ 0x08, "HOST1", NULL, NVKM_ENGINE_FIFO },
-	{ 0x09, "HOST2", NULL, NVKM_ENGINE_FIFO },
-	{ 0x0a, "HOST3", NULL, NVKM_ENGINE_FIFO },
-	{ 0x0b, "HOST4", NULL, NVKM_ENGINE_FIFO },
-	{ 0x0c, "HOST5", NULL, NVKM_ENGINE_FIFO },
-	{ 0x0d, "HOST6", NULL, NVKM_ENGINE_FIFO },
-	{ 0x0e, "HOST7", NULL, NVKM_ENGINE_FIFO },
+	{ 0x07, "HOST0" },
+	{ 0x08, "HOST1" },
+	{ 0x09, "HOST2" },
+	{ 0x0a, "HOST3" },
+	{ 0x0b, "HOST4" },
+	{ 0x0c, "HOST5" },
+	{ 0x0d, "HOST6" },
+	{ 0x0e, "HOST7" },
 	{ 0x0f, "HOSTSR" },
 	{ 0x13, "PERF" },
 	{ 0x17, "PMU" },
@@ -68,8 +87,18 @@ gm107_fifo_fault_engine[] = {
 	{}
 };
 
+const struct nvkm_fifo_func_mmu_fault
+gm107_fifo_mmu_fault = {
+	.recover = gf100_fifo_mmu_fault_recover,
+	.access = gf100_fifo_mmu_fault_access,
+	.engine = gm107_fifo_mmu_fault_engine,
+	.reason = gk104_fifo_mmu_fault_reason,
+	.hubclient = gk104_fifo_mmu_fault_hubclient,
+	.gpcclient = gk104_fifo_mmu_fault_gpcclient,
+};
+
 void
-gm107_fifo_intr_fault(struct nvkm_fifo *fifo, int unit)
+gm107_fifo_intr_mmu_fault_unit(struct nvkm_fifo *fifo, int unit)
 {
 	struct nvkm_device *device = fifo->engine.subdev.device;
 	u32 inst = nvkm_rd32(device, 0x002800 + (unit * 0x10));
@@ -92,22 +121,36 @@ gm107_fifo_intr_fault(struct nvkm_fifo *fifo, int unit)
 	nvkm_fifo_fault(fifo, &info);
 }
 
-static const struct gk104_fifo_func
+static int
+gm107_fifo_chid_nr(struct nvkm_fifo *fifo)
+{
+	return 2048;
+}
+
+static const struct nvkm_fifo_func
 gm107_fifo = {
-	.intr.fault = gm107_fifo_intr_fault,
-	.pbdma = &gk208_fifo_pbdma,
-	.fault.access = gk104_fifo_fault_access,
-	.fault.engine = gm107_fifo_fault_engine,
-	.fault.reason = gk104_fifo_fault_reason,
-	.fault.hubclient = gk104_fifo_fault_hubclient,
-	.fault.gpcclient = gk104_fifo_fault_gpcclient,
-	.runlist = &gm107_fifo_runlist,
-	.chan = {{0,0,KEPLER_CHANNEL_GPFIFO_B}, gk104_fifo_gpfifo_new },
+	.chid_nr = gm107_fifo_chid_nr,
+	.chid_ctor = gk110_fifo_chid_ctor,
+	.runq_nr = gf100_fifo_runq_nr,
+	.runl_ctor = gk104_fifo_runl_ctor,
+	.init = gk104_fifo_init,
+	.init_pbdmas = gk104_fifo_init_pbdmas,
+	.intr = gk104_fifo_intr,
+	.intr_mmu_fault_unit = gm107_fifo_intr_mmu_fault_unit,
+	.intr_ctxsw_timeout = gf100_fifo_intr_ctxsw_timeout,
+	.mmu_fault = &gm107_fifo_mmu_fault,
+	.nonstall = &gf100_fifo_nonstall,
+	.runl = &gm107_runl,
+	.runq = &gk208_runq,
+	.engn = &gk104_engn,
+	.engn_ce = &gk104_engn_ce,
+	.cgrp = {{ 0, 0, KEPLER_CHANNEL_GROUP_A  }, &gk110_cgrp },
+	.chan = {{ 0, 0, KEPLER_CHANNEL_GPFIFO_B }, &gm107_chan },
 };
 
 int
 gm107_fifo_new(struct nvkm_device *device, enum nvkm_subdev_type type, int inst,
 	       struct nvkm_fifo **pfifo)
 {
-	return gk104_fifo_new_(&gm107_fifo, device, type, inst, 2048, pfifo);
+	return nvkm_fifo_new_(&gm107_fifo, device, type, inst, pfifo);
 }
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/fifo/gm200.c b/drivers/gpu/drm/nouveau/nvkm/engine/fifo/gm200.c
index 573658cb6c73..d92d1ac39191 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/fifo/gm200.c
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/fifo/gm200.c
@@ -21,41 +21,46 @@
  *
  * Authors: Ben Skeggs
  */
-#include "gk104.h"
-#include "changk104.h"
+#include "priv.h"
 
 #include <nvif/class.h>
 
 int
-gm200_fifo_pbdma_nr(struct gk104_fifo *fifo)
+gm200_fifo_runq_nr(struct nvkm_fifo *fifo)
 {
-	struct nvkm_device *device = fifo->base.engine.subdev.device;
-	return nvkm_rd32(device, 0x002004) & 0x000000ff;
+	return nvkm_rd32(fifo->engine.subdev.device, 0x002004) & 0x000000ff;
 }
 
-const struct gk104_fifo_pbdma_func
-gm200_fifo_pbdma = {
-	.nr = gm200_fifo_pbdma_nr,
-	.init = gk104_fifo_pbdma_init,
-	.init_timeout = gk208_fifo_pbdma_init_timeout,
-};
+int
+gm200_fifo_chid_nr(struct nvkm_fifo *fifo)
+{
+	return nvkm_rd32(fifo->engine.subdev.device, 0x002008);
+}
 
-static const struct gk104_fifo_func
+static const struct nvkm_fifo_func
 gm200_fifo = {
-	.intr.fault = gm107_fifo_intr_fault,
-	.pbdma = &gm200_fifo_pbdma,
-	.fault.access = gk104_fifo_fault_access,
-	.fault.engine = gm107_fifo_fault_engine,
-	.fault.reason = gk104_fifo_fault_reason,
-	.fault.hubclient = gk104_fifo_fault_hubclient,
-	.fault.gpcclient = gk104_fifo_fault_gpcclient,
-	.runlist = &gm107_fifo_runlist,
-	.chan = {{0,0,MAXWELL_CHANNEL_GPFIFO_A}, gk104_fifo_gpfifo_new },
+	.chid_nr = gm200_fifo_chid_nr,
+	.chid_ctor = gk110_fifo_chid_ctor,
+	.runq_nr = gm200_fifo_runq_nr,
+	.runl_ctor = gk104_fifo_runl_ctor,
+	.init = gk104_fifo_init,
+	.init_pbdmas = gk104_fifo_init_pbdmas,
+	.intr = gk104_fifo_intr,
+	.intr_mmu_fault_unit = gm107_fifo_intr_mmu_fault_unit,
+	.intr_ctxsw_timeout = gf100_fifo_intr_ctxsw_timeout,
+	.mmu_fault = &gm107_fifo_mmu_fault,
+	.nonstall = &gf100_fifo_nonstall,
+	.runl = &gm107_runl,
+	.runq = &gk208_runq,
+	.engn = &gk104_engn,
+	.engn_ce = &gk104_engn_ce,
+	.cgrp = {{ 0, 0,  KEPLER_CHANNEL_GROUP_A  }, &gk110_cgrp },
+	.chan = {{ 0, 0, MAXWELL_CHANNEL_GPFIFO_A }, &gm107_chan },
 };
 
 int
 gm200_fifo_new(struct nvkm_device *device, enum nvkm_subdev_type type, int inst,
 	       struct nvkm_fifo **pfifo)
 {
-	return gk104_fifo_new_(&gm200_fifo, device, type, inst, 4096, pfifo);
+	return nvkm_fifo_new_(&gm200_fifo, device, type, inst, pfifo);
 }
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/fifo/gm20b.c b/drivers/gpu/drm/nouveau/nvkm/engine/fifo/gm20b.c
deleted file mode 100644
index 556c97e54f14..000000000000
--- a/drivers/gpu/drm/nouveau/nvkm/engine/fifo/gm20b.c
+++ /dev/null
@@ -1,45 +0,0 @@
-/*
- * Copyright (c) 2015, NVIDIA CORPORATION. All rights reserved.
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * the rights to use, copy, modify, merge, publish, distribute, sublicense,
- * and/or sell copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in
- * all copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
- * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
- * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
- * DEALINGS IN THE SOFTWARE.
- */
-#include "gk104.h"
-#include "changk104.h"
-
-#include <nvif/class.h>
-
-static const struct gk104_fifo_func
-gm20b_fifo = {
-	.intr.fault = gm107_fifo_intr_fault,
-	.pbdma = &gm200_fifo_pbdma,
-	.fault.access = gk104_fifo_fault_access,
-	.fault.engine = gm107_fifo_fault_engine,
-	.fault.reason = gk104_fifo_fault_reason,
-	.fault.hubclient = gk104_fifo_fault_hubclient,
-	.fault.gpcclient = gk104_fifo_fault_gpcclient,
-	.runlist = &gm107_fifo_runlist,
-	.chan = {{0,0,MAXWELL_CHANNEL_GPFIFO_A}, gk104_fifo_gpfifo_new },
-};
-
-int
-gm20b_fifo_new(struct nvkm_device *device, enum nvkm_subdev_type type, int inst,
-	       struct nvkm_fifo **pfifo)
-{
-	return gk104_fifo_new_(&gm20b_fifo, device, type, inst, 512, pfifo);
-}
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/fifo/gp100.c b/drivers/gpu/drm/nouveau/nvkm/engine/fifo/gp100.c
index 6b46b6b65b87..65bdb6a7d517 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/fifo/gp100.c
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/fifo/gp100.c
@@ -21,30 +21,54 @@
  *
  * Authors: Ben Skeggs
  */
-#include "gk104.h"
-#include "changk104.h"
+#include "priv.h"
+#include "runl.h"
 
+#include <core/gpuobj.h>
 #include <subdev/fault.h>
 
 #include <nvif/class.h>
 
-const struct nvkm_enum
-gp100_fifo_fault_engine[] = {
+static void
+gp100_runl_insert_chan(struct nvkm_chan *chan, struct nvkm_memory *memory, u64 offset)
+{
+	nvkm_wo32(memory, offset + 0, chan->id | chan->runq << 14);
+	nvkm_wo32(memory, offset + 4, chan->inst->addr >> 12);
+}
+
+static const struct nvkm_runl_func
+gp100_runl = {
+	.runqs = 2,
+	.size = 8,
+	.update = nv50_runl_update,
+	.insert_cgrp = gk110_runl_insert_cgrp,
+	.insert_chan = gp100_runl_insert_chan,
+	.commit = gk104_runl_commit,
+	.wait = nv50_runl_wait,
+	.pending = gk104_runl_pending,
+	.block = gk104_runl_block,
+	.allow = gk104_runl_allow,
+	.fault_clear = gk104_runl_fault_clear,
+	.preempt_pending = gf100_runl_preempt_pending,
+};
+
+static const struct nvkm_enum
+gp100_fifo_mmu_fault_engine[] = {
 	{ 0x01, "DISPLAY" },
 	{ 0x03, "IFB", NULL, NVKM_ENGINE_IFB },
 	{ 0x04, "BAR1", NULL, NVKM_SUBDEV_BAR },
 	{ 0x05, "BAR2", NULL, NVKM_SUBDEV_INSTMEM },
-	{ 0x06, "HOST0", NULL, NVKM_ENGINE_FIFO },
-	{ 0x07, "HOST1", NULL, NVKM_ENGINE_FIFO },
-	{ 0x08, "HOST2", NULL, NVKM_ENGINE_FIFO },
-	{ 0x09, "HOST3", NULL, NVKM_ENGINE_FIFO },
-	{ 0x0a, "HOST4", NULL, NVKM_ENGINE_FIFO },
-	{ 0x0b, "HOST5", NULL, NVKM_ENGINE_FIFO },
-	{ 0x0c, "HOST6", NULL, NVKM_ENGINE_FIFO },
-	{ 0x0d, "HOST7", NULL, NVKM_ENGINE_FIFO },
-	{ 0x0e, "HOST8", NULL, NVKM_ENGINE_FIFO },
-	{ 0x0f, "HOST9", NULL, NVKM_ENGINE_FIFO },
-	{ 0x10, "HOST10", NULL, NVKM_ENGINE_FIFO },
+	{ 0x06, "HOST0" },
+	{ 0x07, "HOST1" },
+	{ 0x08, "HOST2" },
+	{ 0x09, "HOST3" },
+	{ 0x0a, "HOST4" },
+	{ 0x0b, "HOST5" },
+	{ 0x0c, "HOST6" },
+	{ 0x0d, "HOST7" },
+	{ 0x0e, "HOST8" },
+	{ 0x0f, "HOST9" },
+	{ 0x10, "HOST10" },
 	{ 0x13, "PERF" },
 	{ 0x17, "PMU" },
 	{ 0x18, "PTP" },
@@ -52,8 +76,18 @@ gp100_fifo_fault_engine[] = {
 	{}
 };
 
-void
-gp100_fifo_intr_fault(struct nvkm_fifo *fifo, int unit)
+static const struct nvkm_fifo_func_mmu_fault
+gp100_fifo_mmu_fault = {
+	.recover = gf100_fifo_mmu_fault_recover,
+	.access = gf100_fifo_mmu_fault_access,
+	.engine = gp100_fifo_mmu_fault_engine,
+	.reason = gk104_fifo_mmu_fault_reason,
+	.hubclient = gk104_fifo_mmu_fault_hubclient,
+	.gpcclient = gk104_fifo_mmu_fault_gpcclient,
+};
+
+static void
+gp100_fifo_intr_mmu_fault_unit(struct nvkm_fifo *fifo, int unit)
 {
 	struct nvkm_device *device = fifo->engine.subdev.device;
 	u32 inst = nvkm_rd32(device, 0x002800 + (unit * 0x10));
@@ -76,23 +110,30 @@ gp100_fifo_intr_fault(struct nvkm_fifo *fifo, int unit)
 	nvkm_fifo_fault(fifo, &info);
 }
 
-static const struct gk104_fifo_func
+static const struct nvkm_fifo_func
 gp100_fifo = {
-	.intr.fault = gp100_fifo_intr_fault,
-	.pbdma = &gm200_fifo_pbdma,
-	.fault.access = gk104_fifo_fault_access,
-	.fault.engine = gp100_fifo_fault_engine,
-	.fault.reason = gk104_fifo_fault_reason,
-	.fault.hubclient = gk104_fifo_fault_hubclient,
-	.fault.gpcclient = gk104_fifo_fault_gpcclient,
-	.runlist = &gm107_fifo_runlist,
-	.chan = {{0,0,PASCAL_CHANNEL_GPFIFO_A}, gk104_fifo_gpfifo_new },
-	.cgrp_force = true,
+	.chid_nr = gm200_fifo_chid_nr,
+	.chid_ctor = gk110_fifo_chid_ctor,
+	.runq_nr = gm200_fifo_runq_nr,
+	.runl_ctor = gk104_fifo_runl_ctor,
+	.init = gk104_fifo_init,
+	.init_pbdmas = gk104_fifo_init_pbdmas,
+	.intr = gk104_fifo_intr,
+	.intr_mmu_fault_unit = gp100_fifo_intr_mmu_fault_unit,
+	.intr_ctxsw_timeout = gf100_fifo_intr_ctxsw_timeout,
+	.mmu_fault = &gp100_fifo_mmu_fault,
+	.nonstall = &gf100_fifo_nonstall,
+	.runl = &gp100_runl,
+	.runq = &gk208_runq,
+	.engn = &gk104_engn,
+	.engn_ce = &gk104_engn_ce,
+	.cgrp = {{ 0, 0, KEPLER_CHANNEL_GROUP_A  }, &gk110_cgrp, .force = true },
+	.chan = {{ 0, 0, PASCAL_CHANNEL_GPFIFO_A }, &gm107_chan },
 };
 
 int
 gp100_fifo_new(struct nvkm_device *device, enum nvkm_subdev_type type, int inst,
 	       struct nvkm_fifo **pfifo)
 {
-	return gk104_fifo_new_(&gp100_fifo, device, type, inst, 4096, pfifo);
+	return nvkm_fifo_new_(&gp100_fifo, device, type, inst, pfifo);
 }
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/fifo/gp10b.c b/drivers/gpu/drm/nouveau/nvkm/engine/fifo/gp10b.c
deleted file mode 100644
index 7a5929cb4d29..000000000000
--- a/drivers/gpu/drm/nouveau/nvkm/engine/fifo/gp10b.c
+++ /dev/null
@@ -1,46 +0,0 @@
-/*
- * Copyright (c) 2017, NVIDIA CORPORATION. All rights reserved.
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * the rights to use, copy, modify, merge, publish, distribute, sublicense,
- * and/or sell copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in
- * all copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
- * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
- * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
- * DEALINGS IN THE SOFTWARE.
- */
-#include "gk104.h"
-#include "changk104.h"
-
-#include <nvif/class.h>
-
-static const struct gk104_fifo_func
-gp10b_fifo = {
-	.intr.fault = gp100_fifo_intr_fault,
-	.pbdma = &gm200_fifo_pbdma,
-	.fault.access = gk104_fifo_fault_access,
-	.fault.engine = gp100_fifo_fault_engine,
-	.fault.reason = gk104_fifo_fault_reason,
-	.fault.hubclient = gk104_fifo_fault_hubclient,
-	.fault.gpcclient = gk104_fifo_fault_gpcclient,
-	.runlist = &gm107_fifo_runlist,
-	.chan = {{0,0,PASCAL_CHANNEL_GPFIFO_A}, gk104_fifo_gpfifo_new },
-	.cgrp_force = true,
-};
-
-int
-gp10b_fifo_new(struct nvkm_device *device, enum nvkm_subdev_type type, int inst,
-	       struct nvkm_fifo **pfifo)
-{
-	return gk104_fifo_new_(&gp10b_fifo, device, type, inst, 512, pfifo);
-}
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/fifo/gpfifog84.c b/drivers/gpu/drm/nouveau/nvkm/engine/fifo/gpfifog84.c
deleted file mode 100644
index 2121f517b1dd..000000000000
--- a/drivers/gpu/drm/nouveau/nvkm/engine/fifo/gpfifog84.c
+++ /dev/null
@@ -1,95 +0,0 @@
-/*
- * Copyright 2012 Red Hat Inc.
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * the rights to use, copy, modify, merge, publish, distribute, sublicense,
- * and/or sell copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in
- * all copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
- * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
- * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
- * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
- * OTHER DEALINGS IN THE SOFTWARE.
- *
- * Authors: Ben Skeggs
- */
-#include "channv50.h"
-
-#include <core/client.h>
-#include <core/ramht.h>
-
-#include <nvif/class.h>
-#include <nvif/cl826f.h>
-#include <nvif/unpack.h>
-
-static int
-g84_fifo_gpfifo_new(struct nvkm_fifo *base, const struct nvkm_oclass *oclass,
-		    void *data, u32 size, struct nvkm_object **pobject)
-{
-	struct nvkm_object *parent = oclass->parent;
-	union {
-		struct g82_channel_gpfifo_v0 v0;
-	} *args = data;
-	struct nv50_fifo *fifo = nv50_fifo(base);
-	struct nv50_fifo_chan *chan;
-	u64 ioffset, ilength;
-	int ret = -ENOSYS;
-
-	nvif_ioctl(parent, "create channel gpfifo size %d\n", size);
-	if (!(ret = nvif_unpack(ret, &data, &size, args->v0, 0, 0, false))) {
-		nvif_ioctl(parent, "create channel gpfifo vers %d vmm %llx "
-				   "pushbuf %llx ioffset %016llx "
-				   "ilength %08x\n",
-			   args->v0.version, args->v0.vmm, args->v0.pushbuf,
-			   args->v0.ioffset, args->v0.ilength);
-		if (!args->v0.pushbuf)
-			return -EINVAL;
-	} else
-		return ret;
-
-	if (!(chan = kzalloc(sizeof(*chan), GFP_KERNEL)))
-		return -ENOMEM;
-	*pobject = &chan->base.object;
-
-	ret = g84_fifo_chan_ctor(fifo, args->v0.vmm, args->v0.pushbuf,
-				 oclass, chan);
-	if (ret)
-		return ret;
-
-	args->v0.chid = chan->base.chid;
-	ioffset = args->v0.ioffset;
-	ilength = order_base_2(args->v0.ilength / 8);
-
-	nvkm_kmap(chan->ramfc);
-	nvkm_wo32(chan->ramfc, 0x3c, 0x403f6078);
-	nvkm_wo32(chan->ramfc, 0x44, 0x01003fff);
-	nvkm_wo32(chan->ramfc, 0x48, chan->base.push->node->offset >> 4);
-	nvkm_wo32(chan->ramfc, 0x50, lower_32_bits(ioffset));
-	nvkm_wo32(chan->ramfc, 0x54, upper_32_bits(ioffset) | (ilength << 16));
-	nvkm_wo32(chan->ramfc, 0x60, 0x7fffffff);
-	nvkm_wo32(chan->ramfc, 0x78, 0x00000000);
-	nvkm_wo32(chan->ramfc, 0x7c, 0x30000001);
-	nvkm_wo32(chan->ramfc, 0x80, ((chan->ramht->bits - 9) << 27) |
-				     (4 << 24) /* SEARCH_FULL */ |
-				     (chan->ramht->gpuobj->node->offset >> 4));
-	nvkm_wo32(chan->ramfc, 0x88, chan->cache->addr >> 10);
-	nvkm_wo32(chan->ramfc, 0x98, chan->base.inst->addr >> 12);
-	nvkm_done(chan->ramfc);
-	return 0;
-}
-
-const struct nvkm_fifo_chan_oclass
-g84_fifo_gpfifo_oclass = {
-	.base.oclass = G82_CHANNEL_GPFIFO,
-	.base.minver = 0,
-	.base.maxver = 0,
-	.ctor = g84_fifo_gpfifo_new,
-};
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/fifo/gpfifogf100.c b/drivers/gpu/drm/nouveau/nvkm/engine/fifo/gpfifogf100.c
deleted file mode 100644
index 4e78bbe3b94b..000000000000
--- a/drivers/gpu/drm/nouveau/nvkm/engine/fifo/gpfifogf100.c
+++ /dev/null
@@ -1,308 +0,0 @@
-/*
- * Copyright 2012 Red Hat Inc.
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * the rights to use, copy, modify, merge, publish, distribute, sublicense,
- * and/or sell copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in
- * all copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
- * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
- * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
- * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
- * OTHER DEALINGS IN THE SOFTWARE.
- *
- * Authors: Ben Skeggs
- */
-#include "changf100.h"
-
-#include <core/client.h>
-#include <core/gpuobj.h>
-#include <subdev/fb.h>
-#include <subdev/timer.h>
-
-#include <nvif/class.h>
-#include <nvif/cl906f.h>
-#include <nvif/unpack.h>
-
-int
-gf100_fifo_chan_ntfy(struct nvkm_fifo_chan *chan, u32 type,
-		     struct nvkm_event **pevent)
-{
-	switch (type) {
-	case NV906F_V0_NTFY_NON_STALL_INTERRUPT:
-		*pevent = &chan->fifo->uevent;
-		return 0;
-	case NV906F_V0_NTFY_KILLED:
-		*pevent = &chan->fifo->kevent;
-		return 0;
-	default:
-		break;
-	}
-	return -EINVAL;
-}
-
-static u32
-gf100_fifo_gpfifo_engine_addr(struct nvkm_engine *engine)
-{
-	switch (engine->subdev.type) {
-	case NVKM_ENGINE_SW    : return 0;
-	case NVKM_ENGINE_GR    : return 0x0210;
-	case NVKM_ENGINE_CE    : return 0x0230 + (engine->subdev.inst * 0x10);
-	case NVKM_ENGINE_MSPDEC: return 0x0250;
-	case NVKM_ENGINE_MSPPP : return 0x0260;
-	case NVKM_ENGINE_MSVLD : return 0x0270;
-	default:
-		WARN_ON(1);
-		return 0;
-	}
-}
-
-static struct gf100_fifo_engn *
-gf100_fifo_gpfifo_engine(struct gf100_fifo_chan *chan, struct nvkm_engine *engine)
-{
-	int engi = chan->base.fifo->func->engine_id(chan->base.fifo, engine);
-	if (engi >= 0)
-		return &chan->engn[engi];
-	return NULL;
-}
-
-static int
-gf100_fifo_gpfifo_engine_fini(struct nvkm_fifo_chan *base,
-			      struct nvkm_engine *engine, bool suspend)
-{
-	const u32 offset = gf100_fifo_gpfifo_engine_addr(engine);
-	struct gf100_fifo_chan *chan = gf100_fifo_chan(base);
-	struct nvkm_subdev *subdev = &chan->fifo->base.engine.subdev;
-	struct nvkm_device *device = subdev->device;
-	struct nvkm_gpuobj *inst = chan->base.inst;
-	int ret = 0;
-
-	mutex_lock(&chan->fifo->base.mutex);
-	nvkm_wr32(device, 0x002634, chan->base.chid);
-	if (nvkm_msec(device, 2000,
-		if (nvkm_rd32(device, 0x002634) == chan->base.chid)
-			break;
-	) < 0) {
-		nvkm_error(subdev, "channel %d [%s] kick timeout\n",
-			   chan->base.chid, chan->base.object.client->name);
-		ret = -ETIMEDOUT;
-	}
-	mutex_unlock(&chan->fifo->base.mutex);
-
-	if (ret && suspend)
-		return ret;
-
-	if (offset) {
-		nvkm_kmap(inst);
-		nvkm_wo32(inst, offset + 0x00, 0x00000000);
-		nvkm_wo32(inst, offset + 0x04, 0x00000000);
-		nvkm_done(inst);
-	}
-
-	return ret;
-}
-
-static int
-gf100_fifo_gpfifo_engine_init(struct nvkm_fifo_chan *base,
-			      struct nvkm_engine *engine)
-{
-	const u32 offset = gf100_fifo_gpfifo_engine_addr(engine);
-	struct gf100_fifo_chan *chan = gf100_fifo_chan(base);
-	struct gf100_fifo_engn *engn = gf100_fifo_gpfifo_engine(chan, engine);
-	struct nvkm_gpuobj *inst = chan->base.inst;
-
-	if (offset) {
-		nvkm_kmap(inst);
-		nvkm_wo32(inst, offset + 0x00, lower_32_bits(engn->vma->addr) | 4);
-		nvkm_wo32(inst, offset + 0x04, upper_32_bits(engn->vma->addr));
-		nvkm_done(inst);
-	}
-
-	return 0;
-}
-
-static void
-gf100_fifo_gpfifo_engine_dtor(struct nvkm_fifo_chan *base,
-			      struct nvkm_engine *engine)
-{
-	struct gf100_fifo_chan *chan = gf100_fifo_chan(base);
-	struct gf100_fifo_engn *engn = gf100_fifo_gpfifo_engine(chan, engine);
-	nvkm_vmm_put(chan->base.vmm, &engn->vma);
-	nvkm_gpuobj_del(&engn->inst);
-}
-
-static int
-gf100_fifo_gpfifo_engine_ctor(struct nvkm_fifo_chan *base,
-			      struct nvkm_engine *engine,
-			      struct nvkm_object *object)
-{
-	struct gf100_fifo_chan *chan = gf100_fifo_chan(base);
-	struct gf100_fifo_engn *engn = gf100_fifo_gpfifo_engine(chan, engine);
-	int ret;
-
-	if (!gf100_fifo_gpfifo_engine_addr(engine))
-		return 0;
-
-	ret = nvkm_object_bind(object, NULL, 0, &engn->inst);
-	if (ret)
-		return ret;
-
-	ret = nvkm_vmm_get(chan->base.vmm, 12, engn->inst->size, &engn->vma);
-	if (ret)
-		return ret;
-
-	return nvkm_memory_map(engn->inst, 0, chan->base.vmm, engn->vma, NULL, 0);
-}
-
-static void
-gf100_fifo_gpfifo_fini(struct nvkm_fifo_chan *base)
-{
-	struct gf100_fifo_chan *chan = gf100_fifo_chan(base);
-	struct gf100_fifo *fifo = chan->fifo;
-	struct nvkm_device *device = fifo->base.engine.subdev.device;
-	u32 coff = chan->base.chid * 8;
-
-	if (!list_empty(&chan->head) && !chan->killed) {
-		gf100_fifo_runlist_remove(fifo, chan);
-		nvkm_mask(device, 0x003004 + coff, 0x00000001, 0x00000000);
-		gf100_fifo_runlist_commit(fifo);
-	}
-
-	gf100_fifo_intr_engine(fifo);
-
-	nvkm_wr32(device, 0x003000 + coff, 0x00000000);
-}
-
-static void
-gf100_fifo_gpfifo_init(struct nvkm_fifo_chan *base)
-{
-	struct gf100_fifo_chan *chan = gf100_fifo_chan(base);
-	struct gf100_fifo *fifo = chan->fifo;
-	struct nvkm_device *device = fifo->base.engine.subdev.device;
-	u32 addr = chan->base.inst->addr >> 12;
-	u32 coff = chan->base.chid * 8;
-
-	nvkm_wr32(device, 0x003000 + coff, 0xc0000000 | addr);
-
-	if (list_empty(&chan->head) && !chan->killed) {
-		gf100_fifo_runlist_insert(fifo, chan);
-		nvkm_wr32(device, 0x003004 + coff, 0x001f0001);
-		gf100_fifo_runlist_commit(fifo);
-	}
-}
-
-static void *
-gf100_fifo_gpfifo_dtor(struct nvkm_fifo_chan *base)
-{
-	return gf100_fifo_chan(base);
-}
-
-static const struct nvkm_fifo_chan_func
-gf100_fifo_gpfifo_func = {
-	.dtor = gf100_fifo_gpfifo_dtor,
-	.init = gf100_fifo_gpfifo_init,
-	.fini = gf100_fifo_gpfifo_fini,
-	.ntfy = gf100_fifo_chan_ntfy,
-	.engine_ctor = gf100_fifo_gpfifo_engine_ctor,
-	.engine_dtor = gf100_fifo_gpfifo_engine_dtor,
-	.engine_init = gf100_fifo_gpfifo_engine_init,
-	.engine_fini = gf100_fifo_gpfifo_engine_fini,
-};
-
-static int
-gf100_fifo_gpfifo_new(struct nvkm_fifo *base, const struct nvkm_oclass *oclass,
-		      void *data, u32 size, struct nvkm_object **pobject)
-{
-	union {
-		struct fermi_channel_gpfifo_v0 v0;
-	} *args = data;
-	struct gf100_fifo *fifo = gf100_fifo(base);
-	struct nvkm_object *parent = oclass->parent;
-	struct gf100_fifo_chan *chan;
-	u64 usermem, ioffset, ilength;
-	int ret = -ENOSYS, i;
-
-	nvif_ioctl(parent, "create channel gpfifo size %d\n", size);
-	if (!(ret = nvif_unpack(ret, &data, &size, args->v0, 0, 0, false))) {
-		nvif_ioctl(parent, "create channel gpfifo vers %d vmm %llx "
-				   "ioffset %016llx ilength %08x\n",
-			   args->v0.version, args->v0.vmm, args->v0.ioffset,
-			   args->v0.ilength);
-		if (!args->v0.vmm)
-			return -EINVAL;
-	} else
-		return ret;
-
-	/* allocate channel */
-	if (!(chan = kzalloc(sizeof(*chan), GFP_KERNEL)))
-		return -ENOMEM;
-	*pobject = &chan->base.object;
-	chan->fifo = fifo;
-	INIT_LIST_HEAD(&chan->head);
-
-	ret = nvkm_fifo_chan_ctor(&gf100_fifo_gpfifo_func, &fifo->base,
-				  0x1000, 0x1000, true, args->v0.vmm, 0,
-				  BIT(GF100_FIFO_ENGN_GR) |
-				  BIT(GF100_FIFO_ENGN_MSPDEC) |
-				  BIT(GF100_FIFO_ENGN_MSPPP) |
-				  BIT(GF100_FIFO_ENGN_MSVLD) |
-				  BIT(GF100_FIFO_ENGN_CE0) |
-				  BIT(GF100_FIFO_ENGN_CE1) |
-				  BIT(GF100_FIFO_ENGN_SW),
-				  1, fifo->user.bar->addr, 0x1000,
-				  oclass, &chan->base);
-	if (ret)
-		return ret;
-
-	args->v0.chid = chan->base.chid;
-
-	/* clear channel control registers */
-
-	usermem = chan->base.chid * 0x1000;
-	ioffset = args->v0.ioffset;
-	ilength = order_base_2(args->v0.ilength / 8);
-
-	nvkm_kmap(fifo->user.mem);
-	for (i = 0; i < 0x1000; i += 4)
-		nvkm_wo32(fifo->user.mem, usermem + i, 0x00000000);
-	nvkm_done(fifo->user.mem);
-	usermem = nvkm_memory_addr(fifo->user.mem) + usermem;
-
-	/* RAMFC */
-	nvkm_kmap(chan->base.inst);
-	nvkm_wo32(chan->base.inst, 0x08, lower_32_bits(usermem));
-	nvkm_wo32(chan->base.inst, 0x0c, upper_32_bits(usermem));
-	nvkm_wo32(chan->base.inst, 0x10, 0x0000face);
-	nvkm_wo32(chan->base.inst, 0x30, 0xfffff902);
-	nvkm_wo32(chan->base.inst, 0x48, lower_32_bits(ioffset));
-	nvkm_wo32(chan->base.inst, 0x4c, upper_32_bits(ioffset) |
-					 (ilength << 16));
-	nvkm_wo32(chan->base.inst, 0x54, 0x00000002);
-	nvkm_wo32(chan->base.inst, 0x84, 0x20400000);
-	nvkm_wo32(chan->base.inst, 0x94, 0x30000001);
-	nvkm_wo32(chan->base.inst, 0x9c, 0x00000100);
-	nvkm_wo32(chan->base.inst, 0xa4, 0x1f1f1f1f);
-	nvkm_wo32(chan->base.inst, 0xa8, 0x1f1f1f1f);
-	nvkm_wo32(chan->base.inst, 0xac, 0x0000001f);
-	nvkm_wo32(chan->base.inst, 0xb8, 0xf8000000);
-	nvkm_wo32(chan->base.inst, 0xf8, 0x10003080); /* 0x002310 */
-	nvkm_wo32(chan->base.inst, 0xfc, 0x10000010); /* 0x002350 */
-	nvkm_done(chan->base.inst);
-	return 0;
-}
-
-const struct nvkm_fifo_chan_oclass
-gf100_fifo_gpfifo_oclass = {
-	.base.oclass = FERMI_CHANNEL_GPFIFO,
-	.base.minver = 0,
-	.base.maxver = 0,
-	.ctor = gf100_fifo_gpfifo_new,
-};
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/fifo/gpfifogk104.c b/drivers/gpu/drm/nouveau/nvkm/engine/fifo/gpfifogk104.c
deleted file mode 100644
index 80456ec70e8a..000000000000
--- a/drivers/gpu/drm/nouveau/nvkm/engine/fifo/gpfifogk104.c
+++ /dev/null
@@ -1,361 +0,0 @@
-/*
- * Copyright 2012 Red Hat Inc.
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * the rights to use, copy, modify, merge, publish, distribute, sublicense,
- * and/or sell copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in
- * all copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
- * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
- * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
- * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
- * OTHER DEALINGS IN THE SOFTWARE.
- *
- * Authors: Ben Skeggs
- */
-#include "changk104.h"
-#include "cgrp.h"
-
-#include <core/client.h>
-#include <core/gpuobj.h>
-#include <subdev/fb.h>
-#include <subdev/mmu.h>
-#include <subdev/timer.h>
-
-#include <nvif/class.h>
-#include <nvif/cla06f.h>
-#include <nvif/unpack.h>
-
-int
-gk104_fifo_gpfifo_kick_locked(struct gk104_fifo_chan *chan)
-{
-	struct gk104_fifo *fifo = chan->fifo;
-	struct nvkm_subdev *subdev = &fifo->base.engine.subdev;
-	struct nvkm_device *device = subdev->device;
-	struct nvkm_client *client = chan->base.object.client;
-	struct nvkm_fifo_cgrp *cgrp = chan->cgrp;
-	int ret = 0;
-
-	if (cgrp)
-		nvkm_wr32(device, 0x002634, cgrp->id | 0x01000000);
-	else
-		nvkm_wr32(device, 0x002634, chan->base.chid);
-	if (nvkm_msec(device, 2000,
-		if (!(nvkm_rd32(device, 0x002634) & 0x00100000))
-			break;
-	) < 0) {
-		nvkm_error(subdev, "%s %d [%s] kick timeout\n",
-			   cgrp ? "tsg" : "channel",
-			   cgrp ? cgrp->id : chan->base.chid, client->name);
-		nvkm_fifo_recover_chan(&fifo->base, chan->base.chid);
-		ret = -ETIMEDOUT;
-	}
-	return ret;
-}
-
-int
-gk104_fifo_gpfifo_kick(struct gk104_fifo_chan *chan)
-{
-	int ret;
-	mutex_lock(&chan->base.fifo->mutex);
-	ret = gk104_fifo_gpfifo_kick_locked(chan);
-	mutex_unlock(&chan->base.fifo->mutex);
-	return ret;
-}
-
-static u32
-gk104_fifo_gpfifo_engine_addr(struct nvkm_engine *engine)
-{
-	switch (engine->subdev.type) {
-	case NVKM_ENGINE_SW    :
-	case NVKM_ENGINE_CE    : return 0;
-	case NVKM_ENGINE_GR    : return 0x0210;
-	case NVKM_ENGINE_SEC   : return 0x0220;
-	case NVKM_ENGINE_MSPDEC: return 0x0250;
-	case NVKM_ENGINE_MSPPP : return 0x0260;
-	case NVKM_ENGINE_MSVLD : return 0x0270;
-	case NVKM_ENGINE_VIC   : return 0x0280;
-	case NVKM_ENGINE_MSENC : return 0x0290;
-	case NVKM_ENGINE_NVDEC : return 0x02100270;
-	case NVKM_ENGINE_NVENC :
-		if (engine->subdev.inst)
-			return 0x0210;
-		return 0x02100290;
-	default:
-		WARN_ON(1);
-		return 0;
-	}
-}
-
-struct gk104_fifo_engn *
-gk104_fifo_gpfifo_engine(struct gk104_fifo_chan *chan, struct nvkm_engine *engine)
-{
-	int engi = chan->base.fifo->func->engine_id(chan->base.fifo, engine);
-	if (engi >= 0)
-		return &chan->engn[engi];
-	return NULL;
-}
-
-static int
-gk104_fifo_gpfifo_engine_fini(struct nvkm_fifo_chan *base,
-			      struct nvkm_engine *engine, bool suspend)
-{
-	struct gk104_fifo_chan *chan = gk104_fifo_chan(base);
-	struct nvkm_gpuobj *inst = chan->base.inst;
-	u32 offset = gk104_fifo_gpfifo_engine_addr(engine);
-	int ret;
-
-	ret = gk104_fifo_gpfifo_kick(chan);
-	if (ret && suspend)
-		return ret;
-
-	if (offset) {
-		nvkm_kmap(inst);
-		nvkm_wo32(inst, (offset & 0xffff) + 0x00, 0x00000000);
-		nvkm_wo32(inst, (offset & 0xffff) + 0x04, 0x00000000);
-		if ((offset >>= 16)) {
-			nvkm_wo32(inst, offset + 0x00, 0x00000000);
-			nvkm_wo32(inst, offset + 0x04, 0x00000000);
-		}
-		nvkm_done(inst);
-	}
-
-	return ret;
-}
-
-static int
-gk104_fifo_gpfifo_engine_init(struct nvkm_fifo_chan *base,
-			      struct nvkm_engine *engine)
-{
-	struct gk104_fifo_chan *chan = gk104_fifo_chan(base);
-	struct gk104_fifo_engn *engn = gk104_fifo_gpfifo_engine(chan, engine);
-	struct nvkm_gpuobj *inst = chan->base.inst;
-	u32 offset = gk104_fifo_gpfifo_engine_addr(engine);
-
-	if (offset) {
-		u32 datalo = lower_32_bits(engn->vma->addr) | 0x00000004;
-		u32 datahi = upper_32_bits(engn->vma->addr);
-		nvkm_kmap(inst);
-		nvkm_wo32(inst, (offset & 0xffff) + 0x00, datalo);
-		nvkm_wo32(inst, (offset & 0xffff) + 0x04, datahi);
-		if ((offset >>= 16)) {
-			nvkm_wo32(inst, offset + 0x00, datalo);
-			nvkm_wo32(inst, offset + 0x04, datahi);
-		}
-		nvkm_done(inst);
-	}
-
-	return 0;
-}
-
-void
-gk104_fifo_gpfifo_engine_dtor(struct nvkm_fifo_chan *base,
-			      struct nvkm_engine *engine)
-{
-	struct gk104_fifo_chan *chan = gk104_fifo_chan(base);
-	struct gk104_fifo_engn *engn = gk104_fifo_gpfifo_engine(chan, engine);
-	nvkm_vmm_put(chan->base.vmm, &engn->vma);
-	nvkm_gpuobj_del(&engn->inst);
-}
-
-int
-gk104_fifo_gpfifo_engine_ctor(struct nvkm_fifo_chan *base,
-			      struct nvkm_engine *engine,
-			      struct nvkm_object *object)
-{
-	struct gk104_fifo_chan *chan = gk104_fifo_chan(base);
-	struct gk104_fifo_engn *engn = gk104_fifo_gpfifo_engine(chan, engine);
-	int ret;
-
-	if (!gk104_fifo_gpfifo_engine_addr(engine)) {
-		if (engine->subdev.type != NVKM_ENGINE_CE ||
-		    engine->subdev.device->card_type < GV100)
-			return 0;
-	}
-
-	ret = nvkm_object_bind(object, NULL, 0, &engn->inst);
-	if (ret)
-		return ret;
-
-	if (!gk104_fifo_gpfifo_engine_addr(engine))
-		return 0;
-
-	ret = nvkm_vmm_get(chan->base.vmm, 12, engn->inst->size, &engn->vma);
-	if (ret)
-		return ret;
-
-	return nvkm_memory_map(engn->inst, 0, chan->base.vmm, engn->vma, NULL, 0);
-}
-
-void
-gk104_fifo_gpfifo_fini(struct nvkm_fifo_chan *base)
-{
-	struct gk104_fifo_chan *chan = gk104_fifo_chan(base);
-	struct gk104_fifo *fifo = chan->fifo;
-	struct nvkm_device *device = fifo->base.engine.subdev.device;
-	u32 coff = chan->base.chid * 8;
-
-	if (!list_empty(&chan->head)) {
-		gk104_fifo_runlist_remove(fifo, chan);
-		nvkm_mask(device, 0x800004 + coff, 0x00000800, 0x00000800);
-		gk104_fifo_gpfifo_kick(chan);
-		gk104_fifo_runlist_update(fifo, chan->runl);
-	}
-
-	nvkm_wr32(device, 0x800000 + coff, 0x00000000);
-}
-
-void
-gk104_fifo_gpfifo_init(struct nvkm_fifo_chan *base)
-{
-	struct gk104_fifo_chan *chan = gk104_fifo_chan(base);
-	struct gk104_fifo *fifo = chan->fifo;
-	struct nvkm_device *device = fifo->base.engine.subdev.device;
-	u32 addr = chan->base.inst->addr >> 12;
-	u32 coff = chan->base.chid * 8;
-
-	nvkm_mask(device, 0x800004 + coff, 0x000f0000, chan->runl << 16);
-	nvkm_wr32(device, 0x800000 + coff, 0x80000000 | addr);
-
-	if (list_empty(&chan->head) && !chan->killed) {
-		gk104_fifo_runlist_insert(fifo, chan);
-		nvkm_mask(device, 0x800004 + coff, 0x00000400, 0x00000400);
-		gk104_fifo_runlist_update(fifo, chan->runl);
-		nvkm_mask(device, 0x800004 + coff, 0x00000400, 0x00000400);
-	}
-}
-
-void *
-gk104_fifo_gpfifo_dtor(struct nvkm_fifo_chan *base)
-{
-	struct gk104_fifo_chan *chan = gk104_fifo_chan(base);
-	kfree(chan->cgrp);
-	return chan;
-}
-
-const struct nvkm_fifo_chan_func
-gk104_fifo_gpfifo_func = {
-	.dtor = gk104_fifo_gpfifo_dtor,
-	.init = gk104_fifo_gpfifo_init,
-	.fini = gk104_fifo_gpfifo_fini,
-	.ntfy = gf100_fifo_chan_ntfy,
-	.engine_ctor = gk104_fifo_gpfifo_engine_ctor,
-	.engine_dtor = gk104_fifo_gpfifo_engine_dtor,
-	.engine_init = gk104_fifo_gpfifo_engine_init,
-	.engine_fini = gk104_fifo_gpfifo_engine_fini,
-};
-
-static int
-gk104_fifo_gpfifo_new_(struct gk104_fifo *fifo, u64 *runlists, u16 *chid,
-		       u64 vmm, u64 ioffset, u64 ilength, u64 *inst, bool priv,
-		       const struct nvkm_oclass *oclass,
-		       struct nvkm_object **pobject)
-{
-	struct gk104_fifo_chan *chan;
-	int runlist = ffs(*runlists) -1, ret, i;
-	u64 usermem;
-
-	if (!vmm || runlist < 0 || runlist >= fifo->runlist_nr)
-		return -EINVAL;
-	*runlists = BIT_ULL(runlist);
-
-	/* Allocate the channel. */
-	if (!(chan = kzalloc(sizeof(*chan), GFP_KERNEL)))
-		return -ENOMEM;
-	*pobject = &chan->base.object;
-	chan->fifo = fifo;
-	chan->runl = runlist;
-	INIT_LIST_HEAD(&chan->head);
-
-	ret = nvkm_fifo_chan_ctor(&gk104_fifo_gpfifo_func, &fifo->base,
-				  0x1000, 0x1000, true, vmm, 0, fifo->runlist[runlist].engm_sw,
-				  1, fifo->user.bar->addr, 0x200,
-				  oclass, &chan->base);
-	if (ret)
-		return ret;
-
-	*chid = chan->base.chid;
-	*inst = chan->base.inst->addr;
-
-	/* Hack to support GPUs where even individual channels should be
-	 * part of a channel group.
-	 */
-	if (fifo->func->cgrp_force) {
-		if (!(chan->cgrp = kmalloc(sizeof(*chan->cgrp), GFP_KERNEL)))
-			return -ENOMEM;
-		chan->cgrp->id = chan->base.chid;
-		INIT_LIST_HEAD(&chan->cgrp->head);
-		INIT_LIST_HEAD(&chan->cgrp->chan);
-		chan->cgrp->chan_nr = 0;
-	}
-
-	/* Clear channel control registers. */
-	usermem = chan->base.chid * 0x200;
-	ilength = order_base_2(ilength / 8);
-
-	nvkm_kmap(fifo->user.mem);
-	for (i = 0; i < 0x200; i += 4)
-		nvkm_wo32(fifo->user.mem, usermem + i, 0x00000000);
-	nvkm_done(fifo->user.mem);
-	usermem = nvkm_memory_addr(fifo->user.mem) + usermem;
-
-	/* RAMFC */
-	nvkm_kmap(chan->base.inst);
-	nvkm_wo32(chan->base.inst, 0x08, lower_32_bits(usermem));
-	nvkm_wo32(chan->base.inst, 0x0c, upper_32_bits(usermem));
-	nvkm_wo32(chan->base.inst, 0x10, 0x0000face);
-	nvkm_wo32(chan->base.inst, 0x30, 0xfffff902);
-	nvkm_wo32(chan->base.inst, 0x48, lower_32_bits(ioffset));
-	nvkm_wo32(chan->base.inst, 0x4c, upper_32_bits(ioffset) |
-					 (ilength << 16));
-	nvkm_wo32(chan->base.inst, 0x84, 0x20400000);
-	nvkm_wo32(chan->base.inst, 0x94, 0x30000001);
-	nvkm_wo32(chan->base.inst, 0x9c, 0x00000100);
-	nvkm_wo32(chan->base.inst, 0xac, 0x0000001f);
-	nvkm_wo32(chan->base.inst, 0xe4, priv ? 0x00000020 : 0x00000000);
-	nvkm_wo32(chan->base.inst, 0xe8, chan->base.chid);
-	nvkm_wo32(chan->base.inst, 0xb8, 0xf8000000);
-	nvkm_wo32(chan->base.inst, 0xf8, 0x10003080); /* 0x002310 */
-	nvkm_wo32(chan->base.inst, 0xfc, 0x10000010); /* 0x002350 */
-	nvkm_done(chan->base.inst);
-	return 0;
-}
-
-int
-gk104_fifo_gpfifo_new(struct gk104_fifo *fifo, const struct nvkm_oclass *oclass,
-		      void *data, u32 size, struct nvkm_object **pobject)
-{
-	struct nvkm_object *parent = oclass->parent;
-	union {
-		struct kepler_channel_gpfifo_a_v0 v0;
-	} *args = data;
-	int ret = -ENOSYS;
-
-	nvif_ioctl(parent, "create channel gpfifo size %d\n", size);
-	if (!(ret = nvif_unpack(ret, &data, &size, args->v0, 0, 0, false))) {
-		nvif_ioctl(parent, "create channel gpfifo vers %d vmm %llx "
-				   "ioffset %016llx ilength %08x "
-				   "runlist %016llx priv %d\n",
-			   args->v0.version, args->v0.vmm, args->v0.ioffset,
-			   args->v0.ilength, args->v0.runlist, args->v0.priv);
-		return gk104_fifo_gpfifo_new_(fifo,
-					      &args->v0.runlist,
-					      &args->v0.chid,
-					       args->v0.vmm,
-					       args->v0.ioffset,
-					       args->v0.ilength,
-					      &args->v0.inst,
-					       args->v0.priv,
-					      oclass, pobject);
-	}
-
-	return ret;
-}
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/fifo/gpfifogv100.c b/drivers/gpu/drm/nouveau/nvkm/engine/fifo/gpfifogv100.c
deleted file mode 100644
index 428f9b41165c..000000000000
--- a/drivers/gpu/drm/nouveau/nvkm/engine/fifo/gpfifogv100.c
+++ /dev/null
@@ -1,241 +0,0 @@
-/*
- * Copyright 2018 Red Hat Inc.
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * the rights to use, copy, modify, merge, publish, distribute, sublicense,
- * and/or sell copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in
- * all copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
- * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
- * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
- * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
- * OTHER DEALINGS IN THE SOFTWARE.
- */
-#include "changk104.h"
-#include "cgrp.h"
-
-#include <core/client.h>
-#include <core/gpuobj.h>
-
-#include <nvif/clc36f.h>
-#include <nvif/unpack.h>
-
-static u32
-gv100_fifo_gpfifo_submit_token(struct nvkm_fifo_chan *chan)
-{
-	return chan->chid;
-}
-
-static int
-gv100_fifo_gpfifo_engine_valid(struct gk104_fifo_chan *chan, bool ce, bool valid)
-{
-	struct nvkm_subdev *subdev = &chan->base.fifo->engine.subdev;
-	struct nvkm_device *device = subdev->device;
-	const u32 mask = ce ? 0x00020000 : 0x00010000;
-	const u32 data = valid ? mask : 0x00000000;
-	int ret;
-
-	/* Block runlist to prevent the channel from being rescheduled. */
-	mutex_lock(&chan->fifo->base.mutex);
-	nvkm_mask(device, 0x002630, BIT(chan->runl), BIT(chan->runl));
-
-	/* Preempt the channel. */
-	ret = gk104_fifo_gpfifo_kick_locked(chan);
-	if (ret == 0) {
-		/* Update engine context validity. */
-		nvkm_kmap(chan->base.inst);
-		nvkm_mo32(chan->base.inst, 0x0ac, mask, data);
-		nvkm_done(chan->base.inst);
-	}
-
-	/* Resume runlist. */
-	nvkm_mask(device, 0x002630, BIT(chan->runl), 0);
-	mutex_unlock(&chan->fifo->base.mutex);
-	return ret;
-}
-
-int
-gv100_fifo_gpfifo_engine_fini(struct nvkm_fifo_chan *base,
-			      struct nvkm_engine *engine, bool suspend)
-{
-	struct gk104_fifo_chan *chan = gk104_fifo_chan(base);
-	struct nvkm_gpuobj *inst = chan->base.inst;
-	int ret;
-
-	if (engine->subdev.type == NVKM_ENGINE_CE) {
-		ret = gv100_fifo_gpfifo_engine_valid(chan, true, false);
-		if (ret && suspend)
-			return ret;
-
-		nvkm_kmap(inst);
-		nvkm_wo32(chan->base.inst, 0x220, 0x00000000);
-		nvkm_wo32(chan->base.inst, 0x224, 0x00000000);
-		nvkm_done(inst);
-		return ret;
-	}
-
-	ret = gv100_fifo_gpfifo_engine_valid(chan, false, false);
-	if (ret && suspend)
-		return ret;
-
-	nvkm_kmap(inst);
-	nvkm_wo32(inst, 0x0210, 0x00000000);
-	nvkm_wo32(inst, 0x0214, 0x00000000);
-	nvkm_done(inst);
-	return ret;
-}
-
-int
-gv100_fifo_gpfifo_engine_init(struct nvkm_fifo_chan *base,
-			      struct nvkm_engine *engine)
-{
-	struct gk104_fifo_chan *chan = gk104_fifo_chan(base);
-	struct gk104_fifo_engn *engn = gk104_fifo_gpfifo_engine(chan, engine);
-	struct nvkm_gpuobj *inst = chan->base.inst;
-
-	if (engine->subdev.type == NVKM_ENGINE_CE) {
-		const u64 bar2 = nvkm_memory_bar2(engn->inst->memory);
-
-		nvkm_kmap(inst);
-		nvkm_wo32(chan->base.inst, 0x220, lower_32_bits(bar2));
-		nvkm_wo32(chan->base.inst, 0x224, upper_32_bits(bar2));
-		nvkm_done(inst);
-
-		return gv100_fifo_gpfifo_engine_valid(chan, true, true);
-	}
-
-	nvkm_kmap(inst);
-	nvkm_wo32(inst, 0x210, lower_32_bits(engn->vma->addr) | 0x00000004);
-	nvkm_wo32(inst, 0x214, upper_32_bits(engn->vma->addr));
-	nvkm_done(inst);
-
-	return gv100_fifo_gpfifo_engine_valid(chan, false, true);
-}
-
-static const struct nvkm_fifo_chan_func
-gv100_fifo_gpfifo = {
-	.dtor = gk104_fifo_gpfifo_dtor,
-	.init = gk104_fifo_gpfifo_init,
-	.fini = gk104_fifo_gpfifo_fini,
-	.ntfy = gf100_fifo_chan_ntfy,
-	.engine_ctor = gk104_fifo_gpfifo_engine_ctor,
-	.engine_dtor = gk104_fifo_gpfifo_engine_dtor,
-	.engine_init = gv100_fifo_gpfifo_engine_init,
-	.engine_fini = gv100_fifo_gpfifo_engine_fini,
-	.submit_token = gv100_fifo_gpfifo_submit_token,
-};
-
-int
-gv100_fifo_gpfifo_new_(const struct nvkm_fifo_chan_func *func,
-		       struct gk104_fifo *fifo, u64 *runlists, u16 *chid,
-		       u64 vmm, u64 ioffset, u64 ilength, u64 *inst, bool priv,
-		       u32 *token, const struct nvkm_oclass *oclass,
-		       struct nvkm_object **pobject)
-{
-	struct gk104_fifo_chan *chan;
-	int runlist = ffs(*runlists) -1, ret, i;
-	u64 usermem;
-
-	if (!vmm || runlist < 0 || runlist >= fifo->runlist_nr)
-		return -EINVAL;
-	*runlists = BIT_ULL(runlist);
-
-	/* Allocate the channel. */
-	if (!(chan = kzalloc(sizeof(*chan), GFP_KERNEL)))
-		return -ENOMEM;
-	*pobject = &chan->base.object;
-	chan->fifo = fifo;
-	chan->runl = runlist;
-	INIT_LIST_HEAD(&chan->head);
-
-	ret = nvkm_fifo_chan_ctor(func, &fifo->base, 0x1000, 0x1000, true, vmm,
-				  0, fifo->runlist[runlist].engm, 1, fifo->user.bar->addr, 0x200,
-				  oclass, &chan->base);
-	if (ret)
-		return ret;
-
-	*chid = chan->base.chid;
-	*inst = chan->base.inst->addr;
-	*token = chan->base.func->submit_token(&chan->base);
-
-	/* Hack to support GPUs where even individual channels should be
-	 * part of a channel group.
-	 */
-	if (fifo->func->cgrp_force) {
-		if (!(chan->cgrp = kmalloc(sizeof(*chan->cgrp), GFP_KERNEL)))
-			return -ENOMEM;
-		chan->cgrp->id = chan->base.chid;
-		INIT_LIST_HEAD(&chan->cgrp->head);
-		INIT_LIST_HEAD(&chan->cgrp->chan);
-		chan->cgrp->chan_nr = 0;
-	}
-
-	/* Clear channel control registers. */
-	usermem = chan->base.chid * 0x200;
-	ilength = order_base_2(ilength / 8);
-
-	nvkm_kmap(fifo->user.mem);
-	for (i = 0; i < 0x200; i += 4)
-		nvkm_wo32(fifo->user.mem, usermem + i, 0x00000000);
-	nvkm_done(fifo->user.mem);
-	usermem = nvkm_memory_addr(fifo->user.mem) + usermem;
-
-	/* RAMFC */
-	nvkm_kmap(chan->base.inst);
-	nvkm_wo32(chan->base.inst, 0x008, lower_32_bits(usermem));
-	nvkm_wo32(chan->base.inst, 0x00c, upper_32_bits(usermem));
-	nvkm_wo32(chan->base.inst, 0x010, 0x0000face);
-	nvkm_wo32(chan->base.inst, 0x030, 0x7ffff902);
-	nvkm_wo32(chan->base.inst, 0x048, lower_32_bits(ioffset));
-	nvkm_wo32(chan->base.inst, 0x04c, upper_32_bits(ioffset) |
-					  (ilength << 16));
-	nvkm_wo32(chan->base.inst, 0x084, 0x20400000);
-	nvkm_wo32(chan->base.inst, 0x094, 0x30000001);
-	nvkm_wo32(chan->base.inst, 0x0e4, priv ? 0x00000020 : 0x00000000);
-	nvkm_wo32(chan->base.inst, 0x0e8, chan->base.chid);
-	nvkm_wo32(chan->base.inst, 0x0f4, 0x00001000);
-	nvkm_wo32(chan->base.inst, 0x0f8, 0x10003080);
-	nvkm_mo32(chan->base.inst, 0x218, 0x00000000, 0x00000000);
-	nvkm_done(chan->base.inst);
-	return 0;
-}
-
-int
-gv100_fifo_gpfifo_new(struct gk104_fifo *fifo, const struct nvkm_oclass *oclass,
-		      void *data, u32 size, struct nvkm_object **pobject)
-{
-	struct nvkm_object *parent = oclass->parent;
-	union {
-		struct volta_channel_gpfifo_a_v0 v0;
-	} *args = data;
-	int ret = -ENOSYS;
-
-	nvif_ioctl(parent, "create channel gpfifo size %d\n", size);
-	if (!(ret = nvif_unpack(ret, &data, &size, args->v0, 0, 0, false))) {
-		nvif_ioctl(parent, "create channel gpfifo vers %d vmm %llx "
-				   "ioffset %016llx ilength %08x "
-				   "runlist %016llx priv %d\n",
-			   args->v0.version, args->v0.vmm, args->v0.ioffset,
-			   args->v0.ilength, args->v0.runlist, args->v0.priv);
-		return gv100_fifo_gpfifo_new_(&gv100_fifo_gpfifo, fifo,
-					      &args->v0.runlist,
-					      &args->v0.chid,
-					       args->v0.vmm,
-					       args->v0.ioffset,
-					       args->v0.ilength,
-					      &args->v0.inst,
-					       args->v0.priv,
-					      &args->v0.token,
-					      oclass, pobject);
-	}
-
-	return ret;
-}
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/fifo/gpfifonv50.c b/drivers/gpu/drm/nouveau/nvkm/engine/fifo/gpfifonv50.c
deleted file mode 100644
index d8f28ec1e4a8..000000000000
--- a/drivers/gpu/drm/nouveau/nvkm/engine/fifo/gpfifonv50.c
+++ /dev/null
@@ -1,93 +0,0 @@
-/*
- * Copyright 2012 Red Hat Inc.
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * the rights to use, copy, modify, merge, publish, distribute, sublicense,
- * and/or sell copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in
- * all copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
- * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
- * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
- * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
- * OTHER DEALINGS IN THE SOFTWARE.
- *
- * Authors: Ben Skeggs
- */
-#include "channv50.h"
-
-#include <core/client.h>
-#include <core/ramht.h>
-
-#include <nvif/class.h>
-#include <nvif/cl506f.h>
-#include <nvif/unpack.h>
-
-static int
-nv50_fifo_gpfifo_new(struct nvkm_fifo *base, const struct nvkm_oclass *oclass,
-		     void *data, u32 size, struct nvkm_object **pobject)
-{
-	struct nvkm_object *parent = oclass->parent;
-	union {
-		struct nv50_channel_gpfifo_v0 v0;
-	} *args = data;
-	struct nv50_fifo *fifo = nv50_fifo(base);
-	struct nv50_fifo_chan *chan;
-	u64 ioffset, ilength;
-	int ret = -ENOSYS;
-
-	nvif_ioctl(parent, "create channel gpfifo size %d\n", size);
-	if (!(ret = nvif_unpack(ret, &data, &size, args->v0, 0, 0, false))) {
-		nvif_ioctl(parent, "create channel gpfifo vers %d vmm %llx "
-				   "pushbuf %llx ioffset %016llx "
-				   "ilength %08x\n",
-			   args->v0.version, args->v0.vmm, args->v0.pushbuf,
-			   args->v0.ioffset, args->v0.ilength);
-		if (!args->v0.pushbuf)
-			return -EINVAL;
-	} else
-		return ret;
-
-	if (!(chan = kzalloc(sizeof(*chan), GFP_KERNEL)))
-		return -ENOMEM;
-	*pobject = &chan->base.object;
-
-	ret = nv50_fifo_chan_ctor(fifo, args->v0.vmm, args->v0.pushbuf,
-				  oclass, chan);
-	if (ret)
-		return ret;
-
-	args->v0.chid = chan->base.chid;
-	ioffset = args->v0.ioffset;
-	ilength = order_base_2(args->v0.ilength / 8);
-
-	nvkm_kmap(chan->ramfc);
-	nvkm_wo32(chan->ramfc, 0x3c, 0x403f6078);
-	nvkm_wo32(chan->ramfc, 0x44, 0x01003fff);
-	nvkm_wo32(chan->ramfc, 0x48, chan->base.push->node->offset >> 4);
-	nvkm_wo32(chan->ramfc, 0x50, lower_32_bits(ioffset));
-	nvkm_wo32(chan->ramfc, 0x54, upper_32_bits(ioffset) | (ilength << 16));
-	nvkm_wo32(chan->ramfc, 0x60, 0x7fffffff);
-	nvkm_wo32(chan->ramfc, 0x78, 0x00000000);
-	nvkm_wo32(chan->ramfc, 0x7c, 0x30000001);
-	nvkm_wo32(chan->ramfc, 0x80, ((chan->ramht->bits - 9) << 27) |
-				     (4 << 24) /* SEARCH_FULL */ |
-				     (chan->ramht->gpuobj->node->offset >> 4));
-	nvkm_done(chan->ramfc);
-	return 0;
-}
-
-const struct nvkm_fifo_chan_oclass
-nv50_fifo_gpfifo_oclass = {
-	.base.oclass = NV50_CHANNEL_GPFIFO,
-	.base.minver = 0,
-	.base.maxver = 0,
-	.ctor = nv50_fifo_gpfifo_new,
-};
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/fifo/gpfifotu102.c b/drivers/gpu/drm/nouveau/nvkm/engine/fifo/gpfifotu102.c
deleted file mode 100644
index 99aafa103a31..000000000000
--- a/drivers/gpu/drm/nouveau/nvkm/engine/fifo/gpfifotu102.c
+++ /dev/null
@@ -1,81 +0,0 @@
-/*
- * Copyright 2018 Red Hat Inc.
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * the rights to use, copy, modify, merge, publish, distribute, sublicense,
- * and/or sell copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in
- * all copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
- * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
- * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
- * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
- * OTHER DEALINGS IN THE SOFTWARE.
- */
-#include "changk104.h"
-#include "cgrp.h"
-
-#include <core/client.h>
-#include <core/gpuobj.h>
-
-#include <nvif/clc36f.h>
-#include <nvif/unpack.h>
-
-static u32
-tu102_fifo_gpfifo_submit_token(struct nvkm_fifo_chan *base)
-{
-	struct gk104_fifo_chan *chan = gk104_fifo_chan(base);
-	return (chan->runl << 16) | chan->base.chid;
-}
-
-static const struct nvkm_fifo_chan_func
-tu102_fifo_gpfifo = {
-	.dtor = gk104_fifo_gpfifo_dtor,
-	.init = gk104_fifo_gpfifo_init,
-	.fini = gk104_fifo_gpfifo_fini,
-	.ntfy = gf100_fifo_chan_ntfy,
-	.engine_ctor = gk104_fifo_gpfifo_engine_ctor,
-	.engine_dtor = gk104_fifo_gpfifo_engine_dtor,
-	.engine_init = gv100_fifo_gpfifo_engine_init,
-	.engine_fini = gv100_fifo_gpfifo_engine_fini,
-	.submit_token = tu102_fifo_gpfifo_submit_token,
-};
-
-int
-tu102_fifo_gpfifo_new(struct gk104_fifo *fifo, const struct nvkm_oclass *oclass,
-		      void *data, u32 size, struct nvkm_object **pobject)
-{
-	struct nvkm_object *parent = oclass->parent;
-	union {
-		struct volta_channel_gpfifo_a_v0 v0;
-	} *args = data;
-	int ret = -ENOSYS;
-
-	nvif_ioctl(parent, "create channel gpfifo size %d\n", size);
-	if (!(ret = nvif_unpack(ret, &data, &size, args->v0, 0, 0, false))) {
-		nvif_ioctl(parent, "create channel gpfifo vers %d vmm %llx "
-				   "ioffset %016llx ilength %08x "
-				   "runlist %016llx priv %d\n",
-			   args->v0.version, args->v0.vmm, args->v0.ioffset,
-			   args->v0.ilength, args->v0.runlist, args->v0.priv);
-		return gv100_fifo_gpfifo_new_(&tu102_fifo_gpfifo, fifo,
-					      &args->v0.runlist,
-					      &args->v0.chid,
-					       args->v0.vmm,
-					       args->v0.ioffset,
-					       args->v0.ilength,
-					      &args->v0.inst,
-					       args->v0.priv,
-					      &args->v0.token,
-					      oclass, pobject);
-	}
-
-	return ret;
-}
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/fifo/gv100.c b/drivers/gpu/drm/nouveau/nvkm/engine/fifo/gv100.c
index faf0fe9f704c..33066c8cdc64 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/fifo/gv100.c
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/fifo/gv100.c
@@ -19,32 +19,180 @@
  * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
  * OTHER DEALINGS IN THE SOFTWARE.
  */
-#include "gk104.h"
+#include "priv.h"
+#include "chan.h"
+#include "chid.h"
 #include "cgrp.h"
-#include "changk104.h"
-#include "user.h"
+#include "runl.h"
+#include "runq.h"
 
 #include <core/gpuobj.h>
+#include <subdev/mmu.h>
 
 #include <nvif/class.h>
 
+static u32
+gv100_chan_doorbell_handle(struct nvkm_chan *chan)
+{
+	return chan->id;
+}
+
+static int
+gv100_chan_ramfc_write(struct nvkm_chan *chan, u64 offset, u64 length, u32 devm, bool priv)
+{
+	const u64 userd = nvkm_memory_addr(chan->userd.mem) + chan->userd.base;
+	const u32 limit2 = ilog2(length / 8);
+
+	nvkm_kmap(chan->inst);
+	nvkm_wo32(chan->inst, 0x008, lower_32_bits(userd));
+	nvkm_wo32(chan->inst, 0x00c, upper_32_bits(userd));
+	nvkm_wo32(chan->inst, 0x010, 0x0000face);
+	nvkm_wo32(chan->inst, 0x030, 0x7ffff902);
+	nvkm_wo32(chan->inst, 0x048, lower_32_bits(offset));
+	nvkm_wo32(chan->inst, 0x04c, upper_32_bits(offset) | (limit2 << 16));
+	nvkm_wo32(chan->inst, 0x084, 0x20400000);
+	nvkm_wo32(chan->inst, 0x094, 0x30000000 | devm);
+	nvkm_wo32(chan->inst, 0x0e4, priv ? 0x00000020 : 0x00000000);
+	nvkm_wo32(chan->inst, 0x0e8, chan->id);
+	nvkm_wo32(chan->inst, 0x0f4, 0x00001000 | (priv ? 0x00000100 : 0x00000000));
+	nvkm_wo32(chan->inst, 0x0f8, 0x10003080);
+	nvkm_mo32(chan->inst, 0x218, 0x00000000, 0x00000000);
+	nvkm_done(chan->inst);
+	return 0;
+}
+
+const struct nvkm_chan_func_ramfc
+gv100_chan_ramfc = {
+	.write = gv100_chan_ramfc_write,
+	.devm = 0xfff,
+	.priv = true,
+};
+
+const struct nvkm_chan_func_userd
+gv100_chan_userd = {
+	.bar = -1,
+	.size = 0x200,
+	.clear = gf100_chan_userd_clear,
+};
+
+static const struct nvkm_chan_func
+gv100_chan = {
+	.inst = &gf100_chan_inst,
+	.userd = &gv100_chan_userd,
+	.ramfc = &gv100_chan_ramfc,
+	.bind = gk104_chan_bind_inst,
+	.unbind = gk104_chan_unbind,
+	.start = gk104_chan_start,
+	.stop = gk104_chan_stop,
+	.preempt = gk110_chan_preempt,
+	.doorbell_handle = gv100_chan_doorbell_handle,
+};
+
+void
+gv100_ectx_bind(struct nvkm_engn *engn, struct nvkm_cctx *cctx, struct nvkm_chan *chan)
+{
+	u64 addr = 0ULL;
+
+	if (cctx) {
+		addr  = cctx->vctx->vma->addr;
+		addr |= 4ULL;
+	}
+
+	nvkm_kmap(chan->inst);
+	nvkm_wo32(chan->inst, 0x210, lower_32_bits(addr));
+	nvkm_wo32(chan->inst, 0x214, upper_32_bits(addr));
+	nvkm_mo32(chan->inst, 0x0ac, 0x00010000, cctx ? 0x00010000 : 0x00000000);
+	nvkm_done(chan->inst);
+}
+
+const struct nvkm_engn_func
+gv100_engn = {
+	.chsw = gk104_engn_chsw,
+	.cxid = gk104_engn_cxid,
+	.ctor = gk104_ectx_ctor,
+	.bind = gv100_ectx_bind,
+};
+
+void
+gv100_ectx_ce_bind(struct nvkm_engn *engn, struct nvkm_cctx *cctx, struct nvkm_chan *chan)
+{
+	const u64 bar2 = cctx ? nvkm_memory_bar2(cctx->vctx->inst->memory) : 0ULL;
+
+	nvkm_kmap(chan->inst);
+	nvkm_wo32(chan->inst, 0x220, lower_32_bits(bar2));
+	nvkm_wo32(chan->inst, 0x224, upper_32_bits(bar2));
+	nvkm_mo32(chan->inst, 0x0ac, 0x00020000, cctx ? 0x00020000 : 0x00000000);
+	nvkm_done(chan->inst);
+}
+
+int
+gv100_ectx_ce_ctor(struct nvkm_engn *engn, struct nvkm_vctx *vctx)
+{
+	if (nvkm_memory_bar2(vctx->inst->memory) == ~0ULL)
+		return -EFAULT;
+
+	return 0;
+}
+
+const struct nvkm_engn_func
+gv100_engn_ce = {
+	.chsw = gk104_engn_chsw,
+	.cxid = gk104_engn_cxid,
+	.ctor = gv100_ectx_ce_ctor,
+	.bind = gv100_ectx_ce_bind,
+};
+
+static bool
+gv100_runq_intr_1_ctxnotvalid(struct nvkm_runq *runq, int chid)
+{
+	struct nvkm_fifo *fifo = runq->fifo;
+	struct nvkm_device *device = fifo->engine.subdev.device;
+	struct nvkm_chan *chan;
+	unsigned long flags;
+
+	RUNQ_ERROR(runq, "CTXNOTVALID chid:%d", chid);
+
+	chan = nvkm_chan_get_chid(&fifo->engine, chid, &flags);
+	if (WARN_ON_ONCE(!chan))
+		return false;
+
+	nvkm_chan_error(chan, true);
+	nvkm_chan_put(&chan, flags);
+
+	nvkm_mask(device, 0x0400ac + (runq->id * 0x2000), 0x00030000, 0x00030000);
+	nvkm_wr32(device, 0x040148 + (runq->id * 0x2000), 0x80000000);
+	return true;
+}
+
+const struct nvkm_runq_func
+gv100_runq = {
+	.init = gk208_runq_init,
+	.intr = gk104_runq_intr,
+	.intr_0_names = gk104_runq_intr_0_names,
+	.intr_1_ctxnotvalid = gv100_runq_intr_1_ctxnotvalid,
+	.idle = gk104_runq_idle,
+};
+
+void
+gv100_runl_preempt(struct nvkm_runl *runl)
+{
+	nvkm_wr32(runl->fifo->engine.subdev.device, 0x002638, BIT(runl->id));
+}
+
 void
-gv100_fifo_runlist_chan(struct gk104_fifo_chan *chan,
-			struct nvkm_memory *memory, u32 offset)
+gv100_runl_insert_chan(struct nvkm_chan *chan, struct nvkm_memory *memory, u64 offset)
 {
-	struct nvkm_memory *usermem = chan->fifo->user.mem;
-	const u64 user = nvkm_memory_addr(usermem) + (chan->base.chid * 0x200);
-	const u64 inst = chan->base.inst->addr;
+	const u64 user = nvkm_memory_addr(chan->userd.mem) + chan->userd.base;
+	const u64 inst = chan->inst->addr;
 
-	nvkm_wo32(memory, offset + 0x0, lower_32_bits(user));
+	nvkm_wo32(memory, offset + 0x0, lower_32_bits(user) | chan->runq << 1);
 	nvkm_wo32(memory, offset + 0x4, upper_32_bits(user));
-	nvkm_wo32(memory, offset + 0x8, lower_32_bits(inst) | chan->base.chid);
+	nvkm_wo32(memory, offset + 0x8, lower_32_bits(inst) | chan->id);
 	nvkm_wo32(memory, offset + 0xc, upper_32_bits(inst));
 }
 
 void
-gv100_fifo_runlist_cgrp(struct nvkm_fifo_cgrp *cgrp,
-			struct nvkm_memory *memory, u32 offset)
+gv100_runl_insert_cgrp(struct nvkm_cgrp *cgrp, struct nvkm_memory *memory, u64 offset)
 {
 	nvkm_wo32(memory, offset + 0x0, (128 << 24) | (3 << 16) | 0x00000001);
 	nvkm_wo32(memory, offset + 0x4, cgrp->chan_nr);
@@ -52,16 +200,24 @@ gv100_fifo_runlist_cgrp(struct nvkm_fifo_cgrp *cgrp,
 	nvkm_wo32(memory, offset + 0xc, 0x00000000);
 }
 
-static const struct gk104_fifo_runlist_func
-gv100_fifo_runlist = {
+static const struct nvkm_runl_func
+gv100_runl = {
+	.runqs = 2,
 	.size = 16,
-	.cgrp = gv100_fifo_runlist_cgrp,
-	.chan = gv100_fifo_runlist_chan,
-	.commit = gk104_fifo_runlist_commit,
+	.update = nv50_runl_update,
+	.insert_cgrp = gv100_runl_insert_cgrp,
+	.insert_chan = gv100_runl_insert_chan,
+	.commit = gk104_runl_commit,
+	.wait = nv50_runl_wait,
+	.pending = gk104_runl_pending,
+	.block = gk104_runl_block,
+	.allow = gk104_runl_allow,
+	.preempt = gv100_runl_preempt,
+	.preempt_pending = gf100_runl_preempt_pending,
 };
 
 const struct nvkm_enum
-gv100_fifo_fault_gpcclient[] = {
+gv100_fifo_mmu_fault_gpcclient[] = {
 	{ 0x00, "T1_0" },
 	{ 0x01, "T1_1" },
 	{ 0x02, "T1_2" },
@@ -163,7 +319,7 @@ gv100_fifo_fault_gpcclient[] = {
 };
 
 const struct nvkm_enum
-gv100_fifo_fault_hubclient[] = {
+gv100_fifo_mmu_fault_hubclient[] = {
 	{ 0x00, "VIP" },
 	{ 0x01, "CE0" },
 	{ 0x02, "CE1" },
@@ -225,7 +381,7 @@ gv100_fifo_fault_hubclient[] = {
 };
 
 const struct nvkm_enum
-gv100_fifo_fault_reason[] = {
+gv100_fifo_mmu_fault_reason[] = {
 	{ 0x00, "PDE" },
 	{ 0x01, "PDE_SIZE" },
 	{ 0x02, "PTE" },
@@ -246,7 +402,7 @@ gv100_fifo_fault_reason[] = {
 };
 
 static const struct nvkm_enum
-gv100_fifo_fault_engine[] = {
+gv100_fifo_mmu_fault_engine[] = {
 	{ 0x01, "DISPLAY" },
 	{ 0x03, "PTP" },
 	{ 0x04, "BAR1", NULL, NVKM_SUBDEV_BAR },
@@ -273,7 +429,7 @@ gv100_fifo_fault_engine[] = {
 };
 
 const struct nvkm_enum
-gv100_fifo_fault_access[] = {
+gv100_fifo_mmu_fault_access[] = {
 	{ 0x0, "VIRT_READ" },
 	{ 0x1, "VIRT_WRITE" },
 	{ 0x2, "VIRT_ATOMIC" },
@@ -286,23 +442,51 @@ gv100_fifo_fault_access[] = {
 	{}
 };
 
-static const struct gk104_fifo_func
+static const struct nvkm_fifo_func_mmu_fault
+gv100_fifo_mmu_fault = {
+	.recover = gf100_fifo_mmu_fault_recover,
+	.access = gv100_fifo_mmu_fault_access,
+	.engine = gv100_fifo_mmu_fault_engine,
+	.reason = gv100_fifo_mmu_fault_reason,
+	.hubclient = gv100_fifo_mmu_fault_hubclient,
+	.gpcclient = gv100_fifo_mmu_fault_gpcclient,
+};
+
+static void
+gv100_fifo_intr_ctxsw_timeout(struct nvkm_fifo *fifo, u32 engm)
+{
+	struct nvkm_runl *runl;
+	struct nvkm_engn *engn;
+
+	nvkm_runl_foreach(runl, fifo) {
+		nvkm_runl_foreach_engn_cond(engn, runl, engm & BIT(engn->id))
+			nvkm_runl_rc_engn(runl, engn);
+	}
+}
+
+static const struct nvkm_fifo_func
 gv100_fifo = {
-	.pbdma = &gm200_fifo_pbdma,
-	.fault.access = gv100_fifo_fault_access,
-	.fault.engine = gv100_fifo_fault_engine,
-	.fault.reason = gv100_fifo_fault_reason,
-	.fault.hubclient = gv100_fifo_fault_hubclient,
-	.fault.gpcclient = gv100_fifo_fault_gpcclient,
-	.runlist = &gv100_fifo_runlist,
-	.user = {{-1,-1,VOLTA_USERMODE_A      }, gv100_fifo_user_new   },
-	.chan = {{ 0, 0,VOLTA_CHANNEL_GPFIFO_A}, gv100_fifo_gpfifo_new },
-	.cgrp_force = true,
+	.chid_nr = gm200_fifo_chid_nr,
+	.chid_ctor = gk110_fifo_chid_ctor,
+	.runq_nr = gm200_fifo_runq_nr,
+	.runl_ctor = gk104_fifo_runl_ctor,
+	.init = gk104_fifo_init,
+	.init_pbdmas = gk104_fifo_init_pbdmas,
+	.intr = gk104_fifo_intr,
+	.intr_ctxsw_timeout = gv100_fifo_intr_ctxsw_timeout,
+	.mmu_fault = &gv100_fifo_mmu_fault,
+	.nonstall = &gf100_fifo_nonstall,
+	.runl = &gv100_runl,
+	.runq = &gv100_runq,
+	.engn = &gv100_engn,
+	.engn_ce = &gv100_engn_ce,
+	.cgrp = {{ 0, 0, KEPLER_CHANNEL_GROUP_A  }, &gk110_cgrp, .force = true },
+	.chan = {{ 0, 0,  VOLTA_CHANNEL_GPFIFO_A }, &gv100_chan },
 };
 
 int
 gv100_fifo_new(struct nvkm_device *device, enum nvkm_subdev_type type, int inst,
 	       struct nvkm_fifo **pfifo)
 {
-	return gk104_fifo_new_(&gv100_fifo, device, type, inst, 4096, pfifo);
+	return nvkm_fifo_new_(&gv100_fifo, device, type, inst, pfifo);
 }
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/fifo/nv04.c b/drivers/gpu/drm/nouveau/nvkm/engine/fifo/nv04.c
index c6730c124769..674faf002b20 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/fifo/nv04.c
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/fifo/nv04.c
@@ -21,38 +21,201 @@
  *
  * Authors: Ben Skeggs
  */
-#include "nv04.h"
-#include "channv04.h"
+#include "priv.h"
+#include "cgrp.h"
+#include "chan.h"
+#include "chid.h"
+#include "runl.h"
+
 #include "regsnv04.h"
 
-#include <core/client.h>
 #include <core/ramht.h>
 #include <subdev/instmem.h>
+#include <subdev/mc.h>
 #include <subdev/timer.h>
 #include <engine/sw.h>
 
-static const struct nv04_fifo_ramfc
-nv04_fifo_ramfc[] = {
-	{ 32,  0, 0x00,  0, NV04_PFIFO_CACHE1_DMA_PUT },
-	{ 32,  0, 0x04,  0, NV04_PFIFO_CACHE1_DMA_GET },
-	{ 16,  0, 0x08,  0, NV04_PFIFO_CACHE1_DMA_INSTANCE },
-	{ 16, 16, 0x08,  0, NV04_PFIFO_CACHE1_DMA_DCOUNT },
-	{ 32,  0, 0x0c,  0, NV04_PFIFO_CACHE1_DMA_STATE },
-	{ 32,  0, 0x10,  0, NV04_PFIFO_CACHE1_DMA_FETCH },
-	{ 32,  0, 0x14,  0, NV04_PFIFO_CACHE1_ENGINE },
-	{ 32,  0, 0x18,  0, NV04_PFIFO_CACHE1_PULL1 },
-	{}
+#include <nvif/class.h>
+
+void
+nv04_chan_stop(struct nvkm_chan *chan)
+{
+	struct nvkm_fifo *fifo = chan->cgrp->runl->fifo;
+	struct nvkm_device *device = fifo->engine.subdev.device;
+	struct nvkm_memory *fctx = device->imem->ramfc;
+	const struct nvkm_ramfc_layout *c;
+	unsigned long flags;
+	u32 data = chan->ramfc_offset;
+	u32 chid;
+
+	/* prevent fifo context switches */
+	spin_lock_irqsave(&fifo->lock, flags);
+	nvkm_wr32(device, NV03_PFIFO_CACHES, 0);
+
+	/* if this channel is active, replace it with a null context */
+	chid = nvkm_rd32(device, NV03_PFIFO_CACHE1_PUSH1) & fifo->chid->mask;
+	if (chid == chan->id) {
+		nvkm_mask(device, NV04_PFIFO_CACHE1_DMA_PUSH, 0x00000001, 0);
+		nvkm_wr32(device, NV03_PFIFO_CACHE1_PUSH0, 0);
+		nvkm_mask(device, NV04_PFIFO_CACHE1_PULL0, 0x00000001, 0);
+
+		c = chan->func->ramfc->layout;
+		nvkm_kmap(fctx);
+		do {
+			u32 rm = ((1ULL << c->bits) - 1) << c->regs;
+			u32 cm = ((1ULL << c->bits) - 1) << c->ctxs;
+			u32 rv = (nvkm_rd32(device, c->regp) &  rm) >> c->regs;
+			u32 cv = (nvkm_ro32(fctx, c->ctxp + data) & ~cm);
+			nvkm_wo32(fctx, c->ctxp + data, cv | (rv << c->ctxs));
+		} while ((++c)->bits);
+		nvkm_done(fctx);
+
+		c = chan->func->ramfc->layout;
+		do {
+			nvkm_wr32(device, c->regp, 0x00000000);
+		} while ((++c)->bits);
+
+		nvkm_wr32(device, NV03_PFIFO_CACHE1_GET, 0);
+		nvkm_wr32(device, NV03_PFIFO_CACHE1_PUT, 0);
+		nvkm_wr32(device, NV03_PFIFO_CACHE1_PUSH1, fifo->chid->mask);
+		nvkm_wr32(device, NV03_PFIFO_CACHE1_PUSH0, 1);
+		nvkm_wr32(device, NV04_PFIFO_CACHE1_PULL0, 1);
+	}
+
+	/* restore normal operation, after disabling dma mode */
+	nvkm_mask(device, NV04_PFIFO_MODE, BIT(chan->id), 0);
+	nvkm_wr32(device, NV03_PFIFO_CACHES, 1);
+	spin_unlock_irqrestore(&fifo->lock, flags);
+}
+
+void
+nv04_chan_start(struct nvkm_chan *chan)
+{
+	struct nvkm_fifo *fifo = chan->cgrp->runl->fifo;
+	unsigned long flags;
+
+	spin_lock_irqsave(&fifo->lock, flags);
+	nvkm_mask(fifo->engine.subdev.device, NV04_PFIFO_MODE, BIT(chan->id), BIT(chan->id));
+	spin_unlock_irqrestore(&fifo->lock, flags);
+}
+
+void
+nv04_chan_ramfc_clear(struct nvkm_chan *chan)
+{
+	struct nvkm_memory *ramfc = chan->cgrp->runl->fifo->engine.subdev.device->imem->ramfc;
+	const struct nvkm_ramfc_layout *c = chan->func->ramfc->layout;
+
+	nvkm_kmap(ramfc);
+	do {
+		nvkm_wo32(ramfc, chan->ramfc_offset + c->ctxp, 0x00000000);
+	} while ((++c)->bits);
+	nvkm_done(ramfc);
+}
+
+static int
+nv04_chan_ramfc_write(struct nvkm_chan *chan, u64 offset, u64 length, u32 devm, bool priv)
+{
+	struct nvkm_memory *ramfc = chan->cgrp->runl->fifo->engine.subdev.device->imem->ramfc;
+	const u32 base = chan->id * 32;
+
+	chan->ramfc_offset = base;
+
+	nvkm_kmap(ramfc);
+	nvkm_wo32(ramfc, base + 0x00, offset);
+	nvkm_wo32(ramfc, base + 0x04, offset);
+	nvkm_wo32(ramfc, base + 0x08, chan->push->addr >> 4);
+	nvkm_wo32(ramfc, base + 0x10, NV_PFIFO_CACHE1_DMA_FETCH_TRIG_128_BYTES |
+				      NV_PFIFO_CACHE1_DMA_FETCH_SIZE_128_BYTES |
+#ifdef __BIG_ENDIAN
+				      NV_PFIFO_CACHE1_BIG_ENDIAN |
+#endif
+				      NV_PFIFO_CACHE1_DMA_FETCH_MAX_REQS_8);
+	nvkm_done(ramfc);
+	return 0;
+}
+
+static const struct nvkm_chan_func_ramfc
+nv04_chan_ramfc = {
+	.layout = (const struct nvkm_ramfc_layout[]) {
+		{ 32,  0, 0x00,  0, NV04_PFIFO_CACHE1_DMA_PUT },
+		{ 32,  0, 0x04,  0, NV04_PFIFO_CACHE1_DMA_GET },
+		{ 16,  0, 0x08,  0, NV04_PFIFO_CACHE1_DMA_INSTANCE },
+		{ 16, 16, 0x08,  0, NV04_PFIFO_CACHE1_DMA_DCOUNT },
+		{ 32,  0, 0x0c,  0, NV04_PFIFO_CACHE1_DMA_STATE },
+		{ 32,  0, 0x10,  0, NV04_PFIFO_CACHE1_DMA_FETCH },
+		{ 32,  0, 0x14,  0, NV04_PFIFO_CACHE1_ENGINE },
+		{ 32,  0, 0x18,  0, NV04_PFIFO_CACHE1_PULL1 },
+		{}
+	},
+	.write = nv04_chan_ramfc_write,
+	.clear = nv04_chan_ramfc_clear,
+	.ctxdma = true,
+};
+
+const struct nvkm_chan_func_userd
+nv04_chan_userd = {
+	.bar = 0,
+	.base = 0x800000,
+	.size = 0x010000,
+};
+
+const struct nvkm_chan_func_inst
+nv04_chan_inst = {
+	.size = 0x1000,
+};
+
+static const struct nvkm_chan_func
+nv04_chan = {
+	.inst = &nv04_chan_inst,
+	.userd = &nv04_chan_userd,
+	.ramfc = &nv04_chan_ramfc,
+	.start = nv04_chan_start,
+	.stop = nv04_chan_stop,
+};
+
+const struct nvkm_cgrp_func
+nv04_cgrp = {
+};
+
+void
+nv04_eobj_ramht_del(struct nvkm_chan *chan, int hash)
+{
+	struct nvkm_fifo *fifo = chan->cgrp->runl->fifo;
+	struct nvkm_instmem *imem = fifo->engine.subdev.device->imem;
+
+	mutex_lock(&fifo->mutex);
+	nvkm_ramht_remove(imem->ramht, hash);
+	mutex_unlock(&fifo->mutex);
+}
+
+static int
+nv04_eobj_ramht_add(struct nvkm_engn *engn, struct nvkm_object *eobj, struct nvkm_chan *chan)
+{
+	struct nvkm_fifo *fifo = chan->cgrp->runl->fifo;
+	struct nvkm_instmem *imem = fifo->engine.subdev.device->imem;
+	u32 context = 0x80000000 | chan->id << 24 | engn->id << 16;
+	int hash;
+
+	mutex_lock(&fifo->mutex);
+	hash = nvkm_ramht_insert(imem->ramht, eobj, chan->id, 4, eobj->handle, context);
+	mutex_unlock(&fifo->mutex);
+	return hash;
+}
+
+const struct nvkm_engn_func
+nv04_engn = {
+	.ramht_add = nv04_eobj_ramht_add,
+	.ramht_del = nv04_eobj_ramht_del,
 };
 
 void
-nv04_fifo_pause(struct nvkm_fifo *base, unsigned long *pflags)
-__acquires(fifo->base.lock)
+nv04_fifo_pause(struct nvkm_fifo *fifo, unsigned long *pflags)
+__acquires(fifo->lock)
 {
-	struct nv04_fifo *fifo = nv04_fifo(base);
-	struct nvkm_device *device = fifo->base.engine.subdev.device;
+	struct nvkm_device *device = fifo->engine.subdev.device;
 	unsigned long flags;
 
-	spin_lock_irqsave(&fifo->base.lock, flags);
+	spin_lock_irqsave(&fifo->lock, flags);
 	*pflags = flags;
 
 	nvkm_wr32(device, NV03_PFIFO_CACHES, 0x00000000);
@@ -81,50 +244,21 @@ __acquires(fifo->base.lock)
 }
 
 void
-nv04_fifo_start(struct nvkm_fifo *base, unsigned long *pflags)
-__releases(fifo->base.lock)
+nv04_fifo_start(struct nvkm_fifo *fifo, unsigned long *pflags)
+__releases(fifo->lock)
 {
-	struct nv04_fifo *fifo = nv04_fifo(base);
-	struct nvkm_device *device = fifo->base.engine.subdev.device;
+	struct nvkm_device *device = fifo->engine.subdev.device;
 	unsigned long flags = *pflags;
 
 	nvkm_mask(device, NV04_PFIFO_CACHE1_PULL0, 0x00000001, 0x00000001);
 	nvkm_wr32(device, NV03_PFIFO_CACHES, 0x00000001);
 
-	spin_unlock_irqrestore(&fifo->base.lock, flags);
+	spin_unlock_irqrestore(&fifo->lock, flags);
 }
 
-struct nvkm_engine *
-nv04_fifo_id_engine(struct nvkm_fifo *fifo, int engi)
-{
-	enum nvkm_subdev_type type;
-
-	switch (engi) {
-	case NV04_FIFO_ENGN_SW  : type = NVKM_ENGINE_SW; break;
-	case NV04_FIFO_ENGN_GR  : type = NVKM_ENGINE_GR; break;
-	case NV04_FIFO_ENGN_MPEG: type = NVKM_ENGINE_MPEG; break;
-	case NV04_FIFO_ENGN_DMA : type = NVKM_ENGINE_DMAOBJ; break;
-	default:
-		WARN_ON(1);
-		return NULL;
-	}
-
-	return nvkm_device_engine(fifo->engine.subdev.device, type, 0);
-}
-
-int
-nv04_fifo_engine_id(struct nvkm_fifo *base, struct nvkm_engine *engine)
-{
-	switch (engine->subdev.type) {
-	case NVKM_ENGINE_SW    : return NV04_FIFO_ENGN_SW;
-	case NVKM_ENGINE_GR    : return NV04_FIFO_ENGN_GR;
-	case NVKM_ENGINE_MPEG  : return NV04_FIFO_ENGN_MPEG;
-	case NVKM_ENGINE_DMAOBJ: return NV04_FIFO_ENGN_DMA;
-	default:
-		WARN_ON(1);
-		return 0;
-	}
-}
+const struct nvkm_runl_func
+nv04_runl = {
+};
 
 static const char *
 nv_dma_state_err(u32 state)
@@ -166,11 +300,11 @@ nv04_fifo_swmthd(struct nvkm_device *device, u32 chid, u32 addr, u32 data)
 }
 
 static void
-nv04_fifo_cache_error(struct nv04_fifo *fifo, u32 chid, u32 get)
+nv04_fifo_intr_cache_error(struct nvkm_fifo *fifo, u32 chid, u32 get)
 {
-	struct nvkm_subdev *subdev = &fifo->base.engine.subdev;
+	struct nvkm_subdev *subdev = &fifo->engine.subdev;
 	struct nvkm_device *device = subdev->device;
-	struct nvkm_fifo_chan *chan;
+	struct nvkm_chan *chan;
 	unsigned long flags;
 	u32 pull0 = nvkm_rd32(device, 0x003250);
 	u32 mthd, data;
@@ -193,12 +327,12 @@ nv04_fifo_cache_error(struct nv04_fifo *fifo, u32 chid, u32 get)
 
 	if (!(pull0 & 0x00000100) ||
 	    !nv04_fifo_swmthd(device, chid, mthd, data)) {
-		chan = nvkm_fifo_chan_chid(&fifo->base, chid, &flags);
+		chan = nvkm_chan_get_chid(&fifo->engine, chid, &flags);
 		nvkm_error(subdev, "CACHE_ERROR - "
 			   "ch %d [%s] subc %d mthd %04x data %08x\n",
-			   chid, chan ? chan->object.client->name : "unknown",
+			   chid, chan ? chan->name : "unknown",
 			   (mthd >> 13) & 7, mthd & 0x1ffc, data);
-		nvkm_fifo_chan_put(&fifo->base, flags, &chan);
+		nvkm_chan_put(&chan, flags);
 	}
 
 	nvkm_wr32(device, NV04_PFIFO_CACHE1_DMA_PUSH, 0);
@@ -217,20 +351,20 @@ nv04_fifo_cache_error(struct nv04_fifo *fifo, u32 chid, u32 get)
 }
 
 static void
-nv04_fifo_dma_pusher(struct nv04_fifo *fifo, u32 chid)
+nv04_fifo_intr_dma_pusher(struct nvkm_fifo *fifo, u32 chid)
 {
-	struct nvkm_subdev *subdev = &fifo->base.engine.subdev;
+	struct nvkm_subdev *subdev = &fifo->engine.subdev;
 	struct nvkm_device *device = subdev->device;
 	u32 dma_get = nvkm_rd32(device, 0x003244);
 	u32 dma_put = nvkm_rd32(device, 0x003240);
 	u32 push = nvkm_rd32(device, 0x003220);
 	u32 state = nvkm_rd32(device, 0x003228);
-	struct nvkm_fifo_chan *chan;
+	struct nvkm_chan *chan;
 	unsigned long flags;
 	const char *name;
 
-	chan = nvkm_fifo_chan_chid(&fifo->base, chid, &flags);
-	name = chan ? chan->object.client->name : "unknown";
+	chan = nvkm_chan_get_chid(&fifo->engine, chid, &flags);
+	name = chan ? chan->name : "unknown";
 	if (device->card_type == NV_50) {
 		u32 ho_get = nvkm_rd32(device, 0x003328);
 		u32 ho_put = nvkm_rd32(device, 0x003320);
@@ -261,18 +395,18 @@ nv04_fifo_dma_pusher(struct nv04_fifo *fifo, u32 chid)
 		if (dma_get != dma_put)
 			nvkm_wr32(device, 0x003244, dma_put);
 	}
-	nvkm_fifo_chan_put(&fifo->base, flags, &chan);
+	nvkm_chan_put(&chan, flags);
 
 	nvkm_wr32(device, 0x003228, 0x00000000);
 	nvkm_wr32(device, 0x003220, 0x00000001);
 	nvkm_wr32(device, 0x002100, NV_PFIFO_INTR_DMA_PUSHER);
 }
 
-void
-nv04_fifo_intr(struct nvkm_fifo *base)
+irqreturn_t
+nv04_fifo_intr(struct nvkm_inth *inth)
 {
-	struct nv04_fifo *fifo = nv04_fifo(base);
-	struct nvkm_subdev *subdev = &fifo->base.engine.subdev;
+	struct nvkm_fifo *fifo = container_of(inth, typeof(*fifo), engine.subdev.inth);
+	struct nvkm_subdev *subdev = &fifo->engine.subdev;
 	struct nvkm_device *device = subdev->device;
 	u32 mask = nvkm_rd32(device, NV03_PFIFO_INTR_EN_0);
 	u32 stat = nvkm_rd32(device, NV03_PFIFO_INTR_0) & mask;
@@ -281,16 +415,16 @@ nv04_fifo_intr(struct nvkm_fifo *base)
 	reassign = nvkm_rd32(device, NV03_PFIFO_CACHES) & 1;
 	nvkm_wr32(device, NV03_PFIFO_CACHES, 0);
 
-	chid = nvkm_rd32(device, NV03_PFIFO_CACHE1_PUSH1) & (fifo->base.nr - 1);
+	chid = nvkm_rd32(device, NV03_PFIFO_CACHE1_PUSH1) & fifo->chid->mask;
 	get  = nvkm_rd32(device, NV03_PFIFO_CACHE1_GET);
 
 	if (stat & NV_PFIFO_INTR_CACHE_ERROR) {
-		nv04_fifo_cache_error(fifo, chid, get);
+		nv04_fifo_intr_cache_error(fifo, chid, get);
 		stat &= ~NV_PFIFO_INTR_CACHE_ERROR;
 	}
 
 	if (stat & NV_PFIFO_INTR_DMA_PUSHER) {
-		nv04_fifo_dma_pusher(fifo, chid);
+		nv04_fifo_intr_dma_pusher(fifo, chid);
 		stat &= ~NV_PFIFO_INTR_DMA_PUSHER;
 	}
 
@@ -313,7 +447,7 @@ nv04_fifo_intr(struct nvkm_fifo *base)
 
 		if (stat & 0x40000000) {
 			nvkm_wr32(device, 0x002100, 0x40000000);
-			nvkm_fifo_uevent(&fifo->base);
+			nvkm_event_ntfy(&fifo->nonstall.event, 0, NVKM_FIFO_NONSTALL_EVENT);
 			stat &= ~0x40000000;
 		}
 	}
@@ -325,13 +459,13 @@ nv04_fifo_intr(struct nvkm_fifo *base)
 	}
 
 	nvkm_wr32(device, NV03_PFIFO_CACHES, reassign);
+	return IRQ_HANDLED;
 }
 
 void
-nv04_fifo_init(struct nvkm_fifo *base)
+nv04_fifo_init(struct nvkm_fifo *fifo)
 {
-	struct nv04_fifo *fifo = nv04_fifo(base);
-	struct nvkm_device *device = fifo->base.engine.subdev.device;
+	struct nvkm_device *device = fifo->engine.subdev.device;
 	struct nvkm_instmem *imem = device->imem;
 	struct nvkm_ramht *ramht = imem->ramht;
 	struct nvkm_memory *ramro = imem->ramro;
@@ -346,7 +480,7 @@ nv04_fifo_init(struct nvkm_fifo *base)
 	nvkm_wr32(device, NV03_PFIFO_RAMRO, nvkm_memory_addr(ramro) >> 8);
 	nvkm_wr32(device, NV03_PFIFO_RAMFC, nvkm_memory_addr(ramfc) >> 8);
 
-	nvkm_wr32(device, NV03_PFIFO_CACHE1_PUSH1, fifo->base.nr - 1);
+	nvkm_wr32(device, NV03_PFIFO_CACHE1_PUSH1, fifo->chid->mask);
 
 	nvkm_wr32(device, NV03_PFIFO_INTR_0, 0xffffffff);
 	nvkm_wr32(device, NV03_PFIFO_INTR_EN_0, 0xffffffff);
@@ -357,43 +491,53 @@ nv04_fifo_init(struct nvkm_fifo *base)
 }
 
 int
-nv04_fifo_new_(const struct nvkm_fifo_func *func, struct nvkm_device *device,
-	       enum nvkm_subdev_type type, int inst, int nr, const struct nv04_fifo_ramfc *ramfc,
-	       struct nvkm_fifo **pfifo)
+nv04_fifo_runl_ctor(struct nvkm_fifo *fifo)
 {
-	struct nv04_fifo *fifo;
-	int ret;
-
-	if (!(fifo = kzalloc(sizeof(*fifo), GFP_KERNEL)))
-		return -ENOMEM;
-	fifo->ramfc = ramfc;
-	*pfifo = &fifo->base;
+	struct nvkm_runl *runl;
 
-	ret = nvkm_fifo_ctor(func, device, type, inst, nr, &fifo->base);
-	if (ret)
-		return ret;
+	runl = nvkm_runl_new(fifo, 0, 0, 0);
+	if (IS_ERR(runl))
+		return PTR_ERR(runl);
 
-	set_bit(nr - 1, fifo->base.mask); /* inactive channel */
+	nvkm_runl_add(runl, 0, fifo->func->engn_sw, NVKM_ENGINE_SW, 0);
+	nvkm_runl_add(runl, 0, fifo->func->engn_sw, NVKM_ENGINE_DMAOBJ, 0);
+	nvkm_runl_add(runl, 1, fifo->func->engn   , NVKM_ENGINE_GR, 0);
+	nvkm_runl_add(runl, 2, fifo->func->engn   , NVKM_ENGINE_MPEG, 0); /* NV31- */
 	return 0;
 }
 
+int
+nv04_fifo_chid_ctor(struct nvkm_fifo *fifo, int nr)
+{
+	/* The last CHID is reserved by HW as a "channel invalid" marker. */
+	return nvkm_chid_new(&nvkm_chan_event, &fifo->engine.subdev, nr, 0, nr - 1, &fifo->chid);
+}
+
+static int
+nv04_fifo_chid_nr(struct nvkm_fifo *fifo)
+{
+	return 16;
+}
+
 static const struct nvkm_fifo_func
 nv04_fifo = {
+	.chid_nr = nv04_fifo_chid_nr,
+	.chid_ctor = nv04_fifo_chid_ctor,
+	.runl_ctor = nv04_fifo_runl_ctor,
 	.init = nv04_fifo_init,
 	.intr = nv04_fifo_intr,
-	.engine_id = nv04_fifo_engine_id,
-	.id_engine = nv04_fifo_id_engine,
 	.pause = nv04_fifo_pause,
 	.start = nv04_fifo_start,
-	.chan = {
-		&nv04_fifo_dma_oclass,
-		NULL
-	},
+	.runl = &nv04_runl,
+	.engn = &nv04_engn,
+	.engn_sw = &nv04_engn,
+	.cgrp = {{                        }, &nv04_cgrp },
+	.chan = {{ 0, 0, NV03_CHANNEL_DMA }, &nv04_chan },
 };
 
 int
 nv04_fifo_new(struct nvkm_device *device, enum nvkm_subdev_type type, int inst,
 	      struct nvkm_fifo **pfifo)
 {
-	return nv04_fifo_new_(&nv04_fifo, device, type, inst, 16, nv04_fifo_ramfc, pfifo);
+	return nvkm_fifo_new_(&nv04_fifo, device, type, inst, pfifo);
 }
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/fifo/nv04.h b/drivers/gpu/drm/nouveau/nvkm/engine/fifo/nv04.h
deleted file mode 100644
index 3f23bcde4a54..000000000000
--- a/drivers/gpu/drm/nouveau/nvkm/engine/fifo/nv04.h
+++ /dev/null
@@ -1,23 +0,0 @@
-/* SPDX-License-Identifier: MIT */
-#ifndef __NV04_FIFO_H__
-#define __NV04_FIFO_H__
-#define nv04_fifo(p) container_of((p), struct nv04_fifo, base)
-#include "priv.h"
-
-struct nv04_fifo_ramfc {
-	unsigned bits:6;
-	unsigned ctxs:5;
-	unsigned ctxp:8;
-	unsigned regs:5;
-	unsigned regp;
-};
-
-struct nv04_fifo {
-	struct nvkm_fifo base;
-	const struct nv04_fifo_ramfc *ramfc;
-};
-
-int nv04_fifo_new_(const struct nvkm_fifo_func *, struct nvkm_device *, enum nvkm_subdev_type, int,
-		   int nr, const struct nv04_fifo_ramfc *, struct nvkm_fifo **);
-void nv04_fifo_init(struct nvkm_fifo *);
-#endif
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/fifo/nv10.c b/drivers/gpu/drm/nouveau/nvkm/engine/fifo/nv10.c
index f8887f0f2f82..a4bcf6b0a7e2 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/fifo/nv10.c
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/fifo/nv10.c
@@ -21,41 +21,93 @@
  *
  * Authors: Ben Skeggs
  */
-#include "nv04.h"
-#include "channv04.h"
+#include "priv.h"
+#include "cgrp.h"
+#include "chan.h"
+#include "runl.h"
+
+#include <core/gpuobj.h>
+#include <subdev/instmem.h>
+
 #include "regsnv04.h"
 
-static const struct nv04_fifo_ramfc
-nv10_fifo_ramfc[] = {
-	{ 32,  0, 0x00,  0, NV04_PFIFO_CACHE1_DMA_PUT },
-	{ 32,  0, 0x04,  0, NV04_PFIFO_CACHE1_DMA_GET },
-	{ 32,  0, 0x08,  0, NV10_PFIFO_CACHE1_REF_CNT },
-	{ 16,  0, 0x0c,  0, NV04_PFIFO_CACHE1_DMA_INSTANCE },
-	{ 16, 16, 0x0c,  0, NV04_PFIFO_CACHE1_DMA_DCOUNT },
-	{ 32,  0, 0x10,  0, NV04_PFIFO_CACHE1_DMA_STATE },
-	{ 32,  0, 0x14,  0, NV04_PFIFO_CACHE1_DMA_FETCH },
-	{ 32,  0, 0x18,  0, NV04_PFIFO_CACHE1_ENGINE },
-	{ 32,  0, 0x1c,  0, NV04_PFIFO_CACHE1_PULL1 },
-	{}
+#include <nvif/class.h>
+
+static int
+nv10_chan_ramfc_write(struct nvkm_chan *chan, u64 offset, u64 length, u32 devm, bool priv)
+{
+	struct nvkm_memory *ramfc = chan->cgrp->runl->fifo->engine.subdev.device->imem->ramfc;
+	const u32 base = chan->id * 32;
+
+	chan->ramfc_offset = base;
+
+	nvkm_kmap(ramfc);
+	nvkm_wo32(ramfc, base + 0x00, offset);
+	nvkm_wo32(ramfc, base + 0x04, offset);
+	nvkm_wo32(ramfc, base + 0x0c, chan->push->addr >> 4);
+	nvkm_wo32(ramfc, base + 0x14, NV_PFIFO_CACHE1_DMA_FETCH_TRIG_128_BYTES |
+				      NV_PFIFO_CACHE1_DMA_FETCH_SIZE_128_BYTES |
+#ifdef __BIG_ENDIAN
+				      NV_PFIFO_CACHE1_BIG_ENDIAN |
+#endif
+				      NV_PFIFO_CACHE1_DMA_FETCH_MAX_REQS_8);
+	nvkm_done(ramfc);
+	return 0;
+}
+
+static const struct nvkm_chan_func_ramfc
+nv10_chan_ramfc = {
+	.layout = (const struct nvkm_ramfc_layout[]) {
+		{ 32,  0, 0x00,  0, NV04_PFIFO_CACHE1_DMA_PUT },
+		{ 32,  0, 0x04,  0, NV04_PFIFO_CACHE1_DMA_GET },
+		{ 32,  0, 0x08,  0, NV10_PFIFO_CACHE1_REF_CNT },
+		{ 16,  0, 0x0c,  0, NV04_PFIFO_CACHE1_DMA_INSTANCE },
+		{ 16, 16, 0x0c,  0, NV04_PFIFO_CACHE1_DMA_DCOUNT },
+		{ 32,  0, 0x10,  0, NV04_PFIFO_CACHE1_DMA_STATE },
+		{ 32,  0, 0x14,  0, NV04_PFIFO_CACHE1_DMA_FETCH },
+		{ 32,  0, 0x18,  0, NV04_PFIFO_CACHE1_ENGINE },
+		{ 32,  0, 0x1c,  0, NV04_PFIFO_CACHE1_PULL1 },
+		{}
+	},
+	.write = nv10_chan_ramfc_write,
+	.clear = nv04_chan_ramfc_clear,
+	.ctxdma = true,
+};
+
+static const struct nvkm_chan_func
+nv10_chan = {
+	.inst = &nv04_chan_inst,
+	.userd = &nv04_chan_userd,
+	.ramfc = &nv10_chan_ramfc,
+	.start = nv04_chan_start,
+	.stop = nv04_chan_stop,
 };
 
+int
+nv10_fifo_chid_nr(struct nvkm_fifo *fifo)
+{
+	return 32;
+}
+
 static const struct nvkm_fifo_func
 nv10_fifo = {
+	.chid_nr = nv10_fifo_chid_nr,
+	.chid_ctor = nv04_fifo_chid_ctor,
+	.runl_ctor = nv04_fifo_runl_ctor,
 	.init = nv04_fifo_init,
 	.intr = nv04_fifo_intr,
-	.engine_id = nv04_fifo_engine_id,
-	.id_engine = nv04_fifo_id_engine,
 	.pause = nv04_fifo_pause,
 	.start = nv04_fifo_start,
-	.chan = {
-		&nv10_fifo_dma_oclass,
-		NULL
-	},
+	.runl = &nv04_runl,
+	.engn = &nv04_engn,
+	.engn_sw = &nv04_engn,
+	.cgrp = {{                        }, &nv04_cgrp },
+	.chan = {{ 0, 0, NV10_CHANNEL_DMA }, &nv10_chan },
 };
 
 int
 nv10_fifo_new(struct nvkm_device *device, enum nvkm_subdev_type type, int inst,
 	      struct nvkm_fifo **pfifo)
 {
-	return nv04_fifo_new_(&nv10_fifo, device, type, inst, 32, nv10_fifo_ramfc, pfifo);
+	return nvkm_fifo_new_(&nv10_fifo, device, type, inst, pfifo);
 }
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/fifo/nv17.c b/drivers/gpu/drm/nouveau/nvkm/engine/fifo/nv17.c
index 3f94c7b5b054..c70f44fd4f3b 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/fifo/nv17.c
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/fifo/nv17.c
@@ -21,37 +21,78 @@
  *
  * Authors: Ben Skeggs
  */
-#include "nv04.h"
-#include "channv04.h"
+#include "priv.h"
+#include "cgrp.h"
+#include "chan.h"
+#include "chid.h"
+#include "runl.h"
+
 #include "regsnv04.h"
 
 #include <core/ramht.h>
 #include <subdev/instmem.h>
 
-static const struct nv04_fifo_ramfc
-nv17_fifo_ramfc[] = {
-	{ 32,  0, 0x00,  0, NV04_PFIFO_CACHE1_DMA_PUT },
-	{ 32,  0, 0x04,  0, NV04_PFIFO_CACHE1_DMA_GET },
-	{ 32,  0, 0x08,  0, NV10_PFIFO_CACHE1_REF_CNT },
-	{ 16,  0, 0x0c,  0, NV04_PFIFO_CACHE1_DMA_INSTANCE },
-	{ 16, 16, 0x0c,  0, NV04_PFIFO_CACHE1_DMA_DCOUNT },
-	{ 32,  0, 0x10,  0, NV04_PFIFO_CACHE1_DMA_STATE },
-	{ 32,  0, 0x14,  0, NV04_PFIFO_CACHE1_DMA_FETCH },
-	{ 32,  0, 0x18,  0, NV04_PFIFO_CACHE1_ENGINE },
-	{ 32,  0, 0x1c,  0, NV04_PFIFO_CACHE1_PULL1 },
-	{ 32,  0, 0x20,  0, NV10_PFIFO_CACHE1_ACQUIRE_VALUE },
-	{ 32,  0, 0x24,  0, NV10_PFIFO_CACHE1_ACQUIRE_TIMESTAMP },
-	{ 32,  0, 0x28,  0, NV10_PFIFO_CACHE1_ACQUIRE_TIMEOUT },
-	{ 32,  0, 0x2c,  0, NV10_PFIFO_CACHE1_SEMAPHORE },
-	{ 32,  0, 0x30,  0, NV10_PFIFO_CACHE1_DMA_SUBROUTINE },
-	{}
+#include <nvif/class.h>
+
+static int
+nv17_chan_ramfc_write(struct nvkm_chan *chan, u64 offset, u64 length, u32 devm, bool priv)
+{
+	struct nvkm_memory *ramfc = chan->cgrp->runl->fifo->engine.subdev.device->imem->ramfc;
+	const u32 base = chan->id * 64;
+
+	chan->ramfc_offset = base;
+
+	nvkm_kmap(ramfc);
+	nvkm_wo32(ramfc, base + 0x00, offset);
+	nvkm_wo32(ramfc, base + 0x04, offset);
+	nvkm_wo32(ramfc, base + 0x0c, chan->push->addr >> 4);
+	nvkm_wo32(ramfc, base + 0x14, NV_PFIFO_CACHE1_DMA_FETCH_TRIG_128_BYTES |
+				      NV_PFIFO_CACHE1_DMA_FETCH_SIZE_128_BYTES |
+#ifdef __BIG_ENDIAN
+				      NV_PFIFO_CACHE1_BIG_ENDIAN |
+#endif
+				      NV_PFIFO_CACHE1_DMA_FETCH_MAX_REQS_8);
+	nvkm_done(ramfc);
+	return 0;
+}
+
+static const struct nvkm_chan_func_ramfc
+nv17_chan_ramfc = {
+	.layout = (const struct nvkm_ramfc_layout[]) {
+		{ 32,  0, 0x00,  0, NV04_PFIFO_CACHE1_DMA_PUT },
+		{ 32,  0, 0x04,  0, NV04_PFIFO_CACHE1_DMA_GET },
+		{ 32,  0, 0x08,  0, NV10_PFIFO_CACHE1_REF_CNT },
+		{ 16,  0, 0x0c,  0, NV04_PFIFO_CACHE1_DMA_INSTANCE },
+		{ 16, 16, 0x0c,  0, NV04_PFIFO_CACHE1_DMA_DCOUNT },
+		{ 32,  0, 0x10,  0, NV04_PFIFO_CACHE1_DMA_STATE },
+		{ 32,  0, 0x14,  0, NV04_PFIFO_CACHE1_DMA_FETCH },
+		{ 32,  0, 0x18,  0, NV04_PFIFO_CACHE1_ENGINE },
+		{ 32,  0, 0x1c,  0, NV04_PFIFO_CACHE1_PULL1 },
+		{ 32,  0, 0x20,  0, NV10_PFIFO_CACHE1_ACQUIRE_VALUE },
+		{ 32,  0, 0x24,  0, NV10_PFIFO_CACHE1_ACQUIRE_TIMESTAMP },
+		{ 32,  0, 0x28,  0, NV10_PFIFO_CACHE1_ACQUIRE_TIMEOUT },
+		{ 32,  0, 0x2c,  0, NV10_PFIFO_CACHE1_SEMAPHORE },
+		{ 32,  0, 0x30,  0, NV10_PFIFO_CACHE1_DMA_SUBROUTINE },
+		{}
+	},
+	.write = nv17_chan_ramfc_write,
+	.clear = nv04_chan_ramfc_clear,
+	.ctxdma = true,
+};
+
+static const struct nvkm_chan_func
+nv17_chan = {
+	.inst = &nv04_chan_inst,
+	.userd = &nv04_chan_userd,
+	.ramfc = &nv17_chan_ramfc,
+	.start = nv04_chan_start,
+	.stop = nv04_chan_stop,
 };
 
 static void
-nv17_fifo_init(struct nvkm_fifo *base)
+nv17_fifo_init(struct nvkm_fifo *fifo)
 {
-	struct nv04_fifo *fifo = nv04_fifo(base);
-	struct nvkm_device *device = fifo->base.engine.subdev.device;
+	struct nvkm_device *device = fifo->engine.subdev.device;
 	struct nvkm_instmem *imem = device->imem;
 	struct nvkm_ramht *ramht = imem->ramht;
 	struct nvkm_memory *ramro = imem->ramro;
@@ -67,7 +108,7 @@ nv17_fifo_init(struct nvkm_fifo *base)
 	nvkm_wr32(device, NV03_PFIFO_RAMFC, nvkm_memory_addr(ramfc) >> 8 |
 					    0x00010000);
 
-	nvkm_wr32(device, NV03_PFIFO_CACHE1_PUSH1, fifo->base.nr - 1);
+	nvkm_wr32(device, NV03_PFIFO_CACHE1_PUSH1, fifo->chid->mask);
 
 	nvkm_wr32(device, NV03_PFIFO_INTR_0, 0xffffffff);
 	nvkm_wr32(device, NV03_PFIFO_INTR_EN_0, 0xffffffff);
@@ -79,21 +120,23 @@ nv17_fifo_init(struct nvkm_fifo *base)
 
 static const struct nvkm_fifo_func
 nv17_fifo = {
+	.chid_nr = nv10_fifo_chid_nr,
+	.chid_ctor = nv04_fifo_chid_ctor,
+	.runl_ctor = nv04_fifo_runl_ctor,
 	.init = nv17_fifo_init,
 	.intr = nv04_fifo_intr,
-	.engine_id = nv04_fifo_engine_id,
-	.id_engine = nv04_fifo_id_engine,
 	.pause = nv04_fifo_pause,
 	.start = nv04_fifo_start,
-	.chan = {
-		&nv17_fifo_dma_oclass,
-		NULL
-	},
+	.runl = &nv04_runl,
+	.engn = &nv04_engn,
+	.engn_sw = &nv04_engn,
+	.cgrp = {{                        }, &nv04_cgrp },
+	.chan = {{ 0, 0, NV17_CHANNEL_DMA }, &nv17_chan },
 };
 
 int
 nv17_fifo_new(struct nvkm_device *device, enum nvkm_subdev_type type, int inst,
 	      struct nvkm_fifo **pfifo)
 {
-	return nv04_fifo_new_(&nv17_fifo, device, type, inst, 32, nv17_fifo_ramfc, pfifo);
+	return nvkm_fifo_new_(&nv17_fifo, device, type, inst, pfifo);
 }
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/fifo/nv40.c b/drivers/gpu/drm/nouveau/nvkm/engine/fifo/nv40.c
index f9ea46809bc0..e50a94b6d7f8 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/fifo/nv40.c
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/fifo/nv40.c
@@ -21,46 +21,166 @@
  *
  * Authors: Ben Skeggs
  */
-#include "nv04.h"
-#include "channv04.h"
+#include "priv.h"
+#include "cgrp.h"
+#include "chan.h"
+#include "chid.h"
+#include "runl.h"
+
 #include "regsnv04.h"
 
 #include <core/ramht.h>
 #include <subdev/fb.h>
 #include <subdev/instmem.h>
 
-static const struct nv04_fifo_ramfc
-nv40_fifo_ramfc[] = {
-	{ 32,  0, 0x00,  0, NV04_PFIFO_CACHE1_DMA_PUT },
-	{ 32,  0, 0x04,  0, NV04_PFIFO_CACHE1_DMA_GET },
-	{ 32,  0, 0x08,  0, NV10_PFIFO_CACHE1_REF_CNT },
-	{ 32,  0, 0x0c,  0, NV04_PFIFO_CACHE1_DMA_INSTANCE },
-	{ 32,  0, 0x10,  0, NV04_PFIFO_CACHE1_DMA_DCOUNT },
-	{ 32,  0, 0x14,  0, NV04_PFIFO_CACHE1_DMA_STATE },
-	{ 28,  0, 0x18,  0, NV04_PFIFO_CACHE1_DMA_FETCH },
-	{  2, 28, 0x18, 28, 0x002058 },
-	{ 32,  0, 0x1c,  0, NV04_PFIFO_CACHE1_ENGINE },
-	{ 32,  0, 0x20,  0, NV04_PFIFO_CACHE1_PULL1 },
-	{ 32,  0, 0x24,  0, NV10_PFIFO_CACHE1_ACQUIRE_VALUE },
-	{ 32,  0, 0x28,  0, NV10_PFIFO_CACHE1_ACQUIRE_TIMESTAMP },
-	{ 32,  0, 0x2c,  0, NV10_PFIFO_CACHE1_ACQUIRE_TIMEOUT },
-	{ 32,  0, 0x30,  0, NV10_PFIFO_CACHE1_SEMAPHORE },
-	{ 32,  0, 0x34,  0, NV10_PFIFO_CACHE1_DMA_SUBROUTINE },
-	{ 32,  0, 0x38,  0, NV40_PFIFO_GRCTX_INSTANCE },
-	{ 17,  0, 0x3c,  0, NV04_PFIFO_DMA_TIMESLICE },
-	{ 32,  0, 0x40,  0, 0x0032e4 },
-	{ 32,  0, 0x44,  0, 0x0032e8 },
-	{ 32,  0, 0x4c,  0, 0x002088 },
-	{ 32,  0, 0x50,  0, 0x003300 },
-	{ 32,  0, 0x54,  0, 0x00330c },
-	{}
+#include <nvif/class.h>
+
+static int
+nv40_chan_ramfc_write(struct nvkm_chan *chan, u64 offset, u64 length, u32 devm, bool priv)
+{
+	struct nvkm_memory *ramfc = chan->cgrp->runl->fifo->engine.subdev.device->imem->ramfc;
+	const u32 base = chan->id * 128;
+
+	chan->ramfc_offset = base;
+
+	nvkm_kmap(ramfc);
+	nvkm_wo32(ramfc, base + 0x00, offset);
+	nvkm_wo32(ramfc, base + 0x04, offset);
+	nvkm_wo32(ramfc, base + 0x0c, chan->push->addr >> 4);
+	nvkm_wo32(ramfc, base + 0x18, 0x30000000 |
+				      NV_PFIFO_CACHE1_DMA_FETCH_TRIG_128_BYTES |
+				      NV_PFIFO_CACHE1_DMA_FETCH_SIZE_128_BYTES |
+#ifdef __BIG_ENDIAN
+				      NV_PFIFO_CACHE1_BIG_ENDIAN |
+#endif
+				      NV_PFIFO_CACHE1_DMA_FETCH_MAX_REQS_8);
+	nvkm_wo32(ramfc, base + 0x3c, 0x0001ffff);
+	nvkm_done(ramfc);
+	return 0;
+}
+
+static const struct nvkm_chan_func_ramfc
+nv40_chan_ramfc = {
+	.layout = (const struct nvkm_ramfc_layout[]) {
+		{ 32,  0, 0x00,  0, NV04_PFIFO_CACHE1_DMA_PUT },
+		{ 32,  0, 0x04,  0, NV04_PFIFO_CACHE1_DMA_GET },
+		{ 32,  0, 0x08,  0, NV10_PFIFO_CACHE1_REF_CNT },
+		{ 32,  0, 0x0c,  0, NV04_PFIFO_CACHE1_DMA_INSTANCE },
+		{ 32,  0, 0x10,  0, NV04_PFIFO_CACHE1_DMA_DCOUNT },
+		{ 32,  0, 0x14,  0, NV04_PFIFO_CACHE1_DMA_STATE },
+		{ 28,  0, 0x18,  0, NV04_PFIFO_CACHE1_DMA_FETCH },
+		{  2, 28, 0x18, 28, 0x002058 },
+		{ 32,  0, 0x1c,  0, NV04_PFIFO_CACHE1_ENGINE },
+		{ 32,  0, 0x20,  0, NV04_PFIFO_CACHE1_PULL1 },
+		{ 32,  0, 0x24,  0, NV10_PFIFO_CACHE1_ACQUIRE_VALUE },
+		{ 32,  0, 0x28,  0, NV10_PFIFO_CACHE1_ACQUIRE_TIMESTAMP },
+		{ 32,  0, 0x2c,  0, NV10_PFIFO_CACHE1_ACQUIRE_TIMEOUT },
+		{ 32,  0, 0x30,  0, NV10_PFIFO_CACHE1_SEMAPHORE },
+		{ 32,  0, 0x34,  0, NV10_PFIFO_CACHE1_DMA_SUBROUTINE },
+		{ 32,  0, 0x38,  0, NV40_PFIFO_GRCTX_INSTANCE },
+		{ 17,  0, 0x3c,  0, NV04_PFIFO_DMA_TIMESLICE },
+		{ 32,  0, 0x40,  0, 0x0032e4 },
+		{ 32,  0, 0x44,  0, 0x0032e8 },
+		{ 32,  0, 0x4c,  0, 0x002088 },
+		{ 32,  0, 0x50,  0, 0x003300 },
+		{ 32,  0, 0x54,  0, 0x00330c },
+		{}
+	},
+	.write = nv40_chan_ramfc_write,
+	.clear = nv04_chan_ramfc_clear,
+	.ctxdma = true,
+};
+
+static const struct nvkm_chan_func_userd
+nv40_chan_userd = {
+	.bar = 0,
+	.base = 0xc00000,
+	.size = 0x001000,
+};
+
+static const struct nvkm_chan_func
+nv40_chan = {
+	.inst = &nv04_chan_inst,
+	.userd = &nv40_chan_userd,
+	.ramfc = &nv40_chan_ramfc,
+	.start = nv04_chan_start,
+	.stop = nv04_chan_stop,
 };
 
+static int
+nv40_eobj_ramht_add(struct nvkm_engn *engn, struct nvkm_object *eobj, struct nvkm_chan *chan)
+{
+	struct nvkm_fifo *fifo = chan->cgrp->runl->fifo;
+	struct nvkm_instmem *imem = fifo->engine.subdev.device->imem;
+	u32 context = chan->id << 23 | engn->id << 20;
+	int hash;
+
+	mutex_lock(&fifo->mutex);
+	hash = nvkm_ramht_insert(imem->ramht, eobj, chan->id, 4, eobj->handle, context);
+	mutex_unlock(&fifo->mutex);
+	return hash;
+}
+
 static void
-nv40_fifo_init(struct nvkm_fifo *base)
+nv40_ectx_bind(struct nvkm_engn *engn, struct nvkm_cctx *cctx, struct nvkm_chan *chan)
 {
-	struct nv04_fifo *fifo = nv04_fifo(base);
-	struct nvkm_device *device = fifo->base.engine.subdev.device;
+	struct nvkm_fifo *fifo = chan->cgrp->runl->fifo;
+	struct nvkm_device *device = fifo->engine.subdev.device;
+	struct nvkm_memory *ramfc = device->imem->ramfc;
+	u32 inst = 0x00000000, reg, ctx;
+	int chid;
+
+	switch (engn->engine->subdev.type) {
+	case NVKM_ENGINE_GR:
+		reg = 0x0032e0;
+		ctx = 0x38;
+		break;
+	case NVKM_ENGINE_MPEG:
+		if (WARN_ON(device->chipset < 0x44))
+			return;
+		reg = 0x00330c;
+		ctx = 0x54;
+		break;
+	default:
+		WARN_ON(1);
+		return;
+	}
+
+	if (cctx)
+		inst = cctx->vctx->inst->addr >> 4;
+
+	spin_lock_irq(&fifo->lock);
+	nvkm_mask(device, 0x002500, 0x00000001, 0x00000000);
+
+	chid = nvkm_rd32(device, 0x003204) & (fifo->chid->nr - 1);
+	if (chid == chan->id)
+		nvkm_wr32(device, reg, inst);
+
+	nvkm_kmap(ramfc);
+	nvkm_wo32(ramfc, chan->ramfc_offset + ctx, inst);
+	nvkm_done(ramfc);
+
+	nvkm_mask(device, 0x002500, 0x00000001, 0x00000001);
+	spin_unlock_irq(&fifo->lock);
+}
+
+static const struct nvkm_engn_func
+nv40_engn = {
+	.bind = nv40_ectx_bind,
+	.ramht_add = nv40_eobj_ramht_add,
+	.ramht_del = nv04_eobj_ramht_del,
+};
+
+static const struct nvkm_engn_func
+nv40_engn_sw = {
+	.ramht_add = nv40_eobj_ramht_add,
+	.ramht_del = nv04_eobj_ramht_del,
+};
+
+static void
+nv40_fifo_init(struct nvkm_fifo *fifo)
+{
+	struct nvkm_device *device = fifo->engine.subdev.device;
 	struct nvkm_fb *fb = device->fb;
 	struct nvkm_instmem *imem = device->imem;
 	struct nvkm_ramht *ramht = imem->ramht;
@@ -98,7 +218,7 @@ nv40_fifo_init(struct nvkm_fifo *base)
 		break;
 	}
 
-	nvkm_wr32(device, NV03_PFIFO_CACHE1_PUSH1, fifo->base.nr - 1);
+	nvkm_wr32(device, NV03_PFIFO_CACHE1_PUSH1, fifo->chid->mask);
 
 	nvkm_wr32(device, NV03_PFIFO_INTR_0, 0xffffffff);
 	nvkm_wr32(device, NV03_PFIFO_INTR_EN_0, 0xffffffff);
@@ -110,21 +230,23 @@ nv40_fifo_init(struct nvkm_fifo *base)
 
 static const struct nvkm_fifo_func
 nv40_fifo = {
+	.chid_nr = nv10_fifo_chid_nr,
+	.chid_ctor = nv04_fifo_chid_ctor,
+	.runl_ctor = nv04_fifo_runl_ctor,
 	.init = nv40_fifo_init,
 	.intr = nv04_fifo_intr,
-	.engine_id = nv04_fifo_engine_id,
-	.id_engine = nv04_fifo_id_engine,
 	.pause = nv04_fifo_pause,
 	.start = nv04_fifo_start,
-	.chan = {
-		&nv40_fifo_dma_oclass,
-		NULL
-	},
+	.runl = &nv04_runl,
+	.engn = &nv40_engn,
+	.engn_sw = &nv40_engn_sw,
+	.cgrp = {{                        }, &nv04_cgrp },
+	.chan = {{ 0, 0, NV40_CHANNEL_DMA }, &nv40_chan },
 };
 
 int
 nv40_fifo_new(struct nvkm_device *device, enum nvkm_subdev_type type, int inst,
 	      struct nvkm_fifo **pfifo)
 {
-	return nv04_fifo_new_(&nv40_fifo, device, type, inst, 32, nv40_fifo_ramfc, pfifo);
+	return nvkm_fifo_new_(&nv40_fifo, device, type, inst, pfifo);
 }
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/fifo/nv50.c b/drivers/gpu/drm/nouveau/nvkm/engine/fifo/nv50.c
index a08742cf425a..954b5f3a7d57 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/fifo/nv50.c
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/fifo/nv50.c
@@ -21,62 +21,325 @@
  *
  * Authors: Ben Skeggs
  */
-#include "nv50.h"
-#include "channv50.h"
+#include "priv.h"
+#include "cgrp.h"
+#include "chan.h"
+#include "chid.h"
+#include "runl.h"
 
-#include <core/gpuobj.h>
+#include <core/ramht.h>
+#include <subdev/timer.h>
 
-static void
-nv50_fifo_runlist_update_locked(struct nv50_fifo *fifo)
+#include <nvif/class.h>
+
+void
+nv50_eobj_ramht_del(struct nvkm_chan *chan, int hash)
 {
-	struct nvkm_device *device = fifo->base.engine.subdev.device;
-	struct nvkm_memory *cur;
-	int i, p;
+	nvkm_ramht_remove(chan->ramht, hash);
+}
 
-	cur = fifo->runlist[fifo->cur_runlist];
-	fifo->cur_runlist = !fifo->cur_runlist;
+int
+nv50_eobj_ramht_add(struct nvkm_engn *engn, struct nvkm_object *eobj, struct nvkm_chan *chan)
+{
+	return nvkm_ramht_insert(chan->ramht, eobj, 0, 4, eobj->handle, engn->id << 20);
+}
 
-	nvkm_kmap(cur);
-	for (i = 0, p = 0; i < fifo->base.nr; i++) {
-		if (nvkm_rd32(device, 0x002600 + (i * 4)) & 0x80000000)
-			nvkm_wo32(cur, p++ * 4, i);
-	}
-	nvkm_done(cur);
+void
+nv50_chan_stop(struct nvkm_chan *chan)
+{
+	struct nvkm_device *device = chan->cgrp->runl->fifo->engine.subdev.device;
 
-	nvkm_wr32(device, 0x0032f4, nvkm_memory_addr(cur) >> 12);
-	nvkm_wr32(device, 0x0032ec, p);
-	nvkm_wr32(device, 0x002500, 0x00000101);
+	nvkm_mask(device, 0x002600 + (chan->id * 4), 0x80000000, 0x00000000);
 }
 
 void
-nv50_fifo_runlist_update(struct nv50_fifo *fifo)
+nv50_chan_start(struct nvkm_chan *chan)
 {
-	mutex_lock(&fifo->base.mutex);
-	nv50_fifo_runlist_update_locked(fifo);
-	mutex_unlock(&fifo->base.mutex);
+	struct nvkm_device *device = chan->cgrp->runl->fifo->engine.subdev.device;
+
+	nvkm_mask(device, 0x002600 + (chan->id * 4), 0x80000000, 0x80000000);
 }
 
-int
-nv50_fifo_oneinit(struct nvkm_fifo *base)
+void
+nv50_chan_unbind(struct nvkm_chan *chan)
 {
-	struct nv50_fifo *fifo = nv50_fifo(base);
-	struct nvkm_device *device = fifo->base.engine.subdev.device;
+	struct nvkm_device *device = chan->cgrp->runl->fifo->engine.subdev.device;
+
+	nvkm_wr32(device, 0x002600 + (chan->id * 4), 0x00000000);
+}
+
+static void
+nv50_chan_bind(struct nvkm_chan *chan)
+{
+	struct nvkm_device *device = chan->cgrp->runl->fifo->engine.subdev.device;
+
+	nvkm_wr32(device, 0x002600 + (chan->id * 4), chan->ramfc->addr >> 12);
+}
+
+static int
+nv50_chan_ramfc_write(struct nvkm_chan *chan, u64 offset, u64 length, u32 devm, bool priv)
+{
+	struct nvkm_device *device = chan->cgrp->runl->fifo->engine.subdev.device;
+	const u32 limit2 = ilog2(length / 8);
 	int ret;
 
-	ret = nvkm_memory_new(device, NVKM_MEM_TARGET_INST, 128 * 4, 0x1000,
-			      false, &fifo->runlist[0]);
+	ret = nvkm_gpuobj_new(device, 0x0200, 0x1000, true, chan->inst, &chan->ramfc);
+	if (ret)
+		return ret;
+
+	ret = nvkm_gpuobj_new(device, 0x1200, 0, true, chan->inst, &chan->eng);
 	if (ret)
 		return ret;
 
-	return nvkm_memory_new(device, NVKM_MEM_TARGET_INST, 128 * 4, 0x1000,
-			       false, &fifo->runlist[1]);
+	ret = nvkm_gpuobj_new(device, 0x4000, 0, false, chan->inst, &chan->pgd);
+	if (ret)
+		return ret;
+
+	ret = nvkm_ramht_new(device, 0x8000, 16, chan->inst, &chan->ramht);
+	if (ret)
+		return ret;
+
+	nvkm_kmap(chan->ramfc);
+	nvkm_wo32(chan->ramfc, 0x3c, 0x403f6078);
+	nvkm_wo32(chan->ramfc, 0x44, 0x01003fff);
+	nvkm_wo32(chan->ramfc, 0x48, chan->push->node->offset >> 4);
+	nvkm_wo32(chan->ramfc, 0x50, lower_32_bits(offset));
+	nvkm_wo32(chan->ramfc, 0x54, upper_32_bits(offset) | (limit2 << 16));
+	nvkm_wo32(chan->ramfc, 0x60, 0x7fffffff);
+	nvkm_wo32(chan->ramfc, 0x78, 0x00000000);
+	nvkm_wo32(chan->ramfc, 0x7c, 0x30000000 | devm);
+	nvkm_wo32(chan->ramfc, 0x80, ((chan->ramht->bits - 9) << 27) |
+				     (4 << 24) /* SEARCH_FULL */ |
+				     (chan->ramht->gpuobj->node->offset >> 4));
+	nvkm_done(chan->ramfc);
+	return 0;
+}
+
+static const struct nvkm_chan_func_ramfc
+nv50_chan_ramfc = {
+	.write = nv50_chan_ramfc_write,
+	.ctxdma = true,
+	.devm = 0xfff,
+};
+
+const struct nvkm_chan_func_userd
+nv50_chan_userd = {
+	.bar = 0,
+	.base = 0xc00000,
+	.size = 0x002000,
+};
+
+const struct nvkm_chan_func_inst
+nv50_chan_inst = {
+	.size = 0x10000,
+	.vmm = true,
+};
+
+static const struct nvkm_chan_func
+nv50_chan = {
+	.inst = &nv50_chan_inst,
+	.userd = &nv50_chan_userd,
+	.ramfc = &nv50_chan_ramfc,
+	.bind = nv50_chan_bind,
+	.unbind = nv50_chan_unbind,
+	.start = nv50_chan_start,
+	.stop = nv50_chan_stop,
+};
+
+static void
+nv50_ectx_bind(struct nvkm_engn *engn, struct nvkm_cctx *cctx, struct nvkm_chan *chan)
+{
+	struct nvkm_subdev *subdev = &chan->cgrp->runl->fifo->engine.subdev;
+	struct nvkm_device *device = subdev->device;
+	u64 start = 0, limit = 0;
+	u32 flags = 0, ptr0, save;
+
+	switch (engn->engine->subdev.type) {
+	case NVKM_ENGINE_GR    : ptr0 = 0x0000; break;
+	case NVKM_ENGINE_MPEG  : ptr0 = 0x0060; break;
+	default:
+		WARN_ON(1);
+		return;
+	}
+
+	if (!cctx) {
+		/* HW bug workaround:
+		 *
+		 * PFIFO will hang forever if the connected engines don't report
+		 * that they've processed the context switch request.
+		 *
+		 * In order for the kickoff to work, we need to ensure all the
+		 * connected engines are in a state where they can answer.
+		 *
+		 * Newer chipsets don't seem to suffer from this issue, and well,
+		 * there's also a "ignore these engines" bitmask reg we can use
+		 * if we hit the issue there..
+		 */
+		save = nvkm_mask(device, 0x00b860, 0x00000001, 0x00000001);
+
+		/* Tell engines to save out contexts. */
+		nvkm_wr32(device, 0x0032fc, chan->inst->addr >> 12);
+		nvkm_msec(device, 2000,
+			if (nvkm_rd32(device, 0x0032fc) != 0xffffffff)
+				break;
+		);
+		nvkm_wr32(device, 0x00b860, save);
+	} else {
+		flags = 0x00190000;
+		start = cctx->vctx->inst->addr;
+		limit = start + cctx->vctx->inst->size - 1;
+	}
+
+	nvkm_kmap(chan->eng);
+	nvkm_wo32(chan->eng, ptr0 + 0x00, flags);
+	nvkm_wo32(chan->eng, ptr0 + 0x04, lower_32_bits(limit));
+	nvkm_wo32(chan->eng, ptr0 + 0x08, lower_32_bits(start));
+	nvkm_wo32(chan->eng, ptr0 + 0x0c, upper_32_bits(limit) << 24 |
+					  lower_32_bits(start));
+	nvkm_wo32(chan->eng, ptr0 + 0x10, 0x00000000);
+	nvkm_wo32(chan->eng, ptr0 + 0x14, 0x00000000);
+	nvkm_done(chan->eng);
 }
 
+static const struct nvkm_engn_func
+nv50_engn = {
+	.bind = nv50_ectx_bind,
+	.ramht_add = nv50_eobj_ramht_add,
+	.ramht_del = nv50_eobj_ramht_del,
+};
+
+const struct nvkm_engn_func
+nv50_engn_sw = {
+	.ramht_add = nv50_eobj_ramht_add,
+	.ramht_del = nv50_eobj_ramht_del,
+};
+
+static bool
+nv50_runl_pending(struct nvkm_runl *runl)
+{
+	return nvkm_rd32(runl->fifo->engine.subdev.device, 0x0032ec) & 0x00000100;
+}
+
+int
+nv50_runl_wait(struct nvkm_runl *runl)
+{
+	struct nvkm_fifo *fifo = runl->fifo;
+
+	nvkm_msec(fifo->engine.subdev.device, fifo->timeout.chan_msec,
+		if (!nvkm_runl_update_pending(runl))
+			return 0;
+		usleep_range(1, 2);
+	);
+
+	return -ETIMEDOUT;
+}
+
+static void
+nv50_runl_commit(struct nvkm_runl *runl, struct nvkm_memory *memory, u32 start, int count)
+{
+	struct nvkm_device *device = runl->fifo->engine.subdev.device;
+	u64 addr = nvkm_memory_addr(memory) + start;
+
+	nvkm_wr32(device, 0x0032f4, addr >> 12);
+	nvkm_wr32(device, 0x0032ec, count);
+}
+
+static void
+nv50_runl_insert_chan(struct nvkm_chan *chan, struct nvkm_memory *memory, u64 offset)
+{
+	nvkm_wo32(memory, offset, chan->id);
+}
+
+static struct nvkm_memory *
+nv50_runl_alloc(struct nvkm_runl *runl, u32 *offset)
+{
+	const u32 segment = ALIGN((runl->cgrp_nr + runl->chan_nr) * runl->func->size, 0x1000);
+	const u32 maxsize = (runl->cgid ? runl->cgid->nr : 0) + runl->chid->nr;
+	int ret;
+
+	if (unlikely(!runl->mem)) {
+		ret = nvkm_memory_new(runl->fifo->engine.subdev.device, NVKM_MEM_TARGET_INST,
+				      maxsize * 2 * runl->func->size, 0, false, &runl->mem);
+		if (ret) {
+			RUNL_ERROR(runl, "alloc %d\n", ret);
+			return ERR_PTR(ret);
+		}
+	} else {
+		if (runl->offset + segment >= nvkm_memory_size(runl->mem)) {
+			ret = runl->func->wait(runl);
+			if (ret) {
+				RUNL_DEBUG(runl, "rewind timeout");
+				return ERR_PTR(ret);
+			}
+
+			runl->offset = 0;
+		}
+	}
+
+	*offset = runl->offset;
+	runl->offset += segment;
+	return runl->mem;
+}
+
+int
+nv50_runl_update(struct nvkm_runl *runl)
+{
+	struct nvkm_memory *memory;
+	struct nvkm_cgrp *cgrp;
+	struct nvkm_chan *chan;
+	u32 start, offset, count;
+
+	/*TODO: prio, interleaving. */
+
+	RUNL_TRACE(runl, "RAMRL: update cgrps:%d chans:%d", runl->cgrp_nr, runl->chan_nr);
+	memory = nv50_runl_alloc(runl, &start);
+	if (IS_ERR(memory))
+		return PTR_ERR(memory);
+
+	RUNL_TRACE(runl, "RAMRL: update start:%08x", start);
+	offset = start;
+
+	nvkm_kmap(memory);
+	nvkm_runl_foreach_cgrp(cgrp, runl) {
+		if (cgrp->hw) {
+			CGRP_TRACE(cgrp, "     RAMRL+%08x: chans:%d", offset, cgrp->chan_nr);
+			runl->func->insert_cgrp(cgrp, memory, offset);
+			offset += runl->func->size;
+		}
+
+		nvkm_cgrp_foreach_chan(chan, cgrp) {
+			CHAN_TRACE(chan, "RAMRL+%08x: [%s]", offset, chan->name);
+			runl->func->insert_chan(chan, memory, offset);
+			offset += runl->func->size;
+		}
+	}
+	nvkm_done(memory);
+
+	/*TODO: look into using features on newer HW to guarantee forward progress. */
+	list_rotate_left(&runl->cgrps);
+
+	count = (offset - start) / runl->func->size;
+	RUNL_TRACE(runl, "RAMRL: commit start:%08x count:%d", start, count);
+
+	runl->func->commit(runl, memory, start, count);
+	return 0;
+}
+
+const struct nvkm_runl_func
+nv50_runl = {
+	.size = 4,
+	.update = nv50_runl_update,
+	.insert_chan = nv50_runl_insert_chan,
+	.commit = nv50_runl_commit,
+	.wait = nv50_runl_wait,
+	.pending = nv50_runl_pending,
+};
+
 void
-nv50_fifo_init(struct nvkm_fifo *base)
+nv50_fifo_init(struct nvkm_fifo *fifo)
 {
-	struct nv50_fifo *fifo = nv50_fifo(base);
-	struct nvkm_device *device = fifo->base.engine.subdev.device;
+	struct nvkm_runl *runl = nvkm_runl_first(fifo);
+	struct nvkm_device *device = fifo->engine.subdev.device;
 	int i;
 
 	nvkm_mask(device, 0x000200, 0x00000100, 0x00000000);
@@ -89,61 +352,47 @@ nv50_fifo_init(struct nvkm_fifo *base)
 
 	for (i = 0; i < 128; i++)
 		nvkm_wr32(device, 0x002600 + (i * 4), 0x00000000);
-	nv50_fifo_runlist_update_locked(fifo);
+
+	atomic_set(&runl->changed, 1);
+	runl->func->update(runl);
 
 	nvkm_wr32(device, 0x003200, 0x00000001);
 	nvkm_wr32(device, 0x003250, 0x00000001);
 	nvkm_wr32(device, 0x002500, 0x00000001);
 }
 
-void *
-nv50_fifo_dtor(struct nvkm_fifo *base)
+int
+nv50_fifo_chid_ctor(struct nvkm_fifo *fifo, int nr)
 {
-	struct nv50_fifo *fifo = nv50_fifo(base);
-	nvkm_memory_unref(&fifo->runlist[1]);
-	nvkm_memory_unref(&fifo->runlist[0]);
-	return fifo;
+	/* CHID 0 is unusable (some kind of PIO channel?), 127 is "channel invalid". */
+	return nvkm_chid_new(&nvkm_chan_event, &fifo->engine.subdev, nr, 1, nr - 2, &fifo->chid);
 }
 
 int
-nv50_fifo_new_(const struct nvkm_fifo_func *func, struct nvkm_device *device,
-	       enum nvkm_subdev_type type, int inst, struct nvkm_fifo **pfifo)
+nv50_fifo_chid_nr(struct nvkm_fifo *fifo)
 {
-	struct nv50_fifo *fifo;
-	int ret;
-
-	if (!(fifo = kzalloc(sizeof(*fifo), GFP_KERNEL)))
-		return -ENOMEM;
-	*pfifo = &fifo->base;
-
-	ret = nvkm_fifo_ctor(func, device, type, inst, 128, &fifo->base);
-	if (ret)
-		return ret;
-
-	set_bit(0, fifo->base.mask); /* PIO channel */
-	set_bit(127, fifo->base.mask); /* inactive channel */
-	return 0;
+	return 128;
 }
 
 static const struct nvkm_fifo_func
 nv50_fifo = {
-	.dtor = nv50_fifo_dtor,
-	.oneinit = nv50_fifo_oneinit,
+	.chid_nr = nv50_fifo_chid_nr,
+	.chid_ctor = nv50_fifo_chid_ctor,
+	.runl_ctor = nv04_fifo_runl_ctor,
 	.init = nv50_fifo_init,
 	.intr = nv04_fifo_intr,
-	.engine_id = nv04_fifo_engine_id,
-	.id_engine = nv04_fifo_id_engine,
 	.pause = nv04_fifo_pause,
 	.start = nv04_fifo_start,
-	.chan = {
-		&nv50_fifo_gpfifo_oclass,
-		NULL
-	},
+	.runl = &nv50_runl,
+	.engn = &nv50_engn,
+	.engn_sw = &nv50_engn_sw,
+	.cgrp = {{                           }, &nv04_cgrp },
+	.chan = {{ 0, 0, NV50_CHANNEL_GPFIFO }, &nv50_chan },
 };
 
 int
 nv50_fifo_new(struct nvkm_device *device, enum nvkm_subdev_type type, int inst,
 	      struct nvkm_fifo **pfifo)
 {
-	return nv50_fifo_new_(&nv50_fifo, device, type, inst, pfifo);
+	return nvkm_fifo_new_(&nv50_fifo, device, type, inst, pfifo);
 }
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/fifo/nv50.h b/drivers/gpu/drm/nouveau/nvkm/engine/fifo/nv50.h
deleted file mode 100644
index 0111e7e5a4e3..000000000000
--- a/drivers/gpu/drm/nouveau/nvkm/engine/fifo/nv50.h
+++ /dev/null
@@ -1,20 +0,0 @@
-/* SPDX-License-Identifier: MIT */
-#ifndef __NV50_FIFO_H__
-#define __NV50_FIFO_H__
-#define nv50_fifo(p) container_of((p), struct nv50_fifo, base)
-#include "priv.h"
-
-struct nv50_fifo {
-	struct nvkm_fifo base;
-	struct nvkm_memory *runlist[2];
-	int cur_runlist;
-};
-
-int nv50_fifo_new_(const struct nvkm_fifo_func *, struct nvkm_device *, enum nvkm_subdev_type, int,
-		   struct nvkm_fifo **);
-
-void *nv50_fifo_dtor(struct nvkm_fifo *);
-int nv50_fifo_oneinit(struct nvkm_fifo *);
-void nv50_fifo_init(struct nvkm_fifo *);
-void nv50_fifo_runlist_update(struct nv50_fifo *);
-#endif
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/fifo/priv.h b/drivers/gpu/drm/nouveau/nvkm/engine/fifo/priv.h
index 79cec57647f0..4d448be19224 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/fifo/priv.h
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/fifo/priv.h
@@ -3,46 +3,207 @@
 #define __NVKM_FIFO_PRIV_H__
 #define nvkm_fifo(p) container_of((p), struct nvkm_fifo, engine)
 #include <engine/fifo.h>
+#include <core/enum.h>
+struct nvkm_cctx;
+struct nvkm_cgrp;
+struct nvkm_engn;
+struct nvkm_memory;
+struct nvkm_runl;
+struct nvkm_runq;
+struct nvkm_vctx;
 
-int nvkm_fifo_ctor(const struct nvkm_fifo_func *, struct nvkm_device *, enum nvkm_subdev_type, int,
-		   int nr, struct nvkm_fifo *);
-void nvkm_fifo_uevent(struct nvkm_fifo *);
-void nvkm_fifo_kevent(struct nvkm_fifo *, int chid);
-void nvkm_fifo_recover_chan(struct nvkm_fifo *, int chid);
-
-struct nvkm_fifo_chan *
-nvkm_fifo_chan_inst_locked(struct nvkm_fifo *, u64 inst);
-
-struct nvkm_fifo_chan_oclass;
 struct nvkm_fifo_func {
-	void *(*dtor)(struct nvkm_fifo *);
-	int (*oneinit)(struct nvkm_fifo *);
-	int (*info)(struct nvkm_fifo *, u64 mthd, u64 *data);
+	int (*chid_nr)(struct nvkm_fifo *);
+	int (*chid_ctor)(struct nvkm_fifo *, int nr);
+	int (*runq_nr)(struct nvkm_fifo *);
+	int (*runl_ctor)(struct nvkm_fifo *);
+
 	void (*init)(struct nvkm_fifo *);
-	void (*fini)(struct nvkm_fifo *);
-	void (*intr)(struct nvkm_fifo *);
-	void (*fault)(struct nvkm_fifo *, struct nvkm_fault_data *);
-	int (*engine_id)(struct nvkm_fifo *, struct nvkm_engine *);
-	struct nvkm_engine *(*id_engine)(struct nvkm_fifo *, int engi);
+	void (*init_pbdmas)(struct nvkm_fifo *, u32 mask);
+
+	irqreturn_t (*intr)(struct nvkm_inth *);
+	void (*intr_mmu_fault_unit)(struct nvkm_fifo *, int unit);
+	void (*intr_ctxsw_timeout)(struct nvkm_fifo *, u32 engm);
+
+	const struct nvkm_fifo_func_mmu_fault {
+		void (*recover)(struct nvkm_fifo *, struct nvkm_fault_data *);
+		const struct nvkm_enum *access;
+		const struct nvkm_enum *engine;
+		const struct nvkm_enum *reason;
+		const struct nvkm_enum *hubclient;
+		const struct nvkm_enum *gpcclient;
+	} *mmu_fault;
+
 	void (*pause)(struct nvkm_fifo *, unsigned long *);
 	void (*start)(struct nvkm_fifo *, unsigned long *);
-	void (*uevent_init)(struct nvkm_fifo *);
-	void (*uevent_fini)(struct nvkm_fifo *);
-	void (*recover_chan)(struct nvkm_fifo *, int chid);
-	int (*class_get)(struct nvkm_fifo *, int index, struct nvkm_oclass *);
-	int (*class_new)(struct nvkm_fifo *, const struct nvkm_oclass *,
-			 void *, u32, struct nvkm_object **);
-	const struct nvkm_fifo_chan_oclass *chan[];
+
+	int (*nonstall_ctor)(struct nvkm_fifo *);
+	const struct nvkm_event_func *nonstall;
+
+	const struct nvkm_runl_func *runl;
+	const struct nvkm_runq_func *runq;
+	const struct nvkm_engn_func *engn;
+	const struct nvkm_engn_func *engn_sw;
+	const struct nvkm_engn_func *engn_ce;
+
+	struct nvkm_fifo_func_cgrp {
+		struct nvkm_sclass user;
+		const struct nvkm_cgrp_func *func;
+		bool force;
+	} cgrp;
+
+	struct nvkm_fifo_func_chan {
+		struct nvkm_sclass user;
+		const struct nvkm_chan_func *func;
+	} chan;
 };
 
-void nv04_fifo_intr(struct nvkm_fifo *);
-int nv04_fifo_engine_id(struct nvkm_fifo *, struct nvkm_engine *);
-struct nvkm_engine *nv04_fifo_id_engine(struct nvkm_fifo *, int);
+int nvkm_fifo_new_(const struct nvkm_fifo_func *, struct nvkm_device *, enum nvkm_subdev_type, int,
+		   struct nvkm_fifo **);
+
+int nv04_fifo_chid_ctor(struct nvkm_fifo *, int);
+int nv04_fifo_runl_ctor(struct nvkm_fifo *);
+void nv04_fifo_init(struct nvkm_fifo *);
+irqreturn_t nv04_fifo_intr(struct nvkm_inth *);
 void nv04_fifo_pause(struct nvkm_fifo *, unsigned long *);
 void nv04_fifo_start(struct nvkm_fifo *, unsigned long *);
+extern const struct nvkm_runl_func nv04_runl;
+extern const struct nvkm_engn_func nv04_engn;
+extern const struct nvkm_cgrp_func nv04_cgrp;
+extern const struct nvkm_chan_func_inst nv04_chan_inst;
+extern const struct nvkm_chan_func_userd nv04_chan_userd;
+void nv04_chan_ramfc_clear(struct nvkm_chan *);
+void nv04_chan_start(struct nvkm_chan *);
+void nv04_chan_stop(struct nvkm_chan *);
+void nv04_eobj_ramht_del(struct nvkm_chan *, int);
+
+int nv10_fifo_chid_nr(struct nvkm_fifo *);
+
+int nv50_fifo_chid_nr(struct nvkm_fifo *);
+int nv50_fifo_chid_ctor(struct nvkm_fifo *, int);
+void nv50_fifo_init(struct nvkm_fifo *);
+extern const struct nvkm_runl_func nv50_runl;
+int nv50_runl_update(struct nvkm_runl *);
+int nv50_runl_wait(struct nvkm_runl *);
+extern const struct nvkm_engn_func nv50_engn_sw;
+extern const struct nvkm_chan_func_inst nv50_chan_inst;
+extern const struct nvkm_chan_func_userd nv50_chan_userd;
+void nv50_chan_unbind(struct nvkm_chan *);
+void nv50_chan_start(struct nvkm_chan *);
+void nv50_chan_stop(struct nvkm_chan *);
+void nv50_chan_preempt(struct nvkm_chan *);
+int nv50_eobj_ramht_add(struct nvkm_engn *, struct nvkm_object *, struct nvkm_chan *);
+void nv50_eobj_ramht_del(struct nvkm_chan *, int);
+
+extern const struct nvkm_event_func g84_fifo_nonstall;
+extern const struct nvkm_engn_func g84_engn;
+extern const struct nvkm_chan_func g84_chan;
+
+int gf100_fifo_chid_ctor(struct nvkm_fifo *, int);
+int gf100_fifo_runq_nr(struct nvkm_fifo *);
+bool gf100_fifo_intr_pbdma(struct nvkm_fifo *);
+void gf100_fifo_intr_mmu_fault(struct nvkm_fifo *);
+void gf100_fifo_intr_mmu_fault_unit(struct nvkm_fifo *, int);
+void gf100_fifo_intr_sched(struct nvkm_fifo *);
+void gf100_fifo_intr_ctxsw_timeout(struct nvkm_fifo *, u32);
+void gf100_fifo_mmu_fault_recover(struct nvkm_fifo *, struct nvkm_fault_data *);
+extern const struct nvkm_enum gf100_fifo_mmu_fault_access[];
+extern const struct nvkm_event_func gf100_fifo_nonstall;
+bool gf100_runl_preempt_pending(struct nvkm_runl *);
+void gf100_runq_init(struct nvkm_runq *);
+bool gf100_runq_intr(struct nvkm_runq *, struct nvkm_runl *);
+void gf100_engn_mmu_fault_trigger(struct nvkm_engn *);
+bool gf100_engn_mmu_fault_triggered(struct nvkm_engn *);
+extern const struct nvkm_engn_func gf100_engn_sw;
+extern const struct nvkm_chan_func_inst gf100_chan_inst;
+void gf100_chan_userd_clear(struct nvkm_chan *);
+void gf100_chan_preempt(struct nvkm_chan *);
+
+int gk104_fifo_chid_nr(struct nvkm_fifo *);
+int gk104_fifo_runl_ctor(struct nvkm_fifo *);
+void gk104_fifo_init(struct nvkm_fifo *);
+void gk104_fifo_init_pbdmas(struct nvkm_fifo *, u32);
+irqreturn_t gk104_fifo_intr(struct nvkm_inth *);
+void gk104_fifo_intr_runlist(struct nvkm_fifo *);
+void gk104_fifo_intr_chsw(struct nvkm_fifo *);
+void gk104_fifo_intr_bind(struct nvkm_fifo *);
+extern const struct nvkm_fifo_func_mmu_fault gk104_fifo_mmu_fault;
+extern const struct nvkm_enum gk104_fifo_mmu_fault_reason[];
+extern const struct nvkm_enum gk104_fifo_mmu_fault_hubclient[];
+extern const struct nvkm_enum gk104_fifo_mmu_fault_gpcclient[];
+void gk104_runl_insert_chan(struct nvkm_chan *, struct nvkm_memory *, u64);
+void gk104_runl_commit(struct nvkm_runl *, struct nvkm_memory *, u32, int);
+bool gk104_runl_pending(struct nvkm_runl *);
+void gk104_runl_block(struct nvkm_runl *, u32);
+void gk104_runl_allow(struct nvkm_runl *, u32);
+void gk104_runl_fault_clear(struct nvkm_runl *);
+extern const struct nvkm_runq_func gk104_runq;
+void gk104_runq_init(struct nvkm_runq *);
+bool gk104_runq_intr(struct nvkm_runq *, struct nvkm_runl *);
+extern const struct nvkm_bitfield gk104_runq_intr_0_names[];
+bool gk104_runq_idle(struct nvkm_runq *);
+extern const struct nvkm_engn_func gk104_engn;
+bool gk104_engn_chsw(struct nvkm_engn *);
+int gk104_engn_cxid(struct nvkm_engn *, bool *cgid);
+int gk104_ectx_ctor(struct nvkm_engn *, struct nvkm_vctx *);
+extern const struct nvkm_engn_func gk104_engn_ce;
+extern const struct nvkm_chan_func_userd gk104_chan_userd;
+extern const struct nvkm_chan_func_ramfc gk104_chan_ramfc;
+void gk104_chan_bind(struct nvkm_chan *);
+void gk104_chan_bind_inst(struct nvkm_chan *);
+void gk104_chan_unbind(struct nvkm_chan *);
+void gk104_chan_start(struct nvkm_chan *);
+void gk104_chan_stop(struct nvkm_chan *);
+
+int gk110_fifo_chid_ctor(struct nvkm_fifo *, int);
+extern const struct nvkm_runl_func gk110_runl;
+extern const struct nvkm_cgrp_func gk110_cgrp;
+void gk110_runl_insert_cgrp(struct nvkm_cgrp *, struct nvkm_memory *, u64);
+extern const struct nvkm_chan_func gk110_chan;
+void gk110_chan_preempt(struct nvkm_chan *);
+
+extern const struct nvkm_runq_func gk208_runq;
+void gk208_runq_init(struct nvkm_runq *);
+
+void gm107_fifo_intr_mmu_fault_unit(struct nvkm_fifo *, int);
+extern const struct nvkm_fifo_func_mmu_fault gm107_fifo_mmu_fault;
+extern const struct nvkm_runl_func gm107_runl;
+extern const struct nvkm_chan_func gm107_chan;
+
+int gm200_fifo_chid_nr(struct nvkm_fifo *);
+int gm200_fifo_runq_nr(struct nvkm_fifo *);
+
+extern const struct nvkm_enum gv100_fifo_mmu_fault_access[];
+extern const struct nvkm_enum gv100_fifo_mmu_fault_reason[];
+extern const struct nvkm_enum gv100_fifo_mmu_fault_hubclient[];
+extern const struct nvkm_enum gv100_fifo_mmu_fault_gpcclient[];
+void gv100_runl_insert_cgrp(struct nvkm_cgrp *, struct nvkm_memory *, u64);
+void gv100_runl_insert_chan(struct nvkm_chan *, struct nvkm_memory *, u64);
+void gv100_runl_preempt(struct nvkm_runl *);
+extern const struct nvkm_runq_func gv100_runq;
+extern const struct nvkm_engn_func gv100_engn;
+void gv100_ectx_bind(struct nvkm_engn *, struct nvkm_cctx *, struct nvkm_chan *);
+extern const struct nvkm_engn_func gv100_engn_ce;
+int gv100_ectx_ce_ctor(struct nvkm_engn *, struct nvkm_vctx *);
+void gv100_ectx_ce_bind(struct nvkm_engn *, struct nvkm_cctx *, struct nvkm_chan *);
+extern const struct nvkm_chan_func_userd gv100_chan_userd;
+extern const struct nvkm_chan_func_ramfc gv100_chan_ramfc;
+
+void tu102_fifo_intr_ctxsw_timeout_info(struct nvkm_engn *, u32 info);
+extern const struct nvkm_fifo_func_mmu_fault tu102_fifo_mmu_fault;
 
-void gf100_fifo_intr_fault(struct nvkm_fifo *, int);
+int ga100_fifo_runl_ctor(struct nvkm_fifo *);
+int ga100_fifo_nonstall_ctor(struct nvkm_fifo *);
+extern const struct nvkm_event_func ga100_fifo_nonstall;
+extern const struct nvkm_runl_func ga100_runl;
+extern const struct nvkm_runq_func ga100_runq;
+extern const struct nvkm_engn_func ga100_engn;
+extern const struct nvkm_engn_func ga100_engn_ce;
+extern const struct nvkm_cgrp_func ga100_cgrp;
+extern const struct nvkm_chan_func ga100_chan;
 
-int gk104_fifo_engine_id(struct nvkm_fifo *, struct nvkm_engine *);
-struct nvkm_engine *gk104_fifo_id_engine(struct nvkm_fifo *, int);
+int nvkm_uchan_new(struct nvkm_fifo *, struct nvkm_cgrp *, const struct nvkm_oclass *,
+		   void *argv, u32 argc, struct nvkm_object **);
+int nvkm_ucgrp_new(struct nvkm_fifo *, const struct nvkm_oclass *, void *argv, u32 argc,
+		   struct nvkm_object **);
 #endif
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/fifo/runl.c b/drivers/gpu/drm/nouveau/nvkm/engine/fifo/runl.c
new file mode 100644
index 000000000000..b5836cbc29aa
--- /dev/null
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/fifo/runl.c
@@ -0,0 +1,430 @@
+/*
+ * Copyright 2021 Red Hat Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+#include "runl.h"
+#include "cgrp.h"
+#include "chan.h"
+#include "chid.h"
+#include "priv.h"
+#include "runq.h"
+
+#include <core/gpuobj.h>
+#include <subdev/timer.h>
+#include <subdev/top.h>
+
+struct nvkm_cgrp *
+nvkm_engn_cgrp_get(struct nvkm_engn *engn, unsigned long *pirqflags)
+{
+	struct nvkm_cgrp *cgrp = NULL;
+	struct nvkm_chan *chan;
+	bool cgid;
+	int id;
+
+	id = engn->func->cxid(engn, &cgid);
+	if (id < 0)
+		return NULL;
+
+	if (!cgid) {
+		chan = nvkm_runl_chan_get_chid(engn->runl, id, pirqflags);
+		if (chan)
+			cgrp = chan->cgrp;
+	} else {
+		cgrp = nvkm_runl_cgrp_get_cgid(engn->runl, id, pirqflags);
+	}
+
+	WARN_ON(!cgrp);
+	return cgrp;
+}
+
+static void
+nvkm_runl_rc(struct nvkm_runl *runl)
+{
+	struct nvkm_fifo *fifo = runl->fifo;
+	struct nvkm_cgrp *cgrp, *gtmp;
+	struct nvkm_chan *chan, *ctmp;
+	struct nvkm_engn *engn;
+	unsigned long flags;
+	int rc, state, i;
+	bool reset;
+
+	/* Runlist is blocked before scheduling recovery - fetch count. */
+	BUG_ON(!mutex_is_locked(&runl->mutex));
+	rc = atomic_xchg(&runl->rc_pending, 0);
+	if (!rc)
+		return;
+
+	/* Look for channel groups flagged for RC. */
+	nvkm_runl_foreach_cgrp_safe(cgrp, gtmp, runl) {
+		state = atomic_cmpxchg(&cgrp->rc, NVKM_CGRP_RC_PENDING, NVKM_CGRP_RC_RUNNING);
+		if (state == NVKM_CGRP_RC_PENDING) {
+			/* Disable all channels in them, and remove from runlist. */
+			nvkm_cgrp_foreach_chan_safe(chan, ctmp, cgrp) {
+				nvkm_chan_error(chan, false);
+				nvkm_chan_remove_locked(chan);
+			}
+		}
+	}
+
+	/* On GPUs with runlist preempt, wait for PBDMA(s) servicing runlist to go idle. */
+	if (runl->func->preempt) {
+		for (i = 0; i < runl->runq_nr; i++) {
+			struct nvkm_runq *runq = runl->runq[i];
+
+			if (runq) {
+				nvkm_msec(fifo->engine.subdev.device, 2000,
+					if (runq->func->idle(runq))
+						break;
+				);
+			}
+		}
+	}
+
+	/* Look for engines that are still on flagged channel groups - reset them. */
+	nvkm_runl_foreach_engn_cond(engn, runl, engn->func->cxid) {
+		cgrp = nvkm_engn_cgrp_get(engn, &flags);
+		if (!cgrp) {
+			ENGN_DEBUG(engn, "cxid not valid");
+			continue;
+		}
+
+		reset = atomic_read(&cgrp->rc) == NVKM_CGRP_RC_RUNNING;
+		nvkm_cgrp_put(&cgrp, flags);
+		if (!reset) {
+			ENGN_DEBUG(engn, "cxid not in recovery");
+			continue;
+		}
+
+		ENGN_DEBUG(engn, "resetting...");
+		/*TODO: can we do something less of a potential catastrophe on failure? */
+		WARN_ON(nvkm_engine_reset(engn->engine));
+	}
+
+	/* Submit runlist update, and clear any remaining exception state. */
+	runl->func->update(runl);
+	if (runl->func->fault_clear)
+		runl->func->fault_clear(runl);
+
+	/* Unblock runlist processing. */
+	while (rc--)
+		nvkm_runl_allow(runl);
+	runl->func->wait(runl);
+}
+
+static void
+nvkm_runl_rc_runl(struct nvkm_runl *runl)
+{
+	RUNL_ERROR(runl, "rc scheduled");
+
+	nvkm_runl_block(runl);
+	if (runl->func->preempt)
+		runl->func->preempt(runl);
+
+	atomic_inc(&runl->rc_pending);
+	schedule_work(&runl->work);
+}
+
+void
+nvkm_runl_rc_cgrp(struct nvkm_cgrp *cgrp)
+{
+	if (atomic_cmpxchg(&cgrp->rc, NVKM_CGRP_RC_NONE, NVKM_CGRP_RC_PENDING) != NVKM_CGRP_RC_NONE)
+		return;
+
+	CGRP_ERROR(cgrp, "rc scheduled");
+	nvkm_runl_rc_runl(cgrp->runl);
+}
+
+void
+nvkm_runl_rc_engn(struct nvkm_runl *runl, struct nvkm_engn *engn)
+{
+	struct nvkm_cgrp *cgrp;
+	unsigned long flags;
+
+	/* Lookup channel group currently on engine. */
+	cgrp = nvkm_engn_cgrp_get(engn, &flags);
+	if (!cgrp) {
+		ENGN_DEBUG(engn, "rc skipped, not on channel");
+		return;
+	}
+
+	nvkm_runl_rc_cgrp(cgrp);
+	nvkm_cgrp_put(&cgrp, flags);
+}
+
+static void
+nvkm_runl_work(struct work_struct *work)
+{
+	struct nvkm_runl *runl = container_of(work, typeof(*runl), work);
+
+	mutex_lock(&runl->mutex);
+	nvkm_runl_rc(runl);
+	mutex_unlock(&runl->mutex);
+
+}
+
+struct nvkm_chan *
+nvkm_runl_chan_get_inst(struct nvkm_runl *runl, u64 inst, unsigned long *pirqflags)
+{
+	struct nvkm_chid *chid = runl->chid;
+	struct nvkm_chan *chan;
+	unsigned long flags;
+	int id;
+
+	spin_lock_irqsave(&chid->lock, flags);
+	for_each_set_bit(id, chid->used, chid->nr) {
+		chan = chid->data[id];
+		if (likely(chan)) {
+			if (chan->inst->addr == inst) {
+				spin_lock(&chan->cgrp->lock);
+				*pirqflags = flags;
+				spin_unlock(&chid->lock);
+				return chan;
+			}
+		}
+	}
+	spin_unlock_irqrestore(&chid->lock, flags);
+	return NULL;
+}
+
+struct nvkm_chan *
+nvkm_runl_chan_get_chid(struct nvkm_runl *runl, int id, unsigned long *pirqflags)
+{
+	struct nvkm_chid *chid = runl->chid;
+	struct nvkm_chan *chan;
+	unsigned long flags;
+
+	spin_lock_irqsave(&chid->lock, flags);
+	if (!WARN_ON(id >= chid->nr)) {
+		chan = chid->data[id];
+		if (likely(chan)) {
+			spin_lock(&chan->cgrp->lock);
+			*pirqflags = flags;
+			spin_unlock(&chid->lock);
+			return chan;
+		}
+	}
+	spin_unlock_irqrestore(&chid->lock, flags);
+	return NULL;
+}
+
+struct nvkm_cgrp *
+nvkm_runl_cgrp_get_cgid(struct nvkm_runl *runl, int id, unsigned long *pirqflags)
+{
+	struct nvkm_chid *cgid = runl->cgid;
+	struct nvkm_cgrp *cgrp;
+	unsigned long flags;
+
+	spin_lock_irqsave(&cgid->lock, flags);
+	if (!WARN_ON(id >= cgid->nr)) {
+		cgrp = cgid->data[id];
+		if (likely(cgrp)) {
+			spin_lock(&cgrp->lock);
+			*pirqflags = flags;
+			spin_unlock(&cgid->lock);
+			return cgrp;
+		}
+	}
+	spin_unlock_irqrestore(&cgid->lock, flags);
+	return NULL;
+}
+
+int
+nvkm_runl_preempt_wait(struct nvkm_runl *runl)
+{
+	return nvkm_msec(runl->fifo->engine.subdev.device, runl->fifo->timeout.chan_msec,
+		if (!runl->func->preempt_pending(runl))
+			break;
+
+		nvkm_runl_rc(runl);
+		usleep_range(1, 2);
+	) < 0 ? -ETIMEDOUT : 0;
+}
+
+bool
+nvkm_runl_update_pending(struct nvkm_runl *runl)
+{
+	if (!runl->func->pending(runl))
+		return false;
+
+	nvkm_runl_rc(runl);
+	return true;
+}
+
+void
+nvkm_runl_update_locked(struct nvkm_runl *runl, bool wait)
+{
+	if (atomic_xchg(&runl->changed, 0) && runl->func->update) {
+		runl->func->update(runl);
+		if (wait)
+			runl->func->wait(runl);
+	}
+}
+
+void
+nvkm_runl_allow(struct nvkm_runl *runl)
+{
+	struct nvkm_fifo *fifo = runl->fifo;
+	unsigned long flags;
+
+	spin_lock_irqsave(&fifo->lock, flags);
+	if (!--runl->blocked) {
+		RUNL_TRACE(runl, "running");
+		runl->func->allow(runl, ~0);
+	}
+	spin_unlock_irqrestore(&fifo->lock, flags);
+}
+
+void
+nvkm_runl_block(struct nvkm_runl *runl)
+{
+	struct nvkm_fifo *fifo = runl->fifo;
+	unsigned long flags;
+
+	spin_lock_irqsave(&fifo->lock, flags);
+	if (!runl->blocked++) {
+		RUNL_TRACE(runl, "stopped");
+		runl->func->block(runl, ~0);
+	}
+	spin_unlock_irqrestore(&fifo->lock, flags);
+}
+
+void
+nvkm_runl_fini(struct nvkm_runl *runl)
+{
+	if (runl->func->fini)
+		runl->func->fini(runl);
+
+	flush_work(&runl->work);
+}
+
+void
+nvkm_runl_del(struct nvkm_runl *runl)
+{
+	struct nvkm_engn *engn, *engt;
+
+	nvkm_memory_unref(&runl->mem);
+
+	list_for_each_entry_safe(engn, engt, &runl->engns, head) {
+		list_del(&engn->head);
+		kfree(engn);
+	}
+
+	nvkm_chid_unref(&runl->chid);
+	nvkm_chid_unref(&runl->cgid);
+
+	list_del(&runl->head);
+	mutex_destroy(&runl->mutex);
+	kfree(runl);
+}
+
+struct nvkm_engn *
+nvkm_runl_add(struct nvkm_runl *runl, int engi, const struct nvkm_engn_func *func,
+	      enum nvkm_subdev_type type, int inst)
+{
+	struct nvkm_fifo *fifo = runl->fifo;
+	struct nvkm_device *device = fifo->engine.subdev.device;
+	struct nvkm_engine *engine;
+	struct nvkm_engn *engn;
+
+	engine = nvkm_device_engine(device, type, inst);
+	if (!engine) {
+		RUNL_DEBUG(runl, "engn %d.%d[%s] not found", engi, inst, nvkm_subdev_type[type]);
+		return NULL;
+	}
+
+	if (!(engn = kzalloc(sizeof(*engn), GFP_KERNEL)))
+		return NULL;
+
+	engn->func = func;
+	engn->runl = runl;
+	engn->id = engi;
+	engn->engine = engine;
+	engn->fault = -1;
+	list_add_tail(&engn->head, &runl->engns);
+
+	/* Lookup MMU engine ID for fault handling. */
+	if (device->top)
+		engn->fault = nvkm_top_fault_id(device, engine->subdev.type, engine->subdev.inst);
+
+	if (engn->fault < 0 && fifo->func->mmu_fault) {
+		const struct nvkm_enum *map = fifo->func->mmu_fault->engine;
+
+		while (map->name) {
+			if (map->data2 == engine->subdev.type && map->inst == engine->subdev.inst) {
+				engn->fault = map->value;
+				break;
+			}
+			map++;
+		}
+	}
+
+	return engn;
+}
+
+struct nvkm_runl *
+nvkm_runl_get(struct nvkm_fifo *fifo, int runi, u32 addr)
+{
+	struct nvkm_runl *runl;
+
+	nvkm_runl_foreach(runl, fifo) {
+		if ((runi >= 0 && runl->id == runi) || (runi < 0 && runl->addr == addr))
+			return runl;
+	}
+
+	return NULL;
+}
+
+struct nvkm_runl *
+nvkm_runl_new(struct nvkm_fifo *fifo, int runi, u32 addr, int id_nr)
+{
+	struct nvkm_subdev *subdev = &fifo->engine.subdev;
+	struct nvkm_runl *runl;
+	int ret;
+
+	if (!(runl = kzalloc(sizeof(*runl), GFP_KERNEL)))
+		return NULL;
+
+	runl->func = fifo->func->runl;
+	runl->fifo = fifo;
+	runl->id = runi;
+	runl->addr = addr;
+	INIT_LIST_HEAD(&runl->engns);
+	INIT_LIST_HEAD(&runl->cgrps);
+	atomic_set(&runl->changed, 0);
+	mutex_init(&runl->mutex);
+	INIT_WORK(&runl->work, nvkm_runl_work);
+	atomic_set(&runl->rc_triggered, 0);
+	atomic_set(&runl->rc_pending, 0);
+	list_add_tail(&runl->head, &fifo->runls);
+
+	if (!fifo->chid) {
+		if ((ret = nvkm_chid_new(&nvkm_chan_event, subdev, id_nr, 0, id_nr, &runl->cgid)) ||
+		    (ret = nvkm_chid_new(&nvkm_chan_event, subdev, id_nr, 0, id_nr, &runl->chid))) {
+			RUNL_ERROR(runl, "cgid/chid: %d", ret);
+			nvkm_runl_del(runl);
+			return NULL;
+		}
+	} else {
+		runl->cgid = nvkm_chid_ref(fifo->cgid);
+		runl->chid = nvkm_chid_ref(fifo->chid);
+	}
+
+	return runl;
+}
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/fifo/runl.h b/drivers/gpu/drm/nouveau/nvkm/engine/fifo/runl.h
new file mode 100644
index 000000000000..c93d21bb7bd5
--- /dev/null
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/fifo/runl.h
@@ -0,0 +1,125 @@
+#ifndef __NVKM_RUNL_H__
+#define __NVKM_RUNL_H__
+#include <core/intr.h>
+struct nvkm_cctx;
+struct nvkm_cgrp;
+struct nvkm_chan;
+struct nvkm_memory;
+struct nvkm_object;
+struct nvkm_vctx;
+enum nvkm_subdev_type;
+
+struct nvkm_engn {
+	const struct nvkm_engn_func {
+		bool (*chsw)(struct nvkm_engn *);
+		int (*cxid)(struct nvkm_engn *, bool *cgid);
+		void (*mmu_fault_trigger)(struct nvkm_engn *);
+		bool (*mmu_fault_triggered)(struct nvkm_engn *);
+		int (*ctor)(struct nvkm_engn *, struct nvkm_vctx *);
+		void (*bind)(struct nvkm_engn *, struct nvkm_cctx *, struct nvkm_chan *);
+		int (*ramht_add)(struct nvkm_engn *, struct nvkm_object *, struct nvkm_chan *);
+		void (*ramht_del)(struct nvkm_chan *, int hash);
+	} *func;
+	struct nvkm_runl *runl;
+	int id;
+
+	struct nvkm_engine *engine;
+
+	int fault;
+
+	struct list_head head;
+};
+
+#define ENGN_PRINT(e,l,p,f,a...)                                                           \
+	RUNL_PRINT((e)->runl, l, p, "%02d[%8s]:"f, (e)->id, (e)->engine->subdev.name, ##a)
+#define ENGN_DEBUG(e,f,a...) ENGN_PRINT((e), DEBUG,   info, " "f"\n", ##a)
+
+struct nvkm_runl {
+	const struct nvkm_runl_func {
+		void (*init)(struct nvkm_runl *);
+		void (*fini)(struct nvkm_runl *);
+		int runqs;
+		u8 size;
+		int (*update)(struct nvkm_runl *);
+		void (*insert_cgrp)(struct nvkm_cgrp *, struct nvkm_memory *, u64 offset);
+		void (*insert_chan)(struct nvkm_chan *, struct nvkm_memory *, u64 offset);
+		void (*commit)(struct nvkm_runl *, struct nvkm_memory *, u32 start, int count);
+		int (*wait)(struct nvkm_runl *);
+		bool (*pending)(struct nvkm_runl *);
+		void (*block)(struct nvkm_runl *, u32 engm);
+		void (*allow)(struct nvkm_runl *, u32 engm);
+		void (*fault_clear)(struct nvkm_runl *);
+		void (*preempt)(struct nvkm_runl *);
+		bool (*preempt_pending)(struct nvkm_runl *);
+	} *func;
+	struct nvkm_fifo *fifo;
+	int id;
+	u32 addr;
+	u32 chan;
+	u16 doorbell;
+
+	struct nvkm_chid *cgid;
+#define NVKM_CHAN_EVENT_ERRORED BIT(0)
+	struct nvkm_chid *chid;
+
+	struct list_head engns;
+
+	struct nvkm_runq *runq[2];
+	int runq_nr;
+
+	struct nvkm_inth inth;
+
+	struct list_head cgrps;
+	int cgrp_nr;
+	int chan_nr;
+	atomic_t changed;
+	struct nvkm_memory *mem;
+	u32 offset;
+	struct mutex mutex;
+
+	int blocked;
+
+	struct work_struct work;
+	atomic_t rc_triggered;
+	atomic_t rc_pending;
+
+	struct list_head head;
+};
+
+struct nvkm_runl *nvkm_runl_new(struct nvkm_fifo *, int runi, u32 addr, int id_nr);
+struct nvkm_runl *nvkm_runl_get(struct nvkm_fifo *, int runi, u32 addr);
+struct nvkm_engn *nvkm_runl_add(struct nvkm_runl *, int engi, const struct nvkm_engn_func *,
+				enum nvkm_subdev_type, int inst);
+void nvkm_runl_del(struct nvkm_runl *);
+void nvkm_runl_fini(struct nvkm_runl *);
+void nvkm_runl_block(struct nvkm_runl *);
+void nvkm_runl_allow(struct nvkm_runl *);
+void nvkm_runl_update_locked(struct nvkm_runl *, bool wait);
+bool nvkm_runl_update_pending(struct nvkm_runl *);
+int nvkm_runl_preempt_wait(struct nvkm_runl *);
+
+void nvkm_runl_rc_engn(struct nvkm_runl *, struct nvkm_engn *);
+void nvkm_runl_rc_cgrp(struct nvkm_cgrp *);
+
+struct nvkm_cgrp *nvkm_runl_cgrp_get_cgid(struct nvkm_runl *, int cgid, unsigned long *irqflags);
+struct nvkm_chan *nvkm_runl_chan_get_chid(struct nvkm_runl *, int chid, unsigned long *irqflags);
+struct nvkm_chan *nvkm_runl_chan_get_inst(struct nvkm_runl *, u64 inst, unsigned long *irqflags);
+
+#define nvkm_runl_find_engn(engn,runl,cond) nvkm_list_find(engn, &(runl)->engns, head, (cond))
+
+#define nvkm_runl_first(fifo) list_first_entry(&(fifo)->runls, struct nvkm_runl, head)
+#define nvkm_runl_foreach(runl,fifo) list_for_each_entry((runl), &(fifo)->runls, head)
+#define nvkm_runl_foreach_cond(runl,fifo,cond) nvkm_list_foreach(runl, &(fifo)->runls, head, (cond))
+#define nvkm_runl_foreach_engn(engn,runl) list_for_each_entry((engn), &(runl)->engns, head)
+#define nvkm_runl_foreach_engn_cond(engn,runl,cond) \
+	nvkm_list_foreach(engn, &(runl)->engns, head, (cond))
+#define nvkm_runl_foreach_cgrp(cgrp,runl) list_for_each_entry((cgrp), &(runl)->cgrps, head)
+#define nvkm_runl_foreach_cgrp_safe(cgrp,gtmp,runl) \
+	list_for_each_entry_safe((cgrp), (gtmp), &(runl)->cgrps, head)
+
+#define RUNL_PRINT(r,l,p,f,a...)                                                          \
+	nvkm_printk__(&(r)->fifo->engine.subdev, NV_DBG_##l, p, "%06x:"f, (r)->addr, ##a)
+#define RUNL_ERROR(r,f,a...) RUNL_PRINT((r), ERROR,    err, " "f"\n", ##a)
+#define RUNL_DEBUG(r,f,a...) RUNL_PRINT((r), DEBUG,   info, " "f"\n", ##a)
+#define RUNL_TRACE(r,f,a...) RUNL_PRINT((r), TRACE,   info, " "f"\n", ##a)
+#endif
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/fifo/runq.c b/drivers/gpu/drm/nouveau/nvkm/engine/fifo/runq.c
new file mode 100644
index 000000000000..33bcf5fb3ef0
--- /dev/null
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/fifo/runq.c
@@ -0,0 +1,45 @@
+/*
+ * Copyright 2021 Red Hat Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+#include "runq.h"
+#include "priv.h"
+
+void
+nvkm_runq_del(struct nvkm_runq *runq)
+{
+	list_del(&runq->head);
+	kfree(runq);
+}
+
+struct nvkm_runq *
+nvkm_runq_new(struct nvkm_fifo *fifo, int pbid)
+{
+	struct nvkm_runq *runq;
+
+	if (!(runq = kzalloc(sizeof(*runq), GFP_KERNEL)))
+		return NULL;
+
+	runq->func = fifo->func->runq;
+	runq->fifo = fifo;
+	runq->id = pbid;
+	list_add_tail(&runq->head, &fifo->runqs);
+	return runq;
+}
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/fifo/runq.h b/drivers/gpu/drm/nouveau/nvkm/engine/fifo/runq.h
new file mode 100644
index 000000000000..2cb4836e8b31
--- /dev/null
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/fifo/runq.h
@@ -0,0 +1,31 @@
+/* SPDX-License-Identifier: MIT */
+#ifndef __NVKM_RUNQ_H__
+#define __NVKM_RUNQ_H__
+#include <core/os.h>
+struct nvkm_runl;
+
+struct nvkm_runq {
+	const struct nvkm_runq_func {
+		void (*init)(struct nvkm_runq *);
+		bool (*intr)(struct nvkm_runq *, struct nvkm_runl *);
+		const struct nvkm_bitfield *intr_0_names;
+		bool (*intr_1_ctxnotvalid)(struct nvkm_runq *, int chid);
+		bool (*idle)(struct nvkm_runq *);
+	} *func;
+	struct nvkm_fifo *fifo;
+	int id;
+
+	struct list_head head;
+};
+
+struct nvkm_runq *nvkm_runq_new(struct nvkm_fifo *, int pbid);
+void nvkm_runq_del(struct nvkm_runq *);
+
+#define nvkm_runq_foreach(runq,fifo) list_for_each_entry((runq), &(fifo)->runqs, head)
+#define nvkm_runq_foreach_cond(runq,fifo,cond) nvkm_list_foreach(runq, &(fifo)->runqs, head, (cond))
+
+#define RUNQ_PRINT(r,l,p,f,a...)							   \
+	nvkm_printk__(&(r)->fifo->engine.subdev, NV_DBG_##l, p, "PBDMA%d:"f, (r)->id, ##a)
+#define RUNQ_ERROR(r,f,a...) RUNQ_PRINT((r), ERROR,    err, " "f"\n", ##a)
+#define RUNQ_DEBUG(r,f,a...) RUNQ_PRINT((r), DEBUG,   info, " "f"\n", ##a)
+#endif
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/fifo/tu102.c b/drivers/gpu/drm/nouveau/nvkm/engine/fifo/tu102.c
index 260b197f81bc..ea9e151dbb48 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/fifo/tu102.c
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/fifo/tu102.c
@@ -19,46 +19,83 @@
  * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
  * OTHER DEALINGS IN THE SOFTWARE.
  */
-#include "gk104.h"
+#include "priv.h"
 #include "cgrp.h"
-#include "changk104.h"
-#include "user.h"
+#include "chan.h"
+#include "runl.h"
 
-#include <core/client.h>
-#include <core/gpuobj.h>
-#include <subdev/bar.h>
-#include <subdev/fault.h>
-#include <subdev/top.h>
-#include <subdev/timer.h>
-#include <engine/sw.h>
+#include <core/memory.h>
+#include <subdev/mc.h>
+#include <subdev/vfn.h>
 
 #include <nvif/class.h>
 
+static u32
+tu102_chan_doorbell_handle(struct nvkm_chan *chan)
+{
+	return (chan->cgrp->runl->id << 16) | chan->id;
+}
+
 static void
-tu102_fifo_runlist_commit(struct gk104_fifo *fifo, int runl,
-			  struct nvkm_memory *mem, int nr)
+tu102_chan_start(struct nvkm_chan *chan)
 {
-	struct nvkm_device *device = fifo->base.engine.subdev.device;
-	u64 addr = nvkm_memory_addr(mem);
-	/*XXX: target? */
+	struct nvkm_device *device = chan->cgrp->runl->fifo->engine.subdev.device;
+
+	gk104_chan_start(chan);
+	nvkm_wr32(device, device->vfn->addr.user + 0x0090, chan->func->doorbell_handle(chan));
+}
+
+static const struct nvkm_chan_func
+tu102_chan = {
+	.inst = &gf100_chan_inst,
+	.userd = &gv100_chan_userd,
+	.ramfc = &gv100_chan_ramfc,
+	.bind = gk104_chan_bind_inst,
+	.unbind = gk104_chan_unbind,
+	.start = tu102_chan_start,
+	.stop = gk104_chan_stop,
+	.preempt = gk110_chan_preempt,
+	.doorbell_handle = tu102_chan_doorbell_handle,
+};
+
+static bool
+tu102_runl_pending(struct nvkm_runl *runl)
+{
+	struct nvkm_device *device = runl->fifo->engine.subdev.device;
+
+	return nvkm_rd32(device, 0x002b0c + (runl->id * 0x10)) & 0x00008000;
+}
 
-	nvkm_wr32(device, 0x002b00 + (runl * 0x10), lower_32_bits(addr));
-	nvkm_wr32(device, 0x002b04 + (runl * 0x10), upper_32_bits(addr));
-	nvkm_wr32(device, 0x002b08 + (runl * 0x10), nr);
+static void
+tu102_runl_commit(struct nvkm_runl *runl, struct nvkm_memory *memory, u32 start, int count)
+{
+	struct nvkm_device *device = runl->fifo->engine.subdev.device;
+	u64 addr = nvkm_memory_addr(memory) + start;
+	/*XXX: target? */
 
-	/*XXX: how to wait? can you even wait? */
+	nvkm_wr32(device, 0x002b00 + (runl->id * 0x10), lower_32_bits(addr));
+	nvkm_wr32(device, 0x002b04 + (runl->id * 0x10), upper_32_bits(addr));
+	nvkm_wr32(device, 0x002b08 + (runl->id * 0x10), count);
 }
 
-static const struct gk104_fifo_runlist_func
-tu102_fifo_runlist = {
+static const struct nvkm_runl_func
+tu102_runl = {
+	.runqs = 2,
 	.size = 16,
-	.cgrp = gv100_fifo_runlist_cgrp,
-	.chan = gv100_fifo_runlist_chan,
-	.commit = tu102_fifo_runlist_commit,
+	.update = nv50_runl_update,
+	.insert_cgrp = gv100_runl_insert_cgrp,
+	.insert_chan = gv100_runl_insert_chan,
+	.commit = tu102_runl_commit,
+	.wait = nv50_runl_wait,
+	.pending = tu102_runl_pending,
+	.block = gk104_runl_block,
+	.allow = gk104_runl_allow,
+	.preempt = gv100_runl_preempt,
+	.preempt_pending = gf100_runl_preempt_pending,
 };
 
 static const struct nvkm_enum
-tu102_fifo_fault_engine[] = {
+tu102_fifo_mmu_fault_engine[] = {
 	{ 0x01, "DISPLAY" },
 	{ 0x03, "PTP" },
 	{ 0x06, "PWR_PMU" },
@@ -85,305 +122,82 @@ tu102_fifo_fault_engine[] = {
 	{}
 };
 
-static void
-tu102_fifo_pbdma_init(struct gk104_fifo *fifo)
-{
-	struct nvkm_device *device = fifo->base.engine.subdev.device;
-	const u32 mask = (1 << fifo->pbdma_nr) - 1;
-	/*XXX: this is a bit of a guess at this point in time. */
-	nvkm_mask(device, 0xb65000, 0x80000fff, 0x80000000 | mask);
-}
-
-static const struct gk104_fifo_pbdma_func
-tu102_fifo_pbdma = {
-	.nr = gm200_fifo_pbdma_nr,
-	.init = tu102_fifo_pbdma_init,
-	.init_timeout = gk208_fifo_pbdma_init_timeout,
-};
-
-static const struct gk104_fifo_func
-tu102_fifo = {
-	.pbdma = &tu102_fifo_pbdma,
-	.fault.access = gv100_fifo_fault_access,
-	.fault.engine = tu102_fifo_fault_engine,
-	.fault.reason = gv100_fifo_fault_reason,
-	.fault.hubclient = gv100_fifo_fault_hubclient,
-	.fault.gpcclient = gv100_fifo_fault_gpcclient,
-	.runlist = &tu102_fifo_runlist,
-	.user = {{-1,-1,VOLTA_USERMODE_A       }, tu102_fifo_user_new   },
-	.chan = {{ 0, 0,TURING_CHANNEL_GPFIFO_A}, tu102_fifo_gpfifo_new },
-	.cgrp_force = true,
+const struct nvkm_fifo_func_mmu_fault
+tu102_fifo_mmu_fault = {
+	.recover = gf100_fifo_mmu_fault_recover,
+	.access = gv100_fifo_mmu_fault_access,
+	.engine = tu102_fifo_mmu_fault_engine,
+	.reason = gv100_fifo_mmu_fault_reason,
+	.hubclient = gv100_fifo_mmu_fault_hubclient,
+	.gpcclient = gv100_fifo_mmu_fault_gpcclient,
 };
 
-static void
-tu102_fifo_recover_work(struct work_struct *w)
+void
+tu102_fifo_intr_ctxsw_timeout_info(struct nvkm_engn *engn, u32 info)
 {
-	struct gk104_fifo *fifo = container_of(w, typeof(*fifo), recover.work);
-	struct nvkm_device *device = fifo->base.engine.subdev.device;
-	struct nvkm_engine *engine;
+	struct nvkm_runl *runl = engn->runl;
+	struct nvkm_cgrp *cgrp;
 	unsigned long flags;
-	u32 engm, runm, todo;
-	int engn, runl;
-
-	spin_lock_irqsave(&fifo->base.lock, flags);
-	runm = fifo->recover.runm;
-	engm = fifo->recover.engm;
-	fifo->recover.engm = 0;
-	fifo->recover.runm = 0;
-	spin_unlock_irqrestore(&fifo->base.lock, flags);
-
-	nvkm_mask(device, 0x002630, runm, runm);
-
-	for (todo = engm; engn = __ffs(todo), todo; todo &= ~BIT(engn)) {
-		if ((engine = fifo->engine[engn].engine)) {
-			nvkm_subdev_fini(&engine->subdev, false);
-			WARN_ON(nvkm_subdev_init(&engine->subdev));
-		}
-	}
-
-	for (todo = runm; runl = __ffs(todo), todo; todo &= ~BIT(runl))
-		gk104_fifo_runlist_update(fifo, runl);
-
-	nvkm_mask(device, 0x002630, runm, 0x00000000);
-}
-
-static void tu102_fifo_recover_engn(struct gk104_fifo *fifo, int engn);
-
-static void
-tu102_fifo_recover_runl(struct gk104_fifo *fifo, int runl)
-{
-	struct nvkm_subdev *subdev = &fifo->base.engine.subdev;
-	struct nvkm_device *device = subdev->device;
-	const u32 runm = BIT(runl);
-
-	assert_spin_locked(&fifo->base.lock);
-	if (fifo->recover.runm & runm)
-		return;
-	fifo->recover.runm |= runm;
-
-	/* Block runlist to prevent channel assignment(s) from changing. */
-	nvkm_mask(device, 0x002630, runm, runm);
-
-	/* Schedule recovery. */
-	nvkm_warn(subdev, "runlist %d: scheduled for recovery\n", runl);
-	schedule_work(&fifo->recover.work);
-}
-
-static struct gk104_fifo_chan *
-tu102_fifo_recover_chid(struct gk104_fifo *fifo, int runl, int chid)
-{
-	struct gk104_fifo_chan *chan;
-	struct nvkm_fifo_cgrp *cgrp;
-
-	list_for_each_entry(chan, &fifo->runlist[runl].chan, head) {
-		if (chan->base.chid == chid) {
-			list_del_init(&chan->head);
-			return chan;
-		}
-	}
-
-	list_for_each_entry(cgrp, &fifo->runlist[runl].cgrp, head) {
-		if (cgrp->id == chid) {
-			chan = list_first_entry(&cgrp->chan, typeof(*chan), head);
-			list_del_init(&chan->head);
-			if (!--cgrp->chan_nr)
-				list_del_init(&cgrp->head);
-			return chan;
-		}
-	}
-
-	return NULL;
-}
 
-static void
-tu102_fifo_recover_chan(struct nvkm_fifo *base, int chid)
-{
-	struct gk104_fifo *fifo = gk104_fifo(base);
-	struct nvkm_subdev *subdev = &fifo->base.engine.subdev;
-	struct nvkm_device *device = subdev->device;
-	const u32  stat = nvkm_rd32(device, 0x800004 + (chid * 0x08));
-	const u32  runl = (stat & 0x000f0000) >> 16;
-	const bool used = (stat & 0x00000001);
-	unsigned long engn, engm = fifo->runlist[runl].engm;
-	struct gk104_fifo_chan *chan;
-
-	assert_spin_locked(&fifo->base.lock);
-	if (!used)
+	/* Check that engine hasn't become unstuck since timeout raised. */
+	ENGN_DEBUG(engn, "CTXSW_TIMEOUT %08x", info);
+	if (info & 0xc0000000)
 		return;
 
-	/* Lookup SW state for channel, and mark it as dead. */
-	chan = tu102_fifo_recover_chid(fifo, runl, chid);
-	if (chan) {
-		chan->killed = true;
-		nvkm_fifo_kevent(&fifo->base, chid);
-	}
-
-	/* Disable channel. */
-	nvkm_wr32(device, 0x800004 + (chid * 0x08), stat | 0x00000800);
-	nvkm_warn(subdev, "channel %d: killed\n", chid);
-
-	/* Block channel assignments from changing during recovery. */
-	tu102_fifo_recover_runl(fifo, runl);
-
-	/* Schedule recovery for any engines the channel is on. */
-	for_each_set_bit(engn, &engm, fifo->engine_nr) {
-		struct gk104_fifo_engine_status status;
-
-		gk104_fifo_engine_status(fifo, engn, &status);
-		if (!status.chan || status.chan->id != chid)
-			continue;
-		tu102_fifo_recover_engn(fifo, engn);
+	/* Determine channel group the engine is stuck on, and schedule recovery. */
+	switch (info & 0x0000c000) {
+	case 0x00004000: /* LOAD */
+		cgrp = nvkm_runl_cgrp_get_cgid(runl, info & 0x3fff0000, &flags);
+		break;
+	case 0x00008000: /* SAVE */
+	case 0x0000c000: /* SWITCH */
+		cgrp = nvkm_runl_cgrp_get_cgid(runl, info & 0x00003fff, &flags);
+		break;
+	default:
+		cgrp = NULL;
+		break;
 	}
-}
-
-static void
-tu102_fifo_recover_engn(struct gk104_fifo *fifo, int engn)
-{
-	struct nvkm_subdev *subdev = &fifo->base.engine.subdev;
-	struct nvkm_device *device = subdev->device;
-	const u32 runl = fifo->engine[engn].runl;
-	const u32 engm = BIT(engn);
-	struct gk104_fifo_engine_status status;
-
-	assert_spin_locked(&fifo->base.lock);
-	if (fifo->recover.engm & engm)
-		return;
-	fifo->recover.engm |= engm;
 
-	/* Block channel assignments from changing during recovery. */
-	tu102_fifo_recover_runl(fifo, runl);
-
-	/* Determine which channel (if any) is currently on the engine. */
-	gk104_fifo_engine_status(fifo, engn, &status);
-	if (status.chan) {
-		/* The channel is not longer viable, kill it. */
-		tu102_fifo_recover_chan(&fifo->base, status.chan->id);
+	if (!WARN_ON(!cgrp)) {
+		nvkm_runl_rc_cgrp(cgrp);
+		nvkm_cgrp_put(&cgrp, flags);
 	}
-
-	/* Preempt the runlist */
-	nvkm_wr32(device, 0x2638, BIT(runl));
-
-	/* Schedule recovery. */
-	nvkm_warn(subdev, "engine %d: scheduled for recovery\n", engn);
-	schedule_work(&fifo->recover.work);
 }
 
 static void
-tu102_fifo_fault(struct nvkm_fifo *base, struct nvkm_fault_data *info)
+tu102_fifo_intr_ctxsw_timeout(struct nvkm_fifo *fifo)
 {
-	struct gk104_fifo *fifo = gk104_fifo(base);
-	struct nvkm_subdev *subdev = &fifo->base.engine.subdev;
-	struct nvkm_device *device = subdev->device;
-	const struct nvkm_enum *er, *ee, *ec, *ea;
-	struct nvkm_engine *engine = NULL;
-	struct nvkm_fifo_chan *chan;
-	unsigned long flags;
-	const char *en = "";
-	char ct[8] = "HUB/";
-	int engn;
-
-	er = nvkm_enum_find(fifo->func->fault.reason, info->reason);
-	ee = nvkm_enum_find(fifo->func->fault.engine, info->engine);
-	if (info->hub) {
-		ec = nvkm_enum_find(fifo->func->fault.hubclient, info->client);
-	} else {
-		ec = nvkm_enum_find(fifo->func->fault.gpcclient, info->client);
-		snprintf(ct, sizeof(ct), "GPC%d/", info->gpc);
-	}
-	ea = nvkm_enum_find(fifo->func->fault.access, info->access);
-
-	if (ee && ee->data2) {
-		switch (ee->data2) {
-		case NVKM_SUBDEV_BAR:
-			nvkm_bar_bar1_reset(device);
-			break;
-		case NVKM_SUBDEV_INSTMEM:
-			nvkm_bar_bar2_reset(device);
-			break;
-		case NVKM_ENGINE_IFB:
-			nvkm_mask(device, 0x001718, 0x00000000, 0x00000000);
-			break;
-		default:
-			engine = nvkm_device_engine(device, ee->data2, 0);
-			break;
-		}
-	}
-
-	if (ee == NULL) {
-		struct nvkm_subdev *subdev = nvkm_top_fault(device, info->engine);
-		if (subdev) {
-			if (subdev->func == &nvkm_engine)
-				engine = container_of(subdev, typeof(*engine), subdev);
-			en = engine->subdev.name;
+	struct nvkm_device *device = fifo->engine.subdev.device;
+	struct nvkm_runl *runl;
+	struct nvkm_engn *engn;
+	u32 engm = nvkm_rd32(device, 0x002a30);
+	u32 info;
+
+	nvkm_runl_foreach(runl, fifo) {
+		nvkm_runl_foreach_engn_cond(engn, runl, engm & BIT(engn->id)) {
+			info = nvkm_rd32(device, 0x003200 + (engn->id * 4));
+			tu102_fifo_intr_ctxsw_timeout_info(engn, info);
 		}
-	} else {
-		en = ee->name;
 	}
 
-	spin_lock_irqsave(&fifo->base.lock, flags);
-	chan = nvkm_fifo_chan_inst_locked(&fifo->base, info->inst);
-
-	nvkm_error(subdev,
-		   "fault %02x [%s] at %016llx engine %02x [%s] client %02x "
-		   "[%s%s] reason %02x [%s] on channel %d [%010llx %s]\n",
-		   info->access, ea ? ea->name : "", info->addr,
-		   info->engine, ee ? ee->name : en,
-		   info->client, ct, ec ? ec->name : "",
-		   info->reason, er ? er->name : "", chan ? chan->chid : -1,
-		   info->inst, chan ? chan->object.client->name : "unknown");
-
-	/* Kill the channel that caused the fault. */
-	if (chan)
-		tu102_fifo_recover_chan(&fifo->base, chan->chid);
-
-	/* Channel recovery will probably have already done this for the
-	 * correct engine(s), but just in case we can't find the channel
-	 * information...
-	 */
-	for (engn = 0; engn < fifo->engine_nr && engine; engn++) {
-		if (fifo->engine[engn].engine == engine) {
-			tu102_fifo_recover_engn(fifo, engn);
-			break;
-		}
-	}
-
-	spin_unlock_irqrestore(&fifo->base.lock, flags);
-}
-
-static void
-tu102_fifo_intr_ctxsw_timeout(struct gk104_fifo *fifo)
-{
-	struct nvkm_device *device = fifo->base.engine.subdev.device;
-	unsigned long flags, engm;
-	u32 engn;
-
-	spin_lock_irqsave(&fifo->base.lock, flags);
-
-	engm = nvkm_rd32(device, 0x2a30);
-	nvkm_wr32(device, 0x2a30, engm);
-
-	for_each_set_bit(engn, &engm, 32)
-		tu102_fifo_recover_engn(fifo, engn);
-
-	spin_unlock_irqrestore(&fifo->base.lock, flags);
+	nvkm_wr32(device, 0x002a30, engm);
 }
 
 static void
-tu102_fifo_intr_sched(struct gk104_fifo *fifo)
+tu102_fifo_intr_sched(struct nvkm_fifo *fifo)
 {
-	struct nvkm_subdev *subdev = &fifo->base.engine.subdev;
-	struct nvkm_device *device = subdev->device;
-	u32 intr = nvkm_rd32(device, 0x00254c);
+	struct nvkm_subdev *subdev = &fifo->engine.subdev;
+	u32 intr = nvkm_rd32(subdev->device, 0x00254c);
 	u32 code = intr & 0x000000ff;
 
 	nvkm_error(subdev, "SCHED_ERROR %02x\n", code);
 }
 
-static void
-tu102_fifo_intr(struct nvkm_fifo *base)
+static irqreturn_t
+tu102_fifo_intr(struct nvkm_inth *inth)
 {
-	struct gk104_fifo *fifo = gk104_fifo(base);
-	struct nvkm_subdev *subdev = &fifo->base.engine.subdev;
+	struct nvkm_fifo *fifo = container_of(inth, typeof(*fifo), engine.subdev.inth);
+	struct nvkm_subdev *subdev = &fifo->engine.subdev;
 	struct nvkm_device *device = subdev->device;
 	u32 mask = nvkm_rd32(device, 0x002140);
 	u32 stat = nvkm_rd32(device, 0x002100) & mask;
@@ -412,17 +226,8 @@ tu102_fifo_intr(struct nvkm_fifo *base)
 	}
 
 	if (stat & 0x20000000) {
-		u32 mask = nvkm_rd32(device, 0x0025a0);
-
-		while (mask) {
-			u32 unit = __ffs(mask);
-
-			gk104_fifo_intr_pbdma_0(fifo, unit);
-			gk104_fifo_intr_pbdma_1(fifo, unit);
-			nvkm_wr32(device, 0x0025a0, (1 << unit));
-			mask &= ~(1 << unit);
-		}
-		stat &= ~0x20000000;
+		if (gf100_fifo_intr_pbdma(fifo))
+			stat &= ~0x20000000;
 	}
 
 	if (stat & 0x40000000) {
@@ -432,46 +237,50 @@ tu102_fifo_intr(struct nvkm_fifo *base)
 
 	if (stat & 0x80000000) {
 		nvkm_wr32(device, 0x002100, 0x80000000);
-		gk104_fifo_intr_engine(fifo);
+		nvkm_event_ntfy(&fifo->nonstall.event, 0, NVKM_FIFO_NONSTALL_EVENT);
 		stat &= ~0x80000000;
 	}
 
 	if (stat) {
 		nvkm_error(subdev, "INTR %08x\n", stat);
+		spin_lock(&fifo->lock);
 		nvkm_mask(device, 0x002140, stat, 0x00000000);
+		spin_unlock(&fifo->lock);
 		nvkm_wr32(device, 0x002100, stat);
 	}
+
+	return IRQ_HANDLED;
+}
+
+static void
+tu102_fifo_init_pbdmas(struct nvkm_fifo *fifo, u32 mask)
+{
+	/* Not directly related to PBDMAs, but, enables doorbell to function. */
+	nvkm_mask(fifo->engine.subdev.device, 0xb65000, 0x80000000, 0x80000000);
 }
 
 static const struct nvkm_fifo_func
-tu102_fifo_ = {
-	.dtor = gk104_fifo_dtor,
-	.oneinit = gk104_fifo_oneinit,
-	.info = gk104_fifo_info,
+tu102_fifo = {
+	.chid_nr = gm200_fifo_chid_nr,
+	.chid_ctor = gk110_fifo_chid_ctor,
+	.runq_nr = gm200_fifo_runq_nr,
+	.runl_ctor = gk104_fifo_runl_ctor,
 	.init = gk104_fifo_init,
-	.fini = gk104_fifo_fini,
+	.init_pbdmas = tu102_fifo_init_pbdmas,
 	.intr = tu102_fifo_intr,
-	.fault = tu102_fifo_fault,
-	.engine_id = gk104_fifo_engine_id,
-	.id_engine = gk104_fifo_id_engine,
-	.uevent_init = gk104_fifo_uevent_init,
-	.uevent_fini = gk104_fifo_uevent_fini,
-	.recover_chan = tu102_fifo_recover_chan,
-	.class_get = gk104_fifo_class_get,
-	.class_new = gk104_fifo_class_new,
+	.mmu_fault = &tu102_fifo_mmu_fault,
+	.nonstall = &gf100_fifo_nonstall,
+	.runl = &tu102_runl,
+	.runq = &gv100_runq,
+	.engn = &gv100_engn,
+	.engn_ce = &gv100_engn_ce,
+	.cgrp = {{ 0, 0, KEPLER_CHANNEL_GROUP_A  }, &gk110_cgrp, .force = true },
+	.chan = {{ 0, 0, TURING_CHANNEL_GPFIFO_A }, &tu102_chan },
 };
 
 int
 tu102_fifo_new(struct nvkm_device *device, enum nvkm_subdev_type type, int inst,
 	       struct nvkm_fifo **pfifo)
 {
-	struct gk104_fifo *fifo;
-
-	if (!(fifo = kzalloc(sizeof(*fifo), GFP_KERNEL)))
-		return -ENOMEM;
-	fifo->func = &tu102_fifo;
-	INIT_WORK(&fifo->recover.work, tu102_fifo_recover_work);
-	*pfifo = &fifo->base;
-
-	return nvkm_fifo_ctor(&tu102_fifo_, device, type, inst, 4096, &fifo->base);
+	return nvkm_fifo_new_(&tu102_fifo, device, type, inst, pfifo);
 }
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/fifo/ucgrp.c b/drivers/gpu/drm/nouveau/nvkm/engine/fifo/ucgrp.c
new file mode 100644
index 000000000000..52c594dfb1b8
--- /dev/null
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/fifo/ucgrp.c
@@ -0,0 +1,125 @@
+/*
+ * Copyright 2021 Red Hat Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+#define nvkm_ucgrp(p) container_of((p), struct nvkm_ucgrp, object)
+#include "priv.h"
+#include "cgrp.h"
+#include "runl.h"
+
+#include <subdev/mmu.h>
+
+#include <nvif/if0021.h>
+
+struct nvkm_ucgrp {
+	struct nvkm_object object;
+	struct nvkm_cgrp *cgrp;
+};
+
+static int
+nvkm_ucgrp_chan_new(const struct nvkm_oclass *oclass, void *argv, u32 argc,
+		    struct nvkm_object **pobject)
+{
+	struct nvkm_cgrp *cgrp = nvkm_ucgrp(oclass->parent)->cgrp;
+
+	return nvkm_uchan_new(cgrp->runl->fifo, cgrp, oclass, argv, argc, pobject);
+}
+
+static int
+nvkm_ucgrp_sclass(struct nvkm_object *object, int index, struct nvkm_oclass *oclass)
+{
+	struct nvkm_cgrp *cgrp = nvkm_ucgrp(object)->cgrp;
+	struct nvkm_fifo *fifo = cgrp->runl->fifo;
+	const struct nvkm_fifo_func_chan *chan = &fifo->func->chan;
+	int c = 0;
+
+	/* *_CHANNEL_GPFIFO_* */
+	if (chan->user.oclass) {
+		if (c++ == index) {
+			oclass->base = chan->user;
+			oclass->ctor = nvkm_ucgrp_chan_new;
+			return 0;
+		}
+	}
+
+	return -EINVAL;
+}
+
+static void *
+nvkm_ucgrp_dtor(struct nvkm_object *object)
+{
+	struct nvkm_ucgrp *ucgrp = nvkm_ucgrp(object);
+
+	nvkm_cgrp_unref(&ucgrp->cgrp);
+	return ucgrp;
+}
+
+static const struct nvkm_object_func
+nvkm_ucgrp = {
+	.dtor = nvkm_ucgrp_dtor,
+	.sclass = nvkm_ucgrp_sclass,
+};
+
+int
+nvkm_ucgrp_new(struct nvkm_fifo *fifo, const struct nvkm_oclass *oclass, void *argv, u32 argc,
+	       struct nvkm_object **pobject)
+{
+	union nvif_cgrp_args *args = argv;
+	struct nvkm_runl *runl;
+	struct nvkm_vmm *vmm;
+	struct nvkm_ucgrp *ucgrp;
+	int ret;
+
+	if (argc < sizeof(args->v0) || args->v0.version != 0)
+		return -ENOSYS;
+	argc -= sizeof(args->v0);
+
+	if (args->v0.namelen != argc)
+		return -EINVAL;
+
+	/* Lookup objects referenced in args. */
+	runl = nvkm_runl_get(fifo, args->v0.runlist, 0);
+	if (!runl)
+		return -EINVAL;
+
+	vmm = nvkm_uvmm_search(oclass->client, args->v0.vmm);
+	if (IS_ERR(vmm))
+		return PTR_ERR(vmm);
+
+	/* Allocate channel group. */
+	if (!(ucgrp = kzalloc(sizeof(*ucgrp), GFP_KERNEL))) {
+		ret = -ENOMEM;
+		goto done;
+	}
+
+	nvkm_object_ctor(&nvkm_ucgrp, oclass, &ucgrp->object);
+	*pobject = &ucgrp->object;
+
+	ret = nvkm_cgrp_new(runl, args->v0.name, vmm, true, &ucgrp->cgrp);
+	if (ret)
+		goto done;
+
+	/* Return channel group info to caller. */
+	args->v0.cgid = ucgrp->cgrp->id;
+
+done:
+	nvkm_vmm_unref(&vmm);
+	return ret;
+}
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/fifo/uchan.c b/drivers/gpu/drm/nouveau/nvkm/engine/fifo/uchan.c
new file mode 100644
index 000000000000..1dac95ae7b43
--- /dev/null
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/fifo/uchan.c
@@ -0,0 +1,409 @@
+/*
+ * Copyright 2021 Red Hat Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+#define nvkm_uchan(p) container_of((p), struct nvkm_uchan, object)
+#include "priv.h"
+#include "cgrp.h"
+#include "chan.h"
+#include "chid.h"
+#include "runl.h"
+
+#include <core/gpuobj.h>
+#include <core/oproxy.h>
+#include <subdev/mmu.h>
+#include <engine/dma.h>
+
+#include <nvif/if0020.h>
+
+struct nvkm_uchan {
+	struct nvkm_object object;
+	struct nvkm_chan *chan;
+};
+
+static int
+nvkm_uchan_uevent(struct nvkm_object *object, void *argv, u32 argc, struct nvkm_uevent *uevent)
+{
+	struct nvkm_chan *chan = nvkm_uchan(object)->chan;
+	struct nvkm_runl *runl = chan->cgrp->runl;
+	union nvif_chan_event_args *args = argv;
+
+	if (!uevent)
+		return 0;
+	if (argc != sizeof(args->v0) || args->v0.version != 0)
+		return -ENOSYS;
+
+	switch (args->v0.type) {
+	case NVIF_CHAN_EVENT_V0_NON_STALL_INTR:
+		return nvkm_uevent_add(uevent, &runl->fifo->nonstall.event, 0,
+				       NVKM_FIFO_NONSTALL_EVENT, NULL);
+	case NVIF_CHAN_EVENT_V0_KILLED:
+		return nvkm_uevent_add(uevent, &runl->chid->event, chan->id,
+				       NVKM_CHAN_EVENT_ERRORED, NULL);
+	default:
+		break;
+	}
+
+	return -ENOSYS;
+}
+
+struct nvkm_uobj {
+	struct nvkm_oproxy oproxy;
+	struct nvkm_chan *chan;
+	struct nvkm_cctx *cctx;
+	int hash;
+};
+
+static int
+nvkm_uchan_object_fini_1(struct nvkm_oproxy *oproxy, bool suspend)
+{
+	struct nvkm_uobj *uobj = container_of(oproxy, typeof(*uobj), oproxy);
+	struct nvkm_chan *chan = uobj->chan;
+	struct nvkm_cctx *cctx = uobj->cctx;
+	struct nvkm_ectx *ectx = cctx->vctx->ectx;
+
+	if (!ectx->object)
+		return 0;
+
+	/* Unbind engine context from channel, if no longer required. */
+	if (refcount_dec_and_mutex_lock(&cctx->uses, &chan->cgrp->mutex)) {
+		nvkm_chan_cctx_bind(chan, ectx->engn, NULL);
+
+		if (refcount_dec_and_test(&ectx->uses))
+			nvkm_object_fini(ectx->object, false);
+		mutex_unlock(&chan->cgrp->mutex);
+	}
+
+	return 0;
+}
+
+static int
+nvkm_uchan_object_init_0(struct nvkm_oproxy *oproxy)
+{
+	struct nvkm_uobj *uobj = container_of(oproxy, typeof(*uobj), oproxy);
+	struct nvkm_chan *chan = uobj->chan;
+	struct nvkm_cctx *cctx = uobj->cctx;
+	struct nvkm_ectx *ectx = cctx->vctx->ectx;
+	int ret = 0;
+
+	if (!ectx->object)
+		return 0;
+
+	/* Bind engine context to channel, if it hasn't been already. */
+	if (!refcount_inc_not_zero(&cctx->uses)) {
+		mutex_lock(&chan->cgrp->mutex);
+		if (!refcount_inc_not_zero(&cctx->uses)) {
+			if (!refcount_inc_not_zero(&ectx->uses)) {
+				ret = nvkm_object_init(ectx->object);
+				if (ret == 0)
+					refcount_set(&ectx->uses, 1);
+			}
+
+			if (ret == 0) {
+				nvkm_chan_cctx_bind(chan, ectx->engn, cctx);
+				refcount_set(&cctx->uses, 1);
+			}
+		}
+		mutex_unlock(&chan->cgrp->mutex);
+	}
+
+	return ret;
+}
+
+static void
+nvkm_uchan_object_dtor(struct nvkm_oproxy *oproxy)
+{
+	struct nvkm_uobj *uobj = container_of(oproxy, typeof(*uobj), oproxy);
+	struct nvkm_engn *engn;
+
+	if (!uobj->cctx)
+		return;
+
+	engn = uobj->cctx->vctx->ectx->engn;
+	if (engn->func->ramht_del)
+		engn->func->ramht_del(uobj->chan, uobj->hash);
+
+	nvkm_chan_cctx_put(uobj->chan, &uobj->cctx);
+}
+
+static const struct nvkm_oproxy_func
+nvkm_uchan_object = {
+	.dtor[1] = nvkm_uchan_object_dtor,
+	.init[0] = nvkm_uchan_object_init_0,
+	.fini[1] = nvkm_uchan_object_fini_1,
+};
+
+static int
+nvkm_uchan_object_new(const struct nvkm_oclass *oclass, void *argv, u32 argc,
+		      struct nvkm_object **pobject)
+{
+	struct nvkm_chan *chan = nvkm_uchan(oclass->parent)->chan;
+	struct nvkm_cgrp *cgrp = chan->cgrp;
+	struct nvkm_engn *engn;
+	struct nvkm_uobj *uobj;
+	int ret;
+
+	/* Lookup host engine state for target engine. */
+	engn = nvkm_runl_find_engn(engn, cgrp->runl, engn->engine == oclass->engine);
+	if (WARN_ON(!engn))
+		return -EINVAL;
+
+	/* Allocate SW object. */
+	if (!(uobj = kzalloc(sizeof(*uobj), GFP_KERNEL)))
+		return -ENOMEM;
+
+	nvkm_oproxy_ctor(&nvkm_uchan_object, oclass, &uobj->oproxy);
+	uobj->chan = chan;
+	*pobject = &uobj->oproxy.base;
+
+	/* Ref. channel context for target engine.*/
+	ret = nvkm_chan_cctx_get(chan, engn, &uobj->cctx, oclass->client);
+	if (ret)
+		return ret;
+
+	/* Allocate HW object. */
+	ret = oclass->base.ctor(&(const struct nvkm_oclass) {
+					.base = oclass->base,
+					.engn = oclass->engn,
+					.handle = oclass->handle,
+					.object = oclass->object,
+					.client = oclass->client,
+					.parent = uobj->cctx->vctx->ectx->object ?: oclass->parent,
+					.engine = engn->engine,
+				 }, argv, argc, &uobj->oproxy.object);
+	if (ret)
+		return ret;
+
+	if (engn->func->ramht_add) {
+		uobj->hash = engn->func->ramht_add(engn, uobj->oproxy.object, uobj->chan);
+		if (uobj->hash < 0)
+			return uobj->hash;
+	}
+
+	return 0;
+}
+
+static int
+nvkm_uchan_sclass(struct nvkm_object *object, int index, struct nvkm_oclass *oclass)
+{
+	struct nvkm_chan *chan = nvkm_uchan(object)->chan;
+	struct nvkm_engn *engn;
+	int ret, runq = 0;
+
+	nvkm_runl_foreach_engn(engn, chan->cgrp->runl) {
+		struct nvkm_engine *engine = engn->engine;
+		int c = 0;
+
+		/* Each runqueue, on runlists with multiple, has its own LCE. */
+		if (engn->runl->func->runqs) {
+			if (engine->subdev.type == NVKM_ENGINE_CE) {
+				if (chan->runq != runq++)
+					continue;
+			}
+		}
+
+		oclass->engine = engine;
+		oclass->base.oclass = 0;
+
+		if (engine->func->fifo.sclass) {
+			ret = engine->func->fifo.sclass(oclass, index);
+			if (oclass->base.oclass) {
+				if (!oclass->base.ctor)
+					oclass->base.ctor = nvkm_object_new;
+				oclass->ctor = nvkm_uchan_object_new;
+				return 0;
+			}
+
+			index -= ret;
+			continue;
+		}
+
+		while (engine->func->sclass[c].oclass) {
+			if (c++ == index) {
+				oclass->base = engine->func->sclass[index];
+				if (!oclass->base.ctor)
+					oclass->base.ctor = nvkm_object_new;
+				oclass->ctor = nvkm_uchan_object_new;
+				return 0;
+			}
+		}
+
+		index -= c;
+	}
+
+	return -EINVAL;
+}
+
+static int
+nvkm_uchan_map(struct nvkm_object *object, void *argv, u32 argc,
+	       enum nvkm_object_map *type, u64 *addr, u64 *size)
+{
+	struct nvkm_chan *chan = nvkm_uchan(object)->chan;
+	struct nvkm_device *device = chan->cgrp->runl->fifo->engine.subdev.device;
+
+	if (chan->func->userd->bar < 0)
+		return -ENOSYS;
+
+	*type = NVKM_OBJECT_MAP_IO;
+	*addr = device->func->resource_addr(device, chan->func->userd->bar) +
+		chan->func->userd->base + chan->userd.base;
+	*size = chan->func->userd->size;
+	return 0;
+}
+
+static int
+nvkm_uchan_fini(struct nvkm_object *object, bool suspend)
+{
+	struct nvkm_chan *chan = nvkm_uchan(object)->chan;
+
+	nvkm_chan_block(chan);
+	nvkm_chan_remove(chan, true);
+
+	if (chan->func->unbind)
+		chan->func->unbind(chan);
+
+	return 0;
+}
+
+static int
+nvkm_uchan_init(struct nvkm_object *object)
+{
+	struct nvkm_chan *chan = nvkm_uchan(object)->chan;
+
+	if (atomic_read(&chan->errored))
+		return 0;
+
+	if (chan->func->bind)
+		chan->func->bind(chan);
+
+	nvkm_chan_allow(chan);
+	nvkm_chan_insert(chan);
+	return 0;
+}
+
+static void *
+nvkm_uchan_dtor(struct nvkm_object *object)
+{
+	struct nvkm_uchan *uchan = nvkm_uchan(object);
+
+	nvkm_chan_del(&uchan->chan);
+	return uchan;
+}
+
+static const struct nvkm_object_func
+nvkm_uchan = {
+	.dtor = nvkm_uchan_dtor,
+	.init = nvkm_uchan_init,
+	.fini = nvkm_uchan_fini,
+	.map = nvkm_uchan_map,
+	.sclass = nvkm_uchan_sclass,
+	.uevent = nvkm_uchan_uevent,
+};
+
+int
+nvkm_uchan_new(struct nvkm_fifo *fifo, struct nvkm_cgrp *cgrp, const struct nvkm_oclass *oclass,
+	       void *argv, u32 argc, struct nvkm_object **pobject)
+{
+	union nvif_chan_args *args = argv;
+	struct nvkm_runl *runl;
+	struct nvkm_vmm *vmm = NULL;
+	struct nvkm_dmaobj *ctxdma = NULL;
+	struct nvkm_memory *userd = NULL;
+	struct nvkm_uchan *uchan;
+	struct nvkm_chan *chan;
+	int ret;
+
+	if (argc < sizeof(args->v0) || args->v0.version != 0)
+		return -ENOSYS;
+	argc -= sizeof(args->v0);
+
+	if (args->v0.namelen != argc)
+		return -EINVAL;
+
+	/* Lookup objects referenced in args. */
+	runl = nvkm_runl_get(fifo, args->v0.runlist, 0);
+	if (!runl)
+		return -EINVAL;
+
+	if (args->v0.vmm) {
+		vmm = nvkm_uvmm_search(oclass->client, args->v0.vmm);
+		if (IS_ERR(vmm))
+			return PTR_ERR(vmm);
+	}
+
+	if (args->v0.ctxdma) {
+		ctxdma = nvkm_dmaobj_search(oclass->client, args->v0.ctxdma);
+		if (IS_ERR(ctxdma)) {
+			ret = PTR_ERR(ctxdma);
+			goto done;
+		}
+	}
+
+	if (args->v0.huserd) {
+		userd = nvkm_umem_search(oclass->client, args->v0.huserd);
+		if (IS_ERR(userd)) {
+			ret = PTR_ERR(userd);
+			userd = NULL;
+			goto done;
+		}
+	}
+
+	/* Allocate channel. */
+	if (!(uchan = kzalloc(sizeof(*uchan), GFP_KERNEL))) {
+		ret = -ENOMEM;
+		goto done;
+	}
+
+	nvkm_object_ctor(&nvkm_uchan, oclass, &uchan->object);
+	*pobject = &uchan->object;
+
+	ret = nvkm_chan_new_(fifo->func->chan.func, runl, args->v0.runq, cgrp, args->v0.name,
+			     args->v0.priv != 0, args->v0.devm, vmm, ctxdma, args->v0.offset,
+			     args->v0.length, userd, args->v0.ouserd, &uchan->chan);
+	if (ret)
+		goto done;
+
+	chan = uchan->chan;
+
+	/* Return channel info to caller. */
+	if (chan->func->doorbell_handle)
+		args->v0.token = chan->func->doorbell_handle(chan);
+	else
+		args->v0.token = ~0;
+
+	args->v0.chid = chan->id;
+
+	switch (nvkm_memory_target(chan->inst->memory)) {
+	case NVKM_MEM_TARGET_INST: args->v0.aper = NVIF_CHAN_V0_INST_APER_INST; break;
+	case NVKM_MEM_TARGET_VRAM: args->v0.aper = NVIF_CHAN_V0_INST_APER_VRAM; break;
+	case NVKM_MEM_TARGET_HOST: args->v0.aper = NVIF_CHAN_V0_INST_APER_HOST; break;
+	case NVKM_MEM_TARGET_NCOH: args->v0.aper = NVIF_CHAN_V0_INST_APER_NCOH; break;
+	default:
+		WARN_ON(1);
+		ret = -EFAULT;
+		break;
+	}
+
+	args->v0.inst = nvkm_memory_addr(chan->inst->memory);
+done:
+	nvkm_memory_unref(&userd);
+	nvkm_vmm_unref(&vmm);
+	return ret;
+}
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/fifo/user.h b/drivers/gpu/drm/nouveau/nvkm/engine/fifo/user.h
deleted file mode 100644
index 54a3a3092cc0..000000000000
--- a/drivers/gpu/drm/nouveau/nvkm/engine/fifo/user.h
+++ /dev/null
@@ -1,8 +0,0 @@
-#ifndef __NVKM_FIFO_USER_H__
-#define __NVKM_FIFO_USER_H__
-#include "priv.h"
-int gv100_fifo_user_new(const struct nvkm_oclass *, void *, u32,
-			struct nvkm_object **);
-int tu102_fifo_user_new(const struct nvkm_oclass *, void *, u32,
-			struct nvkm_object **);
-#endif
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/gr/Kbuild b/drivers/gpu/drm/nouveau/nvkm/engine/gr/Kbuild
index 558c86fd8e82..b5418f05ccd8 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/gr/Kbuild
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/gr/Kbuild
@@ -40,6 +40,7 @@ nvkm-y += nvkm/engine/gr/gp108.o
 nvkm-y += nvkm/engine/gr/gp10b.o
 nvkm-y += nvkm/engine/gr/gv100.o
 nvkm-y += nvkm/engine/gr/tu102.o
+nvkm-y += nvkm/engine/gr/ga102.o
 
 nvkm-y += nvkm/engine/gr/ctxnv40.o
 nvkm-y += nvkm/engine/gr/ctxnv50.o
@@ -63,3 +64,4 @@ nvkm-y += nvkm/engine/gr/ctxgp104.o
 nvkm-y += nvkm/engine/gr/ctxgp107.o
 nvkm-y += nvkm/engine/gr/ctxgv100.o
 nvkm-y += nvkm/engine/gr/ctxtu102.o
+nvkm-y += nvkm/engine/gr/ctxga102.o
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/gr/base.c b/drivers/gpu/drm/nouveau/nvkm/engine/gr/base.c
index 61759f54406e..71b824e6da9d 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/gr/base.c
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/gr/base.c
@@ -136,6 +136,17 @@ nvkm_gr_oneinit(struct nvkm_engine *engine)
 }
 
 static int
+nvkm_gr_reset(struct nvkm_engine *engine)
+{
+	struct nvkm_gr *gr = nvkm_gr(engine);
+
+	if (gr->func->reset)
+		return gr->func->reset(gr);
+
+	return -ENOSYS;
+}
+
+static int
 nvkm_gr_init(struct nvkm_engine *engine)
 {
 	struct nvkm_gr *gr = nvkm_gr(engine);
@@ -166,6 +177,7 @@ nvkm_gr = {
 	.oneinit = nvkm_gr_oneinit,
 	.init = nvkm_gr_init,
 	.fini = nvkm_gr_fini,
+	.reset = nvkm_gr_reset,
 	.intr = nvkm_gr_intr,
 	.tile = nvkm_gr_tile,
 	.chsw_load = nvkm_gr_chsw_load,
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxga102.c b/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxga102.c
new file mode 100644
index 000000000000..11461adf5036
--- /dev/null
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxga102.c
@@ -0,0 +1,77 @@
+/*
+ * Copyright 2019 Red Hat Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+#include "ctxgf100.h"
+
+static void
+ga102_grctx_generate_sm_id(struct gf100_gr *gr, int gpc, int tpc, int sm)
+{
+	struct nvkm_device *device = gr->base.engine.subdev.device;
+
+	tpc = gv100_gr_nonpes_aware_tpc(gr, gpc, tpc);
+
+	nvkm_wr32(device, TPC_UNIT(gpc, tpc, 0x608), sm);
+}
+
+static void
+ga102_grctx_generate_unkn(struct gf100_gr *gr)
+{
+	struct nvkm_device *device = gr->base.engine.subdev.device;
+
+	nvkm_mask(device, 0x41980c, 0x00000010, 0x00000010);
+	nvkm_mask(device, 0x41be08, 0x00000004, 0x00000004);
+}
+
+static void
+ga102_grctx_generate_r419ea8(struct gf100_gr *gr)
+{
+	struct nvkm_device *device = gr->base.engine.subdev.device;
+
+	nvkm_wr32(device, 0x419ea8, nvkm_rd32(device, 0x504728) | 0x08000000);
+}
+
+const struct gf100_grctx_func
+ga102_grctx = {
+	.main = gf100_grctx_generate_main,
+	.unkn = ga102_grctx_generate_unkn,
+	.bundle = gm107_grctx_generate_bundle,
+	.bundle_size = 0x3000,
+	.bundle_min_gpm_fifo_depth = 0x180,
+	.bundle_token_limit = 0x1140,
+	.pagepool = gp100_grctx_generate_pagepool,
+	.pagepool_size = 0x20000,
+	.attrib_cb_size = gp102_grctx_generate_attrib_cb_size,
+	.attrib_cb = gv100_grctx_generate_attrib_cb,
+	.attrib = gv100_grctx_generate_attrib,
+	.attrib_nr_max = 0x800,
+	.attrib_nr = 0x4a1,
+	.alpha_nr_max = 0xc00,
+	.alpha_nr = 0x800,
+	.unknown_size = 0x80000,
+	.unknown = tu102_grctx_generate_unknown,
+	.gfxp_nr = 0xd28,
+	.sm_id = ga102_grctx_generate_sm_id,
+	.skip_pd_num_tpc_per_gpc = true,
+	.rop_mapping = gv100_grctx_generate_rop_mapping,
+	.r406500 = gm200_grctx_generate_r406500,
+	.r400088 = gv100_grctx_generate_r400088,
+	.r419ea8 = ga102_grctx_generate_r419ea8,
+};
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgf100.c b/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgf100.c
index 297915719bf2..cb390e0134a2 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgf100.c
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgf100.c
@@ -26,6 +26,7 @@
 #include <subdev/fb.h>
 #include <subdev/mc.h>
 #include <subdev/timer.h>
+#include <engine/fifo.h>
 
 /*******************************************************************************
  * PGRAPH context register lists
@@ -990,43 +991,16 @@ gf100_grctx_pack_tpc[] = {
  * PGRAPH context implementation
  ******************************************************************************/
 
-int
-gf100_grctx_mmio_data(struct gf100_grctx *info, u32 size, u32 align, bool priv)
-{
-	if (info->data) {
-		info->buffer[info->buffer_nr] = round_up(info->addr, align);
-		info->addr = info->buffer[info->buffer_nr] + size;
-		info->data->size = size;
-		info->data->align = align;
-		info->data->priv = priv;
-		info->data++;
-		return info->buffer_nr++;
-	}
-	return -1;
-}
-
 void
-gf100_grctx_mmio_item(struct gf100_grctx *info, u32 addr, u32 data,
-		      int shift, int buffer)
+gf100_grctx_patch_wr32(struct gf100_gr_chan *chan, u32 addr, u32 data)
 {
-	struct nvkm_device *device = info->gr->base.engine.subdev.device;
-	if (info->data) {
-		if (shift >= 0) {
-			info->mmio->addr = addr;
-			info->mmio->data = data;
-			info->mmio->shift = shift;
-			info->mmio->buffer = buffer;
-			if (buffer >= 0)
-				data |= info->buffer[buffer] >> shift;
-			info->mmio++;
-		} else
-			return;
-	} else {
-		if (buffer >= 0)
-			return;
+	if (unlikely(!chan->mmio)) {
+		nvkm_wr32(chan->gr->base.engine.subdev.device, addr, data);
+		return;
 	}
 
-	nvkm_wr32(device, addr, data);
+	nvkm_wo32(chan->mmio, chan->mmio_nr++ * 4, addr);
+	nvkm_wo32(chan->mmio, chan->mmio_nr++ * 4, data);
 }
 
 void
@@ -1037,56 +1011,60 @@ gf100_grctx_generate_r419cb8(struct gf100_gr *gr)
 }
 
 void
-gf100_grctx_generate_bundle(struct gf100_grctx *info)
+gf100_grctx_generate_bundle(struct gf100_gr_chan *chan, u64 addr, u32 size)
 {
-	const struct gf100_grctx_func *grctx = info->gr->func->grctx;
-	const int s = 8;
-	const int b = mmio_vram(info, grctx->bundle_size, (1 << s), true);
-	mmio_refn(info, 0x408004, 0x00000000, s, b);
-	mmio_wr32(info, 0x408008, 0x80000000 | (grctx->bundle_size >> s));
-	mmio_refn(info, 0x418808, 0x00000000, s, b);
-	mmio_wr32(info, 0x41880c, 0x80000000 | (grctx->bundle_size >> s));
+	gf100_grctx_patch_wr32(chan, 0x408004, addr >> 8);
+	gf100_grctx_patch_wr32(chan, 0x408008, 0x80000000 | (size >> 8));
+	gf100_grctx_patch_wr32(chan, 0x418808, addr >> 8);
+	gf100_grctx_patch_wr32(chan, 0x41880c, 0x80000000 | (size >> 8));
 }
 
 void
-gf100_grctx_generate_pagepool(struct gf100_grctx *info)
+gf100_grctx_generate_pagepool(struct gf100_gr_chan *chan, u64 addr)
 {
-	const struct gf100_grctx_func *grctx = info->gr->func->grctx;
-	const int s = 8;
-	const int b = mmio_vram(info, grctx->pagepool_size, (1 << s), true);
-	mmio_refn(info, 0x40800c, 0x00000000, s, b);
-	mmio_wr32(info, 0x408010, 0x80000000);
-	mmio_refn(info, 0x419004, 0x00000000, s, b);
-	mmio_wr32(info, 0x419008, 0x00000000);
+	gf100_grctx_patch_wr32(chan, 0x40800c, addr >> 8);
+	gf100_grctx_patch_wr32(chan, 0x408010, 0x80000000);
+	gf100_grctx_patch_wr32(chan, 0x419004, addr >> 8);
+	gf100_grctx_patch_wr32(chan, 0x419008, 0x00000000);
 }
 
 void
-gf100_grctx_generate_attrib(struct gf100_grctx *info)
+gf100_grctx_generate_attrib(struct gf100_gr_chan *chan)
 {
-	struct gf100_gr *gr = info->gr;
+	struct gf100_gr *gr = chan->gr;
 	const struct gf100_grctx_func *grctx = gr->func->grctx;
 	const u32 attrib = grctx->attrib_nr;
-	const u32   size = 0x20 * (grctx->attrib_nr_max + grctx->alpha_nr_max);
-	const int s = 12;
-	const int b = mmio_vram(info, size * gr->tpc_total, (1 << s), false);
 	int gpc, tpc;
 	u32 bo = 0;
 
-	mmio_refn(info, 0x418810, 0x80000000, s, b);
-	mmio_refn(info, 0x419848, 0x10000000, s, b);
-	mmio_wr32(info, 0x405830, (attrib << 16));
+	gf100_grctx_patch_wr32(chan, 0x405830, (attrib << 16));
 
 	for (gpc = 0; gpc < gr->gpc_nr; gpc++) {
 		for (tpc = 0; tpc < gr->tpc_nr[gpc]; tpc++) {
 			const u32 o = TPC_UNIT(gpc, tpc, 0x0520);
-			mmio_skip(info, o, (attrib << 16) | ++bo);
-			mmio_wr32(info, o, (attrib << 16) | --bo);
+
+			gf100_grctx_patch_wr32(chan, o, (attrib << 16) | bo);
 			bo += grctx->attrib_nr_max;
 		}
 	}
 }
 
 void
+gf100_grctx_generate_attrib_cb(struct gf100_gr_chan *chan, u64 addr, u32 size)
+{
+	gf100_grctx_patch_wr32(chan, 0x418810, 0x80000000 | addr >> 12);
+	gf100_grctx_patch_wr32(chan, 0x419848, 0x10000000 | addr >> 12);
+}
+
+u32
+gf100_grctx_generate_attrib_cb_size(struct gf100_gr *gr)
+{
+	const struct gf100_grctx_func *grctx = gr->func->grctx;
+
+	return 0x20 * (grctx->attrib_nr_max + grctx->alpha_nr_max) * gr->tpc_total;
+}
+
+void
 gf100_grctx_generate_unkn(struct gf100_gr *gr)
 {
 }
@@ -1361,8 +1339,9 @@ gf100_grctx_generate_floorsweep(struct gf100_gr *gr)
 }
 
 void
-gf100_grctx_generate_main(struct gf100_gr *gr, struct gf100_grctx *info)
+gf100_grctx_generate_main(struct gf100_gr_chan *chan)
 {
+	struct gf100_gr *gr = chan->gr;
 	struct nvkm_device *device = gr->base.engine.subdev.device;
 	const struct gf100_grctx_func *grctx = gr->func->grctx;
 	u32 idle_timeout;
@@ -1380,15 +1359,23 @@ gf100_grctx_generate_main(struct gf100_gr *gr, struct gf100_grctx *info)
 		gf100_gr_mmio(gr, gr->sw_ctx);
 	}
 
+	if (gr->func->init_419bd8)
+		gr->func->init_419bd8(gr);
+	if (grctx->r419ea8)
+		grctx->r419ea8(gr);
+
 	gf100_gr_wait_idle(gr);
 
 	idle_timeout = nvkm_mask(device, 0x404154, 0xffffffff, 0x00000000);
 
-	grctx->pagepool(info);
-	grctx->bundle(info);
-	grctx->attrib(info);
+	grctx->pagepool(chan, chan->pagepool->addr);
+	grctx->bundle(chan, chan->bundle_cb->addr, grctx->bundle_size);
+	grctx->attrib_cb(chan, chan->attrib_cb->addr, grctx->attrib_cb_size(gr));
+	grctx->attrib(chan);
 	if (grctx->patch_ltc)
-		grctx->patch_ltc(info);
+		grctx->patch_ltc(chan);
+	if (grctx->unknown_size)
+		grctx->unknown(chan, chan->unknown->addr, grctx->unknown_size);
 	grctx->unkn(gr);
 
 	gf100_grctx_generate_floorsweep(gr);
@@ -1396,12 +1383,23 @@ gf100_grctx_generate_main(struct gf100_gr *gr, struct gf100_grctx *info)
 	gf100_gr_wait_idle(gr);
 
 	if (grctx->r400088) grctx->r400088(gr, false);
+
 	if (gr->bundle)
 		gf100_gr_icmd(gr, gr->bundle);
 	else
 		gf100_gr_icmd(gr, grctx->icmd);
-	if (grctx->sw_veid_bundle_init)
+
+	if (gr->bundle_veid)
+		gf100_gr_icmd(gr, gr->bundle_veid);
+	else
 		gf100_gr_icmd(gr, grctx->sw_veid_bundle_init);
+
+	if (gr->bundle64)
+		gf100_gr_icmd(gr, gr->bundle64);
+	else
+	if (grctx->sw_bundle64_init)
+		gf100_gr_icmd(gr, grctx->sw_bundle64_init);
+
 	if (grctx->r400088) grctx->r400088(gr, true);
 
 	nvkm_wr32(device, 0x404154, idle_timeout);
@@ -1428,21 +1426,20 @@ gf100_grctx_generate_main(struct gf100_gr *gr, struct gf100_grctx *info)
 		grctx->r408840(gr);
 	if (grctx->r419c0c)
 		grctx->r419c0c(gr);
+
+	gf100_gr_wait_idle(gr);
 }
 
 #define CB_RESERVED 0x80000
 
 int
-gf100_grctx_generate(struct gf100_gr *gr)
+gf100_grctx_generate(struct gf100_gr *gr, struct gf100_gr_chan *chan, struct nvkm_gpuobj *inst)
 {
 	const struct gf100_grctx_func *grctx = gr->func->grctx;
 	struct nvkm_subdev *subdev = &gr->base.engine.subdev;
 	struct nvkm_device *device = subdev->device;
-	struct nvkm_memory *inst = NULL;
 	struct nvkm_memory *data = NULL;
-	struct nvkm_vmm *vmm = NULL;
 	struct nvkm_vma *ctx = NULL;
-	struct gf100_grctx info;
 	int ret, i;
 	u64 addr;
 
@@ -1457,72 +1454,47 @@ gf100_grctx_generate(struct gf100_gr *gr)
 		grctx->unkn88c(gr, true);
 
 	/* Reset FECS. */
-	nvkm_wr32(device, 0x409614, 0x00000070);
-	nvkm_usec(device, 10, NVKM_DELAY);
-	nvkm_mask(device, 0x409614, 0x00000700, 0x00000700);
-	nvkm_usec(device, 10, NVKM_DELAY);
-	nvkm_rd32(device, 0x409614);
+	gr->func->fecs.reset(gr);
 
 	if (grctx->unkn88c)
 		grctx->unkn88c(gr, false);
 
 	/* NV_PGRAPH_FE_PWR_MODE_AUTO. */
 	nvkm_wr32(device, 0x404170, 0x00000010);
+	nvkm_msec(device, 2000,
+		if (!(nvkm_rd32(device, 0x404170) & 0x00000010))
+			break;
+	);
 
 	/* Init SCC RAM. */
 	nvkm_wr32(device, 0x40802c, 0x00000001);
 
-	/* Allocate memory to for a "channel", which we'll use to generate
-	 * the default context values.
-	 */
-	ret = nvkm_memory_new(device, NVKM_MEM_TARGET_INST,
-			      0x1000, 0x1000, true, &inst);
-	if (ret)
-		goto done;
-
-	ret = nvkm_vmm_new(device, 0, 0, NULL, 0, NULL, "grctx", &vmm);
-	if (ret)
-		goto done;
-
-	vmm->debug = subdev->debug;
-
-	ret = nvkm_vmm_join(vmm, inst);
-	if (ret)
-		goto done;
-
+	/* Allocate memory to store context, and dummy global context buffers. */
 	ret = nvkm_memory_new(device, NVKM_MEM_TARGET_INST,
 			      CB_RESERVED + gr->size, 0, true, &data);
 	if (ret)
 		goto done;
 
-	ret = nvkm_vmm_get(vmm, 0, nvkm_memory_size(data), &ctx);
+	ret = nvkm_vmm_get(chan->vmm, 0, nvkm_memory_size(data), &ctx);
 	if (ret)
 		goto done;
 
-	ret = nvkm_memory_map(data, 0, vmm, ctx, NULL, 0);
+	ret = nvkm_memory_map(data, 0, chan->vmm, ctx, NULL, 0);
 	if (ret)
 		goto done;
 
-
 	/* Setup context pointer. */
 	nvkm_kmap(inst);
 	nvkm_wo32(inst, 0x0210, lower_32_bits(ctx->addr + CB_RESERVED) | 4);
 	nvkm_wo32(inst, 0x0214, upper_32_bits(ctx->addr + CB_RESERVED));
 	nvkm_done(inst);
 
-	/* Setup default state for mmio list construction. */
-	info.gr = gr;
-	info.data = gr->mmio_data;
-	info.mmio = gr->mmio_list;
-	info.addr = ctx->addr;
-	info.buffer_nr = 0;
-
 	/* Make channel current. */
-	addr = nvkm_memory_addr(inst) >> 12;
+	addr = inst->addr >> 12;
 	if (gr->firmware) {
 		ret = gf100_gr_fecs_bind_pointer(gr, 0x80000000 | addr);
 		if (ret)
-			goto done;
+			goto done_inst;
 
 		nvkm_kmap(data);
 		nvkm_wo32(data, 0x1c, 1);
@@ -1540,19 +1512,27 @@ gf100_grctx_generate(struct gf100_gr *gr)
 		);
 	}
 
-	grctx->main(gr, &info);
+	grctx->main(chan);
 
-	/* Trigger a context unload by unsetting the "next channel valid" bit
-	 * and faking a context switch interrupt.
-	 */
-	nvkm_mask(device, 0x409b04, 0x80000000, 0x00000000);
-	nvkm_wr32(device, 0x409000, 0x00000100);
-	if (nvkm_msec(device, 2000,
-		if (!(nvkm_rd32(device, 0x409b00) & 0x80000000))
-			break;
-	) < 0) {
-		ret = -EBUSY;
-		goto done;
+	if (!gr->firmware) {
+		/* Trigger a context unload by unsetting the "next channel valid" bit
+		 * and faking a context switch interrupt.
+		 */
+		nvkm_mask(device, 0x409b04, 0x80000000, 0x00000000);
+		nvkm_wr32(device, 0x409000, 0x00000100);
+		if (nvkm_msec(device, 2000,
+			if (!(nvkm_rd32(device, 0x409b00) & 0x80000000))
+				break;
+		) < 0) {
+			ret = -EBUSY;
+			goto done_inst;
+		}
+	} else {
+		ret = gf100_gr_fecs_wfi_golden_save(gr, 0x80000000 | addr);
+		if (ret)
+			goto done_inst;
+
+		nvkm_mask(device, 0x409b00, 0x80000000, 0x00000000);
 	}
 
 	gr->data = kmalloc(gr->size, GFP_KERNEL);
@@ -1566,12 +1546,14 @@ gf100_grctx_generate(struct gf100_gr *gr)
 		ret = -ENOMEM;
 	}
 
+done_inst:
+	nvkm_kmap(inst);
+	nvkm_wo32(inst, 0x0210, 0);
+	nvkm_wo32(inst, 0x0214, 0);
+	nvkm_done(inst);
 done:
-	nvkm_vmm_put(vmm, &ctx);
+	nvkm_vmm_put(chan->vmm, &ctx);
 	nvkm_memory_unref(&data);
-	nvkm_vmm_part(vmm, inst);
-	nvkm_vmm_unref(&vmm);
-	nvkm_memory_unref(&inst);
 	return ret;
 }
 
@@ -1590,6 +1572,8 @@ gf100_grctx = {
 	.bundle_size = 0x1800,
 	.pagepool = gf100_grctx_generate_pagepool,
 	.pagepool_size = 0x8000,
+	.attrib_cb_size = gf100_grctx_generate_attrib_cb_size,
+	.attrib_cb = gf100_grctx_generate_attrib_cb,
 	.attrib = gf100_grctx_generate_attrib,
 	.attrib_nr_max = 0x324,
 	.attrib_nr = 0x218,
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgf100.h b/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgf100.h
index 32bbddc0993e..00dbeda7e346 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgf100.h
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgf100.h
@@ -3,27 +3,12 @@
 #define __NVKM_GRCTX_NVC0_H__
 #include "gf100.h"
 
-struct gf100_grctx {
-	struct gf100_gr *gr;
-	struct gf100_gr_data *data;
-	struct gf100_gr_mmio *mmio;
-	int buffer_nr;
-	u64 buffer[4];
-	u64 addr;
-};
-
-int  gf100_grctx_mmio_data(struct gf100_grctx *, u32 size, u32 align, bool priv);
-void gf100_grctx_mmio_item(struct gf100_grctx *, u32 addr, u32 data, int s, int);
-
-#define mmio_vram(a,b,c,d) gf100_grctx_mmio_data((a), (b), (c), (d))
-#define mmio_refn(a,b,c,d,e) gf100_grctx_mmio_item((a), (b), (c), (d), (e))
-#define mmio_skip(a,b,c) mmio_refn((a), (b), (c), -1, -1)
-#define mmio_wr32(a,b,c) mmio_refn((a), (b), (c),  0, -1)
+void gf100_grctx_patch_wr32(struct gf100_gr_chan *, u32 addr, u32 data);
 
 struct gf100_grctx_func {
 	void (*unkn88c)(struct gf100_gr *, bool on);
 	/* main context generation function */
-	void  (*main)(struct gf100_gr *, struct gf100_grctx *);
+	void  (*main)(struct gf100_gr_chan *);
 	/* context-specific modify-on-first-load list generation function */
 	void  (*unkn)(struct gf100_gr *);
 	/* mmio context data */
@@ -37,23 +22,29 @@ struct gf100_grctx_func {
 	const struct gf100_gr_pack *icmd;
 	const struct gf100_gr_pack *mthd;
 	const struct gf100_gr_pack *sw_veid_bundle_init;
+	const struct gf100_gr_pack *sw_bundle64_init;
 	/* bundle circular buffer */
-	void (*bundle)(struct gf100_grctx *);
+	void (*bundle)(struct gf100_gr_chan *, u64 addr, u32 size);
 	u32 bundle_size;
 	u32 bundle_min_gpm_fifo_depth;
 	u32 bundle_token_limit;
 	/* pagepool */
-	void (*pagepool)(struct gf100_grctx *);
+	void (*pagepool)(struct gf100_gr_chan *, u64 addr);
 	u32 pagepool_size;
 	/* attribute(/alpha) circular buffer */
-	void (*attrib)(struct gf100_grctx *);
+	u32 (*attrib_cb_size)(struct gf100_gr *);
+	void (*attrib_cb)(struct gf100_gr_chan *, u64 addr, u32 size);
+	void (*attrib)(struct gf100_gr_chan *);
 	u32 attrib_nr_max;
 	u32 attrib_nr;
 	u32 alpha_nr_max;
 	u32 alpha_nr;
 	u32 gfxp_nr;
+	/* some other context buffer */
+	void (*unknown)(struct gf100_gr_chan *, u64 addr, u32 size);
+	u32 unknown_size;
 	/* other patch buffer stuff */
-	void (*patch_ltc)(struct gf100_grctx *);
+	void (*patch_ltc)(struct gf100_gr_chan *);
 	/* floorsweeping */
 	void (*sm_id)(struct gf100_gr *, int gpc, int tpc, int sm);
 	void (*tpc_nr)(struct gf100_gr *, int gpc);
@@ -78,14 +69,17 @@ struct gf100_grctx_func {
 	void (*r419a3c)(struct gf100_gr *);
 	void (*r408840)(struct gf100_gr *);
 	void (*r419c0c)(struct gf100_gr *);
+	void (*r419ea8)(struct gf100_gr *);
 };
 
 extern const struct gf100_grctx_func gf100_grctx;
-int  gf100_grctx_generate(struct gf100_gr *);
-void gf100_grctx_generate_main(struct gf100_gr *, struct gf100_grctx *);
-void gf100_grctx_generate_bundle(struct gf100_grctx *);
-void gf100_grctx_generate_pagepool(struct gf100_grctx *);
-void gf100_grctx_generate_attrib(struct gf100_grctx *);
+int  gf100_grctx_generate(struct gf100_gr *, struct gf100_gr_chan *, struct nvkm_gpuobj *inst);
+void gf100_grctx_generate_main(struct gf100_gr_chan *);
+void gf100_grctx_generate_pagepool(struct gf100_gr_chan *, u64);
+void gf100_grctx_generate_bundle(struct gf100_gr_chan *, u64, u32);
+u32 gf100_grctx_generate_attrib_cb_size(struct gf100_gr *);
+void gf100_grctx_generate_attrib_cb(struct gf100_gr_chan *, u64, u32);
+void gf100_grctx_generate_attrib(struct gf100_gr_chan *);
 void gf100_grctx_generate_unkn(struct gf100_gr *);
 void gf100_grctx_generate_floorsweep(struct gf100_gr *);
 void gf100_grctx_generate_sm_id(struct gf100_gr *, int, int, int);
@@ -97,14 +91,14 @@ void gf100_grctx_generate_max_ways_evict(struct gf100_gr *);
 void gf100_grctx_generate_r419cb8(struct gf100_gr *);
 
 extern const struct gf100_grctx_func gf108_grctx;
-void gf108_grctx_generate_attrib(struct gf100_grctx *);
+void gf108_grctx_generate_attrib(struct gf100_gr_chan *);
 void gf108_grctx_generate_unkn(struct gf100_gr *);
 
 extern const struct gf100_grctx_func gf104_grctx;
 extern const struct gf100_grctx_func gf110_grctx;
 
 extern const struct gf100_grctx_func gf117_grctx;
-void gf117_grctx_generate_attrib(struct gf100_grctx *);
+void gf117_grctx_generate_attrib(struct gf100_gr_chan *);
 void gf117_grctx_generate_rop_mapping(struct gf100_gr *);
 void gf117_grctx_generate_dist_skip_table(struct gf100_gr *);
 
@@ -115,9 +109,9 @@ void gk104_grctx_generate_alpha_beta_tables(struct gf100_gr *);
 void gk104_grctx_generate_gpc_tpc_nr(struct gf100_gr *);
 
 extern const struct gf100_grctx_func gk20a_grctx;
-void gk104_grctx_generate_bundle(struct gf100_grctx *);
-void gk104_grctx_generate_pagepool(struct gf100_grctx *);
-void gk104_grctx_generate_patch_ltc(struct gf100_grctx *);
+void gk104_grctx_generate_pagepool(struct gf100_gr_chan *, u64);
+void gk104_grctx_generate_bundle(struct gf100_gr_chan *, u64, u32);
+void gk104_grctx_generate_patch_ltc(struct gf100_gr_chan *);
 void gk104_grctx_generate_unkn(struct gf100_gr *);
 void gk104_grctx_generate_r418800(struct gf100_gr *);
 
@@ -128,9 +122,10 @@ extern const struct gf100_grctx_func gk110b_grctx;
 extern const struct gf100_grctx_func gk208_grctx;
 
 extern const struct gf100_grctx_func gm107_grctx;
-void gm107_grctx_generate_bundle(struct gf100_grctx *);
-void gm107_grctx_generate_pagepool(struct gf100_grctx *);
-void gm107_grctx_generate_attrib(struct gf100_grctx *);
+void gm107_grctx_generate_pagepool(struct gf100_gr_chan *, u64);
+void gm107_grctx_generate_bundle(struct gf100_gr_chan *, u64, u32);
+void gm107_grctx_generate_attrib_cb(struct gf100_gr_chan *, u64, u32);
+void gm107_grctx_generate_attrib(struct gf100_gr_chan *);
 void gm107_grctx_generate_sm_id(struct gf100_gr *, int, int, int);
 
 extern const struct gf100_grctx_func gm200_grctx;
@@ -143,11 +138,13 @@ void gm200_grctx_generate_r419a3c(struct gf100_gr *);
 extern const struct gf100_grctx_func gm20b_grctx;
 
 extern const struct gf100_grctx_func gp100_grctx;
-void gp100_grctx_generate_pagepool(struct gf100_grctx *);
+void gp100_grctx_generate_pagepool(struct gf100_gr_chan *, u64);
+void gp100_grctx_generate_attrib_cb(struct gf100_gr_chan *, u64, u32);
 void gp100_grctx_generate_smid_config(struct gf100_gr *);
 
 extern const struct gf100_grctx_func gp102_grctx;
-void gp102_grctx_generate_attrib(struct gf100_grctx *);
+u32 gp102_grctx_generate_attrib_cb_size(struct gf100_gr *);
+void gp102_grctx_generate_attrib(struct gf100_gr_chan *);
 
 extern const struct gf100_grctx_func gp104_grctx;
 
@@ -158,11 +155,15 @@ extern const struct gf100_grctx_func gv100_grctx;
 extern const struct gf100_grctx_func tu102_grctx;
 void gv100_grctx_unkn88c(struct gf100_gr *, bool);
 void gv100_grctx_generate_unkn(struct gf100_gr *);
-extern const struct gf100_gr_init gv100_grctx_init_sw_veid_bundle_init_0[];
-void gv100_grctx_generate_attrib(struct gf100_grctx *);
+void gv100_grctx_generate_attrib_cb(struct gf100_gr_chan *, u64, u32);
+void gv100_grctx_generate_attrib(struct gf100_gr_chan *);
 void gv100_grctx_generate_rop_mapping(struct gf100_gr *);
 void gv100_grctx_generate_r400088(struct gf100_gr *, bool);
 
+void tu102_grctx_generate_unknown(struct gf100_gr_chan *, u64, u32);
+
+extern const struct gf100_grctx_func ga102_grctx;
+
 /* context init value lists */
 
 extern const struct gf100_gr_pack gf100_grctx_pack_icmd[];
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgf104.c b/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgf104.c
index 7a0564b6e3c7..ba63a3b46518 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgf104.c
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgf104.c
@@ -94,6 +94,8 @@ gf104_grctx = {
 	.bundle_size = 0x1800,
 	.pagepool = gf100_grctx_generate_pagepool,
 	.pagepool_size = 0x8000,
+	.attrib_cb_size = gf100_grctx_generate_attrib_cb_size,
+	.attrib_cb = gf100_grctx_generate_attrib_cb,
 	.attrib = gf100_grctx_generate_attrib,
 	.attrib_nr_max = 0x324,
 	.attrib_nr = 0x218,
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgf108.c b/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgf108.c
index dda2c32e6232..0bc2eab6ad98 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgf108.c
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgf108.c
@@ -733,25 +733,20 @@ gf108_grctx_pack_tpc[] = {
  ******************************************************************************/
 
 void
-gf108_grctx_generate_attrib(struct gf100_grctx *info)
+gf108_grctx_generate_attrib(struct gf100_gr_chan *chan)
 {
-	struct gf100_gr *gr = info->gr;
+	struct gf100_gr *gr = chan->gr;
 	const struct gf100_grctx_func *grctx = gr->func->grctx;
 	const u32  alpha = grctx->alpha_nr;
 	const u32   beta = grctx->attrib_nr;
-	const u32   size = 0x20 * (grctx->attrib_nr_max + grctx->alpha_nr_max);
-	const int s = 12;
-	const int b = mmio_vram(info, size * gr->tpc_total, (1 << s), false);
 	const int timeslice_mode = 1;
 	const int max_batches = 0xffff;
 	u32 bo = 0;
 	u32 ao = bo + grctx->attrib_nr_max * gr->tpc_total;
 	int gpc, tpc;
 
-	mmio_refn(info, 0x418810, 0x80000000, s, b);
-	mmio_refn(info, 0x419848, 0x10000000, s, b);
-	mmio_wr32(info, 0x405830, (beta << 16) | alpha);
-	mmio_wr32(info, 0x4064c4, ((alpha / 4) << 16) | max_batches);
+	gf100_grctx_patch_wr32(chan, 0x405830, (beta << 16) | alpha);
+	gf100_grctx_patch_wr32(chan, 0x4064c4, ((alpha / 4) << 16) | max_batches);
 
 	for (gpc = 0; gpc < gr->gpc_nr; gpc++) {
 		for (tpc = 0; tpc < gr->tpc_nr[gpc]; tpc++) {
@@ -759,10 +754,10 @@ gf108_grctx_generate_attrib(struct gf100_grctx *info)
 			const u32 b =  beta;
 			const u32 t = timeslice_mode;
 			const u32 o = TPC_UNIT(gpc, tpc, 0x500);
-			mmio_skip(info, o + 0x20, (t << 28) | (b << 16) | ++bo);
-			mmio_wr32(info, o + 0x20, (t << 28) | (b << 16) | --bo);
+
+			gf100_grctx_patch_wr32(chan, o + 0x20, (t << 28) | (b << 16) | bo);
 			bo += grctx->attrib_nr_max;
-			mmio_wr32(info, o + 0x44, (a << 16) | ao);
+			gf100_grctx_patch_wr32(chan, o + 0x44, (a << 16) | ao);
 			ao += grctx->alpha_nr_max;
 		}
 	}
@@ -795,6 +790,8 @@ gf108_grctx = {
 	.bundle_size = 0x1800,
 	.pagepool = gf100_grctx_generate_pagepool,
 	.pagepool_size = 0x8000,
+	.attrib_cb_size = gf100_grctx_generate_attrib_cb_size,
+	.attrib_cb = gf100_grctx_generate_attrib_cb,
 	.attrib = gf108_grctx_generate_attrib,
 	.attrib_nr_max = 0x324,
 	.attrib_nr = 0x218,
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgf110.c b/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgf110.c
index f5cca5e6a4f2..64b723b0afb5 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgf110.c
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgf110.c
@@ -342,6 +342,8 @@ gf110_grctx = {
 	.bundle_size = 0x1800,
 	.pagepool = gf100_grctx_generate_pagepool,
 	.pagepool_size = 0x8000,
+	.attrib_cb_size = gf100_grctx_generate_attrib_cb_size,
+	.attrib_cb = gf100_grctx_generate_attrib_cb,
 	.attrib = gf100_grctx_generate_attrib,
 	.attrib_nr_max = 0x324,
 	.attrib_nr = 0x218,
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgf117.c b/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgf117.c
index 276c282d19aa..e34c5da2a9ff 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgf117.c
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgf117.c
@@ -241,38 +241,34 @@ gf117_grctx_generate_rop_mapping(struct gf100_gr *gr)
 }
 
 void
-gf117_grctx_generate_attrib(struct gf100_grctx *info)
+gf117_grctx_generate_attrib(struct gf100_gr_chan *chan)
 {
-	struct gf100_gr *gr = info->gr;
+	struct gf100_gr *gr = chan->gr;
 	const struct gf100_grctx_func *grctx = gr->func->grctx;
 	const u32  alpha = grctx->alpha_nr;
 	const u32   beta = grctx->attrib_nr;
-	const u32   size = 0x20 * (grctx->attrib_nr_max + grctx->alpha_nr_max);
-	const int s = 12;
-	const int b = mmio_vram(info, size * gr->tpc_total, (1 << s), false);
 	const int timeslice_mode = 1;
 	const int max_batches = 0xffff;
 	u32 bo = 0;
 	u32 ao = bo + grctx->attrib_nr_max * gr->tpc_total;
 	int gpc, ppc;
 
-	mmio_refn(info, 0x418810, 0x80000000, s, b);
-	mmio_refn(info, 0x419848, 0x10000000, s, b);
-	mmio_wr32(info, 0x405830, (beta << 16) | alpha);
-	mmio_wr32(info, 0x4064c4, ((alpha / 4) << 16) | max_batches);
+	gf100_grctx_patch_wr32(chan, 0x405830, (beta << 16) | alpha);
+	gf100_grctx_patch_wr32(chan, 0x4064c4, ((alpha / 4) << 16) | max_batches);
 
 	for (gpc = 0; gpc < gr->gpc_nr; gpc++) {
-		for (ppc = 0; ppc < gr->ppc_nr[gpc]; ppc++) {
+		for (ppc = 0; ppc < gr->func->ppc_nr; ppc++) {
 			const u32 a = alpha * gr->ppc_tpc_nr[gpc][ppc];
 			const u32 b =  beta * gr->ppc_tpc_nr[gpc][ppc];
 			const u32 t = timeslice_mode;
 			const u32 o = PPC_UNIT(gpc, ppc, 0);
+
 			if (!(gr->ppc_mask[gpc] & (1 << ppc)))
 				continue;
-			mmio_skip(info, o + 0xc0, (t << 28) | (b << 16) | ++bo);
-			mmio_wr32(info, o + 0xc0, (t << 28) | (b << 16) | --bo);
+
+			gf100_grctx_patch_wr32(chan, o + 0xc0, (t << 28) | (b << 16) | bo);
 			bo += grctx->attrib_nr_max * gr->ppc_tpc_nr[gpc][ppc];
-			mmio_wr32(info, o + 0xe4, (a << 16) | ao);
+			gf100_grctx_patch_wr32(chan, o + 0xe4, (a << 16) | ao);
 			ao += grctx->alpha_nr_max * gr->ppc_tpc_nr[gpc][ppc];
 		}
 	}
@@ -294,6 +290,8 @@ gf117_grctx = {
 	.bundle_size = 0x1800,
 	.pagepool = gf100_grctx_generate_pagepool,
 	.pagepool_size = 0x8000,
+	.attrib_cb_size = gf100_grctx_generate_attrib_cb_size,
+	.attrib_cb = gf100_grctx_generate_attrib_cb,
 	.attrib = gf117_grctx_generate_attrib,
 	.attrib_nr_max = 0x324,
 	.attrib_nr = 0x218,
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgf119.c b/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgf119.c
index 0cfe46366af6..426ad1b8d426 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgf119.c
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgf119.c
@@ -510,6 +510,8 @@ gf119_grctx = {
 	.bundle_size = 0x1800,
 	.pagepool = gf100_grctx_generate_pagepool,
 	.pagepool_size = 0x8000,
+	.attrib_cb_size = gf100_grctx_generate_attrib_cb_size,
+	.attrib_cb = gf100_grctx_generate_attrib_cb,
 	.attrib = gf108_grctx_generate_attrib,
 	.attrib_nr_max = 0x324,
 	.attrib_nr = 0x218,
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgk104.c b/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgk104.c
index 304e9d268bad..94233d0119df 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgk104.c
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgk104.c
@@ -861,43 +861,33 @@ gk104_grctx_generate_r418800(struct gf100_gr *gr)
 }
 
 void
-gk104_grctx_generate_patch_ltc(struct gf100_grctx *info)
+gk104_grctx_generate_patch_ltc(struct gf100_gr_chan *chan)
 {
-	struct nvkm_device *device = info->gr->base.engine.subdev.device;
+	struct nvkm_device *device = chan->gr->base.engine.subdev.device;
 	u32 data0 = nvkm_rd32(device, 0x17e91c);
 	u32 data1 = nvkm_rd32(device, 0x17e920);
+
 	/*XXX: Figure out how to modify this correctly! */
-	mmio_wr32(info, 0x17e91c, data0);
-	mmio_wr32(info, 0x17e920, data1);
+	gf100_grctx_patch_wr32(chan, 0x17e91c, data0);
+	gf100_grctx_patch_wr32(chan, 0x17e920, data1);
 }
 
 void
-gk104_grctx_generate_bundle(struct gf100_grctx *info)
+gk104_grctx_generate_bundle(struct gf100_gr_chan *chan, u64 addr, u32 size)
 {
-	const struct gf100_grctx_func *grctx = info->gr->func->grctx;
-	const u32 state_limit = min(grctx->bundle_min_gpm_fifo_depth,
-				    grctx->bundle_size / 0x20);
+	const struct gf100_grctx_func *grctx = chan->gr->func->grctx;
+	const u32 state_limit = min(grctx->bundle_min_gpm_fifo_depth, size / 0x20);
 	const u32 token_limit = grctx->bundle_token_limit;
-	const int s = 8;
-	const int b = mmio_vram(info, grctx->bundle_size, (1 << s), true);
-	mmio_refn(info, 0x408004, 0x00000000, s, b);
-	mmio_wr32(info, 0x408008, 0x80000000 | (grctx->bundle_size >> s));
-	mmio_refn(info, 0x418808, 0x00000000, s, b);
-	mmio_wr32(info, 0x41880c, 0x80000000 | (grctx->bundle_size >> s));
-	mmio_wr32(info, 0x4064c8, (state_limit << 16) | token_limit);
+
+	gf100_grctx_generate_bundle(chan, addr, size);
+	gf100_grctx_patch_wr32(chan, 0x4064c8, (state_limit << 16) | token_limit);
 }
 
 void
-gk104_grctx_generate_pagepool(struct gf100_grctx *info)
+gk104_grctx_generate_pagepool(struct gf100_gr_chan *chan, u64 addr)
 {
-	const struct gf100_grctx_func *grctx = info->gr->func->grctx;
-	const int s = 8;
-	const int b = mmio_vram(info, grctx->pagepool_size, (1 << s), true);
-	mmio_refn(info, 0x40800c, 0x00000000, s, b);
-	mmio_wr32(info, 0x408010, 0x80000000);
-	mmio_refn(info, 0x419004, 0x00000000, s, b);
-	mmio_wr32(info, 0x419008, 0x00000000);
-	mmio_wr32(info, 0x4064cc, 0x80000000);
+	gf100_grctx_generate_pagepool(chan, addr);
+	gf100_grctx_patch_wr32(chan, 0x4064cc, 0x80000000);
 }
 
 void
@@ -991,6 +981,8 @@ gk104_grctx = {
 	.bundle_token_limit = 0x600,
 	.pagepool = gk104_grctx_generate_pagepool,
 	.pagepool_size = 0x8000,
+	.attrib_cb_size = gf100_grctx_generate_attrib_cb_size,
+	.attrib_cb = gf100_grctx_generate_attrib_cb,
 	.attrib = gf117_grctx_generate_attrib,
 	.attrib_nr_max = 0x324,
 	.attrib_nr = 0x218,
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgk110.c b/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgk110.c
index 86547cfc38dc..4391458e1fb2 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgk110.c
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgk110.c
@@ -838,6 +838,8 @@ gk110_grctx = {
 	.bundle_token_limit = 0x7c0,
 	.pagepool = gk104_grctx_generate_pagepool,
 	.pagepool_size = 0x8000,
+	.attrib_cb_size = gf100_grctx_generate_attrib_cb_size,
+	.attrib_cb = gf100_grctx_generate_attrib_cb,
 	.attrib = gf117_grctx_generate_attrib,
 	.attrib_nr_max = 0x324,
 	.attrib_nr = 0x218,
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgk110b.c b/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgk110b.c
index ebb947bd1446..7b9a34f9ec3c 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgk110b.c
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgk110b.c
@@ -87,6 +87,8 @@ gk110b_grctx = {
 	.bundle_token_limit = 0x600,
 	.pagepool = gk104_grctx_generate_pagepool,
 	.pagepool_size = 0x8000,
+	.attrib_cb_size = gf100_grctx_generate_attrib_cb_size,
+	.attrib_cb = gf100_grctx_generate_attrib_cb,
 	.attrib = gf117_grctx_generate_attrib,
 	.attrib_nr_max = 0x324,
 	.attrib_nr = 0x218,
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgk208.c b/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgk208.c
index 4d40512b5c99..c78d07a8bb7d 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgk208.c
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgk208.c
@@ -553,6 +553,8 @@ gk208_grctx = {
 	.bundle_token_limit = 0x200,
 	.pagepool = gk104_grctx_generate_pagepool,
 	.pagepool_size = 0x8000,
+	.attrib_cb_size = gf100_grctx_generate_attrib_cb_size,
+	.attrib_cb = gf100_grctx_generate_attrib_cb,
 	.attrib = gf117_grctx_generate_attrib,
 	.attrib_nr_max = 0x324,
 	.attrib_nr = 0x218,
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgk20a.c b/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgk20a.c
index c0d36bc601f9..ac5fdcb5cd3f 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgk20a.c
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgk20a.c
@@ -25,8 +25,9 @@
 #include <subdev/mc.h>
 
 static void
-gk20a_grctx_generate_main(struct gf100_gr *gr, struct gf100_grctx *info)
+gk20a_grctx_generate_main(struct gf100_gr_chan *chan)
 {
+	struct gf100_gr *gr = chan->gr;
 	struct nvkm_device *device = gr->base.engine.subdev.device;
 	const struct gf100_grctx_func *grctx = gr->func->grctx;
 	u32 idle_timeout;
@@ -38,7 +39,8 @@ gk20a_grctx_generate_main(struct gf100_gr *gr, struct gf100_grctx *info)
 
 	idle_timeout = nvkm_mask(device, 0x404154, 0xffffffff, 0x00000000);
 
-	grctx->attrib(info);
+	grctx->attrib_cb(chan, chan->attrib_cb->addr, grctx->attrib_cb_size(gr));
+	grctx->attrib(chan);
 
 	grctx->unkn(gr);
 
@@ -60,8 +62,8 @@ gk20a_grctx_generate_main(struct gf100_gr *gr, struct gf100_grctx *info)
 	gf100_gr_wait_idle(gr);
 
 	gf100_gr_icmd(gr, gr->bundle);
-	grctx->pagepool(info);
-	grctx->bundle(info);
+	grctx->pagepool(chan, chan->pagepool->addr);
+	grctx->bundle(chan, chan->bundle_cb->addr, grctx->bundle_size);
 }
 
 const struct gf100_grctx_func
@@ -74,6 +76,8 @@ gk20a_grctx = {
 	.bundle_token_limit = 0x100,
 	.pagepool = gk104_grctx_generate_pagepool,
 	.pagepool_size = 0x8000,
+	.attrib_cb_size = gf100_grctx_generate_attrib_cb_size,
+	.attrib_cb = gf100_grctx_generate_attrib_cb,
 	.attrib = gf117_grctx_generate_attrib,
 	.attrib_nr_max = 0x240,
 	.attrib_nr = 0x240,
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgm107.c b/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgm107.c
index 0b3964e6b36e..beac66eb2a80 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgm107.c
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgm107.c
@@ -876,75 +876,70 @@ gm107_grctx_generate_r419e00(struct gf100_gr *gr)
 }
 
 void
-gm107_grctx_generate_bundle(struct gf100_grctx *info)
+gm107_grctx_generate_bundle(struct gf100_gr_chan *chan, u64 addr, u32 size)
 {
-	const struct gf100_grctx_func *grctx = info->gr->func->grctx;
-	const u32 state_limit = min(grctx->bundle_min_gpm_fifo_depth,
-				    grctx->bundle_size / 0x20);
+	const struct gf100_grctx_func *grctx = chan->gr->func->grctx;
+	const u32 state_limit = min(grctx->bundle_min_gpm_fifo_depth, size / 0x20);
 	const u32 token_limit = grctx->bundle_token_limit;
-	const int s = 8;
-	const int b = mmio_vram(info, grctx->bundle_size, (1 << s), true);
-	mmio_refn(info, 0x408004, 0x00000000, s, b);
-	mmio_wr32(info, 0x408008, 0x80000000 | (grctx->bundle_size >> s));
-	mmio_refn(info, 0x418e24, 0x00000000, s, b);
-	mmio_wr32(info, 0x418e28, 0x80000000 | (grctx->bundle_size >> s));
-	mmio_wr32(info, 0x4064c8, (state_limit << 16) | token_limit);
+
+	gf100_grctx_patch_wr32(chan, 0x408004, addr >> 8);
+	gf100_grctx_patch_wr32(chan, 0x408008, 0x80000000 | (size >> 8));
+	gf100_grctx_patch_wr32(chan, 0x418e24, addr >> 8);
+	gf100_grctx_patch_wr32(chan, 0x418e28, 0x80000000 | (size >> 8));
+	gf100_grctx_patch_wr32(chan, 0x4064c8, (state_limit << 16) | token_limit);
 }
 
 void
-gm107_grctx_generate_pagepool(struct gf100_grctx *info)
+gm107_grctx_generate_pagepool(struct gf100_gr_chan *chan, u64 addr)
 {
-	const struct gf100_grctx_func *grctx = info->gr->func->grctx;
-	const int s = 8;
-	const int b = mmio_vram(info, grctx->pagepool_size, (1 << s), true);
-	mmio_refn(info, 0x40800c, 0x00000000, s, b);
-	mmio_wr32(info, 0x408010, 0x80000000);
-	mmio_refn(info, 0x419004, 0x00000000, s, b);
-	mmio_wr32(info, 0x419008, 0x00000000);
-	mmio_wr32(info, 0x4064cc, 0x80000000);
-	mmio_wr32(info, 0x418e30, 0x80000000); /* guess at it being related */
+	gk104_grctx_generate_pagepool(chan, addr);
+	gf100_grctx_patch_wr32(chan, 0x418e30, 0x80000000);
 }
 
 void
-gm107_grctx_generate_attrib(struct gf100_grctx *info)
+gm107_grctx_generate_attrib(struct gf100_gr_chan *chan)
 {
-	struct gf100_gr *gr = info->gr;
+	struct gf100_gr *gr = chan->gr;
 	const struct gf100_grctx_func *grctx = gr->func->grctx;
 	const u32  alpha = grctx->alpha_nr;
 	const u32 attrib = grctx->attrib_nr;
-	const u32   size = 0x20 * (grctx->attrib_nr_max + grctx->alpha_nr_max);
-	const int s = 12;
-	const int b = mmio_vram(info, size * gr->tpc_total, (1 << s), false);
 	const int max_batches = 0xffff;
 	u32 bo = 0;
 	u32 ao = bo + grctx->attrib_nr_max * gr->tpc_total;
 	int gpc, ppc, n = 0;
 
-	mmio_refn(info, 0x418810, 0x80000000, s, b);
-	mmio_refn(info, 0x419848, 0x10000000, s, b);
-	mmio_refn(info, 0x419c2c, 0x10000000, s, b);
-	mmio_wr32(info, 0x405830, (attrib << 16) | alpha);
-	mmio_wr32(info, 0x4064c4, ((alpha / 4) << 16) | max_batches);
+	gf100_grctx_patch_wr32(chan, 0x405830, (attrib << 16) | alpha);
+	gf100_grctx_patch_wr32(chan, 0x4064c4, ((alpha / 4) << 16) | max_batches);
 
 	for (gpc = 0; gpc < gr->gpc_nr; gpc++) {
-		for (ppc = 0; ppc < gr->ppc_nr[gpc]; ppc++, n++) {
+		for (ppc = 0; ppc < gr->func->ppc_nr; ppc++, n++) {
 			const u32 as =  alpha * gr->ppc_tpc_nr[gpc][ppc];
 			const u32 bs = attrib * gr->ppc_tpc_nr[gpc][ppc];
 			const u32 u = 0x418ea0 + (n * 0x04);
 			const u32 o = PPC_UNIT(gpc, ppc, 0);
+
 			if (!(gr->ppc_mask[gpc] & (1 << ppc)))
 				continue;
-			mmio_wr32(info, o + 0xc0, bs);
-			mmio_wr32(info, o + 0xf4, bo);
+
+			gf100_grctx_patch_wr32(chan, o + 0xc0, bs);
+			gf100_grctx_patch_wr32(chan, o + 0xf4, bo);
 			bo += grctx->attrib_nr_max * gr->ppc_tpc_nr[gpc][ppc];
-			mmio_wr32(info, o + 0xe4, as);
-			mmio_wr32(info, o + 0xf8, ao);
+			gf100_grctx_patch_wr32(chan, o + 0xe4, as);
+			gf100_grctx_patch_wr32(chan, o + 0xf8, ao);
 			ao += grctx->alpha_nr_max * gr->ppc_tpc_nr[gpc][ppc];
-			mmio_wr32(info, u, ((bs / 3) << 16) | bs);
+			gf100_grctx_patch_wr32(chan, u, ((bs / 3) << 16) | bs);
 		}
 	}
 }
 
+void
+gm107_grctx_generate_attrib_cb(struct gf100_gr_chan *chan, u64 addr, u32 size)
+{
+	gf100_grctx_generate_attrib_cb(chan, addr, size);
+
+	gf100_grctx_patch_wr32(chan, 0x419c2c, 0x10000000 | addr >> 12);
+}
+
 static void
 gm107_grctx_generate_r406500(struct gf100_gr *gr)
 {
@@ -978,6 +973,8 @@ gm107_grctx = {
 	.bundle_token_limit = 0x2c0,
 	.pagepool = gm107_grctx_generate_pagepool,
 	.pagepool_size = 0x8000,
+	.attrib_cb_size = gf100_grctx_generate_attrib_cb_size,
+	.attrib_cb = gm107_grctx_generate_attrib_cb,
 	.attrib = gm107_grctx_generate_attrib,
 	.attrib_nr_max = 0xff0,
 	.attrib_nr = 0xaa0,
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgm200.c b/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgm200.c
index 013d05a0f0f6..175da8ac656c 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgm200.c
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgm200.c
@@ -87,7 +87,7 @@ gm200_grctx_generate_dist_skip_table(struct gf100_gr *gr)
 	int gpc, ppc, i;
 
 	for (gpc = 0; gpc < gr->gpc_nr; gpc++) {
-		for (ppc = 0; ppc < gr->ppc_nr[gpc]; ppc++) {
+		for (ppc = 0; ppc < gr->func->ppc_nr; ppc++) {
 			u8 ppc_tpcs = gr->ppc_tpc_nr[gpc][ppc];
 			u8 ppc_tpcm = gr->ppc_tpc_mask[gpc][ppc];
 			while (ppc_tpcs-- > gr->ppc_tpc_min)
@@ -111,6 +111,8 @@ gm200_grctx = {
 	.bundle_token_limit = 0x780,
 	.pagepool = gm107_grctx_generate_pagepool,
 	.pagepool_size = 0x20000,
+	.attrib_cb_size = gf100_grctx_generate_attrib_cb_size,
+	.attrib_cb = gm107_grctx_generate_attrib_cb,
 	.attrib = gm107_grctx_generate_attrib,
 	.attrib_nr_max = 0x600,
 	.attrib_nr = 0x400,
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgm20b.c b/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgm20b.c
index 6b92f8aa18a3..b8edccfada58 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgm20b.c
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgm20b.c
@@ -22,8 +22,9 @@
 #include "ctxgf100.h"
 
 static void
-gm20b_grctx_generate_main(struct gf100_gr *gr, struct gf100_grctx *info)
+gm20b_grctx_generate_main(struct gf100_gr_chan *chan)
 {
+	struct gf100_gr *gr = chan->gr;
 	struct nvkm_device *device = gr->base.engine.subdev.device;
 	const struct gf100_grctx_func *grctx = gr->func->grctx;
 	u32 idle_timeout;
@@ -35,7 +36,8 @@ gm20b_grctx_generate_main(struct gf100_gr *gr, struct gf100_grctx *info)
 
 	idle_timeout = nvkm_mask(device, 0x404154, 0xffffffff, 0x00000000);
 
-	grctx->attrib(info);
+	grctx->attrib_cb(chan, chan->attrib_cb->addr, grctx->attrib_cb_size(gr));
+	grctx->attrib(chan);
 
 	grctx->unkn(gr);
 
@@ -63,8 +65,8 @@ gm20b_grctx_generate_main(struct gf100_gr *gr, struct gf100_grctx *info)
 	gf100_gr_wait_idle(gr);
 
 	gf100_gr_icmd(gr, gr->bundle);
-	grctx->pagepool(info);
-	grctx->bundle(info);
+	grctx->pagepool(chan, chan->pagepool->addr);
+	grctx->bundle(chan, chan->bundle_cb->addr, grctx->bundle_size);
 }
 
 const struct gf100_grctx_func
@@ -77,6 +79,8 @@ gm20b_grctx = {
 	.bundle_token_limit = 0x1c0,
 	.pagepool = gm107_grctx_generate_pagepool,
 	.pagepool_size = 0x8000,
+	.attrib_cb_size = gf100_grctx_generate_attrib_cb_size,
+	.attrib_cb = gm107_grctx_generate_attrib_cb,
 	.attrib = gm107_grctx_generate_attrib,
 	.attrib_nr_max = 0x600,
 	.attrib_nr = 0x400,
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgp100.c b/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgp100.c
index 0b3326262e12..8485aaeae7a9 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgp100.c
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgp100.c
@@ -30,66 +30,76 @@
  ******************************************************************************/
 
 void
-gp100_grctx_generate_pagepool(struct gf100_grctx *info)
+gp100_grctx_generate_pagepool(struct gf100_gr_chan *chan, u64 addr)
 {
-	const struct gf100_grctx_func *grctx = info->gr->func->grctx;
-	const int s = 8;
-	const int b = mmio_vram(info, grctx->pagepool_size, (1 << s), true);
-	mmio_refn(info, 0x40800c, 0x00000000, s, b);
-	mmio_wr32(info, 0x408010, 0x8007d800);
-	mmio_refn(info, 0x419004, 0x00000000, s, b);
-	mmio_wr32(info, 0x419008, 0x00000000);
+	gf100_grctx_patch_wr32(chan, 0x40800c, addr >> 8);
+	gf100_grctx_patch_wr32(chan, 0x408010, 0x8007d800);
+	gf100_grctx_patch_wr32(chan, 0x419004, addr >> 8);
+	gf100_grctx_patch_wr32(chan, 0x419008, 0x00000000);
 }
 
 static void
-gp100_grctx_generate_attrib(struct gf100_grctx *info)
+gp100_grctx_generate_attrib(struct gf100_gr_chan *chan)
 {
-	struct gf100_gr *gr = info->gr;
+	struct gf100_gr *gr = chan->gr;
 	const struct gf100_grctx_func *grctx = gr->func->grctx;
 	const u32  alpha = grctx->alpha_nr;
 	const u32 attrib = grctx->attrib_nr;
-	const int s = 12;
 	const int max_batches = 0xffff;
 	u32 size = grctx->alpha_nr_max * gr->tpc_total;
 	u32 ao = 0;
 	u32 bo = ao + size;
-	int gpc, ppc, b, n = 0;
+	int gpc, ppc, n = 0;
 
-	for (gpc = 0; gpc < gr->gpc_nr; gpc++)
-		size += grctx->attrib_nr_max * gr->ppc_nr[gpc] * gr->ppc_tpc_max;
-	size = ((size * 0x20) + 128) & ~127;
-	b = mmio_vram(info, size, (1 << s), false);
-
-	mmio_refn(info, 0x418810, 0x80000000, s, b);
-	mmio_refn(info, 0x419848, 0x10000000, s, b);
-	mmio_refn(info, 0x419c2c, 0x10000000, s, b);
-	mmio_refn(info, 0x419b00, 0x00000000, s, b);
-	mmio_wr32(info, 0x419b04, 0x80000000 | size >> 7);
-	mmio_wr32(info, 0x405830, attrib);
-	mmio_wr32(info, 0x40585c, alpha);
-	mmio_wr32(info, 0x4064c4, ((alpha / 4) << 16) | max_batches);
+	gf100_grctx_patch_wr32(chan, 0x405830, attrib);
+	gf100_grctx_patch_wr32(chan, 0x40585c, alpha);
+	gf100_grctx_patch_wr32(chan, 0x4064c4, ((alpha / 4) << 16) | max_batches);
 
 	for (gpc = 0; gpc < gr->gpc_nr; gpc++) {
-		for (ppc = 0; ppc < gr->ppc_nr[gpc]; ppc++, n++) {
+		for (ppc = 0; ppc < gr->func->ppc_nr; ppc++, n++) {
 			const u32 as =  alpha * gr->ppc_tpc_nr[gpc][ppc];
 			const u32 bs = attrib * gr->ppc_tpc_max;
 			const u32 u = 0x418ea0 + (n * 0x04);
 			const u32 o = PPC_UNIT(gpc, ppc, 0);
+
 			if (!(gr->ppc_mask[gpc] & (1 << ppc)))
 				continue;
-			mmio_wr32(info, o + 0xc0, bs);
-			mmio_wr32(info, o + 0xf4, bo);
-			mmio_wr32(info, o + 0xf0, bs);
+
+			gf100_grctx_patch_wr32(chan, o + 0xc0, bs);
+			gf100_grctx_patch_wr32(chan, o + 0xf4, bo);
+			gf100_grctx_patch_wr32(chan, o + 0xf0, bs);
 			bo += grctx->attrib_nr_max * gr->ppc_tpc_max;
-			mmio_wr32(info, o + 0xe4, as);
-			mmio_wr32(info, o + 0xf8, ao);
+			gf100_grctx_patch_wr32(chan, o + 0xe4, as);
+			gf100_grctx_patch_wr32(chan, o + 0xf8, ao);
 			ao += grctx->alpha_nr_max * gr->ppc_tpc_nr[gpc][ppc];
-			mmio_wr32(info, u, bs);
+			gf100_grctx_patch_wr32(chan, u, bs);
 		}
 	}
 
-	mmio_wr32(info, 0x418eec, 0x00000000);
-	mmio_wr32(info, 0x41befc, 0x00000000);
+	gf100_grctx_patch_wr32(chan, 0x418eec, 0x00000000);
+	gf100_grctx_patch_wr32(chan, 0x41befc, 0x00000000);
+}
+
+void
+gp100_grctx_generate_attrib_cb(struct gf100_gr_chan *chan, u64 addr, u32 size)
+{
+	gm107_grctx_generate_attrib_cb(chan, addr, size);
+
+	gf100_grctx_patch_wr32(chan, 0x419b00, 0x00000000 | addr >> 12);
+	gf100_grctx_patch_wr32(chan, 0x419b04, 0x80000000 | size >> 7);
+}
+
+static u32
+gp100_grctx_generate_attrib_cb_size(struct gf100_gr *gr)
+{
+	const struct gf100_grctx_func *grctx = gr->func->grctx;
+	u32 size = grctx->alpha_nr_max * gr->tpc_total;
+	int gpc;
+
+	for (gpc = 0; gpc < gr->gpc_nr; gpc++)
+		size += grctx->attrib_nr_max * gr->func->ppc_nr * gr->ppc_tpc_max;
+
+	return ((size * 0x20) + 128) & ~127;
 }
 
 void
@@ -123,6 +133,8 @@ gp100_grctx = {
 	.bundle_token_limit = 0x1080,
 	.pagepool = gp100_grctx_generate_pagepool,
 	.pagepool_size = 0x20000,
+	.attrib_cb_size = gp100_grctx_generate_attrib_cb_size,
+	.attrib_cb = gp100_grctx_generate_attrib_cb,
 	.attrib = gp100_grctx_generate_attrib,
 	.attrib_nr_max = 0x660,
 	.attrib_nr = 0x440,
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgp102.c b/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgp102.c
index daee17bf7d0d..7537979a5492 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgp102.c
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgp102.c
@@ -37,58 +37,62 @@ gp102_grctx_generate_r408840(struct gf100_gr *gr)
 }
 
 void
-gp102_grctx_generate_attrib(struct gf100_grctx *info)
+gp102_grctx_generate_attrib(struct gf100_gr_chan *chan)
 {
-	struct gf100_gr *gr = info->gr;
+	struct gf100_gr *gr = chan->gr;
 	const struct gf100_grctx_func *grctx = gr->func->grctx;
 	const u32  alpha = grctx->alpha_nr;
 	const u32 attrib = grctx->attrib_nr;
 	const u32   gfxp = grctx->gfxp_nr;
-	const int s = 12;
 	const int max_batches = 0xffff;
 	u32 size = grctx->alpha_nr_max * gr->tpc_total;
 	u32 ao = 0;
 	u32 bo = ao + size;
-	int gpc, ppc, b, n = 0;
+	int gpc, ppc, n = 0;
 
-	for (gpc = 0; gpc < gr->gpc_nr; gpc++)
-		size += grctx->gfxp_nr * gr->ppc_nr[gpc] * gr->ppc_tpc_max;
-	size = ((size * 0x20) + 128) & ~127;
-	b = mmio_vram(info, size, (1 << s), false);
-
-	mmio_refn(info, 0x418810, 0x80000000, s, b);
-	mmio_refn(info, 0x419848, 0x10000000, s, b);
-	mmio_refn(info, 0x419c2c, 0x10000000, s, b);
-	mmio_refn(info, 0x419b00, 0x00000000, s, b);
-	mmio_wr32(info, 0x419b04, 0x80000000 | size >> 7);
-	mmio_wr32(info, 0x405830, attrib);
-	mmio_wr32(info, 0x40585c, alpha);
-	mmio_wr32(info, 0x4064c4, ((alpha / 4) << 16) | max_batches);
+	gf100_grctx_patch_wr32(chan, 0x405830, attrib);
+	gf100_grctx_patch_wr32(chan, 0x40585c, alpha);
+	gf100_grctx_patch_wr32(chan, 0x4064c4, ((alpha / 4) << 16) | max_batches);
 
 	for (gpc = 0; gpc < gr->gpc_nr; gpc++) {
-		for (ppc = 0; ppc < gr->ppc_nr[gpc]; ppc++, n++) {
+		for (ppc = 0; ppc < gr->func->ppc_nr; ppc++, n++) {
 			const u32 as =  alpha * gr->ppc_tpc_nr[gpc][ppc];
 			const u32 bs = attrib * gr->ppc_tpc_max;
 			const u32 gs =   gfxp * gr->ppc_tpc_max;
 			const u32 u = 0x418ea0 + (n * 0x04);
 			const u32 o = PPC_UNIT(gpc, ppc, 0);
 			const u32 p = GPC_UNIT(gpc, 0xc44 + (ppc * 4));
+
 			if (!(gr->ppc_mask[gpc] & (1 << ppc)))
 				continue;
-			mmio_wr32(info, o + 0xc0, gs);
-			mmio_wr32(info, p, bs);
-			mmio_wr32(info, o + 0xf4, bo);
-			mmio_wr32(info, o + 0xf0, bs);
+
+			gf100_grctx_patch_wr32(chan, o + 0xc0, gs);
+			gf100_grctx_patch_wr32(chan, p, bs);
+			gf100_grctx_patch_wr32(chan, o + 0xf4, bo);
+			gf100_grctx_patch_wr32(chan, o + 0xf0, bs);
 			bo += gs;
-			mmio_wr32(info, o + 0xe4, as);
-			mmio_wr32(info, o + 0xf8, ao);
+			gf100_grctx_patch_wr32(chan, o + 0xe4, as);
+			gf100_grctx_patch_wr32(chan, o + 0xf8, ao);
 			ao += grctx->alpha_nr_max * gr->ppc_tpc_nr[gpc][ppc];
-			mmio_wr32(info, u, bs);
+			gf100_grctx_patch_wr32(chan, u, bs);
 		}
 	}
 
-	mmio_wr32(info, 0x4181e4, 0x00000100);
-	mmio_wr32(info, 0x41befc, 0x00000100);
+	gf100_grctx_patch_wr32(chan, 0x4181e4, 0x00000100);
+	gf100_grctx_patch_wr32(chan, 0x41befc, 0x00000100);
+}
+
+u32
+gp102_grctx_generate_attrib_cb_size(struct gf100_gr *gr)
+{
+	const struct gf100_grctx_func *grctx = gr->func->grctx;
+	u32 size = grctx->alpha_nr_max * gr->tpc_total;
+	int gpc;
+
+	for (gpc = 0; gpc < gr->gpc_nr; gpc++)
+		size += grctx->gfxp_nr * gr->func->ppc_nr * gr->ppc_tpc_max;
+
+	return ((size * 0x20) + 127) & ~127;
 }
 
 const struct gf100_grctx_func
@@ -101,6 +105,8 @@ gp102_grctx = {
 	.bundle_token_limit = 0x900,
 	.pagepool = gp100_grctx_generate_pagepool,
 	.pagepool_size = 0x20000,
+	.attrib_cb_size = gp102_grctx_generate_attrib_cb_size,
+	.attrib_cb = gp100_grctx_generate_attrib_cb,
 	.attrib = gp102_grctx_generate_attrib,
 	.attrib_nr_max = 0x4b0,
 	.attrib_nr = 0x320,
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgp104.c b/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgp104.c
index 3b85e3d326b2..90b5f793e567 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgp104.c
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgp104.c
@@ -31,6 +31,8 @@ gp104_grctx = {
 	.bundle_token_limit = 0x900,
 	.pagepool = gp100_grctx_generate_pagepool,
 	.pagepool_size = 0x20000,
+	.attrib_cb_size = gp102_grctx_generate_attrib_cb_size,
+	.attrib_cb = gp100_grctx_generate_attrib_cb,
 	.attrib = gp102_grctx_generate_attrib,
 	.attrib_nr_max = 0x4b0,
 	.attrib_nr = 0x320,
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgp107.c b/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgp107.c
index 5060c5ee5ce0..d191761a0471 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgp107.c
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgp107.c
@@ -39,6 +39,8 @@ gp107_grctx = {
 	.bundle_token_limit = 0x300,
 	.pagepool = gp100_grctx_generate_pagepool,
 	.pagepool_size = 0x20000,
+	.attrib_cb_size = gp102_grctx_generate_attrib_cb_size,
+	.attrib_cb = gp100_grctx_generate_attrib_cb,
 	.attrib = gp102_grctx_generate_attrib,
 	.attrib_nr_max = 0x15de,
 	.attrib_nr = 0x540,
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgv100.c b/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgv100.c
index 39553d55d3f3..957ea9d6bad4 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgv100.c
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgv100.c
@@ -25,7 +25,7 @@
  * PGRAPH context implementation
  ******************************************************************************/
 
-const struct gf100_gr_init
+static const struct gf100_gr_init
 gv100_grctx_init_sw_veid_bundle_init_0[] = {
 	{ 0x00001000, 64, 0x00100000, 0x00000008 },
 	{ 0x00000941, 64, 0x00100000, 0x00000000 },
@@ -59,67 +59,70 @@ gv100_grctx_pack_sw_veid_bundle_init[] = {
 };
 
 void
-gv100_grctx_generate_attrib(struct gf100_grctx *info)
+gv100_grctx_generate_attrib(struct gf100_gr_chan *chan)
 {
-	struct gf100_gr *gr = info->gr;
+	struct gf100_gr *gr = chan->gr;
 	const struct gf100_grctx_func *grctx = gr->func->grctx;
 	const u32  alpha = grctx->alpha_nr;
 	const u32 attrib = grctx->attrib_nr;
 	const u32   gfxp = grctx->gfxp_nr;
-	const int s = 12;
+	const int max_batches = 0xffff;
 	u32 size = grctx->alpha_nr_max * gr->tpc_total;
 	u32 ao = 0;
 	u32 bo = ao + size;
-	int gpc, ppc, b, n = 0;
+	int gpc, ppc, n = 0;
 
-	for (gpc = 0; gpc < gr->gpc_nr; gpc++)
-		size += grctx->gfxp_nr * gr->ppc_nr[gpc] * gr->ppc_tpc_max;
-	size = ((size * 0x20) + 127) & ~127;
-	b = mmio_vram(info, size, (1 << s), false);
-
-	mmio_refn(info, 0x418810, 0x80000000, s, b);
-	mmio_refn(info, 0x419848, 0x10000000, s, b);
-	mmio_refn(info, 0x419c2c, 0x10000000, s, b);
-	mmio_refn(info, 0x419e00, 0x00000000, s, b);
-	mmio_wr32(info, 0x419e04, 0x80000000 | size >> 7);
-	mmio_wr32(info, 0x405830, attrib);
-	mmio_wr32(info, 0x40585c, alpha);
+	gf100_grctx_patch_wr32(chan, 0x405830, attrib);
+	gf100_grctx_patch_wr32(chan, 0x40585c, alpha);
+	gf100_grctx_patch_wr32(chan, 0x4064c4, ((alpha / 4) << 16) | max_batches);
 
 	for (gpc = 0; gpc < gr->gpc_nr; gpc++) {
-		for (ppc = 0; ppc < gr->ppc_nr[gpc]; ppc++, n++) {
+		for (ppc = 0; ppc < gr->func->ppc_nr; ppc++, n++) {
 			const u32 as =  alpha * gr->ppc_tpc_nr[gpc][ppc];
 			const u32 bs = attrib * gr->ppc_tpc_max;
 			const u32 gs =   gfxp * gr->ppc_tpc_max;
 			const u32 u = 0x418ea0 + (n * 0x04);
 			const u32 o = PPC_UNIT(gpc, ppc, 0);
+
 			if (!(gr->ppc_mask[gpc] & (1 << ppc)))
 				continue;
-			mmio_wr32(info, o + 0xc0, gs);
-			mmio_wr32(info, o + 0xf4, bo);
-			mmio_wr32(info, o + 0xf0, bs);
+
+			gf100_grctx_patch_wr32(chan, o + 0xc0, gs);
+			gf100_grctx_patch_wr32(chan, o + 0xf4, bo);
+			gf100_grctx_patch_wr32(chan, o + 0xf0, bs);
 			bo += gs;
-			mmio_wr32(info, o + 0xe4, as);
-			mmio_wr32(info, o + 0xf8, ao);
+			gf100_grctx_patch_wr32(chan, o + 0xe4, as);
+			gf100_grctx_patch_wr32(chan, o + 0xf8, ao);
 			ao += grctx->alpha_nr_max * gr->ppc_tpc_nr[gpc][ppc];
-			mmio_wr32(info, u, bs);
+			gf100_grctx_patch_wr32(chan, u, bs);
 		}
 	}
 
-	mmio_wr32(info, 0x4181e4, 0x00000100);
-	mmio_wr32(info, 0x41befc, 0x00000100);
+	gf100_grctx_patch_wr32(chan, 0x4181e4, 0x00000100);
+	gf100_grctx_patch_wr32(chan, 0x41befc, 0x00000100);
+}
+
+void
+gv100_grctx_generate_attrib_cb(struct gf100_gr_chan *chan, u64 addr, u32 size)
+{
+	gm107_grctx_generate_attrib_cb(chan, addr, size);
+
+	gf100_grctx_patch_wr32(chan, 0x419e00, 0x00000000 | addr >> 12);
+	gf100_grctx_patch_wr32(chan, 0x419e04, 0x80000000 | size >> 7);
 }
 
 void
 gv100_grctx_generate_rop_mapping(struct gf100_gr *gr)
 {
 	struct nvkm_device *device = gr->base.engine.subdev.device;
+	const u32 mapregs = DIV_ROUND_UP(gr->func->gpc_nr * gr->func->tpc_nr, 6);
 	u32 data;
 	int i, j;
 
 	/* Pack tile map into register format. */
 	nvkm_wr32(device, 0x418bb8, (gr->tpc_total << 8) |
 				     gr->screen_tile_row_offset);
-	for (i = 0; i < 11; i++) {
+	for (i = 0; i < mapregs; i++) {
 		for (data = 0, j = 0; j < 6; j++)
 			data |= (gr->tile[i * 6 + j] & 0x1f) << (j * 5);
 		nvkm_wr32(device, 0x418b08 + (i * 4), data);
@@ -157,6 +160,9 @@ static void
 gv100_grctx_generate_sm_id(struct gf100_gr *gr, int gpc, int tpc, int sm)
 {
 	struct nvkm_device *device = gr->base.engine.subdev.device;
+
+	tpc = gv100_gr_nonpes_aware_tpc(gr, gpc, tpc);
+
 	nvkm_wr32(device, TPC_UNIT(gpc, tpc, 0x608), sm);
 	nvkm_wr32(device, GPC_UNIT(gpc, 0x0c10 + tpc * 4), sm);
 	nvkm_wr32(device, TPC_UNIT(gpc, tpc, 0x088), sm);
@@ -198,6 +204,8 @@ gv100_grctx = {
 	.bundle_token_limit = 0x1680,
 	.pagepool = gp100_grctx_generate_pagepool,
 	.pagepool_size = 0x20000,
+	.attrib_cb_size = gp102_grctx_generate_attrib_cb_size,
+	.attrib_cb = gv100_grctx_generate_attrib_cb,
 	.attrib = gv100_grctx_generate_attrib,
 	.attrib_nr_max = 0x6c0,
 	.attrib_nr = 0x480,
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxtu102.c b/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxtu102.c
index 2299ca07d04a..542ab0c78be6 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxtu102.c
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxtu102.c
@@ -34,6 +34,9 @@ static void
 tu102_grctx_generate_sm_id(struct gf100_gr *gr, int gpc, int tpc, int sm)
 {
 	struct nvkm_device *device = gr->base.engine.subdev.device;
+
+	tpc = gv100_gr_nonpes_aware_tpc(gr, gpc, tpc);
+
 	nvkm_wr32(device, TPC_UNIT(gpc, tpc, 0x608), sm);
 	nvkm_wr32(device, TPC_UNIT(gpc, tpc, 0x088), sm);
 }
@@ -47,42 +50,38 @@ tu102_grctx_init_unknown_bundle_init_0[] = {
 };
 
 static const struct gf100_gr_pack
-tu102_grctx_pack_sw_veid_bundle_init[] = {
-	{ gv100_grctx_init_sw_veid_bundle_init_0 },
-	{ tu102_grctx_init_unknown_bundle_init_0 },
+tu102_grctx_pack_sw_bundle64_init[] = {
+	{ tu102_grctx_init_unknown_bundle_init_0, .type = 64 },
 	{}
 };
 
-static void
-tu102_grctx_generate_attrib(struct gf100_grctx *info)
+void
+tu102_grctx_generate_unknown(struct gf100_gr_chan *chan, u64 addr, u32 size)
 {
-	const u64 size = 0x80000; /*XXX: educated guess */
-	const int s = 8;
-	const int b = mmio_vram(info, size, (1 << s), true);
-
-	gv100_grctx_generate_attrib(info);
-
-	mmio_refn(info, 0x408070, 0x00000000, s, b);
-	mmio_wr32(info, 0x408074, size >> s); /*XXX: guess */
-	mmio_refn(info, 0x419034, 0x00000000, s, b);
-	mmio_wr32(info, 0x408078, 0x00000000);
+	gf100_grctx_patch_wr32(chan, 0x408070, addr >> 8);
+	gf100_grctx_patch_wr32(chan, 0x408074, size >> 8); /*XXX: guess */
+	gf100_grctx_patch_wr32(chan, 0x419034, addr >> 8);
+	gf100_grctx_patch_wr32(chan, 0x408078, 0x00000000);
 }
 
 const struct gf100_grctx_func
 tu102_grctx = {
-	.unkn88c = gv100_grctx_unkn88c,
 	.main = gf100_grctx_generate_main,
 	.unkn = gv100_grctx_generate_unkn,
-	.sw_veid_bundle_init = tu102_grctx_pack_sw_veid_bundle_init,
+	.sw_bundle64_init = tu102_grctx_pack_sw_bundle64_init,
 	.bundle = gm107_grctx_generate_bundle,
 	.bundle_size = 0x3000,
 	.bundle_min_gpm_fifo_depth = 0x180,
 	.bundle_token_limit = 0xa80,
 	.pagepool = gp100_grctx_generate_pagepool,
 	.pagepool_size = 0x20000,
-	.attrib = tu102_grctx_generate_attrib,
+	.attrib_cb_size = gp102_grctx_generate_attrib_cb_size,
+	.attrib_cb = gv100_grctx_generate_attrib_cb,
+	.attrib = gv100_grctx_generate_attrib,
 	.attrib_nr_max = 0x800,
 	.attrib_nr = 0x700,
+	.unknown_size = 0x80000,
+	.unknown = tu102_grctx_generate_unknown,
 	.alpha_nr_max = 0xc00,
 	.alpha_nr = 0x800,
 	.gfxp_nr = 0xfa8,
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/gr/ga102.c b/drivers/gpu/drm/nouveau/nvkm/engine/gr/ga102.c
new file mode 100644
index 000000000000..a5b5ac2755a2
--- /dev/null
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/gr/ga102.c
@@ -0,0 +1,347 @@
+/*
+ * Copyright 2019 Red Hat Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+#include "gf100.h"
+#include "ctxgf100.h"
+
+#include <core/firmware.h>
+#include <subdev/acr.h>
+#include <subdev/timer.h>
+#include <subdev/vfn.h>
+
+#include <nvfw/flcn.h>
+
+#include <nvif/class.h>
+
+static void
+ga102_gr_zbc_clear_color(struct gf100_gr *gr, int zbc)
+{
+	struct nvkm_device *device = gr->base.engine.subdev.device;
+	u32 invalid[] = { 0, 0, 0, 0 }, *color;
+
+	if (gr->zbc_color[zbc].format)
+		color = gr->zbc_color[zbc].l2;
+	else
+		color = invalid;
+
+	nvkm_mask(device, 0x41bcb4, 0x0000001f, zbc);
+	nvkm_wr32(device, 0x41bcec, color[0]);
+	nvkm_wr32(device, 0x41bcf0, color[1]);
+	nvkm_wr32(device, 0x41bcf4, color[2]);
+	nvkm_wr32(device, 0x41bcf8, color[3]);
+}
+
+static const struct gf100_gr_func_zbc
+ga102_gr_zbc = {
+	.clear_color = ga102_gr_zbc_clear_color,
+	.clear_depth = gp100_gr_zbc_clear_depth,
+	.stencil_get = gp102_gr_zbc_stencil_get,
+	.clear_stencil = gp102_gr_zbc_clear_stencil,
+};
+
+static void
+ga102_gr_gpccs_reset(struct gf100_gr *gr)
+{
+	struct nvkm_device *device = gr->base.engine.subdev.device;
+
+	nvkm_wr32(device, 0x41a610, 0x00000000);
+	nvkm_msec(device, 1, NVKM_DELAY);
+	nvkm_wr32(device, 0x41a610, 0x00000001);
+}
+
+static const struct nvkm_acr_lsf_func
+ga102_gr_gpccs_acr = {
+	.flags = NVKM_ACR_LSF_FORCE_PRIV_LOAD,
+	.bl_entry = 0x3400,
+	.bld_size = sizeof(struct flcn_bl_dmem_desc_v2),
+	.bld_write = gp108_gr_acr_bld_write,
+	.bld_patch = gp108_gr_acr_bld_patch,
+};
+
+static void
+ga102_gr_fecs_reset(struct gf100_gr *gr)
+{
+	struct nvkm_device *device = gr->base.engine.subdev.device;
+
+	nvkm_wr32(device, 0x409614, 0x00000010);
+	nvkm_wr32(device, 0x41a614, 0x00000020);
+	nvkm_usec(device, 10, NVKM_DELAY);
+	nvkm_wr32(device, 0x409614, 0x00000110);
+	nvkm_wr32(device, 0x41a614, 0x00000a20);
+	nvkm_usec(device, 10, NVKM_DELAY);
+	nvkm_rd32(device, 0x409614);
+	nvkm_rd32(device, 0x41a614);
+}
+
+static const struct nvkm_acr_lsf_func
+ga102_gr_fecs_acr = {
+	.bl_entry = 0x7e00,
+	.bld_size = sizeof(struct flcn_bl_dmem_desc_v2),
+	.bld_write = gp108_gr_acr_bld_write,
+	.bld_patch = gp108_gr_acr_bld_patch,
+};
+
+static void
+ga102_gr_init_rop_exceptions(struct gf100_gr *gr)
+{
+	struct nvkm_device *device = gr->base.engine.subdev.device;
+
+	nvkm_wr32(device, 0x41bcbc, 0x40000000);
+	nvkm_wr32(device, 0x41bc38, 0x40000000);
+	nvkm_wr32(device, 0x41ac94, nvkm_rd32(device, 0x502c94));
+}
+
+static void
+ga102_gr_init_40a790(struct gf100_gr *gr)
+{
+	nvkm_wr32(gr->base.engine.subdev.device, 0x40a790, 0xc0000000);
+}
+
+static void
+ga102_gr_init_gpc_mmu(struct gf100_gr *gr)
+{
+	struct nvkm_device *device = gr->base.engine.subdev.device;
+
+	nvkm_wr32(device, 0x418880, nvkm_rd32(device, 0x100c80) & 0xf8001fff);
+	nvkm_wr32(device, 0x418894, 0x00000000);
+
+	nvkm_wr32(device, 0x4188b4, nvkm_rd32(device, 0x100cc8));
+	nvkm_wr32(device, 0x4188b8, nvkm_rd32(device, 0x100ccc));
+	nvkm_wr32(device, 0x4188b0, nvkm_rd32(device, 0x100cc4));
+}
+
+static struct nvkm_intr *
+ga102_gr_oneinit_intr(struct gf100_gr *gr, enum nvkm_intr_type *pvector)
+{
+	struct nvkm_device *device = gr->base.engine.subdev.device;
+
+	*pvector = nvkm_rd32(device, 0x400154) & 0x00000fff;
+	return &device->vfn->intr;
+}
+
+static const struct gf100_gr_func
+ga102_gr = {
+	.oneinit_intr = ga102_gr_oneinit_intr,
+	.oneinit_tiles = gm200_gr_oneinit_tiles,
+	.oneinit_sm_id = gv100_gr_oneinit_sm_id,
+	.init = gf100_gr_init,
+	.init_419bd8 = gv100_gr_init_419bd8,
+	.init_gpc_mmu = ga102_gr_init_gpc_mmu,
+	.init_vsc_stream_master = gk104_gr_init_vsc_stream_master,
+	.init_zcull = tu102_gr_init_zcull,
+	.init_num_active_ltcs = gf100_gr_init_num_active_ltcs,
+	.init_swdx_pes_mask = gp102_gr_init_swdx_pes_mask,
+	.init_fs = tu102_gr_init_fs,
+	.init_fecs_exceptions = tu102_gr_init_fecs_exceptions,
+	.init_40a790 = ga102_gr_init_40a790,
+	.init_ds_hww_esr_2 = gm200_gr_init_ds_hww_esr_2,
+	.init_sked_hww_esr = gk104_gr_init_sked_hww_esr,
+	.init_ppc_exceptions = gk104_gr_init_ppc_exceptions,
+	.init_504430 = gv100_gr_init_504430,
+	.init_shader_exceptions = gv100_gr_init_shader_exceptions,
+	.init_rop_exceptions = ga102_gr_init_rop_exceptions,
+	.init_4188a4 = gv100_gr_init_4188a4,
+	.trap_mp = gv100_gr_trap_mp,
+	.fecs.reset = ga102_gr_fecs_reset,
+	.gpccs.reset = ga102_gr_gpccs_reset,
+	.rops = gm200_gr_rops,
+	.gpc_nr = 7,
+	.tpc_nr = 6,
+	.ppc_nr = 3,
+	.grctx = &ga102_grctx,
+	.zbc = &ga102_gr_zbc,
+	.sclass = {
+		{ -1, -1, FERMI_TWOD_A },
+		{ -1, -1, KEPLER_INLINE_TO_MEMORY_B },
+		{ -1, -1, AMPERE_B, &gf100_fermi },
+		{ -1, -1, AMPERE_COMPUTE_B },
+		{}
+	}
+};
+
+MODULE_FIRMWARE("nvidia/ga102/gr/fecs_bl.bin");
+MODULE_FIRMWARE("nvidia/ga102/gr/fecs_sig.bin");
+MODULE_FIRMWARE("nvidia/ga102/gr/gpccs_bl.bin");
+MODULE_FIRMWARE("nvidia/ga102/gr/gpccs_sig.bin");
+MODULE_FIRMWARE("nvidia/ga102/gr/NET_img.bin");
+
+MODULE_FIRMWARE("nvidia/ga103/gr/fecs_bl.bin");
+MODULE_FIRMWARE("nvidia/ga103/gr/fecs_sig.bin");
+MODULE_FIRMWARE("nvidia/ga103/gr/gpccs_bl.bin");
+MODULE_FIRMWARE("nvidia/ga103/gr/gpccs_sig.bin");
+MODULE_FIRMWARE("nvidia/ga103/gr/NET_img.bin");
+
+MODULE_FIRMWARE("nvidia/ga104/gr/fecs_bl.bin");
+MODULE_FIRMWARE("nvidia/ga104/gr/fecs_sig.bin");
+MODULE_FIRMWARE("nvidia/ga104/gr/gpccs_bl.bin");
+MODULE_FIRMWARE("nvidia/ga104/gr/gpccs_sig.bin");
+MODULE_FIRMWARE("nvidia/ga104/gr/NET_img.bin");
+
+MODULE_FIRMWARE("nvidia/ga106/gr/fecs_bl.bin");
+MODULE_FIRMWARE("nvidia/ga106/gr/fecs_sig.bin");
+MODULE_FIRMWARE("nvidia/ga106/gr/gpccs_bl.bin");
+MODULE_FIRMWARE("nvidia/ga106/gr/gpccs_sig.bin");
+MODULE_FIRMWARE("nvidia/ga106/gr/NET_img.bin");
+
+MODULE_FIRMWARE("nvidia/ga107/gr/fecs_bl.bin");
+MODULE_FIRMWARE("nvidia/ga107/gr/fecs_sig.bin");
+MODULE_FIRMWARE("nvidia/ga107/gr/gpccs_bl.bin");
+MODULE_FIRMWARE("nvidia/ga107/gr/gpccs_sig.bin");
+MODULE_FIRMWARE("nvidia/ga107/gr/NET_img.bin");
+
+struct netlist_region {
+	u32 region_id;
+	u32 data_size;
+	u32 data_offset;
+};
+
+struct netlist_image_header {
+	u32 version;
+	u32 regions;
+};
+
+struct netlist_image {
+	struct netlist_image_header header;
+	struct netlist_region regions[];
+};
+
+struct netlist_av64 {
+	u32 addr;
+	u32 data_hi;
+	u32 data_lo;
+};
+
+static int
+ga102_gr_av64_to_init(struct nvkm_blob *blob, struct gf100_gr_pack **ppack)
+{
+	struct gf100_gr_init *init;
+	struct gf100_gr_pack *pack;
+	int nent;
+	int i;
+
+	nent = (blob->size / sizeof(struct netlist_av64));
+	pack = vzalloc((sizeof(*pack) * 2) + (sizeof(*init) * (nent + 1)));
+	if (!pack)
+		return -ENOMEM;
+
+	init = (void *)(pack + 2);
+	pack[0].init = init;
+	pack[0].type = 64;
+
+	for (i = 0; i < nent; i++) {
+		struct gf100_gr_init *ent = &init[i];
+		struct netlist_av64 *av = &((struct netlist_av64 *)blob->data)[i];
+
+		ent->addr = av->addr;
+		ent->data = ((u64)av->data_hi << 32) | av->data_lo;
+		ent->count = 1;
+		ent->pitch = 1;
+	}
+
+	*ppack = pack;
+	return 0;
+}
+
+static int
+ga102_gr_load(struct gf100_gr *gr, int ver, const struct gf100_gr_fwif *fwif)
+{
+	struct nvkm_subdev *subdev = &gr->base.engine.subdev;
+	const struct firmware *fw;
+	const struct netlist_image *net;
+	const struct netlist_region *fecs_inst = NULL;
+	const struct netlist_region *fecs_data = NULL;
+	const struct netlist_region *gpccs_inst = NULL;
+	const struct netlist_region *gpccs_data = NULL;
+	int ret, i;
+
+	ret = nvkm_firmware_get(subdev, "gr/NET_img", 0, &fw);
+	if (ret)
+		return ret;
+
+	net = (const void *)fw->data;
+	nvkm_debug(subdev, "netlist version %d, %d regions\n",
+		   net->header.version, net->header.regions);
+
+	for (i = 0; i < net->header.regions; i++) {
+		const struct netlist_region *reg = &net->regions[i];
+		struct nvkm_blob blob = {
+			.data = (void *)fw->data + reg->data_offset,
+			.size = reg->data_size,
+		};
+
+		nvkm_debug(subdev, "\t%2d: %08x %08x\n",
+			   reg->region_id, reg->data_offset, reg->data_size);
+
+		switch (reg->region_id) {
+		case  0: fecs_data = reg; break;
+		case  1: fecs_inst = reg; break;
+		case  2: gpccs_data = reg; break;
+		case  3: gpccs_inst = reg; break;
+		case  4: gk20a_gr_av_to_init(&blob, &gr->bundle); break;
+		case  5: gk20a_gr_aiv_to_init(&blob, &gr->sw_ctx); break;
+		case  7: gk20a_gr_av_to_method(&blob, &gr->method); break;
+		case 28: tu102_gr_av_to_init_veid(&blob, &gr->bundle_veid); break;
+		case 34: ga102_gr_av64_to_init(&blob, &gr->bundle64); break;
+		case 48: gk20a_gr_av_to_init(&blob, &gr->sw_nonctx1); break;
+		case 49: gk20a_gr_av_to_init(&blob, &gr->sw_nonctx2); break;
+		case 50: gk20a_gr_av_to_init(&blob, &gr->sw_nonctx3); break;
+		case 51: gk20a_gr_av_to_init(&blob, &gr->sw_nonctx4); break;
+		default:
+			break;
+		}
+	}
+
+	ret = nvkm_acr_lsfw_load_bl_sig_net(subdev, &gr->fecs.falcon, NVKM_ACR_LSF_FECS,
+					    "gr/fecs_", ver, fwif->fecs,
+					    fw->data + fecs_inst->data_offset,
+						       fecs_inst->data_size,
+					    fw->data + fecs_data->data_offset,
+						       fecs_data->data_size);
+	if (ret)
+		return ret;
+
+	ret = nvkm_acr_lsfw_load_bl_sig_net(subdev, &gr->gpccs.falcon, NVKM_ACR_LSF_GPCCS,
+					    "gr/gpccs_", ver, fwif->gpccs,
+					    fw->data + gpccs_inst->data_offset,
+						       gpccs_inst->data_size,
+					    fw->data + gpccs_data->data_offset,
+						       gpccs_data->data_size);
+	if (ret)
+		return ret;
+
+	gr->firmware = true;
+
+	nvkm_firmware_put(fw);
+	return 0;
+}
+
+static const struct gf100_gr_fwif
+ga102_gr_fwif[] = {
+	{  0, ga102_gr_load, &ga102_gr, &ga102_gr_fecs_acr, &ga102_gr_gpccs_acr },
+	{ -1, gm200_gr_nofw },
+	{}
+};
+
+int
+ga102_gr_new(struct nvkm_device *device, enum nvkm_subdev_type type, int inst, struct nvkm_gr **pgr)
+{
+	return gf100_gr_new_(ga102_gr_fwif, device, type, inst, pgr);
+}
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/gr/gf100.c b/drivers/gpu/drm/nouveau/nvkm/engine/gr/gf100.c
index f16eabf4f642..5f20079c3660 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/gr/gf100.c
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/gr/gf100.c
@@ -67,7 +67,7 @@ gf100_gr_zbc_color_get(struct gf100_gr *gr, int format,
 	struct nvkm_ltc *ltc = gr->base.engine.subdev.device->ltc;
 	int zbc = -ENOSPC, i;
 
-	for (i = ltc->zbc_min; i <= ltc->zbc_max; i++) {
+	for (i = ltc->zbc_color_min; i <= ltc->zbc_color_max; i++) {
 		if (gr->zbc_color[i].format) {
 			if (gr->zbc_color[i].format != format)
 				continue;
@@ -114,7 +114,7 @@ gf100_gr_zbc_depth_get(struct gf100_gr *gr, int format,
 	struct nvkm_ltc *ltc = gr->base.engine.subdev.device->ltc;
 	int zbc = -ENOSPC, i;
 
-	for (i = ltc->zbc_min; i <= ltc->zbc_max; i++) {
+	for (i = ltc->zbc_depth_min; i <= ltc->zbc_depth_max; i++) {
 		if (gr->zbc_depth[i].format) {
 			if (gr->zbc_depth[i].format != format)
 				continue;
@@ -355,15 +355,14 @@ static void *
 gf100_gr_chan_dtor(struct nvkm_object *object)
 {
 	struct gf100_gr_chan *chan = gf100_gr_chan(object);
-	int i;
-
-	for (i = 0; i < ARRAY_SIZE(chan->data); i++) {
-		nvkm_vmm_put(chan->vmm, &chan->data[i].vma);
-		nvkm_memory_unref(&chan->data[i].mem);
-	}
 
 	nvkm_vmm_put(chan->vmm, &chan->mmio_vma);
 	nvkm_memory_unref(&chan->mmio);
+
+	nvkm_vmm_put(chan->vmm, &chan->attrib_cb);
+	nvkm_vmm_put(chan->vmm, &chan->unknown);
+	nvkm_vmm_put(chan->vmm, &chan->bundle_cb);
+	nvkm_vmm_put(chan->vmm, &chan->pagepool);
 	nvkm_vmm_unref(&chan->vmm);
 	return chan;
 }
@@ -380,12 +379,10 @@ gf100_gr_chan_new(struct nvkm_gr *base, struct nvkm_fifo_chan *fifoch,
 		  struct nvkm_object **pobject)
 {
 	struct gf100_gr *gr = gf100_gr(base);
-	struct gf100_gr_data *data = gr->mmio_data;
-	struct gf100_gr_mmio *mmio = gr->mmio_list;
 	struct gf100_gr_chan *chan;
 	struct gf100_vmm_map_v0 args = { .priv = 1 };
 	struct nvkm_device *device = gr->base.engine.subdev.device;
-	int ret, i;
+	int ret;
 
 	if (!(chan = kzalloc(sizeof(*chan), GFP_KERNEL)))
 		return -ENOMEM;
@@ -394,63 +391,91 @@ gf100_gr_chan_new(struct nvkm_gr *base, struct nvkm_fifo_chan *fifoch,
 	chan->vmm = nvkm_vmm_ref(fifoch->vmm);
 	*pobject = &chan->object;
 
-	/* allocate memory for a "mmio list" buffer that's used by the HUB
-	 * fuc to modify some per-context register settings on first load
-	 * of the context.
-	 */
-	ret = nvkm_memory_new(device, NVKM_MEM_TARGET_INST, 0x1000, 0x100,
-			      false, &chan->mmio);
+	/* Map pagepool. */
+	ret = nvkm_vmm_get(chan->vmm, 12, nvkm_memory_size(gr->pagepool), &chan->pagepool);
 	if (ret)
 		return ret;
 
-	ret = nvkm_vmm_get(fifoch->vmm, 12, 0x1000, &chan->mmio_vma);
+	ret = nvkm_memory_map(gr->pagepool, 0, chan->vmm, chan->pagepool, &args, sizeof(args));
 	if (ret)
 		return ret;
 
-	ret = nvkm_memory_map(chan->mmio, 0, fifoch->vmm,
-			      chan->mmio_vma, &args, sizeof(args));
+	/* Map bundle circular buffer. */
+	ret = nvkm_vmm_get(chan->vmm, 12, nvkm_memory_size(gr->bundle_cb), &chan->bundle_cb);
+	if (ret)
+		return ret;
+
+	ret = nvkm_memory_map(gr->bundle_cb, 0, chan->vmm, chan->bundle_cb, &args, sizeof(args));
+	if (ret)
+		return ret;
+
+	/* Map attribute circular buffer. */
+	ret = nvkm_vmm_get(chan->vmm, 12, nvkm_memory_size(gr->attrib_cb), &chan->attrib_cb);
 	if (ret)
 		return ret;
 
-	/* allocate buffers referenced by mmio list */
-	for (i = 0; data->size && i < ARRAY_SIZE(gr->mmio_data); i++) {
-		ret = nvkm_memory_new(device, NVKM_MEM_TARGET_INST,
-				      data->size, data->align, false,
-				      &chan->data[i].mem);
+	if (device->card_type < GP100) {
+		ret = nvkm_memory_map(gr->attrib_cb, 0, chan->vmm, chan->attrib_cb, NULL, 0);
 		if (ret)
 			return ret;
-
-		ret = nvkm_vmm_get(fifoch->vmm, 12,
-				   nvkm_memory_size(chan->data[i].mem),
-				   &chan->data[i].vma);
+	} else {
+		ret = nvkm_memory_map(gr->attrib_cb, 0, chan->vmm, chan->attrib_cb,
+				      &args, sizeof(args));;
 		if (ret)
 			return ret;
+	}
 
-		args.priv = data->priv;
+	/* Map some context buffer of unknown purpose. */
+	if (gr->func->grctx->unknown_size) {
+		ret = nvkm_vmm_get(chan->vmm, 12, nvkm_memory_size(gr->unknown), &chan->unknown);
+		if (ret)
+			return ret;
 
-		ret = nvkm_memory_map(chan->data[i].mem, 0, chan->vmm,
-				      chan->data[i].vma, &args, sizeof(args));
+		ret = nvkm_memory_map(gr->unknown, 0, chan->vmm, chan->unknown,
+				      &args, sizeof(args));
 		if (ret)
 			return ret;
+	}
 
-		data++;
+	/* Generate golden context image. */
+	mutex_lock(&gr->fecs.mutex);
+	if (gr->data == NULL) {
+		ret = gf100_grctx_generate(gr, chan, fifoch->inst);
+		if (ret) {
+			nvkm_error(&base->engine.subdev, "failed to construct context\n");
+			return ret;
+		}
 	}
+	mutex_unlock(&gr->fecs.mutex);
 
-	/* finally, fill in the mmio list and point the context at it */
-	nvkm_kmap(chan->mmio);
-	for (i = 0; mmio->addr && i < ARRAY_SIZE(gr->mmio_list); i++) {
-		u32 addr = mmio->addr;
-		u32 data = mmio->data;
+	/* allocate memory for a "mmio list" buffer that's used by the HUB
+	 * fuc to modify some per-context register settings on first load
+	 * of the context.
+	 */
+	ret = nvkm_memory_new(device, NVKM_MEM_TARGET_INST, 0x1000, 0x100,
+			      false, &chan->mmio);
+	if (ret)
+		return ret;
 
-		if (mmio->buffer >= 0) {
-			u64 info = chan->data[mmio->buffer].vma->addr;
-			data |= info >> mmio->shift;
-		}
+	ret = nvkm_vmm_get(fifoch->vmm, 12, 0x1000, &chan->mmio_vma);
+	if (ret)
+		return ret;
 
-		nvkm_wo32(chan->mmio, chan->mmio_nr++ * 4, addr);
-		nvkm_wo32(chan->mmio, chan->mmio_nr++ * 4, data);
-		mmio++;
-	}
+	ret = nvkm_memory_map(chan->mmio, 0, fifoch->vmm,
+			      chan->mmio_vma, &args, sizeof(args));
+	if (ret)
+		return ret;
+
+	/* finally, fill in the mmio list and point the context at it */
+	nvkm_kmap(chan->mmio);
+	gr->func->grctx->pagepool(chan, chan->pagepool->addr);
+	gr->func->grctx->bundle(chan, chan->bundle_cb->addr, gr->func->grctx->bundle_size);
+	gr->func->grctx->attrib_cb(chan, chan->attrib_cb->addr, gr->func->grctx->attrib_cb_size(gr));
+	gr->func->grctx->attrib(chan);
+	if (gr->func->grctx->patch_ltc)
+		gr->func->grctx->patch_ltc(chan);
+	if (gr->func->grctx->unknown_size)
+		gr->func->grctx->unknown(chan, chan->unknown->addr, gr->func->grctx->unknown_size);
 	nvkm_done(chan->mmio);
 	return 0;
 }
@@ -727,7 +752,7 @@ gf100_gr_fecs_ctrl_ctxsw(struct gf100_gr *gr, u32 mthd)
 	struct nvkm_device *device = gr->base.engine.subdev.device;
 
 	nvkm_wr32(device, 0x409804, 0xffffffff);
-	nvkm_wr32(device, 0x409840, 0xffffffff);
+	nvkm_wr32(device, 0x409800, 0x00000000);
 	nvkm_wr32(device, 0x409500, 0xffffffff);
 	nvkm_wr32(device, 0x409504, mthd);
 	nvkm_msec(device, 2000,
@@ -771,12 +796,45 @@ gf100_gr_fecs_stop_ctxsw(struct nvkm_gr *base)
 	return ret;
 }
 
+static int
+gf100_gr_fecs_halt_pipeline(struct gf100_gr *gr)
+{
+	int ret = 0;
+
+	if (gr->firmware) {
+		mutex_lock(&gr->fecs.mutex);
+		ret = gf100_gr_fecs_ctrl_ctxsw(gr, 0x04);
+		mutex_unlock(&gr->fecs.mutex);
+	}
+
+	return ret;
+}
+
+int
+gf100_gr_fecs_wfi_golden_save(struct gf100_gr *gr, u32 inst)
+{
+	struct nvkm_device *device = gr->base.engine.subdev.device;
+
+	nvkm_mask(device, 0x409800, 0x00000003, 0x00000000);
+	nvkm_wr32(device, 0x409500, inst);
+	nvkm_wr32(device, 0x409504, 0x00000009);
+	nvkm_msec(device, 2000,
+		u32 stat = nvkm_rd32(device, 0x409800);
+		if (stat & 0x00000002)
+			return -EIO;
+		if (stat & 0x00000001)
+			return 0;
+	);
+
+	return -ETIMEDOUT;
+}
+
 int
 gf100_gr_fecs_bind_pointer(struct gf100_gr *gr, u32 inst)
 {
 	struct nvkm_device *device = gr->base.engine.subdev.device;
 
-	nvkm_wr32(device, 0x409840, 0x00000030);
+	nvkm_mask(device, 0x409800, 0x00000030, 0x00000000);
 	nvkm_wr32(device, 0x409500, inst);
 	nvkm_wr32(device, 0x409504, 0x00000003);
 	nvkm_msec(device, 2000,
@@ -867,7 +925,7 @@ gf100_gr_fecs_discover_pm_image_size(struct gf100_gr *gr, u32 *psize)
 {
 	struct nvkm_device *device = gr->base.engine.subdev.device;
 
-	nvkm_wr32(device, 0x409840, 0xffffffff);
+	nvkm_wr32(device, 0x409800, 0x00000000);
 	nvkm_wr32(device, 0x409500, 0x00000000);
 	nvkm_wr32(device, 0x409504, 0x00000025);
 	nvkm_msec(device, 2000,
@@ -883,7 +941,7 @@ gf100_gr_fecs_discover_zcull_image_size(struct gf100_gr *gr, u32 *psize)
 {
 	struct nvkm_device *device = gr->base.engine.subdev.device;
 
-	nvkm_wr32(device, 0x409840, 0xffffffff);
+	nvkm_wr32(device, 0x409800, 0x00000000);
 	nvkm_wr32(device, 0x409500, 0x00000000);
 	nvkm_wr32(device, 0x409504, 0x00000016);
 	nvkm_msec(device, 2000,
@@ -899,7 +957,7 @@ gf100_gr_fecs_discover_image_size(struct gf100_gr *gr, u32 *psize)
 {
 	struct nvkm_device *device = gr->base.engine.subdev.device;
 
-	nvkm_wr32(device, 0x409840, 0xffffffff);
+	nvkm_wr32(device, 0x409800, 0x00000000);
 	nvkm_wr32(device, 0x409500, 0x00000000);
 	nvkm_wr32(device, 0x409504, 0x00000010);
 	nvkm_msec(device, 2000,
@@ -915,7 +973,7 @@ gf100_gr_fecs_set_watchdog_timeout(struct gf100_gr *gr, u32 timeout)
 {
 	struct nvkm_device *device = gr->base.engine.subdev.device;
 
-	nvkm_wr32(device, 0x409840, 0xffffffff);
+	nvkm_wr32(device, 0x409800, 0x00000000);
 	nvkm_wr32(device, 0x409500, timeout);
 	nvkm_wr32(device, 0x409504, 0x00000021);
 }
@@ -955,7 +1013,7 @@ gf100_gr_zbc_init(struct gf100_gr *gr)
 	const u32 f32_1[] = { 0x3f800000, 0x3f800000, 0x3f800000, 0x3f800000,
 			      0x3f800000, 0x3f800000, 0x3f800000, 0x3f800000 };
 	struct nvkm_ltc *ltc = gr->base.engine.subdev.device->ltc;
-	int index, c = ltc->zbc_min, d = ltc->zbc_min, s = ltc->zbc_min;
+	int index, c = ltc->zbc_color_min, d = ltc->zbc_depth_min, s = ltc->zbc_depth_min;
 
 	if (!gr->zbc_color[0].format) {
 		gf100_gr_zbc_color_get(gr, 1,  & zero[0],   &zero[4]); c++;
@@ -971,13 +1029,13 @@ gf100_gr_zbc_init(struct gf100_gr *gr)
 		}
 	}
 
-	for (index = c; index <= ltc->zbc_max; index++)
+	for (index = c; index <= ltc->zbc_color_max; index++)
 		gr->func->zbc->clear_color(gr, index);
-	for (index = d; index <= ltc->zbc_max; index++)
+	for (index = d; index <= ltc->zbc_depth_max; index++)
 		gr->func->zbc->clear_depth(gr, index);
 
 	if (gr->func->zbc->clear_stencil) {
-		for (index = s; index <= ltc->zbc_max; index++)
+		for (index = s; index <= ltc->zbc_depth_max; index++)
 			gr->func->zbc->clear_stencil(gr, index);
 	}
 }
@@ -1003,7 +1061,7 @@ gf100_gr_wait_idle(struct gf100_gr *gr)
 		nvkm_rd32(device, 0x400700);
 
 		gr_enabled = nvkm_rd32(device, 0x200) & 0x1000;
-		ctxsw_active = nvkm_rd32(device, 0x2640) & 0x8000;
+		ctxsw_active = nvkm_fifo_ctxsw_in_progress(&gr->base.engine);
 		gr_busy = nvkm_rd32(device, 0x40060c) & 0x1;
 
 		if (!gr_enabled || (!gr_busy && !ctxsw_active))
@@ -1039,7 +1097,7 @@ gf100_gr_icmd(struct gf100_gr *gr, const struct gf100_gr_pack *p)
 	struct nvkm_device *device = gr->base.engine.subdev.device;
 	const struct gf100_gr_pack *pack;
 	const struct gf100_gr_init *init;
-	u32 data = 0;
+	u64 data = 0;
 
 	nvkm_wr32(device, 0x400208, 0x80000000);
 
@@ -1049,6 +1107,8 @@ gf100_gr_icmd(struct gf100_gr *gr, const struct gf100_gr_pack *p)
 
 		if ((pack == p && init == p->init) || data != init->data) {
 			nvkm_wr32(device, 0x400204, init->data);
+			if (pack->type == 64)
+				nvkm_wr32(device, 0x40020c, upper_32_bits(init->data));
 			data = init->data;
 		}
 
@@ -1542,13 +1602,13 @@ gf100_gr_ctxctl_isr(struct gf100_gr *gr)
 	}
 }
 
-static void
-gf100_gr_intr(struct nvkm_gr *base)
+static irqreturn_t
+gf100_gr_intr(struct nvkm_inth *inth)
 {
-	struct gf100_gr *gr = gf100_gr(base);
+	struct gf100_gr *gr = container_of(inth, typeof(*gr), base.engine.subdev.inth);
 	struct nvkm_subdev *subdev = &gr->base.engine.subdev;
 	struct nvkm_device *device = subdev->device;
-	struct nvkm_fifo_chan *chan;
+	struct nvkm_chan *chan;
 	unsigned long flags;
 	u64 inst = nvkm_rd32(device, 0x409b00) & 0x0fffffff;
 	u32 stat = nvkm_rd32(device, 0x400100);
@@ -1561,10 +1621,10 @@ gf100_gr_intr(struct nvkm_gr *base)
 	const char *name = "unknown";
 	int chid = -1;
 
-	chan = nvkm_fifo_chan_inst(device->fifo, (u64)inst << 12, &flags);
+	chan = nvkm_chan_get_inst(&gr->base.engine, (u64)inst << 12, &flags);
 	if (chan) {
-		name = chan->object.client->name;
-		chid = chan->chid;
+		name = chan->name;
+		chid = chan->id;
 	}
 
 	if (device->card_type < NV_E0 || subc < 4)
@@ -1631,7 +1691,8 @@ gf100_gr_intr(struct nvkm_gr *base)
 	}
 
 	nvkm_wr32(device, 0x400500, 0x00010001);
-	nvkm_fifo_chan_put(device->fifo, flags, &chan);
+	nvkm_chan_put(&chan, flags);
+	return IRQ_HANDLED;
 }
 
 static void
@@ -1721,7 +1782,7 @@ gf100_gr_init_ctxctl_ext(struct gf100_gr *gr)
 	nvkm_mc_unk260(device, 1);
 
 	/* start both of them running */
-	nvkm_wr32(device, 0x409840, 0xffffffff);
+	nvkm_wr32(device, 0x409800, 0x00000000);
 	nvkm_wr32(device, 0x41a10c, 0x00000000);
 	nvkm_wr32(device, 0x40910c, 0x00000000);
 
@@ -1763,15 +1824,6 @@ gf100_gr_init_ctxctl_ext(struct gf100_gr *gr)
 			return ret;
 	}
 
-	/* Generate golden context image. */
-	if (gr->data == NULL) {
-		int ret = gf100_grctx_generate(gr);
-		if (ret) {
-			nvkm_error(subdev, "failed to construct context\n");
-			return ret;
-		}
-	}
-
 	return 0;
 }
 
@@ -1823,14 +1875,6 @@ gf100_gr_init_ctxctl_int(struct gf100_gr *gr)
 	}
 
 	gr->size = nvkm_rd32(device, 0x409804);
-	if (gr->data == NULL) {
-		int ret = gf100_grctx_generate(gr);
-		if (ret) {
-			nvkm_error(subdev, "failed to construct context\n");
-			return ret;
-		}
-	}
-
 	return 0;
 }
 
@@ -1847,10 +1891,11 @@ gf100_gr_init_ctxctl(struct gf100_gr *gr)
 	return ret;
 }
 
-void
+int
 gf100_gr_oneinit_sm_id(struct gf100_gr *gr)
 {
 	int tpc, gpc;
+
 	for (tpc = 0; tpc < gr->tpc_max; tpc++) {
 		for (gpc = 0; gpc < gr->gpc_nr; gpc++) {
 			if (tpc < gr->tpc_nr[gpc]) {
@@ -1860,6 +1905,8 @@ gf100_gr_oneinit_sm_id(struct gf100_gr *gr)
 			}
 		}
 	}
+
+	return 0;
 }
 
 void
@@ -1944,7 +1991,17 @@ gf100_gr_oneinit(struct nvkm_gr *base)
 	struct gf100_gr *gr = gf100_gr(base);
 	struct nvkm_subdev *subdev = &gr->base.engine.subdev;
 	struct nvkm_device *device = subdev->device;
-	int i, j;
+	struct nvkm_intr *intr = &device->mc->intr;
+	enum nvkm_intr_type intr_type = NVKM_INTR_SUBDEV;
+	int ret, i, j;
+
+	if (gr->func->oneinit_intr)
+		intr = gr->func->oneinit_intr(gr, &intr_type);
+
+	ret = nvkm_inth_add(intr, intr_type, NVKM_INTR_PRIO_NORMAL, &gr->base.engine.subdev,
+			    gf100_gr_intr, &gr->base.engine.subdev.inth);
+	if (ret)
+		return ret;
 
 	nvkm_pmu_pgob(device->pmu, false);
 
@@ -1954,12 +2011,14 @@ gf100_gr_oneinit(struct nvkm_gr *base)
 		gr->tpc_nr[i]  = nvkm_rd32(device, GPC_UNIT(i, 0x2608));
 		gr->tpc_max = max(gr->tpc_max, gr->tpc_nr[i]);
 		gr->tpc_total += gr->tpc_nr[i];
-		gr->ppc_nr[i]  = gr->func->ppc_nr;
-		for (j = 0; j < gr->ppc_nr[i]; j++) {
+		for (j = 0; j < gr->func->ppc_nr; j++) {
 			gr->ppc_tpc_mask[i][j] =
 				nvkm_rd32(device, GPC_UNIT(i, 0x0c30 + (j * 4)));
 			if (gr->ppc_tpc_mask[i][j] == 0)
 				continue;
+
+			gr->ppc_nr[i]++;
+
 			gr->ppc_mask[i] |= (1 << j);
 			gr->ppc_tpc_nr[i][j] = hweight8(gr->ppc_tpc_mask[i][j]);
 			if (gr->ppc_tpc_min == 0 ||
@@ -1968,12 +2027,37 @@ gf100_gr_oneinit(struct nvkm_gr *base)
 			if (gr->ppc_tpc_max < gr->ppc_tpc_nr[i][j])
 				gr->ppc_tpc_max = gr->ppc_tpc_nr[i][j];
 		}
+
+		gr->ppc_total += gr->ppc_nr[i];
+	}
+
+	/* Allocate global context buffers. */
+	ret = nvkm_memory_new(device, NVKM_MEM_TARGET_INST, gr->func->grctx->pagepool_size,
+			      0x100, false, &gr->pagepool);
+	if (ret)
+		return ret;
+
+	ret = nvkm_memory_new(device, NVKM_MEM_TARGET_INST, gr->func->grctx->bundle_size,
+			      0x100, false, &gr->bundle_cb);
+	if (ret)
+		return ret;
+
+	ret = nvkm_memory_new(device, NVKM_MEM_TARGET_INST, gr->func->grctx->attrib_cb_size(gr),
+			      0x1000, false, &gr->attrib_cb);
+	if (ret)
+		return ret;
+
+	if (gr->func->grctx->unknown_size) {
+		ret = nvkm_memory_new(device, NVKM_MEM_TARGET_INST, gr->func->grctx->unknown_size,
+				      0x100, false, &gr->unknown);
+		if (ret)
+			return ret;
 	}
 
 	memset(gr->tile, 0xff, sizeof(gr->tile));
 	gr->func->oneinit_tiles(gr);
-	gr->func->oneinit_sm_id(gr);
-	return 0;
+
+	return gr->func->oneinit_sm_id(gr);
 }
 
 static int
@@ -1983,7 +2067,7 @@ gf100_gr_init_(struct nvkm_gr *base)
 	struct nvkm_subdev *subdev = &base->engine.subdev;
 	struct nvkm_device *device = subdev->device;
 	bool reset = device->chipset == 0x137 || device->chipset == 0x138;
-	u32 ret;
+	int ret;
 
 	/* On certain GP107/GP108 boards, we trigger a weird issue where
 	 * GR will stop responding to PRI accesses after we've asked the
@@ -2019,7 +2103,12 @@ gf100_gr_init_(struct nvkm_gr *base)
 	if (ret)
 		return ret;
 
-	return gr->func->init(gr);
+	ret = gr->func->init(gr);
+	if (ret)
+		return ret;
+
+	nvkm_inth_allow(&subdev->inth);
+	return 0;
 }
 
 static int
@@ -2027,6 +2116,9 @@ gf100_gr_fini(struct nvkm_gr *base, bool suspend)
 {
 	struct gf100_gr *gr = gf100_gr(base);
 	struct nvkm_subdev *subdev = &gr->base.engine.subdev;
+
+	nvkm_inth_block(&subdev->inth);
+
 	nvkm_falcon_put(&gr->gpccs.falcon, subdev);
 	nvkm_falcon_put(&gr->fecs.falcon, subdev);
 	return 0;
@@ -2039,6 +2131,11 @@ gf100_gr_dtor(struct nvkm_gr *base)
 
 	kfree(gr->data);
 
+	nvkm_memory_unref(&gr->unknown);
+	nvkm_memory_unref(&gr->attrib_cb);
+	nvkm_memory_unref(&gr->bundle_cb);
+	nvkm_memory_unref(&gr->pagepool);
+
 	nvkm_falcon_dtor(&gr->gpccs.falcon);
 	nvkm_falcon_dtor(&gr->fecs.falcon);
 
@@ -2047,81 +2144,27 @@ gf100_gr_dtor(struct nvkm_gr *base)
 	nvkm_blob_dtor(&gr->gpccs.inst);
 	nvkm_blob_dtor(&gr->gpccs.data);
 
+	vfree(gr->bundle64);
+	vfree(gr->bundle_veid);
 	vfree(gr->bundle);
 	vfree(gr->method);
 	vfree(gr->sw_ctx);
 	vfree(gr->sw_nonctx);
+	vfree(gr->sw_nonctx1);
+	vfree(gr->sw_nonctx2);
+	vfree(gr->sw_nonctx3);
+	vfree(gr->sw_nonctx4);
 
 	return gr;
 }
 
-static const struct nvkm_gr_func
-gf100_gr_ = {
-	.dtor = gf100_gr_dtor,
-	.oneinit = gf100_gr_oneinit,
-	.init = gf100_gr_init_,
-	.fini = gf100_gr_fini,
-	.intr = gf100_gr_intr,
-	.units = gf100_gr_units,
-	.chan_new = gf100_gr_chan_new,
-	.object_get = gf100_gr_object_get,
-	.chsw_load = gf100_gr_chsw_load,
-	.ctxsw.pause = gf100_gr_fecs_stop_ctxsw,
-	.ctxsw.resume = gf100_gr_fecs_start_ctxsw,
-	.ctxsw.inst = gf100_gr_ctxsw_inst,
-};
-
 static const struct nvkm_falcon_func
 gf100_gr_flcn = {
-	.fbif = 0x600,
 	.load_imem = nvkm_falcon_v1_load_imem,
 	.load_dmem = nvkm_falcon_v1_load_dmem,
-	.read_dmem = nvkm_falcon_v1_read_dmem,
-	.bind_context = nvkm_falcon_v1_bind_context,
-	.wait_for_halt = nvkm_falcon_v1_wait_for_halt,
-	.clear_interrupt = nvkm_falcon_v1_clear_interrupt,
-	.set_start_addr = nvkm_falcon_v1_set_start_addr,
 	.start = nvkm_falcon_v1_start,
-	.enable = nvkm_falcon_v1_enable,
-	.disable = nvkm_falcon_v1_disable,
 };
 
-int
-gf100_gr_new_(const struct gf100_gr_fwif *fwif, struct nvkm_device *device,
-	      enum nvkm_subdev_type type, int inst, struct nvkm_gr **pgr)
-{
-	struct gf100_gr *gr;
-	int ret;
-
-	if (!(gr = kzalloc(sizeof(*gr), GFP_KERNEL)))
-		return -ENOMEM;
-	*pgr = &gr->base;
-
-	ret = nvkm_gr_ctor(&gf100_gr_, device, type, inst, true, &gr->base);
-	if (ret)
-		return ret;
-
-	fwif = nvkm_firmware_load(&gr->base.engine.subdev, fwif, "Gr", gr);
-	if (IS_ERR(fwif))
-		return PTR_ERR(fwif);
-
-	gr->func = fwif->func;
-
-	ret = nvkm_falcon_ctor(&gf100_gr_flcn, &gr->base.engine.subdev,
-			       "fecs", 0x409000, &gr->fecs.falcon);
-	if (ret)
-		return ret;
-
-	mutex_init(&gr->fecs.mutex);
-
-	ret = nvkm_falcon_ctor(&gf100_gr_flcn, &gr->base.engine.subdev,
-			       "gpccs", 0x41a000, &gr->gpccs.falcon);
-	if (ret)
-		return ret;
-
-	return 0;
-}
-
 void
 gf100_gr_init_num_tpc_per_gpc(struct gf100_gr *gr, bool pd, bool ds)
 {
@@ -2146,6 +2189,29 @@ gf100_gr_init_400054(struct gf100_gr *gr)
 }
 
 void
+gf100_gr_init_exception2(struct gf100_gr *gr)
+{
+	struct nvkm_device *device = gr->base.engine.subdev.device;
+
+	nvkm_wr32(device, 0x40011c, 0xffffffff);
+	nvkm_wr32(device, 0x400134, 0xffffffff);
+}
+
+void
+gf100_gr_init_rop_exceptions(struct gf100_gr *gr)
+{
+	struct nvkm_device *device = gr->base.engine.subdev.device;
+	int rop;
+
+	for (rop = 0; rop < gr->rop_nr; rop++) {
+		nvkm_wr32(device, ROP_UNIT(rop, 0x144), 0x40000000);
+		nvkm_wr32(device, ROP_UNIT(rop, 0x070), 0x40000000);
+		nvkm_wr32(device, ROP_UNIT(rop, 0x204), 0xffffffff);
+		nvkm_wr32(device, ROP_UNIT(rop, 0x208), 0xffffffff);
+	}
+}
+
+void
 gf100_gr_init_shader_exceptions(struct gf100_gr *gr, int gpc, int tpc)
 {
 	struct nvkm_device *device = gr->base.engine.subdev.device;
@@ -2252,21 +2318,47 @@ gf100_gr_init_vsc_stream_master(struct gf100_gr *gr)
 	nvkm_mask(device, TPC_UNIT(0, 0, 0x05c), 0x00000001, 0x00000001);
 }
 
+static int
+gf100_gr_reset(struct nvkm_gr *base)
+{
+	struct nvkm_subdev *subdev = &base->engine.subdev;
+	struct nvkm_device *device = subdev->device;
+	struct gf100_gr *gr = gf100_gr(base);
+
+	nvkm_mask(device, 0x400500, 0x00000001, 0x00000000);
+
+	WARN_ON(gf100_gr_fecs_halt_pipeline(gr));
+
+	subdev->func->fini(subdev, false);
+	nvkm_mc_disable(device, subdev->type, subdev->inst);
+	if (gr->func->gpccs.reset)
+		gr->func->gpccs.reset(gr);
+
+	nvkm_mc_enable(device, subdev->type, subdev->inst);
+	return subdev->func->init(subdev);
+}
+
 int
 gf100_gr_init(struct gf100_gr *gr)
 {
 	struct nvkm_device *device = gr->base.engine.subdev.device;
-	int gpc, tpc, rop;
+	int gpc, tpc;
 
-	if (gr->func->init_419bd8)
-		gr->func->init_419bd8(gr);
+	nvkm_mask(device, 0x400500, 0x00010001, 0x00000000);
 
 	gr->func->init_gpc_mmu(gr);
 
-	if (gr->sw_nonctx)
+	if (gr->sw_nonctx1) {
+		gf100_gr_mmio(gr, gr->sw_nonctx1);
+		gf100_gr_mmio(gr, gr->sw_nonctx2);
+		gf100_gr_mmio(gr, gr->sw_nonctx3);
+		gf100_gr_mmio(gr, gr->sw_nonctx4);
+	} else
+	if (gr->sw_nonctx) {
 		gf100_gr_mmio(gr, gr->sw_nonctx);
-	else
+	} else {
 		gf100_gr_mmio(gr, gr->func->mmio);
+	}
 
 	gf100_gr_wait_idle(gr);
 
@@ -2298,6 +2390,10 @@ gf100_gr_init(struct gf100_gr *gr)
 	nvkm_wr32(device, 0x400124, 0x00000002);
 
 	gr->func->init_fecs_exceptions(gr);
+
+	if (gr->func->init_40a790)
+		gr->func->init_40a790(gr);
+
 	if (gr->func->init_ds_hww_esr_2)
 		gr->func->init_ds_hww_esr_2(gr);
 
@@ -2346,19 +2442,14 @@ gf100_gr_init(struct gf100_gr *gr)
 		nvkm_wr32(device, GPC_UNIT(gpc, 0x2c94), 0xffffffff);
 	}
 
-	for (rop = 0; rop < gr->rop_nr; rop++) {
-		nvkm_wr32(device, ROP_UNIT(rop, 0x144), 0x40000000);
-		nvkm_wr32(device, ROP_UNIT(rop, 0x070), 0x40000000);
-		nvkm_wr32(device, ROP_UNIT(rop, 0x204), 0xffffffff);
-		nvkm_wr32(device, ROP_UNIT(rop, 0x208), 0xffffffff);
-	}
+	gr->func->init_rop_exceptions(gr);
 
 	nvkm_wr32(device, 0x400108, 0xffffffff);
 	nvkm_wr32(device, 0x400138, 0xffffffff);
 	nvkm_wr32(device, 0x400118, 0xffffffff);
 	nvkm_wr32(device, 0x400130, 0xffffffff);
-	nvkm_wr32(device, 0x40011c, 0xffffffff);
-	nvkm_wr32(device, 0x400134, 0xffffffff);
+	if (gr->func->init_exception2)
+		gr->func->init_exception2(gr);
 
 	if (gr->func->init_400054)
 		gr->func->init_400054(gr);
@@ -2371,6 +2462,18 @@ gf100_gr_init(struct gf100_gr *gr)
 	return gf100_gr_init_ctxctl(gr);
 }
 
+void
+gf100_gr_fecs_reset(struct gf100_gr *gr)
+{
+	struct nvkm_device *device = gr->base.engine.subdev.device;
+
+	nvkm_wr32(device, 0x409614, 0x00000070);
+	nvkm_usec(device, 10, NVKM_DELAY);
+	nvkm_mask(device, 0x409614, 0x00000700, 0x00000700);
+	nvkm_usec(device, 10, NVKM_DELAY);
+	nvkm_rd32(device, 0x409614);
+}
+
 #include "fuc/hubgf100.fuc3.h"
 
 struct gf100_gr_ucode
@@ -2391,6 +2494,22 @@ gf100_gr_gpccs_ucode = {
 	.data.size = sizeof(gf100_grgpc_data),
 };
 
+static const struct nvkm_gr_func
+gf100_gr_ = {
+	.dtor = gf100_gr_dtor,
+	.oneinit = gf100_gr_oneinit,
+	.init = gf100_gr_init_,
+	.fini = gf100_gr_fini,
+	.reset = gf100_gr_reset,
+	.units = gf100_gr_units,
+	.chan_new = gf100_gr_chan_new,
+	.object_get = gf100_gr_object_get,
+	.chsw_load = gf100_gr_chsw_load,
+	.ctxsw.pause = gf100_gr_fecs_stop_ctxsw,
+	.ctxsw.resume = gf100_gr_fecs_start_ctxsw,
+	.ctxsw.inst = gf100_gr_ctxsw_inst,
+};
+
 static const struct gf100_gr_func
 gf100_gr = {
 	.oneinit_tiles = gf100_gr_oneinit_tiles,
@@ -2406,10 +2525,13 @@ gf100_gr = {
 	.init_419eb4 = gf100_gr_init_419eb4,
 	.init_tex_hww_esr = gf100_gr_init_tex_hww_esr,
 	.init_shader_exceptions = gf100_gr_init_shader_exceptions,
+	.init_rop_exceptions = gf100_gr_init_rop_exceptions,
+	.init_exception2 = gf100_gr_init_exception2,
 	.init_400054 = gf100_gr_init_400054,
 	.trap_mp = gf100_gr_trap_mp,
 	.mmio = gf100_gr_pack_mmio,
 	.fecs.ucode = &gf100_gr_fecs_ucode,
+	.fecs.reset = gf100_gr_fecs_reset,
 	.gpccs.ucode = &gf100_gr_gpccs_ucode,
 	.rops = gf100_gr_rops,
 	.grctx = &gf100_grctx,
@@ -2483,6 +2605,42 @@ gf100_gr_fwif[] = {
 };
 
 int
+gf100_gr_new_(const struct gf100_gr_fwif *fwif, struct nvkm_device *device,
+	      enum nvkm_subdev_type type, int inst, struct nvkm_gr **pgr)
+{
+	struct gf100_gr *gr;
+	int ret;
+
+	if (!(gr = kzalloc(sizeof(*gr), GFP_KERNEL)))
+		return -ENOMEM;
+	*pgr = &gr->base;
+
+	ret = nvkm_gr_ctor(&gf100_gr_, device, type, inst, true, &gr->base);
+	if (ret)
+		return ret;
+
+	fwif = nvkm_firmware_load(&gr->base.engine.subdev, fwif, "Gr", gr);
+	if (IS_ERR(fwif))
+		return PTR_ERR(fwif);
+
+	gr->func = fwif->func;
+
+	ret = nvkm_falcon_ctor(&gf100_gr_flcn, &gr->base.engine.subdev,
+			       "fecs", 0x409000, &gr->fecs.falcon);
+	if (ret)
+		return ret;
+
+	mutex_init(&gr->fecs.mutex);
+
+	ret = nvkm_falcon_ctor(&gf100_gr_flcn, &gr->base.engine.subdev,
+			       "gpccs", 0x41a000, &gr->gpccs.falcon);
+	if (ret)
+		return ret;
+
+	return 0;
+}
+
+int
 gf100_gr_new(struct nvkm_device *device, enum nvkm_subdev_type type, int inst, struct nvkm_gr **pgr)
 {
 	return gf100_gr_new_(gf100_gr_fwif, device, type, inst, pgr);
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/gr/gf100.h b/drivers/gpu/drm/nouveau/nvkm/engine/gr/gf100.h
index c0038f906135..94ca7ac16acf 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/gr/gf100.h
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/gr/gf100.h
@@ -44,19 +44,6 @@ struct nvkm_acr_lsfw;
 #define PPC_UNIT(t, m, r) (0x503000 + (t) * 0x8000 + (m) * 0x200 + (r))
 #define TPC_UNIT(t, m, r) (0x504000 + (t) * 0x8000 + (m) * 0x800 + (r))
 
-struct gf100_gr_data {
-	u32 size;
-	u32 align;
-	bool priv;
-};
-
-struct gf100_gr_mmio {
-	u32 addr;
-	u32 data;
-	u32 shift;
-	int buffer;
-};
-
 struct gf100_gr_zbc_color {
 	u32 format;
 	u32 ds[4];
@@ -101,13 +88,19 @@ struct gf100_gr {
 	 * using hardcoded arrays. To be allocated with vzalloc().
 	 */
 	struct gf100_gr_pack *sw_nonctx;
+	struct gf100_gr_pack *sw_nonctx1;
+	struct gf100_gr_pack *sw_nonctx2;
+	struct gf100_gr_pack *sw_nonctx3;
+	struct gf100_gr_pack *sw_nonctx4;
 	struct gf100_gr_pack *sw_ctx;
 	struct gf100_gr_pack *bundle;
+	struct gf100_gr_pack *bundle_veid;
+	struct gf100_gr_pack *bundle64;
 	struct gf100_gr_pack *method;
 
-	struct gf100_gr_zbc_color zbc_color[NVKM_LTC_MAX_ZBC_CNT];
-	struct gf100_gr_zbc_depth zbc_depth[NVKM_LTC_MAX_ZBC_CNT];
-	struct gf100_gr_zbc_stencil zbc_stencil[NVKM_LTC_MAX_ZBC_CNT];
+	struct gf100_gr_zbc_color zbc_color[NVKM_LTC_MAX_ZBC_COLOR_CNT];
+	struct gf100_gr_zbc_depth zbc_depth[NVKM_LTC_MAX_ZBC_DEPTH_CNT];
+	struct gf100_gr_zbc_stencil zbc_stencil[NVKM_LTC_MAX_ZBC_DEPTH_CNT];
 
 	u8 rop_nr;
 	u8 gpc_nr;
@@ -120,6 +113,12 @@ struct gf100_gr {
 	u8 ppc_tpc_nr[GPC_MAX][4];
 	u8 ppc_tpc_min;
 	u8 ppc_tpc_max;
+	u8 ppc_total;
+
+	struct nvkm_memory *pagepool;
+	struct nvkm_memory *bundle_cb;
+	struct nvkm_memory *attrib_cb;
+	struct nvkm_memory *unknown;
 
 	u8 screen_tile_row_offset;
 	u8 tile[TPC_MAX];
@@ -130,8 +129,6 @@ struct gf100_gr {
 	} sm[TPC_MAX];
 	u8 sm_nr;
 
-	struct gf100_gr_data mmio_data[4];
-	struct gf100_gr_mmio mmio_list[4096/8];
 	u32  size;
 	u32 *data;
 	u32 size_zcull;
@@ -139,6 +136,7 @@ struct gf100_gr {
 };
 
 int gf100_gr_fecs_bind_pointer(struct gf100_gr *, u32 inst);
+int gf100_gr_fecs_wfi_golden_save(struct gf100_gr *, u32 inst);
 
 struct gf100_gr_func_zbc {
 	void (*clear_color)(struct gf100_gr *, int zbc);
@@ -149,8 +147,9 @@ struct gf100_gr_func_zbc {
 };
 
 struct gf100_gr_func {
+	struct nvkm_intr *(*oneinit_intr)(struct gf100_gr *, enum nvkm_intr_type *);
 	void (*oneinit_tiles)(struct gf100_gr *);
-	void (*oneinit_sm_id)(struct gf100_gr *);
+	int (*oneinit_sm_id)(struct gf100_gr *);
 	int (*init)(struct gf100_gr *);
 	void (*init_419bd8)(struct gf100_gr *);
 	void (*init_gpc_mmu)(struct gf100_gr *);
@@ -164,6 +163,7 @@ struct gf100_gr_func {
 	void (*init_swdx_pes_mask)(struct gf100_gr *);
 	void (*init_fs)(struct gf100_gr *);
 	void (*init_fecs_exceptions)(struct gf100_gr *);
+	void (*init_40a790)(struct gf100_gr *);
 	void (*init_ds_hww_esr_2)(struct gf100_gr *);
 	void (*init_40601c)(struct gf100_gr *);
 	void (*init_sked_hww_esr)(struct gf100_gr *);
@@ -174,6 +174,8 @@ struct gf100_gr_func {
 	void (*init_tex_hww_esr)(struct gf100_gr *, int gpc, int tpc);
 	void (*init_504430)(struct gf100_gr *, int gpc, int tpc);
 	void (*init_shader_exceptions)(struct gf100_gr *, int gpc, int tpc);
+	void (*init_rop_exceptions)(struct gf100_gr *);
+	void (*init_exception2)(struct gf100_gr *);
 	void (*init_400054)(struct gf100_gr *);
 	void (*init_4188a4)(struct gf100_gr *);
 	void (*trap_mp)(struct gf100_gr *, int gpc, int tpc);
@@ -181,9 +183,11 @@ struct gf100_gr_func {
 	const struct gf100_gr_pack *mmio;
 	struct {
 		struct gf100_gr_ucode *ucode;
+		void (*reset)(struct gf100_gr *);
 	} fecs;
 	struct {
 		struct gf100_gr_ucode *ucode;
+		void (*reset)(struct gf100_gr *);
 	} gpccs;
 	int (*rops)(struct gf100_gr *);
 	int gpc_nr;
@@ -197,7 +201,7 @@ struct gf100_gr_func {
 
 int gf100_gr_rops(struct gf100_gr *);
 void gf100_gr_oneinit_tiles(struct gf100_gr *);
-void gf100_gr_oneinit_sm_id(struct gf100_gr *);
+int gf100_gr_oneinit_sm_id(struct gf100_gr *);
 int gf100_gr_init(struct gf100_gr *);
 void gf100_gr_init_vsc_stream_master(struct gf100_gr *);
 void gf100_gr_init_zcull(struct gf100_gr *);
@@ -208,9 +212,12 @@ void gf100_gr_init_419cc0(struct gf100_gr *);
 void gf100_gr_init_419eb4(struct gf100_gr *);
 void gf100_gr_init_tex_hww_esr(struct gf100_gr *, int, int);
 void gf100_gr_init_shader_exceptions(struct gf100_gr *, int, int);
+void gf100_gr_init_rop_exceptions(struct gf100_gr *);
+void gf100_gr_init_exception2(struct gf100_gr *);
 void gf100_gr_init_400054(struct gf100_gr *);
 void gf100_gr_init_num_tpc_per_gpc(struct gf100_gr *, bool, bool);
 extern const struct gf100_gr_func_zbc gf100_gr_zbc;
+void gf100_gr_fecs_reset(struct gf100_gr *);
 
 void gf117_gr_init_zcull(struct gf100_gr *);
 
@@ -226,9 +233,13 @@ void gm107_gr_init_shader_exceptions(struct gf100_gr *, int, int);
 void gm107_gr_init_400054(struct gf100_gr *);
 
 int gk20a_gr_init(struct gf100_gr *);
+int gk20a_gr_av_to_init_(struct nvkm_blob *, u8 count, u32 pitch, struct gf100_gr_pack **);
+int gk20a_gr_av_to_init(struct nvkm_blob *, struct gf100_gr_pack **);
+int gk20a_gr_aiv_to_init(struct nvkm_blob *, struct gf100_gr_pack **);
+int gk20a_gr_av_to_method(struct nvkm_blob *, struct gf100_gr_pack **);
 
 void gm200_gr_oneinit_tiles(struct gf100_gr *);
-void gm200_gr_oneinit_sm_id(struct gf100_gr *);
+int gm200_gr_oneinit_sm_id(struct gf100_gr *);
 int gm200_gr_rops(struct gf100_gr *);
 void gm200_gr_init_num_active_ltcs(struct gf100_gr *);
 void gm200_gr_init_ds_hww_esr_2(struct gf100_gr *);
@@ -242,14 +253,24 @@ extern const struct gf100_gr_func_zbc gp100_gr_zbc;
 
 void gp102_gr_init_swdx_pes_mask(struct gf100_gr *);
 extern const struct gf100_gr_func_zbc gp102_gr_zbc;
+int gp102_gr_zbc_stencil_get(struct gf100_gr *, int, const u32, const u32);
+void gp102_gr_zbc_clear_stencil(struct gf100_gr *, int);
 
 extern const struct gf100_gr_func gp107_gr;
 
+int gv100_gr_oneinit_sm_id(struct gf100_gr *);
+u32 gv100_gr_nonpes_aware_tpc(struct gf100_gr *gr, u32 gpc, u32 tpc);
 void gv100_gr_init_419bd8(struct gf100_gr *);
 void gv100_gr_init_504430(struct gf100_gr *, int, int);
 void gv100_gr_init_shader_exceptions(struct gf100_gr *, int, int);
+void gv100_gr_init_4188a4(struct gf100_gr *);
 void gv100_gr_trap_mp(struct gf100_gr *, int, int);
 
+int tu102_gr_av_to_init_veid(struct nvkm_blob *, struct gf100_gr_pack **);
+void tu102_gr_init_zcull(struct gf100_gr *);
+void tu102_gr_init_fs(struct gf100_gr *);
+void tu102_gr_init_fecs_exceptions(struct gf100_gr *);
+
 #define gf100_gr_chan(p) container_of((p), struct gf100_gr_chan, object)
 #include <core/object.h>
 
@@ -258,14 +279,14 @@ struct gf100_gr_chan {
 	struct gf100_gr *gr;
 	struct nvkm_vmm *vmm;
 
+	struct nvkm_vma *pagepool;
+	struct nvkm_vma *bundle_cb;
+	struct nvkm_vma *attrib_cb;
+	struct nvkm_vma *unknown;
+
 	struct nvkm_memory *mmio;
 	struct nvkm_vma *mmio_vma;
 	int mmio_nr;
-
-	struct {
-		struct nvkm_memory *mem;
-		struct nvkm_vma *vma;
-	} data[4];
 };
 
 void gf100_gr_ctxctl_debug(struct gf100_gr *);
@@ -279,7 +300,7 @@ struct gf100_gr_init {
 	u32 addr;
 	u8  count;
 	u32 pitch;
-	u32 data;
+	u64 data;
 };
 
 struct gf100_gr_pack {
@@ -403,6 +424,9 @@ int gf100_gr_load(struct gf100_gr *, int, const struct gf100_gr_fwif *);
 int gf100_gr_nofw(struct gf100_gr *, int, const struct gf100_gr_fwif *);
 
 int gk20a_gr_load_sw(struct gf100_gr *, const char *path, int ver);
+int gk20a_gr_load_net(struct gf100_gr *, const char *, const char *, int,
+		      int (*)(struct nvkm_blob *, struct gf100_gr_pack **),
+		      struct gf100_gr_pack **);
 
 int gm200_gr_nofw(struct gf100_gr *, int, const struct gf100_gr_fwif *);
 int gm200_gr_load(struct gf100_gr *, int, const struct gf100_gr_fwif *);
@@ -415,6 +439,8 @@ void gm20b_gr_acr_bld_patch(struct nvkm_acr *, u32, s64);
 
 extern const struct nvkm_acr_lsf_func gp108_gr_gpccs_acr;
 extern const struct nvkm_acr_lsf_func gp108_gr_fecs_acr;
+void gp108_gr_acr_bld_write(struct nvkm_acr *, u32, struct nvkm_acr_lsfw *);
+void gp108_gr_acr_bld_patch(struct nvkm_acr *, u32, s64);
 
 int gf100_gr_new_(const struct gf100_gr_fwif *, struct nvkm_device *, enum nvkm_subdev_type, int,
 		  struct nvkm_gr **);
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/gr/gf104.c b/drivers/gpu/drm/nouveau/nvkm/engine/gr/gf104.c
index 3acd99c306f2..63bd29c22fe1 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/gr/gf104.c
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/gr/gf104.c
@@ -127,10 +127,13 @@ gf104_gr = {
 	.init_419eb4 = gf100_gr_init_419eb4,
 	.init_tex_hww_esr = gf100_gr_init_tex_hww_esr,
 	.init_shader_exceptions = gf100_gr_init_shader_exceptions,
+	.init_rop_exceptions = gf100_gr_init_rop_exceptions,
+	.init_exception2 = gf100_gr_init_exception2,
 	.init_400054 = gf100_gr_init_400054,
 	.trap_mp = gf100_gr_trap_mp,
 	.mmio = gf104_gr_pack_mmio,
 	.fecs.ucode = &gf100_gr_fecs_ucode,
+	.fecs.reset = gf100_gr_fecs_reset,
 	.gpccs.ucode = &gf100_gr_gpccs_ucode,
 	.rops = gf100_gr_rops,
 	.grctx = &gf104_grctx,
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/gr/gf108.c b/drivers/gpu/drm/nouveau/nvkm/engine/gr/gf108.c
index ab3760e804b8..495a844f925f 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/gr/gf108.c
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/gr/gf108.c
@@ -125,10 +125,13 @@ gf108_gr = {
 	.init_419eb4 = gf100_gr_init_419eb4,
 	.init_tex_hww_esr = gf100_gr_init_tex_hww_esr,
 	.init_shader_exceptions = gf100_gr_init_shader_exceptions,
+	.init_rop_exceptions = gf100_gr_init_rop_exceptions,
+	.init_exception2 = gf100_gr_init_exception2,
 	.init_400054 = gf100_gr_init_400054,
 	.trap_mp = gf100_gr_trap_mp,
 	.mmio = gf108_gr_pack_mmio,
 	.fecs.ucode = &gf100_gr_fecs_ucode,
+	.fecs.reset = gf100_gr_fecs_reset,
 	.gpccs.ucode = &gf100_gr_gpccs_ucode,
 	.rops = gf100_gr_rops,
 	.grctx = &gf108_grctx,
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/gr/gf110.c b/drivers/gpu/drm/nouveau/nvkm/engine/gr/gf110.c
index 616e2def1865..70fad235d161 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/gr/gf110.c
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/gr/gf110.c
@@ -99,10 +99,13 @@ gf110_gr = {
 	.init_419eb4 = gf100_gr_init_419eb4,
 	.init_tex_hww_esr = gf100_gr_init_tex_hww_esr,
 	.init_shader_exceptions = gf100_gr_init_shader_exceptions,
+	.init_rop_exceptions = gf100_gr_init_rop_exceptions,
+	.init_exception2 = gf100_gr_init_exception2,
 	.init_400054 = gf100_gr_init_400054,
 	.trap_mp = gf100_gr_trap_mp,
 	.mmio = gf110_gr_pack_mmio,
 	.fecs.ucode = &gf100_gr_fecs_ucode,
+	.fecs.reset = gf100_gr_fecs_reset,
 	.gpccs.ucode = &gf100_gr_gpccs_ucode,
 	.rops = gf100_gr_rops,
 	.grctx = &gf110_grctx,
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/gr/gf117.c b/drivers/gpu/drm/nouveau/nvkm/engine/gr/gf117.c
index 669e7536970e..f12728248048 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/gr/gf117.c
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/gr/gf117.c
@@ -125,7 +125,9 @@ gf117_gr_init_zcull(struct gf100_gr *gr)
 {
 	struct nvkm_device *device = gr->base.engine.subdev.device;
 	const u32 magicgpc918 = DIV_ROUND_UP(0x00800000, gr->tpc_total);
-	const u8 tile_nr = ALIGN(gr->tpc_total, 32);
+	/*TODO: fill in litter vals for gf117-gm2xx */
+	const u8 tile_nr = !gr->func->gpc_nr ? ALIGN(gr->tpc_total, 32) :
+			   (gr->func->gpc_nr * gr->func->tpc_nr);
 	u8 bank[GPC_MAX] = {}, gpc, i, j;
 	u32 data;
 
@@ -163,10 +165,13 @@ gf117_gr = {
 	.init_419eb4 = gf100_gr_init_419eb4,
 	.init_tex_hww_esr = gf100_gr_init_tex_hww_esr,
 	.init_shader_exceptions = gf100_gr_init_shader_exceptions,
+	.init_rop_exceptions = gf100_gr_init_rop_exceptions,
+	.init_exception2 = gf100_gr_init_exception2,
 	.init_400054 = gf100_gr_init_400054,
 	.trap_mp = gf100_gr_trap_mp,
 	.mmio = gf117_gr_pack_mmio,
 	.fecs.ucode = &gf117_gr_fecs_ucode,
+	.fecs.reset = gf100_gr_fecs_reset,
 	.gpccs.ucode = &gf117_gr_gpccs_ucode,
 	.rops = gf100_gr_rops,
 	.ppc_nr = 1,
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/gr/gf119.c b/drivers/gpu/drm/nouveau/nvkm/engine/gr/gf119.c
index 5b09bda8110c..75ceb514c06e 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/gr/gf119.c
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/gr/gf119.c
@@ -190,10 +190,13 @@ gf119_gr = {
 	.init_419eb4 = gf100_gr_init_419eb4,
 	.init_tex_hww_esr = gf100_gr_init_tex_hww_esr,
 	.init_shader_exceptions = gf100_gr_init_shader_exceptions,
+	.init_rop_exceptions = gf100_gr_init_rop_exceptions,
+	.init_exception2 = gf100_gr_init_exception2,
 	.init_400054 = gf100_gr_init_400054,
 	.trap_mp = gf100_gr_trap_mp,
 	.mmio = gf119_gr_pack_mmio,
 	.fecs.ucode = &gf100_gr_fecs_ucode,
+	.fecs.reset = gf100_gr_fecs_reset,
 	.gpccs.ucode = &gf100_gr_gpccs_ucode,
 	.rops = gf100_gr_rops,
 	.grctx = &gf119_grctx,
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/gr/gk104.c b/drivers/gpu/drm/nouveau/nvkm/engine/gr/gk104.c
index b680eaa0f350..e53ade24ad23 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/gr/gk104.c
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/gr/gk104.c
@@ -418,7 +418,7 @@ gk104_gr_init_ppc_exceptions(struct gf100_gr *gr)
 	int gpc, ppc;
 
 	for (gpc = 0; gpc < gr->gpc_nr; gpc++) {
-		for (ppc = 0; ppc < gr->ppc_nr[gpc]; ppc++) {
+		for (ppc = 0; ppc < gr->func->ppc_nr; ppc++) {
 			if (!(gr->ppc_mask[gpc] & (1 << ppc)))
 				continue;
 			nvkm_wr32(device, PPC_UNIT(gpc, ppc, 0x038), 0xc0000000);
@@ -470,10 +470,13 @@ gk104_gr = {
 	.init_ppc_exceptions = gk104_gr_init_ppc_exceptions,
 	.init_tex_hww_esr = gf100_gr_init_tex_hww_esr,
 	.init_shader_exceptions = gf100_gr_init_shader_exceptions,
+	.init_rop_exceptions = gf100_gr_init_rop_exceptions,
+	.init_exception2 = gf100_gr_init_exception2,
 	.init_400054 = gf100_gr_init_400054,
 	.trap_mp = gf100_gr_trap_mp,
 	.mmio = gk104_gr_pack_mmio,
 	.fecs.ucode = &gk104_gr_fecs_ucode,
+	.fecs.reset = gf100_gr_fecs_reset,
 	.gpccs.ucode = &gk104_gr_gpccs_ucode,
 	.rops = gf100_gr_rops,
 	.ppc_nr = 1,
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/gr/gk110.c b/drivers/gpu/drm/nouveau/nvkm/engine/gr/gk110.c
index 103e06a77e65..c7e1c5dbc6a9 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/gr/gk110.c
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/gr/gk110.c
@@ -366,10 +366,13 @@ gk110_gr = {
 	.init_ppc_exceptions = gk104_gr_init_ppc_exceptions,
 	.init_tex_hww_esr = gf100_gr_init_tex_hww_esr,
 	.init_shader_exceptions = gf100_gr_init_shader_exceptions,
+	.init_rop_exceptions = gf100_gr_init_rop_exceptions,
+	.init_exception2 = gf100_gr_init_exception2,
 	.init_400054 = gf100_gr_init_400054,
 	.trap_mp = gf100_gr_trap_mp,
 	.mmio = gk110_gr_pack_mmio,
 	.fecs.ucode = &gk110_gr_fecs_ucode,
+	.fecs.reset = gf100_gr_fecs_reset,
 	.gpccs.ucode = &gk110_gr_gpccs_ucode,
 	.rops = gf100_gr_rops,
 	.ppc_nr = 2,
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/gr/gk110b.c b/drivers/gpu/drm/nouveau/nvkm/engine/gr/gk110b.c
index 034d0b11a17d..458abae571bf 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/gr/gk110b.c
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/gr/gk110b.c
@@ -118,10 +118,13 @@ gk110b_gr = {
 	.init_ppc_exceptions = gk104_gr_init_ppc_exceptions,
 	.init_tex_hww_esr = gf100_gr_init_tex_hww_esr,
 	.init_shader_exceptions = gf100_gr_init_shader_exceptions,
+	.init_rop_exceptions = gf100_gr_init_rop_exceptions,
+	.init_exception2 = gf100_gr_init_exception2,
 	.init_400054 = gf100_gr_init_400054,
 	.trap_mp = gf100_gr_trap_mp,
 	.mmio = gk110b_gr_pack_mmio,
 	.fecs.ucode = &gk110_gr_fecs_ucode,
+	.fecs.reset = gf100_gr_fecs_reset,
 	.gpccs.ucode = &gk110_gr_gpccs_ucode,
 	.rops = gf100_gr_rops,
 	.ppc_nr = 2,
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/gr/gk208.c b/drivers/gpu/drm/nouveau/nvkm/engine/gr/gk208.c
index 116d682f9f96..d3f6b65c21d2 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/gr/gk208.c
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/gr/gk208.c
@@ -176,10 +176,13 @@ gk208_gr = {
 	.init_ppc_exceptions = gk104_gr_init_ppc_exceptions,
 	.init_tex_hww_esr = gf100_gr_init_tex_hww_esr,
 	.init_shader_exceptions = gf100_gr_init_shader_exceptions,
+	.init_rop_exceptions = gf100_gr_init_rop_exceptions,
+	.init_exception2 = gf100_gr_init_exception2,
 	.init_400054 = gf100_gr_init_400054,
 	.trap_mp = gf100_gr_trap_mp,
 	.mmio = gk208_gr_pack_mmio,
 	.fecs.ucode = &gk208_gr_fecs_ucode,
+	.fecs.reset = gf100_gr_fecs_reset,
 	.gpccs.ucode = &gk208_gr_gpccs_ucode,
 	.rops = gf100_gr_rops,
 	.ppc_nr = 1,
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/gr/gk20a.c b/drivers/gpu/drm/nouveau/nvkm/engine/gr/gk20a.c
index be0b2cefd8e8..035ea213f543 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/gr/gk20a.c
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/gr/gk20a.c
@@ -33,47 +33,40 @@ struct gk20a_fw_av
 	u32 data;
 };
 
-static int
-gk20a_gr_av_to_init(struct gf100_gr *gr, const char *path, const char *name,
-		    int ver, struct gf100_gr_pack **ppack)
+int
+gk20a_gr_av_to_init_(struct nvkm_blob *blob, u8 count, u32 pitch, struct gf100_gr_pack **ppack)
 {
-	struct nvkm_subdev *subdev = &gr->base.engine.subdev;
-	struct nvkm_blob blob;
 	struct gf100_gr_init *init;
 	struct gf100_gr_pack *pack;
 	int nent;
-	int ret;
 	int i;
 
-	ret = nvkm_firmware_load_blob(subdev, path, name, ver, &blob);
-	if (ret)
-		return ret;
-
-	nent = (blob.size / sizeof(struct gk20a_fw_av));
+	nent = (blob->size / sizeof(struct gk20a_fw_av));
 	pack = vzalloc((sizeof(*pack) * 2) + (sizeof(*init) * (nent + 1)));
-	if (!pack) {
-		ret = -ENOMEM;
-		goto end;
-	}
+	if (!pack)
+		return -ENOMEM;
 
 	init = (void *)(pack + 2);
 	pack[0].init = init;
 
 	for (i = 0; i < nent; i++) {
 		struct gf100_gr_init *ent = &init[i];
-		struct gk20a_fw_av *av = &((struct gk20a_fw_av *)blob.data)[i];
+		struct gk20a_fw_av *av = &((struct gk20a_fw_av *)blob->data)[i];
 
 		ent->addr = av->addr;
 		ent->data = av->data;
-		ent->count = 1;
-		ent->pitch = 1;
+		ent->count = ((ent->addr & 0xffff) != 0xe100) ? count : 1;
+		ent->pitch = pitch;
 	}
 
 	*ppack = pack;
+	return 0;
+}
 
-end:
-	nvkm_blob_dtor(&blob);
-	return ret;
+int
+gk20a_gr_av_to_init(struct nvkm_blob *blob, struct gf100_gr_pack **ppack)
+{
+	return gk20a_gr_av_to_init_(blob, 1, 1, ppack);
 }
 
 struct gk20a_fw_aiv
@@ -83,35 +76,25 @@ struct gk20a_fw_aiv
 	u32 data;
 };
 
-static int
-gk20a_gr_aiv_to_init(struct gf100_gr *gr, const char *path, const char *name,
-		     int ver, struct gf100_gr_pack **ppack)
+int
+gk20a_gr_aiv_to_init(struct nvkm_blob *blob, struct gf100_gr_pack **ppack)
 {
-	struct nvkm_subdev *subdev = &gr->base.engine.subdev;
-	struct nvkm_blob blob;
 	struct gf100_gr_init *init;
 	struct gf100_gr_pack *pack;
 	int nent;
-	int ret;
 	int i;
 
-	ret = nvkm_firmware_load_blob(subdev, path, name, ver, &blob);
-	if (ret)
-		return ret;
-
-	nent = (blob.size / sizeof(struct gk20a_fw_aiv));
+	nent = (blob->size / sizeof(struct gk20a_fw_aiv));
 	pack = vzalloc((sizeof(*pack) * 2) + (sizeof(*init) * (nent + 1)));
-	if (!pack) {
-		ret = -ENOMEM;
-		goto end;
-	}
+	if (!pack)
+		return -ENOMEM;
 
 	init = (void *)(pack + 2);
 	pack[0].init = init;
 
 	for (i = 0; i < nent; i++) {
 		struct gf100_gr_init *ent = &init[i];
-		struct gk20a_fw_aiv *av = &((struct gk20a_fw_aiv *)blob.data)[i];
+		struct gk20a_fw_aiv *av = &((struct gk20a_fw_aiv *)blob->data)[i];
 
 		ent->addr = av->addr;
 		ent->data = av->data;
@@ -120,44 +103,30 @@ gk20a_gr_aiv_to_init(struct gf100_gr *gr, const char *path, const char *name,
 	}
 
 	*ppack = pack;
-
-end:
-	nvkm_blob_dtor(&blob);
-	return ret;
+	return 0;
 }
 
-static int
-gk20a_gr_av_to_method(struct gf100_gr *gr, const char *path, const char *name,
-		      int ver, struct gf100_gr_pack **ppack)
+int
+gk20a_gr_av_to_method(struct nvkm_blob *blob, struct gf100_gr_pack **ppack)
 {
-	struct nvkm_subdev *subdev = &gr->base.engine.subdev;
-	struct nvkm_blob blob;
 	struct gf100_gr_init *init;
 	struct gf100_gr_pack *pack;
 	/* We don't suppose we will initialize more than 16 classes here... */
 	static const unsigned int max_classes = 16;
 	u32 classidx = 0, prevclass = 0;
 	int nent;
-	int ret;
 	int i;
 
-	ret = nvkm_firmware_load_blob(subdev, path, name, ver, &blob);
-	if (ret)
-		return ret;
-
-	nent = (blob.size / sizeof(struct gk20a_fw_av));
-
+	nent = (blob->size / sizeof(struct gk20a_fw_av));
 	pack = vzalloc((sizeof(*pack) * (max_classes + 1)) +
 		       (sizeof(*init) * (nent + max_classes + 1)));
-	if (!pack) {
-		ret = -ENOMEM;
-		goto end;
-	}
+	if (!pack)
+		return -ENOMEM;
 
 	init = (void *)(pack + max_classes + 1);
 
 	for (i = 0; i < nent; i++, init++) {
-		struct gk20a_fw_av *av = &((struct gk20a_fw_av *)blob.data)[i];
+		struct gk20a_fw_av *av = &((struct gk20a_fw_av *)blob->data)[i];
 		u32 class = av->addr & 0xffff;
 		u32 addr = (av->addr & 0xffff0000) >> 14;
 
@@ -169,8 +138,7 @@ gk20a_gr_av_to_method(struct gf100_gr *gr, const char *path, const char *name,
 			prevclass = class;
 			if (++classidx >= max_classes) {
 				vfree(pack);
-				ret = -ENOSPC;
-				goto end;
+				return -ENOSPC;
 			}
 		}
 
@@ -181,10 +149,7 @@ gk20a_gr_av_to_method(struct gf100_gr *gr, const char *path, const char *name,
 	}
 
 	*ppack = pack;
-
-end:
-	nvkm_blob_dtor(&blob);
-	return ret;
+	return 0;
 }
 
 static int
@@ -294,6 +259,7 @@ gk20a_gr = {
 	.init_rop_active_fbps = gk104_gr_init_rop_active_fbps,
 	.trap_mp = gf100_gr_trap_mp,
 	.set_hww_esr_report_mask = gk20a_gr_set_hww_esr_report_mask,
+	.fecs.reset = gf100_gr_fecs_reset,
 	.rops = gf100_gr_rops,
 	.ppc_nr = 1,
 	.grctx = &gk20a_grctx,
@@ -308,12 +274,29 @@ gk20a_gr = {
 };
 
 int
+gk20a_gr_load_net(struct gf100_gr *gr, const char *path, const char *name, int ver,
+		  int (*load)(struct nvkm_blob *, struct gf100_gr_pack **),
+		  struct gf100_gr_pack **ppack)
+{
+	struct nvkm_blob blob;
+	int ret;
+
+	ret = nvkm_firmware_load_blob(&gr->base.engine.subdev, path, name, ver, &blob);
+	if (ret)
+		return ret;
+
+	ret = load(&blob, ppack);
+	nvkm_blob_dtor(&blob);
+	return 0;
+}
+
+int
 gk20a_gr_load_sw(struct gf100_gr *gr, const char *path, int ver)
 {
-	if (gk20a_gr_av_to_init(gr, path, "sw_nonctx", ver, &gr->sw_nonctx) ||
-	    gk20a_gr_aiv_to_init(gr, path, "sw_ctx", ver, &gr->sw_ctx) ||
-	    gk20a_gr_av_to_init(gr, path, "sw_bundle_init", ver, &gr->bundle) ||
-	    gk20a_gr_av_to_method(gr, path, "sw_method_init", ver, &gr->method))
+	if (gk20a_gr_load_net(gr, path, "sw_nonctx", ver, gk20a_gr_av_to_init, &gr->sw_nonctx) ||
+	    gk20a_gr_load_net(gr, path, "sw_ctx", ver, gk20a_gr_aiv_to_init, &gr->sw_ctx) ||
+	    gk20a_gr_load_net(gr, path, "sw_bundle_init", ver, gk20a_gr_av_to_init, &gr->bundle) ||
+	    gk20a_gr_load_net(gr, path, "sw_method_init", ver, gk20a_gr_av_to_method, &gr->method))
 		return -ENOENT;
 
 	return 0;
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/gr/gm107.c b/drivers/gpu/drm/nouveau/nvkm/engine/gr/gm107.c
index 310987174cb5..797b828a943b 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/gr/gm107.c
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/gr/gm107.c
@@ -411,10 +411,13 @@ gm107_gr = {
 	.init_tex_hww_esr = gf100_gr_init_tex_hww_esr,
 	.init_504430 = gm107_gr_init_504430,
 	.init_shader_exceptions = gm107_gr_init_shader_exceptions,
+	.init_rop_exceptions = gf100_gr_init_rop_exceptions,
+	.init_exception2 = gf100_gr_init_exception2,
 	.init_400054 = gm107_gr_init_400054,
 	.trap_mp = gf100_gr_trap_mp,
 	.mmio = gm107_gr_pack_mmio,
 	.fecs.ucode = &gm107_gr_fecs_ucode,
+	.fecs.reset = gf100_gr_fecs_reset,
 	.gpccs.ucode = &gm107_gr_gpccs_ucode,
 	.rops = gf100_gr_rops,
 	.ppc_nr = 2,
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/gr/gm200.c b/drivers/gpu/drm/nouveau/nvkm/engine/gr/gm200.c
index 385cfd91b266..b5210b31c1b2 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/gr/gm200.c
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/gr/gm200.c
@@ -148,11 +148,11 @@ gm200_gr_tile_map_2_8[] = {
 	0, 1, 1, 0, 0, 1, 1, 0,
 };
 
-void
+int
 gm200_gr_oneinit_sm_id(struct gf100_gr *gr)
 {
 	/*XXX: There's a different algorithm here I've not yet figured out. */
-	gf100_gr_oneinit_sm_id(gr);
+	return gf100_gr_oneinit_sm_id(gr);
 }
 
 void
@@ -199,8 +199,11 @@ gm200_gr = {
 	.init_tex_hww_esr = gf100_gr_init_tex_hww_esr,
 	.init_504430 = gm107_gr_init_504430,
 	.init_shader_exceptions = gm107_gr_init_shader_exceptions,
+	.init_rop_exceptions = gf100_gr_init_rop_exceptions,
+	.init_exception2 = gf100_gr_init_exception2,
 	.init_400054 = gm107_gr_init_400054,
 	.trap_mp = gf100_gr_trap_mp,
+	.fecs.reset = gf100_gr_fecs_reset,
 	.rops = gm200_gr_rops,
 	.tpc_nr = 4,
 	.ppc_nr = 2,
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/gr/gm20b.c b/drivers/gpu/drm/nouveau/nvkm/engine/gr/gm20b.c
index ec1c46e47e00..458cd1a00d3f 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/gr/gm20b.c
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/gr/gm20b.c
@@ -123,6 +123,7 @@ gm20b_gr = {
 	.init_rop_active_fbps = gk104_gr_init_rop_active_fbps,
 	.trap_mp = gf100_gr_trap_mp,
 	.set_hww_esr_report_mask = gm20b_gr_set_hww_esr_report_mask,
+	.fecs.reset = gf100_gr_fecs_reset,
 	.rops = gm200_gr_rops,
 	.ppc_nr = 1,
 	.grctx = &gm20b_grctx,
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/gr/gp100.c b/drivers/gpu/drm/nouveau/nvkm/engine/gr/gp100.c
index 0550dd6f46f1..851e743d2cab 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/gr/gp100.c
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/gr/gp100.c
@@ -87,7 +87,7 @@ gp100_gr_init_419c9c(struct gf100_gr *gr)
 void
 gp100_gr_init_fecs_exceptions(struct gf100_gr *gr)
 {
-	nvkm_wr32(gr->base.engine.subdev.device, 0x409c24, 0x000f0002);
+	nvkm_wr32(gr->base.engine.subdev.device, 0x409c24, 0x000e0002);
 }
 
 void
@@ -119,7 +119,10 @@ gp100_gr = {
 	.init_tex_hww_esr = gf100_gr_init_tex_hww_esr,
 	.init_504430 = gm107_gr_init_504430,
 	.init_shader_exceptions = gp100_gr_init_shader_exceptions,
+	.init_rop_exceptions = gf100_gr_init_rop_exceptions,
+	.init_exception2 = gf100_gr_init_exception2,
 	.trap_mp = gf100_gr_trap_mp,
+	.fecs.reset = gf100_gr_fecs_reset,
 	.rops = gm200_gr_rops,
 	.gpc_nr = 6,
 	.tpc_nr = 5,
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/gr/gp102.c b/drivers/gpu/drm/nouveau/nvkm/engine/gr/gp102.c
index 5b001f374be0..0e223b7b5f0e 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/gr/gp102.c
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/gr/gp102.c
@@ -26,7 +26,7 @@
 
 #include <nvif/class.h>
 
-static void
+void
 gp102_gr_zbc_clear_stencil(struct gf100_gr *gr, int zbc)
 {
 	struct nvkm_device *device = gr->base.engine.subdev.device;
@@ -40,14 +40,14 @@ gp102_gr_zbc_clear_stencil(struct gf100_gr *gr, int zbc)
 			  gr->zbc_stencil[zbc].format << ((znum % 4) * 7));
 }
 
-static int
+int
 gp102_gr_zbc_stencil_get(struct gf100_gr *gr, int format,
 			 const u32 ds, const u32 l2)
 {
 	struct nvkm_ltc *ltc = gr->base.engine.subdev.device->ltc;
 	int zbc = -ENOSPC, i;
 
-	for (i = ltc->zbc_min; i <= ltc->zbc_max; i++) {
+	for (i = ltc->zbc_depth_min; i <= ltc->zbc_depth_max; i++) {
 		if (gr->zbc_stencil[i].format) {
 			if (gr->zbc_stencil[i].format != format)
 				continue;
@@ -115,7 +115,10 @@ gp102_gr = {
 	.init_tex_hww_esr = gf100_gr_init_tex_hww_esr,
 	.init_504430 = gm107_gr_init_504430,
 	.init_shader_exceptions = gp100_gr_init_shader_exceptions,
+	.init_rop_exceptions = gf100_gr_init_rop_exceptions,
+	.init_exception2 = gf100_gr_init_exception2,
 	.trap_mp = gf100_gr_trap_mp,
+	.fecs.reset = gf100_gr_fecs_reset,
 	.rops = gm200_gr_rops,
 	.gpc_nr = 6,
 	.tpc_nr = 5,
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/gr/gp104.c b/drivers/gpu/drm/nouveau/nvkm/engine/gr/gp104.c
index 2655574ec63b..6802cb9b199f 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/gr/gp104.c
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/gr/gp104.c
@@ -43,7 +43,10 @@ gp104_gr = {
 	.init_tex_hww_esr = gf100_gr_init_tex_hww_esr,
 	.init_504430 = gm107_gr_init_504430,
 	.init_shader_exceptions = gp100_gr_init_shader_exceptions,
+	.init_rop_exceptions = gf100_gr_init_rop_exceptions,
+	.init_exception2 = gf100_gr_init_exception2,
 	.trap_mp = gf100_gr_trap_mp,
+	.fecs.reset = gf100_gr_fecs_reset,
 	.rops = gm200_gr_rops,
 	.gpc_nr = 6,
 	.tpc_nr = 5,
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/gr/gp107.c b/drivers/gpu/drm/nouveau/nvkm/engine/gr/gp107.c
index adabc04d4f3a..cc2bb0d0a987 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/gr/gp107.c
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/gr/gp107.c
@@ -45,7 +45,10 @@ gp107_gr = {
 	.init_tex_hww_esr = gf100_gr_init_tex_hww_esr,
 	.init_504430 = gm107_gr_init_504430,
 	.init_shader_exceptions = gp100_gr_init_shader_exceptions,
+	.init_rop_exceptions = gf100_gr_init_rop_exceptions,
+	.init_exception2 = gf100_gr_init_exception2,
 	.trap_mp = gf100_gr_trap_mp,
+	.fecs.reset = gf100_gr_fecs_reset,
 	.rops = gm200_gr_rops,
 	.gpc_nr = 2,
 	.tpc_nr = 3,
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/gr/gp108.c b/drivers/gpu/drm/nouveau/nvkm/engine/gr/gp108.c
index 7310f0466bb7..311f703439e4 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/gr/gp108.c
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/gr/gp108.c
@@ -25,7 +25,7 @@
 
 #include <nvfw/flcn.h>
 
-static void
+void
 gp108_gr_acr_bld_patch(struct nvkm_acr *acr, u32 bld, s64 adjust)
 {
 	struct flcn_bl_dmem_desc_v2 hdr;
@@ -36,7 +36,7 @@ gp108_gr_acr_bld_patch(struct nvkm_acr *acr, u32 bld, s64 adjust)
 	flcn_bl_dmem_desc_v2_dump(&acr->subdev, &hdr);
 }
 
-static void
+void
 gp108_gr_acr_bld_write(struct nvkm_acr *acr, u32 bld,
 		       struct nvkm_acr_lsfw *lsfw)
 {
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/gr/gp10b.c b/drivers/gpu/drm/nouveau/nvkm/engine/gr/gp10b.c
index e13683b6e7b1..5008881ca079 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/gr/gp10b.c
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/gr/gp10b.c
@@ -55,7 +55,10 @@ gp10b_gr = {
 	.init_tex_hww_esr = gf100_gr_init_tex_hww_esr,
 	.init_504430 = gm107_gr_init_504430,
 	.init_shader_exceptions = gp100_gr_init_shader_exceptions,
+	.init_rop_exceptions = gf100_gr_init_rop_exceptions,
+	.init_exception2 = gf100_gr_init_exception2,
 	.trap_mp = gf100_gr_trap_mp,
+	.fecs.reset = gf100_gr_fecs_reset,
 	.rops = gm200_gr_rops,
 	.gpc_nr = 1,
 	.tpc_nr = 2,
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/gr/gv100.c b/drivers/gpu/drm/nouveau/nvkm/engine/gr/gv100.c
index 4d043c1173ea..7f7404a76140 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/gr/gv100.c
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/gr/gv100.c
@@ -52,10 +52,11 @@ gv100_gr_trap_mp(struct gf100_gr *gr, int gpc, int tpc)
 	gv100_gr_trap_sm(gr, gpc, tpc, 1);
 }
 
-static void
+void
 gv100_gr_init_4188a4(struct gf100_gr *gr)
 {
 	struct nvkm_device *device = gr->base.engine.subdev.device;
+
 	nvkm_mask(device, 0x4188a4, 0x03000000, 0x03000000);
 }
 
@@ -65,7 +66,6 @@ gv100_gr_init_shader_exceptions(struct gf100_gr *gr, int gpc, int tpc)
 	struct nvkm_device *device = gr->base.engine.subdev.device;
 	int sm;
 	for (sm = 0; sm < 0x100; sm += 0x80) {
-		nvkm_wr32(device, TPC_UNIT(gpc, tpc, 0x728 + sm), 0x0085eb64);
 		nvkm_wr32(device, TPC_UNIT(gpc, tpc, 0x610), 0x00000001);
 		nvkm_wr32(device, TPC_UNIT(gpc, tpc, 0x72c + sm), 0x00000004);
 	}
@@ -85,10 +85,202 @@ gv100_gr_init_419bd8(struct gf100_gr *gr)
 	nvkm_mask(device, 0x419bd8, 0x00000700, 0x00000000);
 }
 
+u32
+gv100_gr_nonpes_aware_tpc(struct gf100_gr *gr, u32 gpc, u32 tpc)
+{
+	u32 pes, temp, tpc_new = 0;
+
+	for (pes = 0; pes < gr->ppc_nr[gpc]; pes++) {
+		if (gr->ppc_tpc_mask[gpc][pes] & BIT(tpc))
+			break;
+
+		tpc_new += gr->ppc_tpc_nr[gpc][pes];
+	}
+
+	temp = (BIT(tpc) - 1) & gr->ppc_tpc_mask[gpc][pes];
+	temp = hweight32(temp);
+	return tpc_new + temp;
+}
+
+static int
+gv100_gr_scg_estimate_perf(struct gf100_gr *gr, unsigned long *gpc_tpc_mask,
+			   u32 disable_gpc, u32 disable_tpc, int *perf)
+{
+	const u32 scale_factor = 512UL;		/* Use fx23.9 */
+	const u32 pix_scale = 1024*1024UL;	/* Pix perf in [29:20] */
+	const u32 world_scale = 1024UL;		/* World performance in [19:10] */
+	const u32 tpc_scale = 1;		/* TPC balancing in [9:0] */
+	u32 scg_num_pes = 0;
+	u32 min_scg_gpc_pix_perf = scale_factor; /* Init perf as maximum */
+	u32 average_tpcs = 0; /* Average of # of TPCs per GPC */
+	u32 deviation; /* absolute diff between TPC# and average_tpcs, averaged across GPCs */
+	u32 norm_tpc_deviation;	/* deviation/max_tpc_per_gpc */
+	u32 tpc_balance;
+	u32 scg_gpc_pix_perf;
+	u32 scg_world_perf;
+	u32 gpc;
+	u32 pes;
+	int diff;
+	bool tpc_removed_gpc = false;
+	bool tpc_removed_pes = false;
+	u32 max_tpc_gpc = 0;
+	u32 num_tpc_mask;
+	u32 *num_tpc_gpc;
+	int ret = -EINVAL;
+
+	if (!(num_tpc_gpc = kcalloc(gr->gpc_nr, sizeof(*num_tpc_gpc), GFP_KERNEL)))
+		return -ENOMEM;
+
+	/* Calculate pix-perf-reduction-rate per GPC and find bottleneck TPC */
+	for (gpc = 0; gpc < gr->gpc_nr; gpc++) {
+		num_tpc_mask = gpc_tpc_mask[gpc];
+
+		if ((gpc == disable_gpc) && num_tpc_mask & BIT(disable_tpc)) {
+			/* Safety check if a TPC is removed twice */
+			if (WARN_ON(tpc_removed_gpc))
+				goto done;
+
+			/* Remove logical TPC from set */
+			num_tpc_mask &= ~BIT(disable_tpc);
+			tpc_removed_gpc = true;
+		}
+
+		/* track balancing of tpcs across gpcs */
+		num_tpc_gpc[gpc] = hweight32(num_tpc_mask);
+		average_tpcs += num_tpc_gpc[gpc];
+
+		/* save the maximum numer of gpcs */
+		max_tpc_gpc = num_tpc_gpc[gpc] > max_tpc_gpc ? num_tpc_gpc[gpc] : max_tpc_gpc;
+
+		/*
+		 * Calculate ratio between TPC count and post-FS and post-SCG
+		 *
+		 * ratio represents relative throughput of the GPC
+		 */
+		scg_gpc_pix_perf = scale_factor * num_tpc_gpc[gpc] / gr->tpc_nr[gpc];
+		if (min_scg_gpc_pix_perf > scg_gpc_pix_perf)
+			min_scg_gpc_pix_perf = scg_gpc_pix_perf;
+
+		/* Calculate # of surviving PES */
+		for (pes = 0; pes < gr->ppc_nr[gpc]; pes++) {
+			/* Count the number of TPC on the set */
+			num_tpc_mask = gr->ppc_tpc_mask[gpc][pes] & gpc_tpc_mask[gpc];
+
+			if ((gpc == disable_gpc) && (num_tpc_mask & BIT(disable_tpc))) {
+				if (WARN_ON(tpc_removed_pes))
+					goto done;
+
+				num_tpc_mask &= ~BIT(disable_tpc);
+				tpc_removed_pes = true;
+			}
+
+			if (hweight32(num_tpc_mask))
+				scg_num_pes++;
+		}
+	}
+
+	if (WARN_ON(!tpc_removed_gpc || !tpc_removed_pes))
+		goto done;
+
+	if (max_tpc_gpc == 0) {
+		*perf = 0;
+		goto done_ok;
+	}
+
+	/* Now calculate perf */
+	scg_world_perf = (scale_factor * scg_num_pes) / gr->ppc_total;
+	deviation = 0;
+	average_tpcs = scale_factor * average_tpcs / gr->gpc_nr;
+	for (gpc = 0; gpc < gr->gpc_nr; gpc++) {
+		diff = average_tpcs - scale_factor * num_tpc_gpc[gpc];
+		if (diff < 0)
+			diff = -diff;
+
+		deviation += diff;
+	}
+
+	deviation /= gr->gpc_nr;
+
+	norm_tpc_deviation = deviation / max_tpc_gpc;
+
+	tpc_balance = scale_factor - norm_tpc_deviation;
+
+	if ((tpc_balance > scale_factor)          ||
+	    (scg_world_perf > scale_factor)       ||
+	    (min_scg_gpc_pix_perf > scale_factor) ||
+	    (norm_tpc_deviation > scale_factor)) {
+		WARN_ON(1);
+		goto done;
+	}
+
+	*perf = (pix_scale * min_scg_gpc_pix_perf) +
+		(world_scale * scg_world_perf) +
+		(tpc_scale * tpc_balance);
+done_ok:
+	ret = 0;
+done:
+	kfree(num_tpc_gpc);
+	return ret;
+}
+
+int
+gv100_gr_oneinit_sm_id(struct gf100_gr *gr)
+{
+	unsigned long *gpc_tpc_mask;
+	u32 *tpc_table, *gpc_table;
+	u32 gpc, tpc, pes, gtpc;
+	int perf, maxperf, ret = 0;
+
+	gpc_tpc_mask = kcalloc(gr->gpc_nr, sizeof(*gpc_tpc_mask), GFP_KERNEL);
+	gpc_table = kcalloc(gr->tpc_total, sizeof(*gpc_table), GFP_KERNEL);
+	tpc_table = kcalloc(gr->tpc_total, sizeof(*tpc_table), GFP_KERNEL);
+	if (!gpc_table || !tpc_table || !gpc_tpc_mask) {
+		ret = -ENOMEM;
+		goto done;
+	}
+
+	for (gpc = 0; gpc < gr->gpc_nr; gpc++) {
+		for (pes = 0; pes < gr->ppc_nr[gpc]; pes++)
+			gpc_tpc_mask[gpc] |= gr->ppc_tpc_mask[gpc][pes];
+	}
+
+	for (gtpc = 0; gtpc < gr->tpc_total; gtpc++) {
+		for (maxperf = -1, gpc = 0; gpc < gr->gpc_nr; gpc++) {
+			for_each_set_bit(tpc, &gpc_tpc_mask[gpc], gr->tpc_nr[gpc]) {
+				ret = gv100_gr_scg_estimate_perf(gr, gpc_tpc_mask, gpc, tpc, &perf);
+				if (ret)
+					goto done;
+
+				/* nvgpu does ">=" here, but this gets us RM's numbers. */
+				if (perf > maxperf) {
+					maxperf = perf;
+					gpc_table[gtpc] = gpc;
+					tpc_table[gtpc] = tpc;
+				}
+			}
+		}
+
+		gpc_tpc_mask[gpc_table[gtpc]] &= ~BIT(tpc_table[gtpc]);
+	}
+
+	/*TODO: build table for sm_per_tpc != 1, don't use yet, but might need later? */
+	for (gtpc = 0; gtpc < gr->tpc_total; gtpc++) {
+		gr->sm[gtpc].gpc = gpc_table[gtpc];
+		gr->sm[gtpc].tpc = tpc_table[gtpc];
+		gr->sm_nr++;
+	}
+
+done:
+	kfree(gpc_table);
+	kfree(tpc_table);
+	kfree(gpc_tpc_mask);
+	return ret;
+}
+
 static const struct gf100_gr_func
 gv100_gr = {
 	.oneinit_tiles = gm200_gr_oneinit_tiles,
-	.oneinit_sm_id = gm200_gr_oneinit_sm_id,
+	.oneinit_sm_id = gv100_gr_oneinit_sm_id,
 	.init = gf100_gr_init,
 	.init_419bd8 = gv100_gr_init_419bd8,
 	.init_gpc_mmu = gm200_gr_init_gpc_mmu,
@@ -103,11 +295,14 @@ gv100_gr = {
 	.init_ppc_exceptions = gk104_gr_init_ppc_exceptions,
 	.init_504430 = gv100_gr_init_504430,
 	.init_shader_exceptions = gv100_gr_init_shader_exceptions,
+	.init_rop_exceptions = gf100_gr_init_rop_exceptions,
+	.init_exception2 = gf100_gr_init_exception2,
 	.init_4188a4 = gv100_gr_init_4188a4,
 	.trap_mp = gv100_gr_trap_mp,
+	.fecs.reset = gf100_gr_fecs_reset,
 	.rops = gm200_gr_rops,
 	.gpc_nr = 6,
-	.tpc_nr = 5,
+	.tpc_nr = 7,
 	.ppc_nr = 3,
 	.grctx = &gv100_grctx,
 	.zbc = &gp102_gr_zbc,
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/gr/nv04.c b/drivers/gpu/drm/nouveau/nvkm/engine/gr/nv04.c
index 0bc1a238de43..81bd682c2102 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/gr/nv04.c
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/gr/nv04.c
@@ -1192,7 +1192,7 @@ nv04_gr_chan_new(struct nvkm_gr *base, struct nvkm_fifo_chan *fifoch,
 		return -ENOMEM;
 	nvkm_object_ctor(&nv04_gr_chan, oclass, &chan->object);
 	chan->gr = gr;
-	chan->chid = fifoch->chid;
+	chan->chid = fifoch->id;
 	*pobject = &chan->object;
 
 	*ctx_reg(chan, NV04_PGRAPH_DEBUG_3) = 0xfad4ff31;
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/gr/nv10.c b/drivers/gpu/drm/nouveau/nvkm/engine/gr/nv10.c
index 942450b33bc6..7fe6e58f6bab 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/gr/nv10.c
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/gr/nv10.c
@@ -1011,7 +1011,7 @@ nv10_gr_chan_new(struct nvkm_gr *base, struct nvkm_fifo_chan *fifoch,
 		return -ENOMEM;
 	nvkm_object_ctor(&nv10_gr_chan, oclass, &chan->object);
 	chan->gr = gr;
-	chan->chid = fifoch->chid;
+	chan->chid = fifoch->id;
 	*pobject = &chan->object;
 
 	NV_WRITE_CTX(0x00400e88, 0x08000000);
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/gr/nv20.c b/drivers/gpu/drm/nouveau/nvkm/engine/gr/nv20.c
index 6bff10cee71b..75434f5de7ad 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/gr/nv20.c
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/gr/nv20.c
@@ -83,7 +83,7 @@ nv20_gr_chan_new(struct nvkm_gr *base, struct nvkm_fifo_chan *fifoch,
 		return -ENOMEM;
 	nvkm_object_ctor(&nv20_gr_chan, oclass, &chan->object);
 	chan->gr = gr;
-	chan->chid = fifoch->chid;
+	chan->chid = fifoch->id;
 	*pobject = &chan->object;
 
 	ret = nvkm_memory_new(gr->base.engine.subdev.device,
@@ -182,7 +182,7 @@ nv20_gr_intr(struct nvkm_gr *base)
 	struct nv20_gr *gr = nv20_gr(base);
 	struct nvkm_subdev *subdev = &gr->base.engine.subdev;
 	struct nvkm_device *device = subdev->device;
-	struct nvkm_fifo_chan *chan;
+	struct nvkm_chan *chan;
 	u32 stat = nvkm_rd32(device, NV03_PGRAPH_INTR);
 	u32 nsource = nvkm_rd32(device, NV03_PGRAPH_NSOURCE);
 	u32 nstatus = nvkm_rd32(device, NV03_PGRAPH_NSTATUS);
@@ -196,7 +196,7 @@ nv20_gr_intr(struct nvkm_gr *base)
 	char msg[128], src[128], sta[128];
 	unsigned long flags;
 
-	chan = nvkm_fifo_chan_chid(device->fifo, chid, &flags);
+	chan = nvkm_chan_get_chid(&gr->base.engine, chid, &flags);
 
 	nvkm_wr32(device, NV03_PGRAPH_INTR, stat);
 	nvkm_wr32(device, NV04_PGRAPH_FIFO, 0x00000001);
@@ -209,11 +209,11 @@ nv20_gr_intr(struct nvkm_gr *base)
 				   "nstatus %08x [%s] ch %d [%s] subc %d "
 				   "class %04x mthd %04x data %08x\n",
 			   show, msg, nsource, src, nstatus, sta, chid,
-			   chan ? chan->object.client->name : "unknown",
+			   chan ? chan->name : "unknown",
 			   subc, class, mthd, data);
 	}
 
-	nvkm_fifo_chan_put(device->fifo, flags, &chan);
+	nvkm_chan_put(&chan, flags);
 }
 
 int
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/gr/nv25.c b/drivers/gpu/drm/nouveau/nvkm/engine/gr/nv25.c
index f3a56f17d94a..94685e4d4f87 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/gr/nv25.c
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/gr/nv25.c
@@ -29,7 +29,7 @@ nv25_gr_chan_new(struct nvkm_gr *base, struct nvkm_fifo_chan *fifoch,
 		return -ENOMEM;
 	nvkm_object_ctor(&nv25_gr_chan, oclass, &chan->object);
 	chan->gr = gr;
-	chan->chid = fifoch->chid;
+	chan->chid = fifoch->id;
 	*pobject = &chan->object;
 
 	ret = nvkm_memory_new(gr->base.engine.subdev.device,
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/gr/nv2a.c b/drivers/gpu/drm/nouveau/nvkm/engine/gr/nv2a.c
index f268d2642d29..2d6273675291 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/gr/nv2a.c
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/gr/nv2a.c
@@ -29,7 +29,7 @@ nv2a_gr_chan_new(struct nvkm_gr *base, struct nvkm_fifo_chan *fifoch,
 		return -ENOMEM;
 	nvkm_object_ctor(&nv2a_gr_chan, oclass, &chan->object);
 	chan->gr = gr;
-	chan->chid = fifoch->chid;
+	chan->chid = fifoch->id;
 	*pobject = &chan->object;
 
 	ret = nvkm_memory_new(gr->base.engine.subdev.device,
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/gr/nv30.c b/drivers/gpu/drm/nouveau/nvkm/engine/gr/nv30.c
index e5737cdf2fa1..647bd6fede04 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/gr/nv30.c
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/gr/nv30.c
@@ -30,7 +30,7 @@ nv30_gr_chan_new(struct nvkm_gr *base, struct nvkm_fifo_chan *fifoch,
 		return -ENOMEM;
 	nvkm_object_ctor(&nv30_gr_chan, oclass, &chan->object);
 	chan->gr = gr;
-	chan->chid = fifoch->chid;
+	chan->chid = fifoch->id;
 	*pobject = &chan->object;
 
 	ret = nvkm_memory_new(gr->base.engine.subdev.device,
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/gr/nv34.c b/drivers/gpu/drm/nouveau/nvkm/engine/gr/nv34.c
index 1ab2da8ebf4e..2eae3fe4ef4e 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/gr/nv34.c
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/gr/nv34.c
@@ -29,7 +29,7 @@ nv34_gr_chan_new(struct nvkm_gr *base, struct nvkm_fifo_chan *fifoch,
 		return -ENOMEM;
 	nvkm_object_ctor(&nv34_gr_chan, oclass, &chan->object);
 	chan->gr = gr;
-	chan->chid = fifoch->chid;
+	chan->chid = fifoch->id;
 	*pobject = &chan->object;
 
 	ret = nvkm_memory_new(gr->base.engine.subdev.device,
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/gr/nv35.c b/drivers/gpu/drm/nouveau/nvkm/engine/gr/nv35.c
index 591260f5676b..657d7cdba369 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/gr/nv35.c
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/gr/nv35.c
@@ -29,7 +29,7 @@ nv35_gr_chan_new(struct nvkm_gr *base, struct nvkm_fifo_chan *fifoch,
 		return -ENOMEM;
 	nvkm_object_ctor(&nv35_gr_chan, oclass, &chan->object);
 	chan->gr = gr;
-	chan->chid = fifoch->chid;
+	chan->chid = fifoch->id;
 	*pobject = &chan->object;
 
 	ret = nvkm_memory_new(gr->base.engine.subdev.device,
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/gr/nv40.c b/drivers/gpu/drm/nouveau/nvkm/engine/gr/nv40.c
index 67f3535ff97e..d2df097a6cf6 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/gr/nv40.c
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/gr/nv40.c
@@ -275,8 +275,8 @@ nv40_gr_intr(struct nvkm_gr *base)
 				   "nstatus %08x [%s] ch %d [%08x %s] subc %d "
 				   "class %04x mthd %04x data %08x\n",
 			   show, msg, nsource, src, nstatus, sta,
-			   chan ? chan->fifo->chid : -1, inst << 4,
-			   chan ? chan->fifo->object.client->name : "unknown",
+			   chan ? chan->fifo->id : -1, inst << 4,
+			   chan ? chan->fifo->name : "unknown",
 			   subc, class, mthd, data);
 	}
 
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/gr/nv50.c b/drivers/gpu/drm/nouveau/nvkm/engine/gr/nv50.c
index 563a10097e95..1ba18a8e380f 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/gr/nv50.c
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/gr/nv50.c
@@ -622,7 +622,7 @@ nv50_gr_intr(struct nvkm_gr *base)
 	struct nv50_gr *gr = nv50_gr(base);
 	struct nvkm_subdev *subdev = &gr->base.engine.subdev;
 	struct nvkm_device *device = subdev->device;
-	struct nvkm_fifo_chan *chan;
+	struct nvkm_chan *chan;
 	u32 stat = nvkm_rd32(device, 0x400100);
 	u32 inst = nvkm_rd32(device, 0x40032c) & 0x0fffffff;
 	u32 addr = nvkm_rd32(device, 0x400704);
@@ -637,10 +637,10 @@ nv50_gr_intr(struct nvkm_gr *base)
 	char msg[128];
 	int chid = -1;
 
-	chan = nvkm_fifo_chan_inst(device->fifo, (u64)inst << 12, &flags);
+	chan = nvkm_chan_get_inst(&gr->base.engine, (u64)inst << 12, &flags);
 	if (chan)  {
-		name = chan->object.client->name;
-		chid = chan->chid;
+		name = chan->name;
+		chid = chan->id;
 	}
 
 	if (show & 0x00100000) {
@@ -672,7 +672,7 @@ nv50_gr_intr(struct nvkm_gr *base)
 	if (nvkm_rd32(device, 0x400824) & (1 << 31))
 		nvkm_wr32(device, 0x400824, nvkm_rd32(device, 0x400824) & ~(1 << 31));
 
-	nvkm_fifo_chan_put(device->fifo, flags, &chan);
+	nvkm_chan_put(&chan, flags);
 }
 
 int
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/gr/priv.h b/drivers/gpu/drm/nouveau/nvkm/engine/gr/priv.h
index 9b2c66e8be90..08d5c96e6458 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/gr/priv.h
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/gr/priv.h
@@ -17,6 +17,7 @@ struct nvkm_gr_func {
 	int (*oneinit)(struct nvkm_gr *);
 	int (*init)(struct nvkm_gr *);
 	int (*fini)(struct nvkm_gr *, bool);
+	int (*reset)(struct nvkm_gr *);
 	void (*intr)(struct nvkm_gr *);
 	void (*tile)(struct nvkm_gr *, int region, struct nvkm_fb_tile *);
 	int (*tlb_flush)(struct nvkm_gr *);
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/gr/tu102.c b/drivers/gpu/drm/nouveau/nvkm/engine/gr/tu102.c
index 1a8a21844e12..3b6c8100a242 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/gr/tu102.c
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/gr/tu102.c
@@ -24,13 +24,13 @@
 
 #include <nvif/class.h>
 
-static void
+void
 tu102_gr_init_fecs_exceptions(struct gf100_gr *gr)
 {
-	nvkm_wr32(gr->base.engine.subdev.device, 0x409c24, 0x006f0002);
+	nvkm_wr32(gr->base.engine.subdev.device, 0x409c24, 0x006e0003);
 }
 
-static void
+void
 tu102_gr_init_fs(struct gf100_gr *gr)
 {
 	struct nvkm_device *device = gr->base.engine.subdev.device;
@@ -40,20 +40,21 @@ tu102_gr_init_fs(struct gf100_gr *gr)
 	gk104_grctx_generate_gpc_tpc_nr(gr);
 
 	for (sm = 0; sm < gr->sm_nr; sm++) {
-		nvkm_wr32(device, GPC_UNIT(gr->sm[sm].gpc, 0x0c10 +
-					   gr->sm[sm].tpc * 4), sm);
+		int tpc = gv100_gr_nonpes_aware_tpc(gr, gr->sm[sm].gpc, gr->sm[sm].tpc);
+
+		nvkm_wr32(device, GPC_UNIT(gr->sm[sm].gpc, 0x0c10 + tpc * 4), sm);
 	}
 
 	gm200_grctx_generate_dist_skip_table(gr);
 	gf100_gr_init_num_tpc_per_gpc(gr, true, true);
 }
 
-static void
+void
 tu102_gr_init_zcull(struct gf100_gr *gr)
 {
 	struct nvkm_device *device = gr->base.engine.subdev.device;
 	const u32 magicgpc918 = DIV_ROUND_UP(0x00800000, gr->tpc_total);
-	const u8 tile_nr = ALIGN(gr->tpc_total, 64);
+	const u8 tile_nr = gr->func->gpc_nr * gr->func->tpc_nr;
 	u8 bank[GPC_MAX] = {}, gpc, i, j;
 	u32 data;
 
@@ -93,7 +94,7 @@ tu102_gr_init_gpc_mmu(struct gf100_gr *gr)
 static const struct gf100_gr_func
 tu102_gr = {
 	.oneinit_tiles = gm200_gr_oneinit_tiles,
-	.oneinit_sm_id = gm200_gr_oneinit_sm_id,
+	.oneinit_sm_id = gv100_gr_oneinit_sm_id,
 	.init = gf100_gr_init,
 	.init_419bd8 = gv100_gr_init_419bd8,
 	.init_gpc_mmu = tu102_gr_init_gpc_mmu,
@@ -109,10 +110,14 @@ tu102_gr = {
 	.init_ppc_exceptions = gk104_gr_init_ppc_exceptions,
 	.init_504430 = gv100_gr_init_504430,
 	.init_shader_exceptions = gv100_gr_init_shader_exceptions,
+	.init_rop_exceptions = gf100_gr_init_rop_exceptions,
+	.init_exception2 = gf100_gr_init_exception2,
+	.init_4188a4 = gv100_gr_init_4188a4,
 	.trap_mp = gv100_gr_trap_mp,
+	.fecs.reset = gf100_gr_fecs_reset,
 	.rops = gm200_gr_rops,
 	.gpc_nr = 6,
-	.tpc_nr = 5,
+	.tpc_nr = 6,
 	.ppc_nr = 3,
 	.grctx = &tu102_grctx,
 	.zbc = &gp102_gr_zbc,
@@ -137,6 +142,7 @@ MODULE_FIRMWARE("nvidia/tu102/gr/sw_ctx.bin");
 MODULE_FIRMWARE("nvidia/tu102/gr/sw_nonctx.bin");
 MODULE_FIRMWARE("nvidia/tu102/gr/sw_bundle_init.bin");
 MODULE_FIRMWARE("nvidia/tu102/gr/sw_method_init.bin");
+MODULE_FIRMWARE("nvidia/tu102/gr/sw_veid_bundle_init.bin");
 
 MODULE_FIRMWARE("nvidia/tu104/gr/fecs_bl.bin");
 MODULE_FIRMWARE("nvidia/tu104/gr/fecs_inst.bin");
@@ -150,6 +156,7 @@ MODULE_FIRMWARE("nvidia/tu104/gr/sw_ctx.bin");
 MODULE_FIRMWARE("nvidia/tu104/gr/sw_nonctx.bin");
 MODULE_FIRMWARE("nvidia/tu104/gr/sw_bundle_init.bin");
 MODULE_FIRMWARE("nvidia/tu104/gr/sw_method_init.bin");
+MODULE_FIRMWARE("nvidia/tu104/gr/sw_veid_bundle_init.bin");
 
 MODULE_FIRMWARE("nvidia/tu106/gr/fecs_bl.bin");
 MODULE_FIRMWARE("nvidia/tu106/gr/fecs_inst.bin");
@@ -163,6 +170,7 @@ MODULE_FIRMWARE("nvidia/tu106/gr/sw_ctx.bin");
 MODULE_FIRMWARE("nvidia/tu106/gr/sw_nonctx.bin");
 MODULE_FIRMWARE("nvidia/tu106/gr/sw_bundle_init.bin");
 MODULE_FIRMWARE("nvidia/tu106/gr/sw_method_init.bin");
+MODULE_FIRMWARE("nvidia/tu106/gr/sw_veid_bundle_init.bin");
 
 MODULE_FIRMWARE("nvidia/tu117/gr/fecs_bl.bin");
 MODULE_FIRMWARE("nvidia/tu117/gr/fecs_inst.bin");
@@ -176,6 +184,7 @@ MODULE_FIRMWARE("nvidia/tu117/gr/sw_ctx.bin");
 MODULE_FIRMWARE("nvidia/tu117/gr/sw_nonctx.bin");
 MODULE_FIRMWARE("nvidia/tu117/gr/sw_bundle_init.bin");
 MODULE_FIRMWARE("nvidia/tu117/gr/sw_method_init.bin");
+MODULE_FIRMWARE("nvidia/tu117/gr/sw_veid_bundle_init.bin");
 
 MODULE_FIRMWARE("nvidia/tu116/gr/fecs_bl.bin");
 MODULE_FIRMWARE("nvidia/tu116/gr/fecs_inst.bin");
@@ -189,6 +198,26 @@ MODULE_FIRMWARE("nvidia/tu116/gr/sw_ctx.bin");
 MODULE_FIRMWARE("nvidia/tu116/gr/sw_nonctx.bin");
 MODULE_FIRMWARE("nvidia/tu116/gr/sw_bundle_init.bin");
 MODULE_FIRMWARE("nvidia/tu116/gr/sw_method_init.bin");
+MODULE_FIRMWARE("nvidia/tu116/gr/sw_veid_bundle_init.bin");
+
+int
+tu102_gr_av_to_init_veid(struct nvkm_blob *blob, struct gf100_gr_pack **ppack)
+{
+	return gk20a_gr_av_to_init_(blob, 64, 0x00100000, ppack);
+}
+
+int
+tu102_gr_load(struct gf100_gr *gr, int ver, const struct gf100_gr_fwif *fwif)
+{
+	int ret;
+
+	ret = gm200_gr_load(gr, ver, fwif);
+	if (ret)
+		return ret;
+
+	return gk20a_gr_load_net(gr, "gr/", "sw_veid_bundle_init", ver, tu102_gr_av_to_init_veid,
+				 &gr->bundle_veid);
+}
 
 static const struct gf100_gr_fwif
 tu102_gr_fwif[] = {
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/mpeg/nv31.c b/drivers/gpu/drm/nouveau/nvkm/engine/mpeg/nv31.c
index b1054db4c1b8..cb0c3991b2ad 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/mpeg/nv31.c
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/mpeg/nv31.c
@@ -213,8 +213,8 @@ nv31_mpeg_intr(struct nvkm_engine *engine)
 
 	if (show) {
 		nvkm_error(subdev, "ch %d [%s] %08x %08x %08x %08x\n",
-			   mpeg->chan ? mpeg->chan->fifo->chid : -1,
-			   mpeg->chan ? mpeg->chan->object.client->name :
+			   mpeg->chan ? mpeg->chan->fifo->id : -1,
+			   mpeg->chan ? mpeg->chan->fifo->name :
 			   "unknown", stat, type, mthd, data);
 	}
 
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/mpeg/nv44.c b/drivers/gpu/drm/nouveau/nvkm/engine/mpeg/nv44.c
index 521ce43a2871..0890a279458e 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/mpeg/nv44.c
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/mpeg/nv44.c
@@ -182,8 +182,8 @@ nv44_mpeg_intr(struct nvkm_engine *engine)
 
 	if (show) {
 		nvkm_error(subdev, "ch %d [%08x %s] %08x %08x %08x %08x\n",
-			   chan ? chan->fifo->chid : -1, inst << 4,
-			   chan ? chan->object.client->name : "unknown",
+			   chan ? chan->fifo->id : -1, inst << 4,
+			   chan ? chan->fifo->name : "unknown",
 			   stat, type, mthd, data);
 	}
 
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/nvdec/Kbuild b/drivers/gpu/drm/nouveau/nvkm/engine/nvdec/Kbuild
index 9a0fd9812750..f05e79670d22 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/nvdec/Kbuild
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/nvdec/Kbuild
@@ -1,3 +1,4 @@
 # SPDX-License-Identifier: MIT
 nvkm-y += nvkm/engine/nvdec/base.o
 nvkm-y += nvkm/engine/nvdec/gm107.o
+nvkm-y += nvkm/engine/nvdec/ga102.o
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/nvdec/base.c b/drivers/gpu/drm/nouveau/nvkm/engine/nvdec/base.c
index b0181cc5953b..1f6e3b32ba16 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/nvdec/base.c
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/nvdec/base.c
@@ -37,7 +37,7 @@ nvkm_nvdec = {
 
 int
 nvkm_nvdec_new_(const struct nvkm_nvdec_fwif *fwif, struct nvkm_device *device,
-		enum nvkm_subdev_type type, int inst, struct nvkm_nvdec **pnvdec)
+		enum nvkm_subdev_type type, int inst, u32 addr, struct nvkm_nvdec **pnvdec)
 {
 	struct nvkm_nvdec *nvdec;
 	int ret;
@@ -57,5 +57,5 @@ nvkm_nvdec_new_(const struct nvkm_nvdec_fwif *fwif, struct nvkm_device *device,
 	nvdec->func = fwif->func;
 
 	return nvkm_falcon_ctor(nvdec->func->flcn, &nvdec->engine.subdev,
-				nvdec->engine.subdev.name, 0, &nvdec->falcon);
+				nvdec->engine.subdev.name, addr, &nvdec->falcon);
 };
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/nvdec/ga102.c b/drivers/gpu/drm/nouveau/nvkm/engine/nvdec/ga102.c
new file mode 100644
index 000000000000..37d8c3c0f3ab
--- /dev/null
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/nvdec/ga102.c
@@ -0,0 +1,61 @@
+/*
+ * Copyright 2021 Red Hat Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+#include "priv.h"
+
+#include <subdev/mc.h>
+#include <subdev/timer.h>
+
+static const struct nvkm_falcon_func
+ga102_nvdec_flcn = {
+	.disable = gm200_flcn_disable,
+	.enable = gm200_flcn_enable,
+	.addr2 = 0x1c00,
+	.reset_pmc = true,
+	.reset_prep = ga102_flcn_reset_prep,
+	.reset_wait_mem_scrubbing = ga102_flcn_reset_wait_mem_scrubbing,
+	.imem_dma = &ga102_flcn_dma,
+	.dmem_dma = &ga102_flcn_dma,
+};
+
+static const struct nvkm_nvdec_func
+ga102_nvdec = {
+	.flcn = &ga102_nvdec_flcn,
+};
+
+static int
+ga102_nvdec_nofw(struct nvkm_nvdec *nvdec, int ver, const struct nvkm_nvdec_fwif *fwif)
+{
+	return 0;
+}
+
+static const struct nvkm_nvdec_fwif
+ga102_nvdec_fwif[] = {
+	{ -1, ga102_nvdec_nofw, &ga102_nvdec },
+	{}
+};
+
+int
+ga102_nvdec_new(struct nvkm_device *device, enum nvkm_subdev_type type, int inst,
+		struct nvkm_nvdec **pnvdec)
+{
+	return nvkm_nvdec_new_(ga102_nvdec_fwif, device, type, inst, 0x848000, pnvdec);
+}
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/nvdec/gm107.c b/drivers/gpu/drm/nouveau/nvkm/engine/nvdec/gm107.c
index 8c44ce44a6d7..564f7e8960a2 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/nvdec/gm107.c
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/nvdec/gm107.c
@@ -23,18 +23,13 @@
 
 static const struct nvkm_falcon_func
 gm107_nvdec_flcn = {
+	.disable = gm200_flcn_disable,
+	.enable = gm200_flcn_enable,
+	.reset_pmc = true,
+	.reset_wait_mem_scrubbing = gm200_flcn_reset_wait_mem_scrubbing,
 	.debug = 0xd00,
-	.fbif = 0x600,
-	.load_imem = nvkm_falcon_v1_load_imem,
-	.load_dmem = nvkm_falcon_v1_load_dmem,
-	.read_dmem = nvkm_falcon_v1_read_dmem,
-	.bind_context = nvkm_falcon_v1_bind_context,
-	.wait_for_halt = nvkm_falcon_v1_wait_for_halt,
-	.clear_interrupt = nvkm_falcon_v1_clear_interrupt,
-	.set_start_addr = nvkm_falcon_v1_set_start_addr,
-	.start = nvkm_falcon_v1_start,
-	.enable = nvkm_falcon_v1_enable,
-	.disable = nvkm_falcon_v1_disable,
+	.imem_pio = &gm200_flcn_imem_pio,
+	.dmem_pio = &gm200_flcn_dmem_pio,
 };
 
 static const struct nvkm_nvdec_func
@@ -59,5 +54,5 @@ int
 gm107_nvdec_new(struct nvkm_device *device, enum nvkm_subdev_type type, int inst,
 		struct nvkm_nvdec **pnvdec)
 {
-	return nvkm_nvdec_new_(gm107_nvdec_fwif, device, type, inst, pnvdec);
+	return nvkm_nvdec_new_(gm107_nvdec_fwif, device, type, inst, 0, pnvdec);
 }
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/nvdec/priv.h b/drivers/gpu/drm/nouveau/nvkm/engine/nvdec/priv.h
index 0920f6a887e2..61e1f7aaa509 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/nvdec/priv.h
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/nvdec/priv.h
@@ -15,5 +15,5 @@ struct nvkm_nvdec_fwif {
 };
 
 int nvkm_nvdec_new_(const struct nvkm_nvdec_fwif *fwif, struct nvkm_device *,
-		    enum nvkm_subdev_type, int, struct nvkm_nvdec **);
+		    enum nvkm_subdev_type, int, u32 addr, struct nvkm_nvdec **);
 #endif
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/nvenc/gm107.c b/drivers/gpu/drm/nouveau/nvkm/engine/nvenc/gm107.c
index f44d41bf2034..ad27d8b97569 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/nvenc/gm107.c
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/nvenc/gm107.c
@@ -24,17 +24,6 @@
 
 static const struct nvkm_falcon_func
 gm107_nvenc_flcn = {
-	.fbif = 0x800,
-	.load_imem = nvkm_falcon_v1_load_imem,
-	.load_dmem = nvkm_falcon_v1_load_dmem,
-	.read_dmem = nvkm_falcon_v1_read_dmem,
-	.bind_context = nvkm_falcon_v1_bind_context,
-	.wait_for_halt = nvkm_falcon_v1_wait_for_halt,
-	.clear_interrupt = nvkm_falcon_v1_clear_interrupt,
-	.set_start_addr = nvkm_falcon_v1_set_start_addr,
-	.start = nvkm_falcon_v1_start,
-	.enable = nvkm_falcon_v1_enable,
-	.disable = nvkm_falcon_v1_disable,
 };
 
 static const struct nvkm_nvenc_func
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/sec/g98.c b/drivers/gpu/drm/nouveau/nvkm/engine/sec/g98.c
index 1b87df03c823..c15b2cbf506b 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/sec/g98.c
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/sec/g98.c
@@ -40,7 +40,7 @@ static const struct nvkm_enum g98_sec_isr_error_name[] = {
 };
 
 static void
-g98_sec_intr(struct nvkm_falcon *sec, struct nvkm_fifo_chan *chan)
+g98_sec_intr(struct nvkm_falcon *sec, struct nvkm_chan *chan)
 {
 	struct nvkm_subdev *subdev = &sec->engine.subdev;
 	struct nvkm_device *device = subdev->device;
@@ -54,9 +54,9 @@ g98_sec_intr(struct nvkm_falcon *sec, struct nvkm_fifo_chan *chan)
 
 	nvkm_error(subdev, "DISPATCH_ERROR %04x [%s] ch %d [%010llx %s] "
 			   "subc %d mthd %04x data %08x\n", ssta,
-		   en ? en->name : "UNKNOWN", chan ? chan->chid : -1,
+		   en ? en->name : "UNKNOWN", chan ? chan->id : -1,
 		   chan ? chan->inst->addr : 0,
-		   chan ? chan->object.client->name : "unknown",
+		   chan ? chan->name : "unknown",
 		   subc, mthd, data);
 }
 
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/sec2/Kbuild b/drivers/gpu/drm/nouveau/nvkm/engine/sec2/Kbuild
index 63cd2be3de08..19feadb1f67b 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/sec2/Kbuild
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/sec2/Kbuild
@@ -3,3 +3,4 @@ nvkm-y += nvkm/engine/sec2/base.o
 nvkm-y += nvkm/engine/sec2/gp102.o
 nvkm-y += nvkm/engine/sec2/gp108.o
 nvkm-y += nvkm/engine/sec2/tu102.o
+nvkm-y += nvkm/engine/sec2/ga102.o
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/sec2/base.c b/drivers/gpu/drm/nouveau/nvkm/engine/sec2/base.c
index 092c6d0b8e01..f2c60da5d1e8 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/sec2/base.c
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/sec2/base.c
@@ -22,53 +22,99 @@
 #include "priv.h"
 
 #include <core/firmware.h>
-#include <subdev/top.h>
+#include <subdev/mc.h>
+#include <subdev/timer.h>
 
-static void
-nvkm_sec2_recv(struct work_struct *work)
+#include <nvfw/sec2.h>
+
+static int
+nvkm_sec2_finimsg(void *priv, struct nvfw_falcon_msg *hdr)
+{
+	struct nvkm_sec2 *sec2 = priv;
+
+	atomic_set(&sec2->running, 0);
+	return 0;
+}
+
+static int
+nvkm_sec2_fini(struct nvkm_engine *engine, bool suspend)
 {
-	struct nvkm_sec2 *sec2 = container_of(work, typeof(*sec2), work);
+	struct nvkm_sec2 *sec2 = nvkm_sec2(engine);
+	struct nvkm_subdev *subdev = &sec2->engine.subdev;
+	struct nvkm_falcon *falcon = &sec2->falcon;
+	struct nvkm_falcon_cmdq *cmdq = sec2->cmdq;
+	struct nvfw_falcon_cmd cmd = {
+		.unit_id = sec2->func->unit_unload,
+		.size = sizeof(cmd),
+	};
+	int ret;
 
-	if (!sec2->initmsg_received) {
-		int ret = sec2->func->initmsg(sec2);
-		if (ret) {
-			nvkm_error(&sec2->engine.subdev,
-				   "error parsing init message: %d\n", ret);
-			return;
-		}
+	if (!subdev->use.enabled)
+		return 0;
 
-		sec2->initmsg_received = true;
+	if (atomic_read(&sec2->initmsg) == 1) {
+		ret = nvkm_falcon_cmdq_send(cmdq, &cmd, nvkm_sec2_finimsg, sec2,
+					    msecs_to_jiffies(1000));
+		WARN_ON(ret);
+
+		nvkm_msec(subdev->device, 2000,
+			if (nvkm_falcon_rd32(falcon, 0x100) & 0x00000010)
+				break;
+		);
 	}
 
-	nvkm_falcon_msgq_recv(sec2->msgq);
+	nvkm_inth_block(&subdev->inth);
+
+	nvkm_falcon_cmdq_fini(cmdq);
+	falcon->func->disable(falcon);
+	nvkm_falcon_put(falcon, subdev);
+	return 0;
 }
 
-static void
-nvkm_sec2_intr(struct nvkm_engine *engine)
+static int
+nvkm_sec2_init(struct nvkm_engine *engine)
 {
 	struct nvkm_sec2 *sec2 = nvkm_sec2(engine);
-	sec2->func->intr(sec2);
+	struct nvkm_subdev *subdev = &sec2->engine.subdev;
+	struct nvkm_falcon *falcon = &sec2->falcon;
+	int ret;
+
+	ret = nvkm_falcon_get(falcon, subdev);
+	if (ret)
+		return ret;
+
+	nvkm_falcon_wr32(falcon, 0x014, 0xffffffff);
+	atomic_set(&sec2->initmsg, 0);
+	atomic_set(&sec2->running, 1);
+	nvkm_inth_allow(&subdev->inth);
+
+	nvkm_falcon_start(falcon);
+	return 0;
 }
 
 static int
-nvkm_sec2_fini(struct nvkm_engine *engine, bool suspend)
+nvkm_sec2_oneinit(struct nvkm_engine *engine)
 {
 	struct nvkm_sec2 *sec2 = nvkm_sec2(engine);
-
-	flush_work(&sec2->work);
-
-	if (suspend) {
-		nvkm_falcon_cmdq_fini(sec2->cmdq);
-		sec2->initmsg_received = false;
+	struct nvkm_subdev *subdev = &sec2->engine.subdev;
+	struct nvkm_intr *intr = &sec2->engine.subdev.device->mc->intr;
+	enum nvkm_intr_type type = NVKM_INTR_SUBDEV;
+
+	if (sec2->func->intr_vector) {
+		intr = sec2->func->intr_vector(sec2, &type);
+		if (IS_ERR(intr))
+			return PTR_ERR(intr);
 	}
 
-	return 0;
+	return nvkm_inth_add(intr, type, NVKM_INTR_PRIO_NORMAL, subdev, sec2->func->intr,
+			     &subdev->inth);
 }
 
 static void *
 nvkm_sec2_dtor(struct nvkm_engine *engine)
 {
 	struct nvkm_sec2 *sec2 = nvkm_sec2(engine);
+
 	nvkm_falcon_msgq_del(&sec2->msgq);
 	nvkm_falcon_cmdq_del(&sec2->cmdq);
 	nvkm_falcon_qmgr_del(&sec2->qmgr);
@@ -79,8 +125,9 @@ nvkm_sec2_dtor(struct nvkm_engine *engine)
 static const struct nvkm_engine_func
 nvkm_sec2 = {
 	.dtor = nvkm_sec2_dtor,
+	.oneinit = nvkm_sec2_oneinit,
+	.init = nvkm_sec2_init,
 	.fini = nvkm_sec2_fini,
-	.intr = nvkm_sec2_intr,
 };
 
 int
@@ -113,6 +160,5 @@ nvkm_sec2_new_(const struct nvkm_sec2_fwif *fwif, struct nvkm_device *device,
 	    (ret = nvkm_falcon_msgq_new(sec2->qmgr, "msgq", &sec2->msgq)))
 		return ret;
 
-	INIT_WORK(&sec2->work, nvkm_sec2_recv);
 	return 0;
 };
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/sec2/ga102.c b/drivers/gpu/drm/nouveau/nvkm/engine/sec2/ga102.c
new file mode 100644
index 000000000000..945abb8156d7
--- /dev/null
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/sec2/ga102.c
@@ -0,0 +1,197 @@
+/*
+ * Copyright 2021 Red Hat Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+#include "priv.h"
+#include <subdev/acr.h>
+#include <subdev/vfn.h>
+
+#include <nvfw/flcn.h>
+#include <nvfw/sec2.h>
+
+static int
+ga102_sec2_initmsg(struct nvkm_sec2 *sec2)
+{
+	struct nv_sec2_init_msg_v1 msg;
+	int ret, i;
+
+	ret = nvkm_falcon_msgq_recv_initmsg(sec2->msgq, &msg, sizeof(msg));
+	if (ret)
+		return ret;
+
+	if (msg.hdr.unit_id != NV_SEC2_UNIT_INIT ||
+	    msg.msg_type != NV_SEC2_INIT_MSG_INIT)
+		return -EINVAL;
+
+	for (i = 0; i < ARRAY_SIZE(msg.queue_info); i++) {
+		if (msg.queue_info[i].id == NV_SEC2_INIT_MSG_QUEUE_ID_MSGQ) {
+			nvkm_falcon_msgq_init(sec2->msgq, msg.queue_info[i].index,
+							  msg.queue_info[i].offset,
+							  msg.queue_info[i].size);
+		} else {
+			nvkm_falcon_cmdq_init(sec2->cmdq, msg.queue_info[i].index,
+							  msg.queue_info[i].offset,
+							  msg.queue_info[i].size);
+		}
+	}
+
+	return 0;
+}
+
+static struct nvkm_intr *
+ga102_sec2_intr_vector(struct nvkm_sec2 *sec2, enum nvkm_intr_type *pvector)
+{
+	struct nvkm_device *device = sec2->engine.subdev.device;
+	struct nvkm_falcon *falcon = &sec2->falcon;
+	int ret;
+
+	ret = ga102_flcn_select(falcon);
+	if (ret)
+		return ERR_PTR(ret);
+
+	*pvector = nvkm_rd32(device, 0x8403e0) & 0x000000ff;
+	return &device->vfn->intr;
+}
+
+static int
+ga102_sec2_acr_bootstrap_falcon_callback(void *priv, struct nvfw_falcon_msg *hdr)
+{
+	struct nv_sec2_acr_bootstrap_falcon_msg_v1 *msg =
+		container_of(hdr, typeof(*msg), msg.hdr);
+	struct nvkm_subdev *subdev = priv;
+	const char *name = nvkm_acr_lsf_id(msg->falcon_id);
+
+	if (msg->error_code) {
+		nvkm_error(subdev, "ACR_BOOTSTRAP_FALCON failed for falcon %d [%s]: %08x %08x\n",
+			   msg->falcon_id, name, msg->error_code, msg->unkn08);
+		return -EINVAL;
+	}
+
+	nvkm_debug(subdev, "%s booted\n", name);
+	return 0;
+}
+
+static int
+ga102_sec2_acr_bootstrap_falcon(struct nvkm_falcon *falcon, enum nvkm_acr_lsf_id id)
+{
+	struct nvkm_sec2 *sec2 = container_of(falcon, typeof(*sec2), falcon);
+	struct nv_sec2_acr_bootstrap_falcon_cmd_v1 cmd = {
+		.cmd.hdr.unit_id = sec2->func->unit_acr,
+		.cmd.hdr.size = sizeof(cmd),
+		.cmd.cmd_type = NV_SEC2_ACR_CMD_BOOTSTRAP_FALCON,
+		.flags = NV_SEC2_ACR_BOOTSTRAP_FALCON_FLAGS_RESET_YES,
+		.falcon_id = id,
+	};
+
+	return nvkm_falcon_cmdq_send(sec2->cmdq, &cmd.cmd.hdr,
+				     ga102_sec2_acr_bootstrap_falcon_callback,
+				     &sec2->engine.subdev,
+				     msecs_to_jiffies(1000));
+}
+
+static const struct nvkm_acr_lsf_func
+ga102_sec2_acr_0 = {
+	.bld_size = sizeof(struct flcn_bl_dmem_desc_v2),
+	.bld_write = gp102_sec2_acr_bld_write_1,
+	.bld_patch = gp102_sec2_acr_bld_patch_1,
+	.bootstrap_falcons = BIT_ULL(NVKM_ACR_LSF_FECS) |
+			     BIT_ULL(NVKM_ACR_LSF_GPCCS) |
+			     BIT_ULL(NVKM_ACR_LSF_SEC2),
+	.bootstrap_falcon = ga102_sec2_acr_bootstrap_falcon,
+};
+
+static const struct nvkm_falcon_func
+ga102_sec2_flcn = {
+	.disable = gm200_flcn_disable,
+	.enable = gm200_flcn_enable,
+	.select = ga102_flcn_select,
+	.addr2 = 0x1000,
+	.reset_pmc = true,
+	.reset_eng = gp102_flcn_reset_eng,
+	.reset_prep = ga102_flcn_reset_prep,
+	.reset_wait_mem_scrubbing = ga102_flcn_reset_wait_mem_scrubbing,
+	.imem_dma = &ga102_flcn_dma,
+	.dmem_pio = &gm200_flcn_dmem_pio,
+	.dmem_dma = &ga102_flcn_dma,
+	.emem_addr = 0x01000000,
+	.emem_pio = &gp102_flcn_emem_pio,
+	.start = nvkm_falcon_v1_start,
+	.cmdq = { 0xc00, 0xc04, 8 },
+	.msgq = { 0xc80, 0xc84, 8 },
+};
+
+static const struct nvkm_sec2_func
+ga102_sec2 = {
+	.flcn = &ga102_sec2_flcn,
+	.intr_vector = ga102_sec2_intr_vector,
+	.intr = gp102_sec2_intr,
+	.initmsg = ga102_sec2_initmsg,
+	.unit_acr = NV_SEC2_UNIT_V2_ACR,
+	.unit_unload = NV_SEC2_UNIT_V2_UNLOAD,
+};
+
+MODULE_FIRMWARE("nvidia/ga102/sec2/desc.bin");
+MODULE_FIRMWARE("nvidia/ga102/sec2/image.bin");
+MODULE_FIRMWARE("nvidia/ga102/sec2/sig.bin");
+MODULE_FIRMWARE("nvidia/ga102/sec2/hs_bl_sig.bin");
+
+MODULE_FIRMWARE("nvidia/ga103/sec2/desc.bin");
+MODULE_FIRMWARE("nvidia/ga103/sec2/image.bin");
+MODULE_FIRMWARE("nvidia/ga103/sec2/sig.bin");
+MODULE_FIRMWARE("nvidia/ga103/sec2/hs_bl_sig.bin");
+
+MODULE_FIRMWARE("nvidia/ga104/sec2/desc.bin");
+MODULE_FIRMWARE("nvidia/ga104/sec2/image.bin");
+MODULE_FIRMWARE("nvidia/ga104/sec2/sig.bin");
+MODULE_FIRMWARE("nvidia/ga104/sec2/hs_bl_sig.bin");
+
+MODULE_FIRMWARE("nvidia/ga106/sec2/desc.bin");
+MODULE_FIRMWARE("nvidia/ga106/sec2/image.bin");
+MODULE_FIRMWARE("nvidia/ga106/sec2/sig.bin");
+MODULE_FIRMWARE("nvidia/ga106/sec2/hs_bl_sig.bin");
+
+MODULE_FIRMWARE("nvidia/ga107/sec2/desc.bin");
+MODULE_FIRMWARE("nvidia/ga107/sec2/image.bin");
+MODULE_FIRMWARE("nvidia/ga107/sec2/sig.bin");
+MODULE_FIRMWARE("nvidia/ga107/sec2/hs_bl_sig.bin");
+
+static int
+ga102_sec2_load(struct nvkm_sec2 *sec2, int ver,
+		const struct nvkm_sec2_fwif *fwif)
+{
+	return nvkm_acr_lsfw_load_sig_image_desc_v2(&sec2->engine.subdev, &sec2->falcon,
+						    NVKM_ACR_LSF_SEC2, "sec2/", ver, fwif->acr);
+}
+
+static const struct nvkm_sec2_fwif
+ga102_sec2_fwif[] = {
+	{  0, ga102_sec2_load, &ga102_sec2, &ga102_sec2_acr_0 },
+	{ -1, gp102_sec2_nofw, &ga102_sec2 }
+};
+
+int
+ga102_sec2_new(struct nvkm_device *device, enum nvkm_subdev_type type, int inst,
+	       struct nvkm_sec2 **psec2)
+{
+	/* TOP info wasn't updated on Turing to reflect the PRI
+	 * address change for some reason.  We override it here.
+	 */
+	return nvkm_sec2_new_(ga102_sec2_fwif, device, type, inst, 0x840000, psec2);
+}
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/sec2/gp102.c b/drivers/gpu/drm/nouveau/nvkm/engine/sec2/gp102.c
index 44e39f5743d5..c64013d10500 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/sec2/gp102.c
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/sec2/gp102.c
@@ -74,16 +74,6 @@ gp102_sec2_acr_bootstrap_falcon(struct nvkm_falcon *falcon,
 				     msecs_to_jiffies(1000));
 }
 
-static int
-gp102_sec2_acr_boot(struct nvkm_falcon *falcon)
-{
-	struct nv_sec2_args args = {};
-	nvkm_falcon_load_dmem(falcon, &args,
-			      falcon->func->emem_addr, sizeof(args), 0);
-	nvkm_falcon_start(falcon);
-	return 0;
-}
-
 static void
 gp102_sec2_acr_bld_patch(struct nvkm_acr *acr, u32 bld, s64 adjust)
 {
@@ -122,7 +112,6 @@ gp102_sec2_acr_0 = {
 	.bld_size = sizeof(struct loader_config_v1),
 	.bld_write = gp102_sec2_acr_bld_write,
 	.bld_patch = gp102_sec2_acr_bld_patch,
-	.boot = gp102_sec2_acr_boot,
 	.bootstrap_falcons = BIT_ULL(NVKM_ACR_LSF_FECS) |
 			     BIT_ULL(NVKM_ACR_LSF_GPCCS) |
 			     BIT_ULL(NVKM_ACR_LSF_SEC2),
@@ -160,89 +149,68 @@ gp102_sec2_initmsg(struct nvkm_sec2 *sec2)
 	return 0;
 }
 
-void
-gp102_sec2_intr(struct nvkm_sec2 *sec2)
+irqreturn_t
+gp102_sec2_intr(struct nvkm_inth *inth)
 {
+	struct nvkm_sec2 *sec2 = container_of(inth, typeof(*sec2), engine.subdev.inth);
 	struct nvkm_subdev *subdev = &sec2->engine.subdev;
 	struct nvkm_falcon *falcon = &sec2->falcon;
 	u32 disp = nvkm_falcon_rd32(falcon, 0x01c);
 	u32 intr = nvkm_falcon_rd32(falcon, 0x008) & disp & ~(disp >> 16);
 
 	if (intr & 0x00000040) {
-		schedule_work(&sec2->work);
+		if (unlikely(atomic_read(&sec2->initmsg) == 0)) {
+			int ret = sec2->func->initmsg(sec2);
+
+			if (ret)
+				nvkm_error(subdev, "error parsing init message: %d\n", ret);
+
+			atomic_set(&sec2->initmsg, ret ?: 1);
+		}
+
+		if (atomic_read(&sec2->initmsg) > 0) {
+			if (!nvkm_falcon_msgq_empty(sec2->msgq))
+				nvkm_falcon_msgq_recv(sec2->msgq);
+		}
+
 		nvkm_falcon_wr32(falcon, 0x004, 0x00000040);
 		intr &= ~0x00000040;
 	}
 
+	if (intr & 0x00000010) {
+		if (atomic_read(&sec2->running)) {
+			FLCN_ERR(falcon, "halted");
+			gm200_flcn_tracepc(falcon);
+		}
+
+		nvkm_falcon_wr32(falcon, 0x004, 0x00000010);
+		intr &= ~0x00000010;
+	}
+
 	if (intr) {
 		nvkm_error(subdev, "unhandled intr %08x\n", intr);
 		nvkm_falcon_wr32(falcon, 0x004, intr);
 	}
-}
 
-int
-gp102_sec2_flcn_enable(struct nvkm_falcon *falcon)
-{
-	nvkm_falcon_mask(falcon, 0x3c0, 0x00000001, 0x00000001);
-	udelay(10);
-	nvkm_falcon_mask(falcon, 0x3c0, 0x00000001, 0x00000000);
-	return nvkm_falcon_v1_enable(falcon);
-}
-
-void
-gp102_sec2_flcn_bind_context(struct nvkm_falcon *falcon,
-			     struct nvkm_memory *ctx)
-{
-	struct nvkm_device *device = falcon->owner->device;
-
-	nvkm_falcon_v1_bind_context(falcon, ctx);
-	if (!ctx)
-		return;
-
-	/* Not sure if this is a WAR for a HW issue, or some additional
-	 * programming sequence that's needed to properly complete the
-	 * context switch we trigger above.
-	 *
-	 * Fixes unreliability of booting the SEC2 RTOS on Quadro P620,
-	 * particularly when resuming from suspend.
-	 *
-	 * Also removes the need for an odd workaround where we needed
-	 * to program SEC2's FALCON_CPUCTL_ALIAS_STARTCPU twice before
-	 * the SEC2 RTOS would begin executing.
-	 */
-	nvkm_msec(device, 10,
-		u32 irqstat = nvkm_falcon_rd32(falcon, 0x008);
-		u32 flcn0dc = nvkm_falcon_rd32(falcon, 0x0dc);
-		if ((irqstat & 0x00000008) &&
-		    (flcn0dc & 0x00007000) == 0x00005000)
-			break;
-	);
-
-	nvkm_falcon_mask(falcon, 0x004, 0x00000008, 0x00000008);
-	nvkm_falcon_mask(falcon, 0x058, 0x00000002, 0x00000002);
-
-	nvkm_msec(device, 10,
-		u32 flcn0dc = nvkm_falcon_rd32(falcon, 0x0dc);
-		if ((flcn0dc & 0x00007000) == 0x00000000)
-			break;
-	);
+	return IRQ_HANDLED;
 }
 
 static const struct nvkm_falcon_func
 gp102_sec2_flcn = {
+	.disable = gm200_flcn_disable,
+	.enable = gm200_flcn_enable,
+	.reset_pmc = true,
+	.reset_eng = gp102_flcn_reset_eng,
+	.reset_wait_mem_scrubbing = gm200_flcn_reset_wait_mem_scrubbing,
 	.debug = 0x408,
-	.fbif = 0x600,
-	.load_imem = nvkm_falcon_v1_load_imem,
-	.load_dmem = nvkm_falcon_v1_load_dmem,
-	.read_dmem = nvkm_falcon_v1_read_dmem,
+	.bind_inst = gm200_flcn_bind_inst,
+	.bind_stat = gm200_flcn_bind_stat,
+	.bind_intr = true,
+	.imem_pio = &gm200_flcn_imem_pio,
+	.dmem_pio = &gm200_flcn_dmem_pio,
 	.emem_addr = 0x01000000,
-	.bind_context = gp102_sec2_flcn_bind_context,
-	.wait_for_halt = nvkm_falcon_v1_wait_for_halt,
-	.clear_interrupt = nvkm_falcon_v1_clear_interrupt,
-	.set_start_addr = nvkm_falcon_v1_set_start_addr,
+	.emem_pio = &gp102_flcn_emem_pio,
 	.start = nvkm_falcon_v1_start,
-	.enable = gp102_sec2_flcn_enable,
-	.disable = nvkm_falcon_v1_disable,
 	.cmdq = { 0xa00, 0xa04, 8 },
 	.msgq = { 0xa30, 0xa34, 8 },
 };
@@ -250,6 +218,7 @@ gp102_sec2_flcn = {
 const struct nvkm_sec2_func
 gp102_sec2 = {
 	.flcn = &gp102_sec2_flcn,
+	.unit_unload = NV_SEC2_UNIT_UNLOAD,
 	.unit_acr = NV_SEC2_UNIT_ACR,
 	.intr = gp102_sec2_intr,
 	.initmsg = gp102_sec2_initmsg,
@@ -268,7 +237,7 @@ MODULE_FIRMWARE("nvidia/gp107/sec2/desc.bin");
 MODULE_FIRMWARE("nvidia/gp107/sec2/image.bin");
 MODULE_FIRMWARE("nvidia/gp107/sec2/sig.bin");
 
-static void
+void
 gp102_sec2_acr_bld_patch_1(struct nvkm_acr *acr, u32 bld, s64 adjust)
 {
 	struct flcn_bl_dmem_desc_v2 hdr;
@@ -279,7 +248,7 @@ gp102_sec2_acr_bld_patch_1(struct nvkm_acr *acr, u32 bld, s64 adjust)
 	flcn_bl_dmem_desc_v2_dump(&acr->subdev, &hdr);
 }
 
-static void
+void
 gp102_sec2_acr_bld_write_1(struct nvkm_acr *acr, u32 bld,
 			   struct nvkm_acr_lsfw *lsfw)
 {
@@ -304,7 +273,6 @@ gp102_sec2_acr_1 = {
 	.bld_size = sizeof(struct flcn_bl_dmem_desc_v2),
 	.bld_write = gp102_sec2_acr_bld_write_1,
 	.bld_patch = gp102_sec2_acr_bld_patch_1,
-	.boot = gp102_sec2_acr_boot,
 	.bootstrap_falcons = BIT_ULL(NVKM_ACR_LSF_FECS) |
 			     BIT_ULL(NVKM_ACR_LSF_GPCCS) |
 			     BIT_ULL(NVKM_ACR_LSF_SEC2),
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/sec2/priv.h b/drivers/gpu/drm/nouveau/nvkm/engine/sec2/priv.h
index af19229e885d..172d2705c199 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/sec2/priv.h
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/sec2/priv.h
@@ -2,15 +2,18 @@
 #ifndef __NVKM_SEC2_PRIV_H__
 #define __NVKM_SEC2_PRIV_H__
 #include <engine/sec2.h>
+struct nvkm_acr_lsfw;
 
 struct nvkm_sec2_func {
 	const struct nvkm_falcon_func *flcn;
+	u8 unit_unload;
 	u8 unit_acr;
-	void (*intr)(struct nvkm_sec2 *);
+	struct nvkm_intr *(*intr_vector)(struct nvkm_sec2 *, enum nvkm_intr_type *);
+	irqreturn_t (*intr)(struct nvkm_inth *);
 	int (*initmsg)(struct nvkm_sec2 *);
 };
 
-void gp102_sec2_intr(struct nvkm_sec2 *);
+irqreturn_t gp102_sec2_intr(struct nvkm_inth *);
 int gp102_sec2_initmsg(struct nvkm_sec2 *);
 
 struct nvkm_sec2_fwif {
@@ -24,6 +27,8 @@ int gp102_sec2_nofw(struct nvkm_sec2 *, int, const struct nvkm_sec2_fwif *);
 int gp102_sec2_load(struct nvkm_sec2 *, int, const struct nvkm_sec2_fwif *);
 extern const struct nvkm_sec2_func gp102_sec2;
 extern const struct nvkm_acr_lsf_func gp102_sec2_acr_1;
+void gp102_sec2_acr_bld_write_1(struct nvkm_acr *, u32, struct nvkm_acr_lsfw *);
+void gp102_sec2_acr_bld_patch_1(struct nvkm_acr *, u32, s64);
 
 int nvkm_sec2_new_(const struct nvkm_sec2_fwif *, struct nvkm_device *, enum nvkm_subdev_type,
 		   int, u32 addr, struct nvkm_sec2 **);
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/sec2/tu102.c b/drivers/gpu/drm/nouveau/nvkm/engine/sec2/tu102.c
index f3faeb705575..0afc4b2fa529 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/sec2/tu102.c
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/sec2/tu102.c
@@ -22,21 +22,24 @@
 #include "priv.h"
 #include <subdev/acr.h>
 
+#include <nvfw/sec2.h>
+
 static const struct nvkm_falcon_func
 tu102_sec2_flcn = {
+	.disable = gm200_flcn_disable,
+	.enable = gm200_flcn_enable,
+	.reset_pmc = true,
+	.reset_eng = gp102_flcn_reset_eng,
+	.reset_wait_mem_scrubbing = gm200_flcn_reset_wait_mem_scrubbing,
 	.debug = 0x408,
-	.fbif = 0x600,
-	.load_imem = nvkm_falcon_v1_load_imem,
-	.load_dmem = nvkm_falcon_v1_load_dmem,
-	.read_dmem = nvkm_falcon_v1_read_dmem,
+	.bind_inst = gm200_flcn_bind_inst,
+	.bind_stat = gm200_flcn_bind_stat,
+	.bind_intr = true,
+	.imem_pio = &gm200_flcn_imem_pio,
+	.dmem_pio = &gm200_flcn_dmem_pio,
 	.emem_addr = 0x01000000,
-	.bind_context = gp102_sec2_flcn_bind_context,
-	.wait_for_halt = nvkm_falcon_v1_wait_for_halt,
-	.clear_interrupt = nvkm_falcon_v1_clear_interrupt,
-	.set_start_addr = nvkm_falcon_v1_set_start_addr,
+	.emem_pio = &gp102_flcn_emem_pio,
 	.start = nvkm_falcon_v1_start,
-	.enable = nvkm_falcon_v1_enable,
-	.disable = nvkm_falcon_v1_disable,
 	.cmdq = { 0xc00, 0xc04, 8 },
 	.msgq = { 0xc80, 0xc84, 8 },
 };
@@ -44,7 +47,8 @@ tu102_sec2_flcn = {
 static const struct nvkm_sec2_func
 tu102_sec2 = {
 	.flcn = &tu102_sec2_flcn,
-	.unit_acr = 0x07,
+	.unit_unload = NV_SEC2_UNIT_V2_UNLOAD,
+	.unit_acr = NV_SEC2_UNIT_V2_ACR,
 	.intr = gp102_sec2_intr,
 	.initmsg = gp102_sec2_initmsg,
 };
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/sw/base.c b/drivers/gpu/drm/nouveau/nvkm/engine/sw/base.c
index 14871d0bd746..a9d464db6974 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/sw/base.c
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/sw/base.c
@@ -35,7 +35,7 @@ nvkm_sw_mthd(struct nvkm_sw *sw, int chid, int subc, u32 mthd, u32 data)
 
 	spin_lock_irqsave(&sw->engine.lock, flags);
 	list_for_each_entry(chan, &sw->chan, head) {
-		if (chan->fifo->chid == chid) {
+		if (chan->fifo->id == chid) {
 			handled = nvkm_sw_chan_mthd(chan, subc, mthd, data);
 			list_del(&chan->head);
 			list_add(&chan->head, &sw->chan);
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/sw/chan.c b/drivers/gpu/drm/nouveau/nvkm/engine/sw/chan.c
index f28967065639..834b8cbed51d 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/sw/chan.c
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/sw/chan.c
@@ -23,7 +23,6 @@
  */
 #include "chan.h"
 
-#include <core/notify.h>
 #include <engine/fifo.h>
 
 #include <nvif/event.h>
@@ -36,7 +35,7 @@ nvkm_sw_chan_mthd(struct nvkm_sw_chan *chan, int subc, u32 mthd, u32 data)
 	case 0x0000:
 		return true;
 	case 0x0500:
-		nvkm_event_send(&chan->event, 1, 0, NULL, 0);
+		nvkm_event_ntfy(&chan->event, 0, NVKM_SW_CHAN_EVENT_PAGE_FLIP);
 		return true;
 	default:
 		if (chan->func->mthd)
@@ -46,27 +45,8 @@ nvkm_sw_chan_mthd(struct nvkm_sw_chan *chan, int subc, u32 mthd, u32 data)
 	return false;
 }
 
-static int
-nvkm_sw_chan_event_ctor(struct nvkm_object *object, void *data, u32 size,
-			struct nvkm_notify *notify)
-{
-	union {
-		struct nvif_notify_uevent_req none;
-	} *req = data;
-	int ret = -ENOSYS;
-
-	if (!(ret = nvif_unvers(ret, &data, &size, req->none))) {
-		notify->size  = sizeof(struct nvif_notify_uevent_rep);
-		notify->types = 1;
-		notify->index = 0;
-	}
-
-	return ret;
-}
-
 static const struct nvkm_event_func
 nvkm_sw_chan_event = {
-	.ctor = nvkm_sw_chan_event_ctor,
 };
 
 static void *
@@ -107,5 +87,5 @@ nvkm_sw_chan_ctor(const struct nvkm_sw_chan_func *func, struct nvkm_sw *sw,
 	list_add(&chan->head, &sw->chan);
 	spin_unlock_irqrestore(&sw->engine.lock, flags);
 
-	return nvkm_event_init(&nvkm_sw_chan_event, 1, 1, &chan->event);
+	return nvkm_event_init(&nvkm_sw_chan_event, &sw->engine.subdev, 1, 1, &chan->event);
 }
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/sw/chan.h b/drivers/gpu/drm/nouveau/nvkm/engine/sw/chan.h
index 32de53427aa4..67b2e5ea93d9 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/sw/chan.h
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/sw/chan.h
@@ -14,6 +14,7 @@ struct nvkm_sw_chan {
 	struct nvkm_fifo_chan *fifo;
 	struct list_head head;
 
+#define NVKM_SW_CHAN_EVENT_PAGE_FLIP BIT(0)
 	struct nvkm_event event;
 };
 
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/sw/gf100.c b/drivers/gpu/drm/nouveau/nvkm/engine/sw/gf100.c
index 55abf839f29d..c3cf6f2ff86c 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/sw/gf100.c
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/sw/gf100.c
@@ -36,10 +36,10 @@
  ******************************************************************************/
 
 static int
-gf100_sw_chan_vblsem_release(struct nvkm_notify *notify)
+gf100_sw_chan_vblsem_release(struct nvkm_event_ntfy *notify, u32 bits)
 {
 	struct nv50_sw_chan *chan =
-		container_of(notify, typeof(*chan), vblank.notify[notify->index]);
+		container_of(notify, typeof(*chan), vblank.notify[notify->id]);
 	struct nvkm_sw *sw = chan->base.sw;
 	struct nvkm_device *device = sw->engine.subdev.device;
 	u32 inst = chan->base.fifo->inst->addr >> 12;
@@ -50,7 +50,7 @@ gf100_sw_chan_vblsem_release(struct nvkm_notify *notify)
 	nvkm_wr32(device, 0x060010, lower_32_bits(chan->vblank.offset));
 	nvkm_wr32(device, 0x060014, chan->vblank.value);
 
-	return NVKM_NOTIFY_DROP;
+	return NVKM_EVENT_DROP;
 }
 
 static bool
@@ -73,7 +73,7 @@ gf100_sw_chan_mthd(struct nvkm_sw_chan *base, int subc, u32 mthd, u32 data)
 		return true;
 	case 0x040c:
 		if (data < device->disp->vblank.index_nr) {
-			nvkm_notify_get(&chan->vblank.notify[data]);
+			nvkm_event_ntfy_allow(&chan->vblank.notify[data]);
 			return true;
 		}
 		break;
@@ -120,16 +120,8 @@ gf100_sw_chan_new(struct nvkm_sw *sw, struct nvkm_fifo_chan *fifoch,
 		return ret;
 
 	for (i = 0; disp && i < disp->vblank.index_nr; i++) {
-		ret = nvkm_notify_init(NULL, &disp->vblank,
-				       gf100_sw_chan_vblsem_release, false,
-				       &(struct nvif_notify_head_req_v0) {
-					.head = i,
-				       },
-				       sizeof(struct nvif_notify_head_req_v0),
-				       sizeof(struct nvif_notify_head_rep_v0),
-				       &chan->vblank.notify[i]);
-		if (ret)
-			return ret;
+		nvkm_event_ntfy_add(&disp->vblank, i, NVKM_DISP_HEAD_EVENT_VBLANK, true,
+				    gf100_sw_chan_vblsem_release, &chan->vblank.notify[i]);
 	}
 
 	return 0;
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/sw/nv50.c b/drivers/gpu/drm/nouveau/nvkm/engine/sw/nv50.c
index 1fdd094c8b7e..9d7a9b7d5be3 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/sw/nv50.c
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/sw/nv50.c
@@ -36,10 +36,10 @@
  ******************************************************************************/
 
 static int
-nv50_sw_chan_vblsem_release(struct nvkm_notify *notify)
+nv50_sw_chan_vblsem_release(struct nvkm_event_ntfy *notify, u32 bits)
 {
 	struct nv50_sw_chan *chan =
-		container_of(notify, typeof(*chan), vblank.notify[notify->index]);
+		container_of(notify, typeof(*chan), vblank.notify[notify->id]);
 	struct nvkm_sw *sw = chan->base.sw;
 	struct nvkm_device *device = sw->engine.subdev.device;
 
@@ -55,7 +55,7 @@ nv50_sw_chan_vblsem_release(struct nvkm_notify *notify)
 		nvkm_wr32(device, 0x060014, chan->vblank.value);
 	}
 
-	return NVKM_NOTIFY_DROP;
+	return NVKM_EVENT_DROP;
 }
 
 static bool
@@ -70,7 +70,7 @@ nv50_sw_chan_mthd(struct nvkm_sw_chan *base, int subc, u32 mthd, u32 data)
 	case 0x0404: chan->vblank.value  = data; return true;
 	case 0x0408:
 		if (data < device->disp->vblank.index_nr) {
-			nvkm_notify_get(&chan->vblank.notify[data]);
+			nvkm_event_ntfy_allow(&chan->vblank.notify[data]);
 			return true;
 		}
 		break;
@@ -85,8 +85,10 @@ nv50_sw_chan_dtor(struct nvkm_sw_chan *base)
 {
 	struct nv50_sw_chan *chan = nv50_sw_chan(base);
 	int i;
+
 	for (i = 0; i < ARRAY_SIZE(chan->vblank.notify); i++)
-		nvkm_notify_fini(&chan->vblank.notify[i]);
+		nvkm_event_ntfy_del(&chan->vblank.notify[i]);
+
 	return chan;
 }
 
@@ -113,16 +115,8 @@ nv50_sw_chan_new(struct nvkm_sw *sw, struct nvkm_fifo_chan *fifoch,
 		return ret;
 
 	for (i = 0; disp && i < disp->vblank.index_nr; i++) {
-		ret = nvkm_notify_init(NULL, &disp->vblank,
-				       nv50_sw_chan_vblsem_release, false,
-				       &(struct nvif_notify_head_req_v0) {
-					.head = i,
-				       },
-				       sizeof(struct nvif_notify_head_req_v0),
-				       sizeof(struct nvif_notify_head_rep_v0),
-				       &chan->vblank.notify[i]);
-		if (ret)
-			return ret;
+		nvkm_event_ntfy_add(&disp->vblank, i, NVKM_DISP_HEAD_EVENT_VBLANK, true,
+				    nv50_sw_chan_vblsem_release, &chan->vblank.notify[i]);
 	}
 
 	return 0;
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/sw/nv50.h b/drivers/gpu/drm/nouveau/nvkm/engine/sw/nv50.h
index 6d364d7b406a..b42289ce8826 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/sw/nv50.h
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/sw/nv50.h
@@ -5,12 +5,12 @@
 #include "priv.h"
 #include "chan.h"
 #include "nvsw.h"
-#include <core/notify.h>
+#include <core/event.h>
 
 struct nv50_sw_chan {
 	struct nvkm_sw_chan base;
 	struct {
-		struct nvkm_notify notify[4];
+		struct nvkm_event_ntfy notify[4];
 		u32 ctxdma;
 		u64 offset;
 		u32 value;
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/sw/nvsw.c b/drivers/gpu/drm/nouveau/nvkm/engine/sw/nvsw.c
index 33dd03fff3c4..f5affa1c8f34 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/sw/nvsw.c
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/sw/nvsw.c
@@ -27,33 +27,34 @@
 #include <nvif/if0004.h>
 
 static int
-nvkm_nvsw_mthd_(struct nvkm_object *object, u32 mthd, void *data, u32 size)
+nvkm_nvsw_uevent(struct nvkm_object *object, void *argv, u32 argc, struct nvkm_uevent *uevent)
 {
-	struct nvkm_nvsw *nvsw = nvkm_nvsw(object);
-	if (nvsw->func->mthd)
-		return nvsw->func->mthd(nvsw, mthd, data, size);
-	return -ENODEV;
+	union nv04_nvsw_event_args *args = argv;
+
+	if (!uevent)
+		return 0;
+	if (argc != sizeof(args->vn))
+		return -ENOSYS;
+
+	return nvkm_uevent_add(uevent, &nvkm_nvsw(object)->chan->event, 0,
+			       NVKM_SW_CHAN_EVENT_PAGE_FLIP, NULL);
 }
 
 static int
-nvkm_nvsw_ntfy_(struct nvkm_object *object, u32 mthd,
-		struct nvkm_event **pevent)
+nvkm_nvsw_mthd(struct nvkm_object *object, u32 mthd, void *data, u32 size)
 {
 	struct nvkm_nvsw *nvsw = nvkm_nvsw(object);
-	switch (mthd) {
-	case NV04_NVSW_NTFY_UEVENT:
-		*pevent = &nvsw->chan->event;
-		return 0;
-	default:
-		break;
-	}
-	return -EINVAL;
+
+	if (nvsw->func->mthd)
+		return nvsw->func->mthd(nvsw, mthd, data, size);
+
+	return -ENODEV;
 }
 
 static const struct nvkm_object_func
 nvkm_nvsw_ = {
-	.mthd = nvkm_nvsw_mthd_,
-	.ntfy = nvkm_nvsw_ntfy_,
+	.mthd = nvkm_nvsw_mthd,
+	.uevent = nvkm_nvsw_uevent,
 };
 
 int
diff --git a/drivers/gpu/drm/nouveau/nvkm/falcon/Kbuild b/drivers/gpu/drm/nouveau/nvkm/falcon/Kbuild
index d79d783904ee..9ffe7b921ccb 100644
--- a/drivers/gpu/drm/nouveau/nvkm/falcon/Kbuild
+++ b/drivers/gpu/drm/nouveau/nvkm/falcon/Kbuild
@@ -1,6 +1,12 @@
 # SPDX-License-Identifier: MIT
 nvkm-y += nvkm/falcon/base.o
 nvkm-y += nvkm/falcon/cmdq.o
+nvkm-y += nvkm/falcon/fw.o
 nvkm-y += nvkm/falcon/msgq.o
 nvkm-y += nvkm/falcon/qmgr.o
 nvkm-y += nvkm/falcon/v1.o
+
+nvkm-y += nvkm/falcon/gm200.o
+nvkm-y += nvkm/falcon/gp102.o
+nvkm-y += nvkm/falcon/ga100.o
+nvkm-y += nvkm/falcon/ga102.o
diff --git a/drivers/gpu/drm/nouveau/nvkm/falcon/base.c b/drivers/gpu/drm/nouveau/nvkm/falcon/base.c
index f3f90c1063dd..235149f73a69 100644
--- a/drivers/gpu/drm/nouveau/nvkm/falcon/base.c
+++ b/drivers/gpu/drm/nouveau/nvkm/falcon/base.c
@@ -22,119 +22,217 @@
 #include "priv.h"
 
 #include <subdev/mc.h>
+#include <subdev/timer.h>
 #include <subdev/top.h>
 
-void
-nvkm_falcon_load_imem(struct nvkm_falcon *falcon, void *data, u32 start,
-		      u32 size, u16 tag, u8 port, bool secure)
+static const struct nvkm_falcon_func_dma *
+nvkm_falcon_dma(struct nvkm_falcon *falcon, enum nvkm_falcon_mem *mem_type, u32 *mem_base)
 {
-	if (secure && !falcon->secret) {
-		nvkm_warn(falcon->user,
-			  "writing with secure tag on a non-secure falcon!\n");
-		return;
+	switch (*mem_type) {
+	case IMEM: return falcon->func->imem_dma;
+	case DMEM: return falcon->func->dmem_dma;
+	default:
+		return NULL;
 	}
-
-	falcon->func->load_imem(falcon, data, start, size, tag, port,
-				secure);
 }
 
-void
-nvkm_falcon_load_dmem(struct nvkm_falcon *falcon, void *data, u32 start,
-		      u32 size, u8 port)
+int
+nvkm_falcon_dma_wr(struct nvkm_falcon *falcon, const u8 *img, u64 dma_addr, u32 dma_base,
+		   enum nvkm_falcon_mem mem_type, u32 mem_base, int len, bool sec)
 {
-	mutex_lock(&falcon->dmem_mutex);
-
-	falcon->func->load_dmem(falcon, data, start, size, port);
+	const struct nvkm_falcon_func_dma *dma = nvkm_falcon_dma(falcon, &mem_type, &mem_base);
+	const char *type = nvkm_falcon_mem(mem_type);
+	const int dmalen = 256;
+	u32 dma_start = 0;
+	u32 dst, src, cmd;
+	int ret, i;
+
+	if (WARN_ON(!dma->xfer))
+		return -EINVAL;
+
+	if (mem_type == DMEM) {
+		dma_start = dma_base;
+		dma_addr += dma_base;
+	}
 
-	mutex_unlock(&falcon->dmem_mutex);
-}
+	FLCN_DBG(falcon, "%s %08x <- %08x bytes at %08x (%010llx %08x)",
+		 type, mem_base, len, dma_base, dma_addr - dma_base, dma_start);
+	if (WARN_ON(!len || (len & (dmalen - 1))))
+		return -EINVAL;
 
-void
-nvkm_falcon_read_dmem(struct nvkm_falcon *falcon, u32 start, u32 size, u8 port,
-		      void *data)
-{
-	mutex_lock(&falcon->dmem_mutex);
+	ret = dma->init(falcon, dma_addr, dmalen, mem_type, sec, &cmd);
+	if (ret)
+		return ret;
 
-	falcon->func->read_dmem(falcon, start, size, port, data);
+	dst = mem_base;
+	src = dma_base;
+	if (len) {
+		while (len >= dmalen) {
+			dma->xfer(falcon, dst, src - dma_start, cmd);
+
+			if (img && nvkm_printk_ok(falcon->owner, falcon->user, NV_DBG_TRACE)) {
+				for (i = 0; i < dmalen; i += 4, mem_base += 4) {
+					const int w = 8, x = (i / 4) % w;
+
+					if (x == 0)
+						printk(KERN_INFO "%s %08x <-", type, mem_base);
+					printk(KERN_CONT " %08x", *(u32 *)(img + src + i));
+					if (x == (w - 1) || ((i + 4) == dmalen))
+						printk(KERN_CONT " <- %08x+%08x", dma_base,
+						       src + i - dma_base - (x * 4));
+					if (i == (7 * 4))
+						printk(KERN_CONT " *");
+				}
+			}
+
+			if (nvkm_msec(falcon->owner->device, 2000,
+				if (dma->done(falcon))
+					break;
+			) < 0)
+				return -ETIMEDOUT;
+
+			src += dmalen;
+			dst += dmalen;
+			len -= dmalen;
+		}
+		WARN_ON(len);
+	}
 
-	mutex_unlock(&falcon->dmem_mutex);
+	return 0;
 }
 
-void
-nvkm_falcon_bind_context(struct nvkm_falcon *falcon, struct nvkm_memory *inst)
+static const struct nvkm_falcon_func_pio *
+nvkm_falcon_pio(struct nvkm_falcon *falcon, enum nvkm_falcon_mem *mem_type, u32 *mem_base)
 {
-	if (!falcon->func->bind_context) {
-		nvkm_error(falcon->user,
-			   "Context binding not supported on this falcon!\n");
-		return;
+	switch (*mem_type) {
+	case IMEM:
+		return falcon->func->imem_pio;
+	case DMEM:
+		if (!falcon->func->emem_addr || *mem_base < falcon->func->emem_addr)
+			return falcon->func->dmem_pio;
+
+		*mem_base -= falcon->func->emem_addr;
+		fallthrough;
+	case EMEM:
+		return falcon->func->emem_pio;
+	default:
+		return NULL;
 	}
-
-	falcon->func->bind_context(falcon, inst);
 }
 
-void
-nvkm_falcon_set_start_addr(struct nvkm_falcon *falcon, u32 start_addr)
+int
+nvkm_falcon_pio_rd(struct nvkm_falcon *falcon, u8 port, enum nvkm_falcon_mem mem_type, u32 mem_base,
+		   const u8 *img, u32 img_base, int len)
 {
-	falcon->func->set_start_addr(falcon, start_addr);
-}
+	const struct nvkm_falcon_func_pio *pio = nvkm_falcon_pio(falcon, &mem_type, &mem_base);
+	const char *type = nvkm_falcon_mem(mem_type);
+	int xfer_len;
+
+	if (WARN_ON(!pio || !pio->rd))
+		return -EINVAL;
+
+	FLCN_DBG(falcon, "%s %08x -> %08x bytes at %08x", type, mem_base, len, img_base);
+	if (WARN_ON(!len || (len & (pio->min - 1))))
+		return -EINVAL;
+
+	pio->rd_init(falcon, port, mem_base);
+	do {
+		xfer_len = min(len, pio->max);
+		pio->rd(falcon, port, img, xfer_len);
+
+		if (nvkm_printk_ok(falcon->owner, falcon->user, NV_DBG_TRACE)) {
+			for (img_base = 0; img_base < xfer_len; img_base += 4, mem_base += 4) {
+				if (((img_base / 4) % 8) == 0)
+					printk(KERN_INFO "%s %08x ->", type, mem_base);
+				printk(KERN_CONT " %08x", *(u32 *)(img + img_base));
+			}
+		}
+
+		img += xfer_len;
+		len -= xfer_len;
+	} while (len);
 
-void
-nvkm_falcon_start(struct nvkm_falcon *falcon)
-{
-	falcon->func->start(falcon);
+	return 0;
 }
 
 int
-nvkm_falcon_enable(struct nvkm_falcon *falcon)
+nvkm_falcon_pio_wr(struct nvkm_falcon *falcon, const u8 *img, u32 img_base, u8 port,
+		   enum nvkm_falcon_mem mem_type, u32 mem_base, int len, u16 tag, bool sec)
 {
-	struct nvkm_device *device = falcon->owner->device;
-	int ret;
-
-	nvkm_mc_enable(device, falcon->owner->type, falcon->owner->inst);
-	ret = falcon->func->enable(falcon);
-	if (ret) {
-		nvkm_mc_disable(device, falcon->owner->type, falcon->owner->inst);
-		return ret;
-	}
+	const struct nvkm_falcon_func_pio *pio = nvkm_falcon_pio(falcon, &mem_type, &mem_base);
+	const char *type = nvkm_falcon_mem(mem_type);
+	int xfer_len;
+
+	if (WARN_ON(!pio || !pio->wr))
+		return -EINVAL;
+
+	FLCN_DBG(falcon, "%s %08x <- %08x bytes at %08x", type, mem_base, len, img_base);
+	if (WARN_ON(!len || (len & (pio->min - 1))))
+		return -EINVAL;
+
+	pio->wr_init(falcon, port, sec, mem_base);
+	do {
+		xfer_len = min(len, pio->max);
+		pio->wr(falcon, port, img, xfer_len, tag++);
+
+		if (nvkm_printk_ok(falcon->owner, falcon->user, NV_DBG_TRACE)) {
+			for (img_base = 0; img_base < xfer_len; img_base += 4, mem_base += 4) {
+				if (((img_base / 4) % 8) == 0)
+					printk(KERN_INFO "%s %08x <-", type, mem_base);
+				printk(KERN_CONT " %08x", *(u32 *)(img + img_base));
+				if ((img_base / 4) == 7 && mem_type == IMEM)
+					printk(KERN_CONT " %04x", tag - 1);
+			}
+		}
+
+		img += xfer_len;
+		len -= xfer_len;
+	} while (len);
 
 	return 0;
 }
 
 void
-nvkm_falcon_disable(struct nvkm_falcon *falcon)
+nvkm_falcon_load_imem(struct nvkm_falcon *falcon, void *data, u32 start,
+		      u32 size, u16 tag, u8 port, bool secure)
 {
-	struct nvkm_device *device = falcon->owner->device;
-
-	/* already disabled, return or wait_idle will timeout */
-	if (!nvkm_mc_enabled(device, falcon->owner->type, falcon->owner->inst))
+	if (secure && !falcon->secret) {
+		nvkm_warn(falcon->user,
+			  "writing with secure tag on a non-secure falcon!\n");
 		return;
+	}
 
-	falcon->func->disable(falcon);
-
-	nvkm_mc_disable(device, falcon->owner->type, falcon->owner->inst);
+	falcon->func->load_imem(falcon, data, start, size, tag, port,
+				secure);
 }
 
-int
-nvkm_falcon_reset(struct nvkm_falcon *falcon)
+void
+nvkm_falcon_load_dmem(struct nvkm_falcon *falcon, void *data, u32 start,
+		      u32 size, u8 port)
 {
-	if (!falcon->func->reset) {
-		nvkm_falcon_disable(falcon);
-		return nvkm_falcon_enable(falcon);
-	}
+	mutex_lock(&falcon->dmem_mutex);
 
-	return falcon->func->reset(falcon);
+	falcon->func->load_dmem(falcon, data, start, size, port);
+
+	mutex_unlock(&falcon->dmem_mutex);
 }
 
-int
-nvkm_falcon_wait_for_halt(struct nvkm_falcon *falcon, u32 ms)
+void
+nvkm_falcon_start(struct nvkm_falcon *falcon)
 {
-	return falcon->func->wait_for_halt(falcon, ms);
+	falcon->func->start(falcon);
 }
 
 int
-nvkm_falcon_clear_interrupt(struct nvkm_falcon *falcon, u32 mask)
+nvkm_falcon_reset(struct nvkm_falcon *falcon)
 {
-	return falcon->func->clear_interrupt(falcon, mask);
+	int ret;
+
+	ret = falcon->func->disable(falcon);
+	if (WARN_ON(ret))
+		return ret;
+
+	return nvkm_falcon_enable(falcon);
 }
 
 static int
@@ -169,7 +267,7 @@ nvkm_falcon_oneinit(struct nvkm_falcon *falcon)
 }
 
 void
-nvkm_falcon_put(struct nvkm_falcon *falcon, const struct nvkm_subdev *user)
+nvkm_falcon_put(struct nvkm_falcon *falcon, struct nvkm_subdev *user)
 {
 	if (unlikely(!falcon))
 		return;
@@ -183,7 +281,7 @@ nvkm_falcon_put(struct nvkm_falcon *falcon, const struct nvkm_subdev *user)
 }
 
 int
-nvkm_falcon_get(struct nvkm_falcon *falcon, const struct nvkm_subdev *user)
+nvkm_falcon_get(struct nvkm_falcon *falcon, struct nvkm_subdev *user)
 {
 	int ret = 0;
 
@@ -217,6 +315,7 @@ nvkm_falcon_ctor(const struct nvkm_falcon_func *func,
 	falcon->owner = subdev;
 	falcon->name = name;
 	falcon->addr = addr;
+	falcon->addr2 = func->addr2;
 	mutex_init(&falcon->mutex);
 	mutex_init(&falcon->dmem_mutex);
 	return 0;
diff --git a/drivers/gpu/drm/nouveau/nvkm/falcon/cmdq.c b/drivers/gpu/drm/nouveau/nvkm/falcon/cmdq.c
index 44cf6a8862e1..211ebe7afac6 100644
--- a/drivers/gpu/drm/nouveau/nvkm/falcon/cmdq.c
+++ b/drivers/gpu/drm/nouveau/nvkm/falcon/cmdq.c
@@ -51,7 +51,7 @@ static void
 nvkm_falcon_cmdq_push(struct nvkm_falcon_cmdq *cmdq, void *data, u32 size)
 {
 	struct nvkm_falcon *falcon = cmdq->qmgr->falcon;
-	nvkm_falcon_load_dmem(falcon, data, cmdq->position, size, 0);
+	nvkm_falcon_pio_wr(falcon, data, 0, 0, DMEM, cmdq->position, size, 0, false);
 	cmdq->position += ALIGN(size, QUEUE_ALIGNMENT);
 }
 
diff --git a/drivers/gpu/drm/nouveau/nvkm/falcon/fw.c b/drivers/gpu/drm/nouveau/nvkm/falcon/fw.c
new file mode 100644
index 000000000000..80a480b12174
--- /dev/null
+++ b/drivers/gpu/drm/nouveau/nvkm/falcon/fw.c
@@ -0,0 +1,354 @@
+/*
+ * Copyright 2022 Red Hat Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+#include "priv.h"
+
+#include <core/memory.h>
+#include <subdev/mmu.h>
+
+#include <nvfw/fw.h>
+#include <nvfw/hs.h>
+
+int
+nvkm_falcon_fw_patch(struct nvkm_falcon_fw *fw)
+{
+	struct nvkm_falcon *falcon = fw->falcon;
+	u32 sig_base_src = fw->sig_base_prd;
+	u32 src, dst, len, i;
+	int idx = 0;
+
+	FLCNFW_DBG(fw, "patching sigs:%d size:%d", fw->sig_nr, fw->sig_size);
+	if (fw->func->signature) {
+		idx = fw->func->signature(fw, &sig_base_src);
+		if (idx < 0)
+			return idx;
+	}
+
+	src = idx * fw->sig_size;
+	dst = fw->sig_base_img;
+	len = fw->sig_size / 4;
+	FLCNFW_DBG(fw, "patch idx:%d src:%08x dst:%08x", idx, sig_base_src + src, dst);
+	for (i = 0; i < len; i++) {
+		u32 sig = *(u32 *)(fw->sigs + src);
+
+		if (nvkm_printk_ok(falcon->owner, falcon->user, NV_DBG_TRACE)) {
+			if (i % 8 == 0)
+				printk(KERN_INFO "sig -> %08x:", dst);
+			printk(KERN_CONT " %08x", sig);
+		}
+
+		*(u32 *)(fw->fw.img + dst) = sig;
+		src += 4;
+		dst += 4;
+	}
+
+	return 0;
+}
+
+static void
+nvkm_falcon_fw_dtor_sigs(struct nvkm_falcon_fw *fw)
+{
+	kfree(fw->sigs);
+	fw->sigs = NULL;
+}
+
+int
+nvkm_falcon_fw_boot(struct nvkm_falcon_fw *fw, struct nvkm_subdev *user,
+		    bool release, u32 *pmbox0, u32 *pmbox1, u32 mbox0_ok, u32 irqsclr)
+{
+	struct nvkm_falcon *falcon = fw->falcon;
+	int ret;
+
+	ret = nvkm_falcon_get(falcon, user);
+	if (ret)
+		return ret;
+
+	if (fw->sigs) {
+		ret = nvkm_falcon_fw_patch(fw);
+		if (ret)
+			goto done;
+
+		nvkm_falcon_fw_dtor_sigs(fw);
+	}
+
+	FLCNFW_DBG(fw, "resetting");
+	fw->func->reset(fw);
+
+	FLCNFW_DBG(fw, "loading");
+	if (fw->func->setup) {
+		ret = fw->func->setup(fw);
+		if (ret)
+			goto done;
+	}
+
+	ret = fw->func->load(fw);
+	if (ret)
+		goto done;
+
+	FLCNFW_DBG(fw, "booting");
+	ret = fw->func->boot(fw, pmbox0, pmbox1, mbox0_ok, irqsclr);
+	if (ret)
+		FLCNFW_ERR(fw, "boot failed: %d", ret);
+	else
+		FLCNFW_DBG(fw, "booted");
+
+done:
+	if (ret || release)
+		nvkm_falcon_put(falcon, user);
+	return ret;
+}
+
+int
+nvkm_falcon_fw_oneinit(struct nvkm_falcon_fw *fw, struct nvkm_falcon *falcon,
+		       struct nvkm_vmm *vmm, struct nvkm_memory *inst)
+{
+	int ret;
+
+	fw->falcon = falcon;
+	fw->vmm = nvkm_vmm_ref(vmm);
+	fw->inst = nvkm_memory_ref(inst);
+
+	if (fw->boot) {
+		FLCN_DBG(falcon, "mapping %s fw", fw->fw.name);
+		ret = nvkm_vmm_get(fw->vmm, 12, nvkm_memory_size(&fw->fw.mem.memory), &fw->vma);
+		if (ret) {
+			FLCN_ERR(falcon, "get %d", ret);
+			return ret;
+		}
+
+		ret = nvkm_memory_map(&fw->fw.mem.memory, 0, fw->vmm, fw->vma, NULL, 0);
+		if (ret) {
+			FLCN_ERR(falcon, "map %d", ret);
+			return ret;
+		}
+	}
+
+	return 0;
+}
+
+void
+nvkm_falcon_fw_dtor(struct nvkm_falcon_fw *fw)
+{
+	nvkm_vmm_put(fw->vmm, &fw->vma);
+	nvkm_vmm_unref(&fw->vmm);
+	nvkm_memory_unref(&fw->inst);
+	nvkm_falcon_fw_dtor_sigs(fw);
+	nvkm_firmware_dtor(&fw->fw);
+}
+
+static const struct nvkm_firmware_func
+nvkm_falcon_fw_dma = {
+	.type = NVKM_FIRMWARE_IMG_DMA,
+};
+
+static const struct nvkm_firmware_func
+nvkm_falcon_fw = {
+	.type = NVKM_FIRMWARE_IMG_RAM,
+};
+
+int
+nvkm_falcon_fw_sign(struct nvkm_falcon_fw *fw, u32 sig_base_img, u32 sig_size, const u8 *sigs,
+		    int sig_nr_prd, u32 sig_base_prd, int sig_nr_dbg, u32 sig_base_dbg)
+{
+	fw->sig_base_prd = sig_base_prd;
+	fw->sig_base_dbg = sig_base_dbg;
+	fw->sig_base_img = sig_base_img;
+	fw->sig_size = sig_size;
+	fw->sig_nr = sig_nr_prd + sig_nr_dbg;
+
+	fw->sigs = kmalloc_array(fw->sig_nr, fw->sig_size, GFP_KERNEL);
+	if (!fw->sigs)
+		return -ENOMEM;
+
+	memcpy(fw->sigs, sigs + sig_base_prd, sig_nr_prd * fw->sig_size);
+	if (sig_nr_dbg)
+		memcpy(fw->sigs + sig_size, sigs + sig_base_dbg, sig_nr_dbg * fw->sig_size);
+
+	return 0;
+}
+
+int
+nvkm_falcon_fw_ctor(const struct nvkm_falcon_fw_func *func, const char *name,
+		    struct nvkm_device *device, bool dma, const void *src, u32 len,
+		    struct nvkm_falcon *falcon, struct nvkm_falcon_fw *fw)
+{
+	const struct nvkm_firmware_func *type = dma ? &nvkm_falcon_fw_dma : &nvkm_falcon_fw;
+	int ret;
+
+	fw->func = func;
+
+	ret = nvkm_firmware_ctor(type, name, device, src, len, &fw->fw);
+	if (ret)
+		return ret;
+
+	return falcon ? nvkm_falcon_fw_oneinit(fw, falcon, NULL, NULL) : 0;
+}
+
+int
+nvkm_falcon_fw_ctor_hs(const struct nvkm_falcon_fw_func *func, const char *name,
+		       struct nvkm_subdev *subdev, const char *bl, const char *img, int ver,
+		       struct nvkm_falcon *falcon, struct nvkm_falcon_fw *fw)
+{
+	const struct firmware *blob;
+	const struct nvfw_bin_hdr *hdr;
+	const struct nvfw_hs_header *hshdr;
+	const struct nvfw_hs_load_header *lhdr;
+	const struct nvfw_bl_desc *desc;
+	u32 loc, sig;
+	int ret;
+
+	ret = nvkm_firmware_load_name(subdev, img, "", ver, &blob);
+	if (ret)
+		return ret;
+
+	hdr = nvfw_bin_hdr(subdev, blob->data);
+	hshdr = nvfw_hs_header(subdev, blob->data + hdr->header_offset);
+
+	ret = nvkm_falcon_fw_ctor(func, name, subdev->device, bl != NULL,
+				  blob->data + hdr->data_offset, hdr->data_size, falcon, fw);
+	if (ret)
+		goto done;
+
+	/* Earlier FW releases by NVIDIA for Nouveau's use aren't in NVIDIA's
+	 * standard format, and don't have the indirection seen in the 0x10de
+	 * case.
+	 */
+	switch (hdr->bin_magic) {
+	case 0x000010de:
+		loc = *(u32 *)(blob->data + hshdr->patch_loc);
+		sig = *(u32 *)(blob->data + hshdr->patch_sig);
+		break;
+	case 0x3b1d14f0:
+		loc = hshdr->patch_loc;
+		sig = hshdr->patch_sig;
+		break;
+	default:
+		WARN_ON(1);
+		ret = -EINVAL;
+		goto done;
+	}
+
+	ret = nvkm_falcon_fw_sign(fw, loc, hshdr->sig_prod_size, blob->data,
+				  1, hshdr->sig_prod_offset + sig,
+				  1, hshdr->sig_dbg_offset + sig);
+	if (ret)
+		goto done;
+
+	lhdr = nvfw_hs_load_header(subdev, blob->data + hshdr->hdr_offset);
+
+	fw->nmem_base_img = 0;
+	fw->nmem_base = lhdr->non_sec_code_off;
+	fw->nmem_size = lhdr->non_sec_code_size;
+
+	fw->imem_base_img = lhdr->apps[0];
+	fw->imem_base = ALIGN(lhdr->apps[0], 0x100);
+	fw->imem_size = lhdr->apps[lhdr->num_apps + 0];
+
+	fw->dmem_base_img = lhdr->data_dma_base;
+	fw->dmem_base = 0;
+	fw->dmem_size = lhdr->data_size;
+	fw->dmem_sign = loc - lhdr->data_dma_base;
+
+	if (bl) {
+		nvkm_firmware_put(blob);
+
+		ret = nvkm_firmware_load_name(subdev, bl, "", ver, &blob);
+		if (ret)
+			return ret;
+
+		hdr = nvfw_bin_hdr(subdev, blob->data);
+		desc = nvfw_bl_desc(subdev, blob->data + hdr->header_offset);
+
+		fw->boot_addr = desc->start_tag << 8;
+		fw->boot_size = desc->code_size;
+		fw->boot = kmemdup(blob->data + hdr->data_offset + desc->code_off,
+				   fw->boot_size, GFP_KERNEL);
+		if (!fw->boot)
+			ret = -ENOMEM;
+	} else {
+		fw->boot_addr = fw->nmem_base;
+	}
+
+done:
+	if (ret)
+		nvkm_falcon_fw_dtor(fw);
+
+	nvkm_firmware_put(blob);
+	return ret;
+}
+
+int
+nvkm_falcon_fw_ctor_hs_v2(const struct nvkm_falcon_fw_func *func, const char *name,
+			  struct nvkm_subdev *subdev, const char *img, int ver,
+			  struct nvkm_falcon *falcon, struct nvkm_falcon_fw *fw)
+{
+	const struct nvfw_bin_hdr *hdr;
+	const struct nvfw_hs_header_v2 *hshdr;
+	const struct nvfw_hs_load_header_v2 *lhdr;
+	const struct firmware *blob;
+	u32 loc, sig, cnt, *meta;
+	int ret;
+
+	ret = nvkm_firmware_load_name(subdev, img, "", ver, &blob);
+	if (ret)
+		return ret;
+
+	hdr = nvfw_bin_hdr(subdev, blob->data);
+	hshdr = nvfw_hs_header_v2(subdev, blob->data + hdr->header_offset);
+	meta = (u32 *)(blob->data + hshdr->meta_data_offset);
+	loc = *(u32 *)(blob->data + hshdr->patch_loc);
+	sig = *(u32 *)(blob->data + hshdr->patch_sig);
+	cnt = *(u32 *)(blob->data + hshdr->num_sig);
+
+	ret = nvkm_falcon_fw_ctor(func, name, subdev->device, true,
+				  blob->data + hdr->data_offset, hdr->data_size, falcon, fw);
+	if (ret)
+		goto done;
+
+	ret = nvkm_falcon_fw_sign(fw, loc, hshdr->sig_prod_size / cnt, blob->data,
+				  cnt, hshdr->sig_prod_offset + sig, 0, 0);
+	if (ret)
+		goto done;
+
+	lhdr = nvfw_hs_load_header_v2(subdev, blob->data + hshdr->header_offset);
+
+	fw->imem_base_img = lhdr->app[0].offset;
+	fw->imem_base = 0;
+	fw->imem_size = lhdr->app[0].size;
+
+	fw->dmem_base_img = lhdr->os_data_offset;
+	fw->dmem_base = 0;
+	fw->dmem_size = lhdr->os_data_size;
+	fw->dmem_sign = loc - lhdr->os_data_offset;
+
+	fw->boot_addr = lhdr->app[0].offset;
+
+	fw->fuse_ver = meta[0];
+	fw->engine_id = meta[1];
+	fw->ucode_id = meta[2];
+
+done:
+	if (ret)
+		nvkm_falcon_fw_dtor(fw);
+
+	nvkm_firmware_put(blob);
+	return ret;
+}
diff --git a/drivers/gpu/drm/nouveau/nvkm/falcon/ga100.c b/drivers/gpu/drm/nouveau/nvkm/falcon/ga100.c
new file mode 100644
index 000000000000..49fd32943916
--- /dev/null
+++ b/drivers/gpu/drm/nouveau/nvkm/falcon/ga100.c
@@ -0,0 +1,62 @@
+/*
+ * Copyright 2021 Red Hat Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+#include "priv.h"
+
+int
+ga100_flcn_fw_signature(struct nvkm_falcon_fw *fw, u32 *src_base_src)
+{
+	struct nvkm_falcon *falcon = fw->falcon;
+	struct nvkm_device *device = falcon->owner->device;
+	u32 reg_fuse_version;
+	int idx;
+
+	FLCN_DBG(falcon, "brom: %08x %08x", fw->engine_id, fw->ucode_id);
+	FLCN_DBG(falcon, "fuse_version: %d", fw->fuse_ver);
+
+	if (fw->engine_id & 0x00000001) {
+		reg_fuse_version = nvkm_rd32(device, 0x824140 + (fw->ucode_id - 1) * 4);
+	} else
+	if (fw->engine_id & 0x00000004) {
+		reg_fuse_version = nvkm_rd32(device, 0x824100 + (fw->ucode_id - 1) * 4);
+	} else
+	if (fw->engine_id & 0x00000400) {
+		reg_fuse_version = nvkm_rd32(device, 0x8241c0 + (fw->ucode_id - 1) * 4);
+	} else {
+		WARN_ON(1);
+		return -ENOSYS;
+	}
+
+	FLCN_DBG(falcon, "reg_fuse_version: %08x", reg_fuse_version);
+	if (reg_fuse_version) {
+		reg_fuse_version = fls(reg_fuse_version);
+		FLCN_DBG(falcon, "reg_fuse_version: %d", reg_fuse_version);
+
+		if (WARN_ON(fw->fuse_ver < reg_fuse_version))
+			return -EINVAL;
+
+		idx = fw->fuse_ver - reg_fuse_version;
+	} else {
+		idx = fw->sig_nr - 1;
+	}
+
+	return idx;
+}
diff --git a/drivers/gpu/drm/nouveau/nvkm/falcon/ga102.c b/drivers/gpu/drm/nouveau/nvkm/falcon/ga102.c
new file mode 100644
index 000000000000..0ff450fe3590
--- /dev/null
+++ b/drivers/gpu/drm/nouveau/nvkm/falcon/ga102.c
@@ -0,0 +1,148 @@
+/*
+ * Copyright 2022 Red Hat Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+#include "priv.h"
+
+#include <subdev/mc.h>
+#include <subdev/timer.h>
+
+static bool
+ga102_flcn_dma_done(struct nvkm_falcon *falcon)
+{
+	return !!(nvkm_falcon_rd32(falcon, 0x118) & 0x00000002);
+}
+
+static void
+ga102_flcn_dma_xfer(struct nvkm_falcon *falcon, u32 mem_base, u32 dma_base, u32 cmd)
+{
+	nvkm_falcon_wr32(falcon, 0x114, mem_base);
+	nvkm_falcon_wr32(falcon, 0x11c, dma_base);
+	nvkm_falcon_wr32(falcon, 0x118, cmd);
+}
+
+static int
+ga102_flcn_dma_init(struct nvkm_falcon *falcon, u64 dma_addr, int xfer_len,
+		    enum nvkm_falcon_mem mem_type, bool sec, u32 *cmd)
+{
+	*cmd = (ilog2(xfer_len) - 2) << 8;
+	if (mem_type == IMEM)
+		*cmd |= 0x00000010;
+	if (sec)
+		*cmd |= 0x00000004;
+
+	nvkm_falcon_wr32(falcon, 0x110, dma_addr >> 8);
+	nvkm_falcon_wr32(falcon, 0x128, 0x00000000);
+	return 0;
+}
+
+const struct nvkm_falcon_func_dma
+ga102_flcn_dma = {
+	.init = ga102_flcn_dma_init,
+	.xfer = ga102_flcn_dma_xfer,
+	.done = ga102_flcn_dma_done,
+};
+
+int
+ga102_flcn_reset_wait_mem_scrubbing(struct nvkm_falcon *falcon)
+{
+	nvkm_falcon_mask(falcon, 0x040, 0x00000000, 0x00000000);
+
+	if (nvkm_msec(falcon->owner->device, 20,
+		if (!(nvkm_falcon_rd32(falcon, 0x0f4) & 0x00001000))
+			break;
+	) < 0)
+		return -ETIMEDOUT;
+
+	return 0;
+}
+
+int
+ga102_flcn_reset_prep(struct nvkm_falcon *falcon)
+{
+	nvkm_falcon_rd32(falcon, 0x0f4);
+
+	nvkm_usec(falcon->owner->device, 150,
+		if (nvkm_falcon_rd32(falcon, 0x0f4) & 0x80000000)
+			break;
+		_warn = false;
+	);
+
+	return 0;
+}
+
+int
+ga102_flcn_select(struct nvkm_falcon *falcon)
+{
+	if ((nvkm_falcon_rd32(falcon, falcon->addr2 + 0x668) & 0x00000010) != 0x00000000) {
+		nvkm_falcon_wr32(falcon, falcon->addr2 + 0x668, 0x00000000);
+		if (nvkm_msec(falcon->owner->device, 10,
+			if (nvkm_falcon_rd32(falcon, falcon->addr2 + 0x668) & 0x00000001)
+				break;
+		) < 0)
+			return -ETIMEDOUT;
+	}
+
+	return 0;
+}
+
+int
+ga102_flcn_fw_boot(struct nvkm_falcon_fw *fw, u32 *mbox0, u32 *mbox1, u32 mbox0_ok, u32 irqsclr)
+{
+	struct nvkm_falcon *falcon = fw->falcon;
+
+	nvkm_falcon_wr32(falcon, falcon->addr2 + 0x210, fw->dmem_sign);
+	nvkm_falcon_wr32(falcon, falcon->addr2 + 0x19c, fw->engine_id);
+	nvkm_falcon_wr32(falcon, falcon->addr2 + 0x198, fw->ucode_id);
+	nvkm_falcon_wr32(falcon, falcon->addr2 + 0x180, 0x00000001);
+
+	return gm200_flcn_fw_boot(fw, mbox0, mbox1, mbox0_ok, irqsclr);
+}
+
+int
+ga102_flcn_fw_load(struct nvkm_falcon_fw *fw)
+{
+	struct nvkm_falcon *falcon = fw->falcon;
+	int ret = 0;
+
+	nvkm_falcon_mask(falcon, 0x624, 0x00000080, 0x00000080);
+	nvkm_falcon_wr32(falcon, 0x10c, 0x00000000);
+	nvkm_falcon_mask(falcon, 0x600, 0x00010007, (0 << 16) | (1 << 2) | 1);
+
+	ret = nvkm_falcon_dma_wr(falcon, fw->fw.img, fw->fw.phys, fw->imem_base_img,
+				 IMEM, fw->imem_base, fw->imem_size, true);
+	if (ret)
+		return ret;
+
+	ret = nvkm_falcon_dma_wr(falcon, fw->fw.img, fw->fw.phys, fw->dmem_base_img,
+				 DMEM, fw->dmem_base, fw->dmem_size, false);
+	if (ret)
+		return ret;
+
+	return 0;
+}
+
+const struct nvkm_falcon_fw_func
+ga102_flcn_fw = {
+	.signature = ga100_flcn_fw_signature,
+	.reset = gm200_flcn_fw_reset,
+	.load = ga102_flcn_fw_load,
+	.boot = ga102_flcn_fw_boot,
+};
diff --git a/drivers/gpu/drm/nouveau/nvkm/falcon/gm200.c b/drivers/gpu/drm/nouveau/nvkm/falcon/gm200.c
new file mode 100644
index 000000000000..393ade9f7e6c
--- /dev/null
+++ b/drivers/gpu/drm/nouveau/nvkm/falcon/gm200.c
@@ -0,0 +1,345 @@
+/*
+ * Copyright 2022 Red Hat Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+#include "priv.h"
+
+#include <core/memory.h>
+#include <subdev/mc.h>
+#include <subdev/timer.h>
+
+void
+gm200_flcn_tracepc(struct nvkm_falcon *falcon)
+{
+	u32 sctl = nvkm_falcon_rd32(falcon, 0x240);
+	u32 tidx = nvkm_falcon_rd32(falcon, 0x148);
+	int nr = (tidx & 0x00ff0000) >> 16, sp, ip;
+
+	FLCN_ERR(falcon, "TRACEPC SCTL %08x TIDX %08x", sctl, tidx);
+	for (sp = 0; sp < nr; sp++) {
+		nvkm_falcon_wr32(falcon, 0x148, sp);
+		ip = nvkm_falcon_rd32(falcon, 0x14c);
+		FLCN_ERR(falcon, "TRACEPC: %08x", ip);
+	}
+}
+
+static void
+gm200_flcn_pio_dmem_rd(struct nvkm_falcon *falcon, u8 port, const u8 *img, int len)
+{
+	while (len >= 4) {
+		*(u32 *)img = nvkm_falcon_rd32(falcon, 0x1c4 + (port * 8));
+		img += 4;
+		len -= 4;
+	}
+}
+
+static void
+gm200_flcn_pio_dmem_rd_init(struct nvkm_falcon *falcon, u8 port, u32 dmem_base)
+{
+	nvkm_falcon_wr32(falcon, 0x1c0 + (port * 8), BIT(25) | dmem_base);
+}
+
+static void
+gm200_flcn_pio_dmem_wr(struct nvkm_falcon *falcon, u8 port, const u8 *img, int len, u16 tag)
+{
+	while (len >= 4) {
+		nvkm_falcon_wr32(falcon, 0x1c4 + (port * 8), *(u32 *)img);
+		img += 4;
+		len -= 4;
+	}
+}
+
+static void
+gm200_flcn_pio_dmem_wr_init(struct nvkm_falcon *falcon, u8 port, bool sec, u32 dmem_base)
+{
+	nvkm_falcon_wr32(falcon, 0x1c0 + (port * 8), BIT(24) | dmem_base);
+}
+
+const struct nvkm_falcon_func_pio
+gm200_flcn_dmem_pio = {
+	.min = 4,
+	.max = 0x100,
+	.wr_init = gm200_flcn_pio_dmem_wr_init,
+	.wr = gm200_flcn_pio_dmem_wr,
+	.rd_init = gm200_flcn_pio_dmem_rd_init,
+	.rd = gm200_flcn_pio_dmem_rd,
+};
+
+static void
+gm200_flcn_pio_imem_wr_init(struct nvkm_falcon *falcon, u8 port, bool sec, u32 imem_base)
+{
+	nvkm_falcon_wr32(falcon, 0x180 + (port * 0x10), (sec ? BIT(28) : 0) | BIT(24) | imem_base);
+}
+
+static void
+gm200_flcn_pio_imem_wr(struct nvkm_falcon *falcon, u8 port, const u8 *img, int len, u16 tag)
+{
+	nvkm_falcon_wr32(falcon, 0x188 + (port * 0x10), tag++);
+	while (len >= 4) {
+		nvkm_falcon_wr32(falcon, 0x184 + (port * 0x10), *(u32 *)img);
+		img += 4;
+		len -= 4;
+	}
+}
+
+const struct nvkm_falcon_func_pio
+gm200_flcn_imem_pio = {
+	.min = 0x100,
+	.max = 0x100,
+	.wr_init = gm200_flcn_pio_imem_wr_init,
+	.wr = gm200_flcn_pio_imem_wr,
+};
+
+int
+gm200_flcn_bind_stat(struct nvkm_falcon *falcon, bool intr)
+{
+	if (intr && !(nvkm_falcon_rd32(falcon, 0x008) & 0x00000008))
+		return -1;
+
+	return (nvkm_falcon_rd32(falcon, 0x0dc) & 0x00007000) >> 12;
+}
+
+void
+gm200_flcn_bind_inst(struct nvkm_falcon *falcon, int target, u64 addr)
+{
+	nvkm_falcon_mask(falcon, 0x604, 0x00000007, 0x00000000); /* DMAIDX_VIRT */
+	nvkm_falcon_wr32(falcon, 0x054, (1 << 30) | (target << 28) | (addr >> 12));
+	nvkm_falcon_mask(falcon, 0x090, 0x00010000, 0x00010000);
+	nvkm_falcon_mask(falcon, 0x0a4, 0x00000008, 0x00000008);
+}
+
+int
+gm200_flcn_reset_wait_mem_scrubbing(struct nvkm_falcon *falcon)
+{
+	nvkm_falcon_mask(falcon, 0x040, 0x00000000, 0x00000000);
+
+	if (nvkm_msec(falcon->owner->device, 10,
+		if (!(nvkm_falcon_rd32(falcon, 0x10c) & 0x00000006))
+			break;
+	) < 0)
+		return -ETIMEDOUT;
+
+	return 0;
+}
+
+int
+gm200_flcn_enable(struct nvkm_falcon *falcon)
+{
+	struct nvkm_device *device = falcon->owner->device;
+	int ret;
+
+	if (falcon->func->reset_eng) {
+		ret = falcon->func->reset_eng(falcon);
+		if (ret)
+			return ret;
+	}
+
+	if (falcon->func->select) {
+		ret = falcon->func->select(falcon);
+		if (ret)
+			return ret;
+	}
+
+	if (falcon->func->reset_pmc)
+		nvkm_mc_enable(device, falcon->owner->type, falcon->owner->inst);
+
+	ret = falcon->func->reset_wait_mem_scrubbing(falcon);
+	if (ret)
+		return ret;
+
+	nvkm_falcon_wr32(falcon, 0x084, nvkm_rd32(device, 0x000000));
+	return 0;
+}
+
+int
+gm200_flcn_disable(struct nvkm_falcon *falcon)
+{
+	struct nvkm_device *device = falcon->owner->device;
+	int ret;
+
+	if (falcon->func->select) {
+		ret = falcon->func->select(falcon);
+		if (ret)
+			return ret;
+	}
+
+	nvkm_falcon_mask(falcon, 0x048, 0x00000003, 0x00000000);
+	nvkm_falcon_wr32(falcon, 0x014, 0xffffffff);
+
+	if (falcon->func->reset_pmc) {
+		if (falcon->func->reset_prep) {
+			ret = falcon->func->reset_prep(falcon);
+			if (ret)
+				return ret;
+		}
+
+		nvkm_mc_disable(device, falcon->owner->type, falcon->owner->inst);
+	}
+
+	if (falcon->func->reset_eng) {
+		ret = falcon->func->reset_eng(falcon);
+		if (ret)
+			return ret;
+	}
+
+	return 0;
+}
+
+int
+gm200_flcn_fw_boot(struct nvkm_falcon_fw *fw, u32 *pmbox0, u32 *pmbox1, u32 mbox0_ok, u32 irqsclr)
+{
+	struct nvkm_falcon *falcon = fw->falcon;
+	u32 mbox0, mbox1;
+	int ret = 0;
+
+	nvkm_falcon_wr32(falcon, 0x040, pmbox0 ? *pmbox0 : 0xcafebeef);
+	if (pmbox1)
+		nvkm_falcon_wr32(falcon, 0x044, *pmbox1);
+
+	nvkm_falcon_wr32(falcon, 0x104, fw->boot_addr);
+	nvkm_falcon_wr32(falcon, 0x100, 0x00000002);
+
+	if (nvkm_msec(falcon->owner->device, 2000,
+		if (nvkm_falcon_rd32(falcon, 0x100) & 0x00000010)
+			break;
+	) < 0)
+		ret = -ETIMEDOUT;
+
+	mbox0 = nvkm_falcon_rd32(falcon, 0x040);
+	mbox1 = nvkm_falcon_rd32(falcon, 0x044);
+	if (FLCN_ERRON(falcon, ret || mbox0 != mbox0_ok, "mbox %08x %08x", mbox0, mbox1))
+		ret = ret ?: -EIO;
+
+	if (irqsclr)
+		nvkm_falcon_mask(falcon, 0x004, 0xffffffff, irqsclr);
+
+	return ret;
+}
+
+int
+gm200_flcn_fw_load(struct nvkm_falcon_fw *fw)
+{
+	struct nvkm_falcon *falcon = fw->falcon;
+	int target, ret;
+
+	if (fw->inst) {
+		nvkm_falcon_mask(falcon, 0x048, 0x00000001, 0x00000001);
+
+		switch (nvkm_memory_target(fw->inst)) {
+		case NVKM_MEM_TARGET_VRAM: target = 0; break;
+		case NVKM_MEM_TARGET_HOST: target = 2; break;
+		case NVKM_MEM_TARGET_NCOH: target = 3; break;
+		default:
+			WARN_ON(1);
+			return -EINVAL;
+		}
+
+		falcon->func->bind_inst(falcon, target, nvkm_memory_addr(fw->inst));
+
+		if (nvkm_msec(falcon->owner->device, 10,
+			if (falcon->func->bind_stat(falcon, falcon->func->bind_intr) == 5)
+				break;
+		) < 0)
+			return -ETIMEDOUT;
+
+		nvkm_falcon_mask(falcon, 0x004, 0x00000008, 0x00000008);
+		nvkm_falcon_mask(falcon, 0x058, 0x00000002, 0x00000002);
+
+		if (nvkm_msec(falcon->owner->device, 10,
+			if (falcon->func->bind_stat(falcon, false) == 0)
+				break;
+		) < 0)
+			return -ETIMEDOUT;
+	} else {
+		nvkm_falcon_mask(falcon, 0x624, 0x00000080, 0x00000080);
+		nvkm_falcon_wr32(falcon, 0x10c, 0x00000000);
+	}
+
+	if (fw->boot) {
+		switch (nvkm_memory_target(&fw->fw.mem.memory)) {
+		case NVKM_MEM_TARGET_VRAM: target = 4; break;
+		case NVKM_MEM_TARGET_HOST: target = 5; break;
+		case NVKM_MEM_TARGET_NCOH: target = 6; break;
+		default:
+			WARN_ON(1);
+			return -EINVAL;
+		}
+
+		ret = nvkm_falcon_pio_wr(falcon, fw->boot, 0, 0,
+					 IMEM, falcon->code.limit - fw->boot_size, fw->boot_size,
+					 fw->boot_addr >> 8, false);
+		if (ret)
+			return ret;
+
+		return fw->func->load_bld(fw);
+	}
+
+	ret = nvkm_falcon_pio_wr(falcon, fw->fw.img + fw->nmem_base_img, fw->nmem_base_img, 0,
+				 IMEM, fw->nmem_base, fw->nmem_size, fw->nmem_base >> 8, false);
+	if (ret)
+		return ret;
+
+	ret = nvkm_falcon_pio_wr(falcon, fw->fw.img + fw->imem_base_img, fw->imem_base_img, 0,
+				 IMEM, fw->imem_base, fw->imem_size, fw->imem_base >> 8, true);
+	if (ret)
+		return ret;
+
+	ret = nvkm_falcon_pio_wr(falcon, fw->fw.img + fw->dmem_base_img, fw->dmem_base_img, 0,
+				 DMEM, fw->dmem_base, fw->dmem_size, 0, false);
+	if (ret)
+		return ret;
+
+	return 0;
+}
+
+int
+gm200_flcn_fw_reset(struct nvkm_falcon_fw *fw)
+{
+	return nvkm_falcon_reset(fw->falcon);
+}
+
+int
+gm200_flcn_fw_signature(struct nvkm_falcon_fw *fw, u32 *sig_base_src)
+{
+	struct nvkm_falcon *falcon = fw->falcon;
+	u32 addr = falcon->func->debug;
+	int ret = 0;
+
+	if (addr) {
+		ret = nvkm_falcon_enable(falcon);
+		if (ret)
+			return ret;
+
+		if (nvkm_falcon_rd32(falcon, addr) & 0x00100000) {
+			*sig_base_src = fw->sig_base_dbg;
+			return 1;
+		}
+	}
+
+	return ret;
+}
+
+const struct nvkm_falcon_fw_func
+gm200_flcn_fw = {
+	.signature = gm200_flcn_fw_signature,
+	.reset = gm200_flcn_fw_reset,
+	.load = gm200_flcn_fw_load,
+	.boot = gm200_flcn_fw_boot,
+};
diff --git a/drivers/gpu/drm/nouveau/nvkm/falcon/gp102.c b/drivers/gpu/drm/nouveau/nvkm/falcon/gp102.c
new file mode 100644
index 000000000000..c774935f3077
--- /dev/null
+++ b/drivers/gpu/drm/nouveau/nvkm/falcon/gp102.c
@@ -0,0 +1,82 @@
+/*
+ * Copyright 2022 Red Hat Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+#include "priv.h"
+
+static void
+gp102_flcn_pio_emem_rd(struct nvkm_falcon *falcon, u8 port, const u8 *img, int len)
+{
+	while (len >= 4) {
+		*(u32 *)img = nvkm_falcon_rd32(falcon, 0xac4 + (port * 8));
+		img += 4;
+		len -= 4;
+	}
+}
+
+static void
+gp102_flcn_pio_emem_rd_init(struct nvkm_falcon *falcon, u8 port, u32 dmem_base)
+{
+	nvkm_falcon_wr32(falcon, 0xac0 + (port * 8), BIT(25) | dmem_base);
+}
+
+static void
+gp102_flcn_pio_emem_wr(struct nvkm_falcon *falcon, u8 port, const u8 *img, int len, u16 tag)
+{
+	while (len >= 4) {
+		nvkm_falcon_wr32(falcon, 0xac4 + (port * 8), *(u32 *)img);
+		img += 4;
+		len -= 4;
+	}
+}
+
+static void
+gp102_flcn_pio_emem_wr_init(struct nvkm_falcon *falcon, u8 port, bool sec, u32 emem_base)
+{
+	nvkm_falcon_wr32(falcon, 0xac0 + (port * 8), BIT(24) | emem_base);
+}
+
+const struct nvkm_falcon_func_pio
+gp102_flcn_emem_pio = {
+	.min = 4,
+	.max = 0x100,
+	.wr_init = gp102_flcn_pio_emem_wr_init,
+	.wr = gp102_flcn_pio_emem_wr,
+	.rd_init = gp102_flcn_pio_emem_rd_init,
+	.rd = gp102_flcn_pio_emem_rd,
+};
+
+int
+gp102_flcn_reset_eng(struct nvkm_falcon *falcon)
+{
+	int ret;
+
+	if (falcon->func->reset_prep) {
+		ret = falcon->func->reset_prep(falcon);
+		if (ret)
+			return ret;
+	}
+
+	nvkm_falcon_mask(falcon, 0x3c0, 0x00000001, 0x00000001);
+	udelay(10);
+	nvkm_falcon_mask(falcon, 0x3c0, 0x00000001, 0x00000000);
+
+	return falcon->func->reset_wait_mem_scrubbing(falcon);
+}
diff --git a/drivers/gpu/drm/nouveau/nvkm/falcon/msgq.c b/drivers/gpu/drm/nouveau/nvkm/falcon/msgq.c
index e74371dffc76..16b246fda666 100644
--- a/drivers/gpu/drm/nouveau/nvkm/falcon/msgq.c
+++ b/drivers/gpu/drm/nouveau/nvkm/falcon/msgq.c
@@ -25,7 +25,7 @@
 static void
 nvkm_falcon_msgq_open(struct nvkm_falcon_msgq *msgq)
 {
-	mutex_lock(&msgq->mutex);
+	spin_lock(&msgq->lock);
 	msgq->position = nvkm_falcon_rd32(msgq->qmgr->falcon, msgq->tail_reg);
 }
 
@@ -37,10 +37,10 @@ nvkm_falcon_msgq_close(struct nvkm_falcon_msgq *msgq, bool commit)
 	if (commit)
 		nvkm_falcon_wr32(falcon, msgq->tail_reg, msgq->position);
 
-	mutex_unlock(&msgq->mutex);
+	spin_unlock(&msgq->lock);
 }
 
-static bool
+bool
 nvkm_falcon_msgq_empty(struct nvkm_falcon_msgq *msgq)
 {
 	u32 head = nvkm_falcon_rd32(msgq->qmgr->falcon, msgq->head_reg);
@@ -68,7 +68,7 @@ nvkm_falcon_msgq_pop(struct nvkm_falcon_msgq *msgq, void *data, u32 size)
 		return -EINVAL;
 	}
 
-	nvkm_falcon_read_dmem(falcon, tail, size, 0, data);
+	nvkm_falcon_pio_rd(falcon, 0, DMEM, tail, data, 0, size);
 	msgq->position += ALIGN(size, QUEUE_ALIGNMENT);
 	return 0;
 }
@@ -208,6 +208,6 @@ nvkm_falcon_msgq_new(struct nvkm_falcon_qmgr *qmgr, const char *name,
 
 	msgq->qmgr = qmgr;
 	msgq->name = name;
-	mutex_init(&msgq->mutex);
+	spin_lock_init(&msgq->lock);
 	return 0;
 }
diff --git a/drivers/gpu/drm/nouveau/nvkm/falcon/priv.h b/drivers/gpu/drm/nouveau/nvkm/falcon/priv.h
index 466188752eb0..11a24b9c8569 100644
--- a/drivers/gpu/drm/nouveau/nvkm/falcon/priv.h
+++ b/drivers/gpu/drm/nouveau/nvkm/falcon/priv.h
@@ -2,4 +2,12 @@
 #ifndef __NVKM_FALCON_PRIV_H__
 #define __NVKM_FALCON_PRIV_H__
 #include <core/falcon.h>
+
+static inline int
+nvkm_falcon_enable(struct nvkm_falcon *falcon)
+{
+	if (falcon->func->enable)
+		return falcon->func->enable(falcon);
+	return 0;
+}
 #endif
diff --git a/drivers/gpu/drm/nouveau/nvkm/falcon/qmgr.h b/drivers/gpu/drm/nouveau/nvkm/falcon/qmgr.h
index 976cb7b7aa99..79f0da9e749f 100644
--- a/drivers/gpu/drm/nouveau/nvkm/falcon/qmgr.h
+++ b/drivers/gpu/drm/nouveau/nvkm/falcon/qmgr.h
@@ -73,7 +73,7 @@ struct nvkm_falcon_cmdq {
 struct nvkm_falcon_msgq {
 	struct nvkm_falcon_qmgr *qmgr;
 	const char *name;
-	struct mutex mutex;
+	spinlock_t lock;
 
 	u32 head_reg;
 	u32 tail_reg;
@@ -82,8 +82,7 @@ struct nvkm_falcon_msgq {
 	u32 position;
 };
 
-#define FLCNQ_PRINTK(t,q,f,a...)                                               \
-       FLCN_PRINTK(t, (q)->qmgr->falcon, "%s: "f, (q)->name, ##a)
-#define FLCNQ_DBG(q,f,a...) FLCNQ_PRINTK(debug, (q), f, ##a)
-#define FLCNQ_ERR(q,f,a...) FLCNQ_PRINTK(error, (q), f, ##a)
+#define FLCNQ_PRINTK(q,l,p,f,a...) FLCN_PRINTK((q)->qmgr->falcon, l, p, "%s: "f, (q)->name, ##a)
+#define FLCNQ_DBG(q,f,a...) FLCNQ_PRINTK((q), DEBUG, info, f, ##a)
+#define FLCNQ_ERR(q,f,a...) FLCNQ_PRINTK((q), ERROR, err, f, ##a)
 #endif
diff --git a/drivers/gpu/drm/nouveau/nvkm/falcon/v1.c b/drivers/gpu/drm/nouveau/nvkm/falcon/v1.c
index b0ee4c31414c..dd2ddc54ac60 100644
--- a/drivers/gpu/drm/nouveau/nvkm/falcon/v1.c
+++ b/drivers/gpu/drm/nouveau/nvkm/falcon/v1.c
@@ -64,44 +64,13 @@ nvkm_falcon_v1_load_imem(struct nvkm_falcon *falcon, void *data, u32 start,
 		nvkm_falcon_wr32(falcon, 0x184 + (port * 16), 0);
 }
 
-static void
-nvkm_falcon_v1_load_emem(struct nvkm_falcon *falcon, void *data, u32 start,
-			 u32 size, u8 port)
-{
-	u8 rem = size % 4;
-	int i;
-
-	size -= rem;
-
-	nvkm_falcon_wr32(falcon, 0xac0 + (port * 8), start | (0x1 << 24));
-	for (i = 0; i < size / 4; i++)
-		nvkm_falcon_wr32(falcon, 0xac4 + (port * 8), ((u32 *)data)[i]);
-
-	/*
-	 * If size is not a multiple of 4, mask the last word to ensure garbage
-	 * does not get written
-	 */
-	if (rem) {
-		u32 extra = ((u32 *)data)[i];
-
-		nvkm_falcon_wr32(falcon, 0xac4 + (port * 8),
-				 extra & (BIT(rem * 8) - 1));
-	}
-}
-
 void
 nvkm_falcon_v1_load_dmem(struct nvkm_falcon *falcon, void *data, u32 start,
 			 u32 size, u8 port)
 {
-	const struct nvkm_falcon_func *func = falcon->func;
 	u8 rem = size % 4;
 	int i;
 
-	if (func->emem_addr && start >= func->emem_addr)
-		return nvkm_falcon_v1_load_emem(falcon, data,
-						start - func->emem_addr, size,
-						port);
-
 	size -= rem;
 
 	nvkm_falcon_wr32(falcon, 0x1c0 + (port * 8), start | (0x1 << 24));
@@ -120,113 +89,6 @@ nvkm_falcon_v1_load_dmem(struct nvkm_falcon *falcon, void *data, u32 start,
 	}
 }
 
-static void
-nvkm_falcon_v1_read_emem(struct nvkm_falcon *falcon, u32 start, u32 size,
-			 u8 port, void *data)
-{
-	u8 rem = size % 4;
-	int i;
-
-	size -= rem;
-
-	nvkm_falcon_wr32(falcon, 0xac0 + (port * 8), start | (0x1 << 25));
-	for (i = 0; i < size / 4; i++)
-		((u32 *)data)[i] = nvkm_falcon_rd32(falcon, 0xac4 + (port * 8));
-
-	/*
-	 * If size is not a multiple of 4, mask the last word to ensure garbage
-	 * does not get read
-	 */
-	if (rem) {
-		u32 extra = nvkm_falcon_rd32(falcon, 0xac4 + (port * 8));
-
-		for (i = size; i < size + rem; i++) {
-			((u8 *)data)[i] = (u8)(extra & 0xff);
-			extra >>= 8;
-		}
-	}
-}
-
-void
-nvkm_falcon_v1_read_dmem(struct nvkm_falcon *falcon, u32 start, u32 size,
-			 u8 port, void *data)
-{
-	const struct nvkm_falcon_func *func = falcon->func;
-	u8 rem = size % 4;
-	int i;
-
-	if (func->emem_addr && start >= func->emem_addr)
-		return nvkm_falcon_v1_read_emem(falcon, start - func->emem_addr,
-						size, port, data);
-
-	size -= rem;
-
-	nvkm_falcon_wr32(falcon, 0x1c0 + (port * 8), start | (0x1 << 25));
-	for (i = 0; i < size / 4; i++)
-		((u32 *)data)[i] = nvkm_falcon_rd32(falcon, 0x1c4 + (port * 8));
-
-	/*
-	 * If size is not a multiple of 4, mask the last word to ensure garbage
-	 * does not get read
-	 */
-	if (rem) {
-		u32 extra = nvkm_falcon_rd32(falcon, 0x1c4 + (port * 8));
-
-		for (i = size; i < size + rem; i++) {
-			((u8 *)data)[i] = (u8)(extra & 0xff);
-			extra >>= 8;
-		}
-	}
-}
-
-void
-nvkm_falcon_v1_bind_context(struct nvkm_falcon *falcon, struct nvkm_memory *ctx)
-{
-	const u32 fbif = falcon->func->fbif;
-	u32 inst_loc;
-
-	/* disable instance block binding */
-	if (ctx == NULL) {
-		nvkm_falcon_wr32(falcon, 0x10c, 0x0);
-		return;
-	}
-
-	nvkm_falcon_wr32(falcon, 0x10c, 0x1);
-
-	/* setup apertures - virtual */
-	nvkm_falcon_wr32(falcon, fbif + 4 * FALCON_DMAIDX_UCODE, 0x4);
-	nvkm_falcon_wr32(falcon, fbif + 4 * FALCON_DMAIDX_VIRT, 0x0);
-	/* setup apertures - physical */
-	nvkm_falcon_wr32(falcon, fbif + 4 * FALCON_DMAIDX_PHYS_VID, 0x4);
-	nvkm_falcon_wr32(falcon, fbif + 4 * FALCON_DMAIDX_PHYS_SYS_COH, 0x5);
-	nvkm_falcon_wr32(falcon, fbif + 4 * FALCON_DMAIDX_PHYS_SYS_NCOH, 0x6);
-
-	/* Set context */
-	switch (nvkm_memory_target(ctx)) {
-	case NVKM_MEM_TARGET_VRAM: inst_loc = 0; break;
-	case NVKM_MEM_TARGET_HOST: inst_loc = 2; break;
-	case NVKM_MEM_TARGET_NCOH: inst_loc = 3; break;
-	default:
-		WARN_ON(1);
-		return;
-	}
-
-	/* Enable context */
-	nvkm_falcon_mask(falcon, 0x048, 0x1, 0x1);
-	nvkm_falcon_wr32(falcon, 0x054,
-			 ((nvkm_memory_addr(ctx) >> 12) & 0xfffffff) |
-			 (inst_loc << 28) | (1 << 30));
-
-	nvkm_falcon_mask(falcon, 0x090, 0x10000, 0x10000);
-	nvkm_falcon_mask(falcon, 0x0a4, 0x8, 0x8);
-}
-
-void
-nvkm_falcon_v1_set_start_addr(struct nvkm_falcon *falcon, u32 start_addr)
-{
-	nvkm_falcon_wr32(falcon, 0x104, start_addr);
-}
-
 void
 nvkm_falcon_v1_start(struct nvkm_falcon *falcon)
 {
@@ -237,75 +99,3 @@ nvkm_falcon_v1_start(struct nvkm_falcon *falcon)
 	else
 		nvkm_falcon_wr32(falcon, 0x100, 0x2);
 }
-
-int
-nvkm_falcon_v1_wait_for_halt(struct nvkm_falcon *falcon, u32 ms)
-{
-	struct nvkm_device *device = falcon->owner->device;
-	int ret;
-
-	ret = nvkm_wait_msec(device, ms, falcon->addr + 0x100, 0x10, 0x10);
-	if (ret < 0)
-		return ret;
-
-	return 0;
-}
-
-int
-nvkm_falcon_v1_clear_interrupt(struct nvkm_falcon *falcon, u32 mask)
-{
-	struct nvkm_device *device = falcon->owner->device;
-	int ret;
-
-	/* clear interrupt(s) */
-	nvkm_falcon_mask(falcon, 0x004, mask, mask);
-	/* wait until interrupts are cleared */
-	ret = nvkm_wait_msec(device, 10, falcon->addr + 0x008, mask, 0x0);
-	if (ret < 0)
-		return ret;
-
-	return 0;
-}
-
-static int
-falcon_v1_wait_idle(struct nvkm_falcon *falcon)
-{
-	struct nvkm_device *device = falcon->owner->device;
-	int ret;
-
-	ret = nvkm_wait_msec(device, 10, falcon->addr + 0x04c, 0xffff, 0x0);
-	if (ret < 0)
-		return ret;
-
-	return 0;
-}
-
-int
-nvkm_falcon_v1_enable(struct nvkm_falcon *falcon)
-{
-	struct nvkm_device *device = falcon->owner->device;
-	int ret;
-
-	ret = nvkm_wait_msec(device, 10, falcon->addr + 0x10c, 0x6, 0x0);
-	if (ret < 0) {
-		nvkm_error(falcon->user, "Falcon mem scrubbing timeout\n");
-		return ret;
-	}
-
-	ret = falcon_v1_wait_idle(falcon);
-	if (ret)
-		return ret;
-
-	/* enable IRQs */
-	nvkm_falcon_wr32(falcon, 0x010, 0xff);
-
-	return 0;
-}
-
-void
-nvkm_falcon_v1_disable(struct nvkm_falcon *falcon)
-{
-	/* disable IRQs and wait for any previous code to complete */
-	nvkm_falcon_wr32(falcon, 0x014, 0xff);
-	falcon_v1_wait_idle(falcon);
-}
diff --git a/drivers/gpu/drm/nouveau/nvkm/nvfw/acr.c b/drivers/gpu/drm/nouveau/nvkm/nvfw/acr.c
index bef790ad8f2f..83a9c48bc58c 100644
--- a/drivers/gpu/drm/nouveau/nvkm/nvfw/acr.c
+++ b/drivers/gpu/drm/nouveau/nvkm/nvfw/acr.c
@@ -45,6 +45,47 @@ wpr_header_v1_dump(struct nvkm_subdev *subdev, const struct wpr_header_v1 *hdr)
 	nvkm_debug(subdev, "\tstatus        : %d\n", hdr->status);
 }
 
+void
+wpr_generic_header_dump(struct nvkm_subdev *subdev, const struct wpr_generic_header *hdr)
+{
+	nvkm_debug(subdev, "wprGenericHeader\n");
+	nvkm_debug(subdev, "\tidentifier : %04x\n", hdr->identifier);
+	nvkm_debug(subdev, "\tversion    : %04x\n", hdr->version);
+	nvkm_debug(subdev, "\tsize       : %08x\n", hdr->size);
+}
+
+void
+wpr_header_v2_dump(struct nvkm_subdev *subdev, const struct wpr_header_v2 *hdr)
+{
+	wpr_generic_header_dump(subdev, &hdr->hdr);
+	wpr_header_v1_dump(subdev, &hdr->wpr);
+}
+
+void
+lsb_header_v2_dump(struct nvkm_subdev *subdev, struct lsb_header_v2 *hdr)
+{
+	wpr_generic_header_dump(subdev, &hdr->hdr);
+	nvkm_debug(subdev, "lsbHeader\n");
+	nvkm_debug(subdev, "\tucodeOff      : 0x%x\n", hdr->ucode_off);
+	nvkm_debug(subdev, "\tucodeSize     : 0x%x\n", hdr->ucode_size);
+	nvkm_debug(subdev, "\tdataSize      : 0x%x\n", hdr->data_size);
+	nvkm_debug(subdev, "\tblCodeSize    : 0x%x\n", hdr->bl_code_size);
+	nvkm_debug(subdev, "\tblImemOff     : 0x%x\n", hdr->bl_imem_off);
+	nvkm_debug(subdev, "\tblDataOff     : 0x%x\n", hdr->bl_data_off);
+	nvkm_debug(subdev, "\tblDataSize    : 0x%x\n", hdr->bl_data_size);
+	nvkm_debug(subdev, "\treserved0     : %08x\n", hdr->rsvd0);
+	nvkm_debug(subdev, "\tappCodeOff    : 0x%x\n", hdr->app_code_off);
+	nvkm_debug(subdev, "\tappCodeSize   : 0x%x\n", hdr->app_code_size);
+	nvkm_debug(subdev, "\tappDataOff    : 0x%x\n", hdr->app_data_off);
+	nvkm_debug(subdev, "\tappDataSize   : 0x%x\n", hdr->app_data_size);
+	nvkm_debug(subdev, "\tappImemOffset : 0x%x\n", hdr->app_imem_offset);
+	nvkm_debug(subdev, "\tappDmemOffset : 0x%x\n", hdr->app_dmem_offset);
+	nvkm_debug(subdev, "\tflags         : 0x%x\n", hdr->flags);
+	nvkm_debug(subdev, "\tmonitorCodeOff: 0x%x\n", hdr->monitor_code_offset);
+	nvkm_debug(subdev, "\tmonitorDataOff: 0x%x\n", hdr->monitor_data_offset);
+	nvkm_debug(subdev, "\tmanifestOffset: 0x%x\n", hdr->manifest_offset);
+}
+
 static void
 lsb_header_tail_dump(struct nvkm_subdev *subdev, struct lsb_header_tail *hdr)
 {
diff --git a/drivers/gpu/drm/nouveau/nvkm/nvfw/hs.c b/drivers/gpu/drm/nouveau/nvkm/nvfw/hs.c
index 04ed77cb2eba..a7e0583401d0 100644
--- a/drivers/gpu/drm/nouveau/nvkm/nvfw/hs.c
+++ b/drivers/gpu/drm/nouveau/nvkm/nvfw/hs.c
@@ -38,6 +38,24 @@ nvfw_hs_header(struct nvkm_subdev *subdev, const void *data)
 	return hdr;
 }
 
+const struct nvfw_hs_header_v2 *
+nvfw_hs_header_v2(struct nvkm_subdev *subdev, const void *data)
+{
+	const struct nvfw_hs_header_v2 *hdr = data;
+
+	nvkm_debug(subdev, "hsHeader:\n");
+	nvkm_debug(subdev, "\tsigProdOffset    : 0x%x\n", hdr->sig_prod_offset);
+	nvkm_debug(subdev, "\tsigProdSize      : 0x%x\n", hdr->sig_prod_size);
+	nvkm_debug(subdev, "\tpatchLoc         : 0x%x\n", hdr->patch_loc);
+	nvkm_debug(subdev, "\tpatchSig         : 0x%x\n", hdr->patch_sig);
+	nvkm_debug(subdev, "\tmetadataOffset   : 0x%x\n", hdr->meta_data_offset);
+	nvkm_debug(subdev, "\tmetadataSize     : 0x%x\n", hdr->meta_data_size);
+	nvkm_debug(subdev, "\tnumSig           : 0x%x\n", hdr->num_sig);
+	nvkm_debug(subdev, "\theaderOffset     : 0x%x\n", hdr->header_offset);
+	nvkm_debug(subdev, "\theaderSize       : 0x%x\n", hdr->header_size);
+	return hdr;
+}
+
 const struct nvfw_hs_load_header *
 nvfw_hs_load_header(struct nvkm_subdev *subdev, const void *data)
 {
@@ -60,3 +78,24 @@ nvfw_hs_load_header(struct nvkm_subdev *subdev, const void *data)
 
 	return hdr;
 }
+
+const struct nvfw_hs_load_header_v2 *
+nvfw_hs_load_header_v2(struct nvkm_subdev *subdev, const void *data)
+{
+	const struct nvfw_hs_load_header_v2 *hdr = data;
+	int i;
+
+	nvkm_debug(subdev, "hsLoadHeader:\n");
+	nvkm_debug(subdev, "\tosCodeOffset     : 0x%x\n", hdr->os_code_offset);
+	nvkm_debug(subdev, "\tosCodeSize       : 0x%x\n", hdr->os_code_size);
+	nvkm_debug(subdev, "\tosDataOffset     : 0x%x\n", hdr->os_data_offset);
+	nvkm_debug(subdev, "\tosDataSize       : 0x%x\n", hdr->os_data_size);
+	nvkm_debug(subdev, "\tnumApps          : 0x%x\n", hdr->num_apps);
+	for (i = 0; i < hdr->num_apps; i++) {
+		nvkm_debug(subdev,
+			   "\tApp[%d]           : offset 0x%x size 0x%x\n", i,
+			   hdr->app[i].offset, hdr->app[i].size);
+	}
+
+	return hdr;
+}
diff --git a/drivers/gpu/drm/nouveau/nvkm/nvfw/ls.c b/drivers/gpu/drm/nouveau/nvkm/nvfw/ls.c
index b847f281ce97..45c3a6c5e088 100644
--- a/drivers/gpu/drm/nouveau/nvkm/nvfw/ls.c
+++ b/drivers/gpu/drm/nouveau/nvkm/nvfw/ls.c
@@ -106,3 +106,75 @@ nvfw_ls_desc_v1(struct nvkm_subdev *subdev, const void *data)
 
 	return hdr;
 }
+
+const struct nvfw_ls_desc_v2 *
+nvfw_ls_desc_v2(struct nvkm_subdev *subdev, const void *data)
+{
+	const struct nvfw_ls_desc_v2 *hdr = data;
+	char *date;
+	int i;
+
+	nvkm_debug(subdev, "lsUcodeImgDesc:\n");
+	nvkm_debug(subdev, "\tdescriptorSize       : %d\n", hdr->descriptor_size);
+	nvkm_debug(subdev, "\timageSize            : %d\n", hdr->image_size);
+	nvkm_debug(subdev, "\ttoolsVersion         : 0x%x\n", hdr->tools_version);
+	nvkm_debug(subdev, "\tappVersion           : 0x%x\n", hdr->app_version);
+
+	date = kstrndup(hdr->date, sizeof(hdr->date), GFP_KERNEL);
+	nvkm_debug(subdev, "\tdate                 : %s\n", date);
+	kfree(date);
+
+	nvkm_debug(subdev, "\tsecureBootloader     : 0x%x\n", hdr->secure_bootloader);
+	nvkm_debug(subdev, "\tbootloaderStartOffset: 0x%x\n", hdr->bootloader_start_offset);
+	nvkm_debug(subdev, "\tbootloaderSize       : 0x%x\n", hdr->bootloader_size);
+	nvkm_debug(subdev, "\tbootloaderImemOffset : 0x%x\n", hdr->bootloader_imem_offset);
+	nvkm_debug(subdev, "\tbootloaderEntryPoint : 0x%x\n", hdr->bootloader_entry_point);
+
+	nvkm_debug(subdev, "\tappStartOffset       : 0x%x\n", hdr->app_start_offset);
+	nvkm_debug(subdev, "\tappSize              : 0x%x\n", hdr->app_size);
+	nvkm_debug(subdev, "\tappImemOffset        : 0x%x\n", hdr->app_imem_offset);
+	nvkm_debug(subdev, "\tappImemEntry         : 0x%x\n", hdr->app_imem_entry);
+	nvkm_debug(subdev, "\tappDmemOffset        : 0x%x\n", hdr->app_dmem_offset);
+	nvkm_debug(subdev, "\tappResidentCodeOffset: 0x%x\n", hdr->app_resident_code_offset);
+	nvkm_debug(subdev, "\tappResidentCodeSize  : 0x%x\n", hdr->app_resident_code_size);
+	nvkm_debug(subdev, "\tappResidentDataOffset: 0x%x\n", hdr->app_resident_data_offset);
+	nvkm_debug(subdev, "\tappResidentDataSize  : 0x%x\n", hdr->app_resident_data_size);
+
+	nvkm_debug(subdev, "\tnbImemOverlays       : %d\n", hdr->nb_imem_overlays);
+	nvkm_debug(subdev, "\tnbDmemOverlays       : %d\n", hdr->nb_dmem_overlays);
+	for (i = 0; i < ARRAY_SIZE(hdr->load_ovl); i++) {
+		nvkm_debug(subdev, "\tloadOvl[%d]          : 0x%x %d\n", i,
+			   hdr->load_ovl[i].start, hdr->load_ovl[i].size);
+	}
+
+	return hdr;
+}
+
+const struct nvfw_ls_hsbl_bin_hdr *
+nvfw_ls_hsbl_bin_hdr(struct nvkm_subdev *subdev, const void *data)
+{
+	const struct nvfw_ls_hsbl_bin_hdr *hdr = data;
+
+	nvkm_debug(subdev, "lsHsblBinHdr:\n");
+	nvkm_debug(subdev, "\tbinMagic         : 0x%08x\n", hdr->bin_magic);
+	nvkm_debug(subdev, "\tbinVer           : %d\n", hdr->bin_ver);
+	nvkm_debug(subdev, "\tbinSize          : %d\n", hdr->bin_size);
+	nvkm_debug(subdev, "\theaderOffset     : 0x%x\n", hdr->header_offset);
+	return hdr;
+}
+
+const struct nvfw_ls_hsbl_hdr *
+nvfw_ls_hsbl_hdr(struct nvkm_subdev *subdev, const void *data)
+{
+	const struct nvfw_ls_hsbl_hdr *hdr = data;
+
+	nvkm_debug(subdev, "lsHsblHdr:\n");
+	nvkm_debug(subdev, "\tsigProdOffset    : 0x%x\n", hdr->sig_prod_offset);
+	nvkm_debug(subdev, "\tsigProdSize      : 0x%x\n", hdr->sig_prod_size);
+	nvkm_debug(subdev, "\tpatchLoc         : 0x%x\n", hdr->patch_loc);
+	nvkm_debug(subdev, "\tpatchSig         : 0x%x\n", hdr->patch_sig);
+	nvkm_debug(subdev, "\tmetadataOffset   : 0x%x\n", hdr->meta_data_offset);
+	nvkm_debug(subdev, "\tmetadataSize     : 0x%x\n", hdr->meta_data_size);
+	nvkm_debug(subdev, "\tnumSig           : 0x%x\n", hdr->num_sig);
+	return hdr;
+}
diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/Kbuild b/drivers/gpu/drm/nouveau/nvkm/subdev/Kbuild
index 2cb24fff7e32..4c2f6fc4ef58 100644
--- a/drivers/gpu/drm/nouveau/nvkm/subdev/Kbuild
+++ b/drivers/gpu/drm/nouveau/nvkm/subdev/Kbuild
@@ -23,4 +23,5 @@ include $(src)/nvkm/subdev/privring/Kbuild
 include $(src)/nvkm/subdev/therm/Kbuild
 include $(src)/nvkm/subdev/timer/Kbuild
 include $(src)/nvkm/subdev/top/Kbuild
+include $(src)/nvkm/subdev/vfn/Kbuild
 include $(src)/nvkm/subdev/volt/Kbuild
diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/acr/Kbuild b/drivers/gpu/drm/nouveau/nvkm/subdev/acr/Kbuild
index 5b9f64a8957f..5731f35b11e1 100644
--- a/drivers/gpu/drm/nouveau/nvkm/subdev/acr/Kbuild
+++ b/drivers/gpu/drm/nouveau/nvkm/subdev/acr/Kbuild
@@ -1,10 +1,12 @@
 # SPDX-License-Identifier: MIT
 nvkm-y += nvkm/subdev/acr/base.o
-nvkm-y += nvkm/subdev/acr/hsfw.o
 nvkm-y += nvkm/subdev/acr/lsfw.o
 nvkm-y += nvkm/subdev/acr/gm200.o
 nvkm-y += nvkm/subdev/acr/gm20b.o
 nvkm-y += nvkm/subdev/acr/gp102.o
 nvkm-y += nvkm/subdev/acr/gp108.o
+nvkm-y += nvkm/subdev/acr/gv100.o
 nvkm-y += nvkm/subdev/acr/gp10b.o
 nvkm-y += nvkm/subdev/acr/tu102.o
+nvkm-y += nvkm/subdev/acr/ga100.o
+nvkm-y += nvkm/subdev/acr/ga102.o
diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/acr/base.c b/drivers/gpu/drm/nouveau/nvkm/subdev/acr/base.c
index af6cac696d43..795f3a649b12 100644
--- a/drivers/gpu/drm/nouveau/nvkm/subdev/acr/base.c
+++ b/drivers/gpu/drm/nouveau/nvkm/subdev/acr/base.c
@@ -24,50 +24,63 @@
 #include <core/firmware.h>
 #include <core/memory.h>
 #include <subdev/mmu.h>
+#include <subdev/gsp.h>
+#include <subdev/pmu.h>
+#include <engine/sec2.h>
+#include <engine/nvdec.h>
 
-static struct nvkm_acr_hsf *
-nvkm_acr_hsf_find(struct nvkm_acr *acr, const char *name)
+static struct nvkm_acr_hsfw *
+nvkm_acr_hsfw_find(struct nvkm_acr *acr, const char *name)
 {
-	struct nvkm_acr_hsf *hsf;
-	list_for_each_entry(hsf, &acr->hsf, head) {
-		if (!strcmp(hsf->name, name))
-			return hsf;
+	struct nvkm_acr_hsfw *hsfw;
+
+	list_for_each_entry(hsfw, &acr->hsfw, head) {
+		if (!strcmp(hsfw->fw.fw.name, name))
+			return hsfw;
 	}
+
 	return NULL;
 }
 
 int
-nvkm_acr_hsf_boot(struct nvkm_acr *acr, const char *name)
+nvkm_acr_hsfw_boot(struct nvkm_acr *acr, const char *name)
 {
 	struct nvkm_subdev *subdev = &acr->subdev;
-	struct nvkm_acr_hsf *hsf;
-	int ret;
+	struct nvkm_acr_hsfw *hsfw;
 
-	hsf = nvkm_acr_hsf_find(acr, name);
-	if (!hsf)
+	hsfw = nvkm_acr_hsfw_find(acr, name);
+	if (!hsfw)
 		return -EINVAL;
 
-	nvkm_debug(subdev, "executing %s binary\n", hsf->name);
-	ret = nvkm_falcon_get(hsf->falcon, subdev);
-	if (ret)
-		return ret;
+	return nvkm_falcon_fw_boot(&hsfw->fw, subdev, true, NULL, NULL,
+				   hsfw->boot_mbox0, hsfw->intr_clear);
+}
 
-	ret = hsf->func->boot(acr, hsf);
-	nvkm_falcon_put(hsf->falcon, subdev);
-	if (ret) {
-		nvkm_error(subdev, "%s binary failed\n", hsf->name);
-		return ret;
+static struct nvkm_acr_lsf *
+nvkm_acr_rtos(struct nvkm_acr *acr)
+{
+	struct nvkm_acr_lsf *lsf;
+
+	if (acr) {
+		list_for_each_entry(lsf, &acr->lsf, head) {
+			if (lsf->func->bootstrap_falcon)
+				return lsf;
+		}
 	}
 
-	nvkm_debug(subdev, "%s binary completed successfully\n", hsf->name);
-	return 0;
+	return NULL;
 }
 
 static void
 nvkm_acr_unload(struct nvkm_acr *acr)
 {
 	if (acr->done) {
-		nvkm_acr_hsf_boot(acr, "unload");
+		if (acr->rtos) {
+			nvkm_subdev_unref(acr->rtos->falcon->owner);
+			acr->rtos = NULL;
+		}
+
+		nvkm_acr_hsfw_boot(acr, "unload");
 		acr->done = false;
 	}
 }
@@ -76,7 +89,7 @@ static int
 nvkm_acr_load(struct nvkm_acr *acr)
 {
 	struct nvkm_subdev *subdev = &acr->subdev;
-	struct nvkm_acr_lsf *lsf;
+	struct nvkm_acr_lsf *rtos = nvkm_acr_rtos(acr);
 	u64 start, limit;
 	int ret;
 
@@ -100,12 +113,12 @@ nvkm_acr_load(struct nvkm_acr *acr)
 
 	acr->done = true;
 
-	list_for_each_entry(lsf, &acr->lsf, head) {
-		if (lsf->func->boot) {
-			ret = lsf->func->boot(lsf->falcon);
-			if (ret)
-				break;
-		}
+	if (rtos) {
+		ret = nvkm_subdev_ref(rtos->falcon->owner);
+		if (ret)
+			return ret;
+
+		acr->rtos = rtos;
 	}
 
 	return ret;
@@ -118,33 +131,17 @@ nvkm_acr_reload(struct nvkm_acr *acr)
 	return nvkm_acr_load(acr);
 }
 
-static struct nvkm_acr_lsf *
-nvkm_acr_falcon(struct nvkm_device *device)
-{
-	struct nvkm_acr *acr = device->acr;
-	struct nvkm_acr_lsf *lsf;
-
-	if (acr) {
-		list_for_each_entry(lsf, &acr->lsf, head) {
-			if (lsf->func->bootstrap_falcon)
-				return lsf;
-		}
-	}
-
-	return NULL;
-}
-
 int
 nvkm_acr_bootstrap_falcons(struct nvkm_device *device, unsigned long mask)
 {
-	struct nvkm_acr_lsf *acrflcn = nvkm_acr_falcon(device);
 	struct nvkm_acr *acr = device->acr;
+	struct nvkm_acr_lsf *rtos = nvkm_acr_rtos(acr);
 	unsigned long id;
 
 	/* If there's no LS FW managing bootstrapping of other LS falcons,
 	 * we depend on the HS firmware being able to do it instead.
 	 */
-	if (!acrflcn) {
+	if (!rtos) {
 		/* Which isn't possible everywhere... */
 		if ((mask & acr->func->bootstrap_falcons) == mask) {
 			int ret = nvkm_acr_reload(acr);
@@ -156,16 +153,14 @@ nvkm_acr_bootstrap_falcons(struct nvkm_device *device, unsigned long mask)
 		return -ENOSYS;
 	}
 
-	if ((mask & acrflcn->func->bootstrap_falcons) != mask)
+	if ((mask & rtos->func->bootstrap_falcons) != mask)
 		return -ENOSYS;
 
-	if (acrflcn->func->bootstrap_multiple_falcons) {
-		return acrflcn->func->
-			bootstrap_multiple_falcons(acrflcn->falcon, mask);
-	}
+	if (rtos->func->bootstrap_multiple_falcons)
+		return rtos->func->bootstrap_multiple_falcons(rtos->falcon, mask);
 
 	for_each_set_bit(id, &mask, NVKM_ACR_LSF_NUM) {
-		int ret = acrflcn->func->bootstrap_falcon(acrflcn->falcon, id);
+		int ret = rtos->func->bootstrap_falcon(rtos->falcon, id);
 		if (ret)
 			return ret;
 	}
@@ -189,6 +184,9 @@ nvkm_acr_managed_falcon(struct nvkm_device *device, enum nvkm_acr_lsf_id id)
 static int
 nvkm_acr_fini(struct nvkm_subdev *subdev, bool suspend)
 {
+	if (!subdev->use.enabled)
+		return 0;
+
 	nvkm_acr_unload(nvkm_acr(subdev));
 	return 0;
 }
@@ -196,17 +194,19 @@ nvkm_acr_fini(struct nvkm_subdev *subdev, bool suspend)
 static int
 nvkm_acr_init(struct nvkm_subdev *subdev)
 {
-	if (!nvkm_acr_falcon(subdev->device))
+	struct nvkm_acr *acr = nvkm_acr(subdev);
+
+	if (!nvkm_acr_rtos(acr))
 		return 0;
 
-	return nvkm_acr_load(nvkm_acr(subdev));
+	return nvkm_acr_load(acr);
 }
 
 static void
 nvkm_acr_cleanup(struct nvkm_acr *acr)
 {
 	nvkm_acr_lsfw_del_all(acr);
-	nvkm_acr_hsfw_del_all(acr);
+
 	nvkm_firmware_put(acr->wpr_fw);
 	acr->wpr_fw = NULL;
 }
@@ -218,7 +218,8 @@ nvkm_acr_oneinit(struct nvkm_subdev *subdev)
 	struct nvkm_acr *acr = nvkm_acr(subdev);
 	struct nvkm_acr_hsfw *hsfw;
 	struct nvkm_acr_lsfw *lsfw, *lsft;
-	struct nvkm_acr_lsf *lsf;
+	struct nvkm_acr_lsf *lsf, *rtos;
+	struct nvkm_falcon *falcon;
 	u32 wpr_size = 0;
 	u64 falcons;
 	int ret, i;
@@ -260,10 +261,10 @@ nvkm_acr_oneinit(struct nvkm_subdev *subdev)
 	}
 
 	/* Ensure the falcon that'll provide ACR functions is booted first. */
-	lsf = nvkm_acr_falcon(device);
-	if (lsf) {
-		falcons = lsf->func->bootstrap_falcons;
-		list_move(&lsf->head, &acr->lsf);
+	rtos = nvkm_acr_rtos(acr);
+	if (rtos) {
+		falcons = rtos->func->bootstrap_falcons;
+		list_move(&rtos->head, &acr->lsf);
 	} else {
 		falcons = acr->func->bootstrap_falcons;
 	}
@@ -301,7 +302,7 @@ nvkm_acr_oneinit(struct nvkm_subdev *subdev)
 		nvkm_wobj(acr->wpr, 0, acr->wpr_fw->data, acr->wpr_fw->size);
 
 	if (!acr->wpr_fw || acr->wpr_comp)
-		acr->func->wpr_build(acr, nvkm_acr_falcon(device));
+		acr->func->wpr_build(acr, rtos);
 	acr->func->wpr_patch(acr, (s64)acr->wpr_start - acr->wpr_prev);
 
 	if (acr->wpr_fw && acr->wpr_comp) {
@@ -336,8 +337,16 @@ nvkm_acr_oneinit(struct nvkm_subdev *subdev)
 
 	/* Load HS firmware blobs into ACR VMM. */
 	list_for_each_entry(hsfw, &acr->hsfw, head) {
-		nvkm_debug(subdev, "loading %s fw\n", hsfw->name);
-		ret = hsfw->func->load(acr, hsfw);
+		switch (hsfw->falcon_id) {
+		case NVKM_ACR_HSF_PMU : falcon = &device->pmu->falcon; break;
+		case NVKM_ACR_HSF_SEC2: falcon = &device->sec2->falcon; break;
+		case NVKM_ACR_HSF_GSP : falcon = &device->gsp->falcon; break;
+		default:
+			WARN_ON(1);
+			return -EINVAL;
+		}
+
+		ret = nvkm_falcon_fw_oneinit(&hsfw->fw, falcon, acr->vmm, acr->inst);
 		if (ret)
 			return ret;
 	}
@@ -351,15 +360,13 @@ static void *
 nvkm_acr_dtor(struct nvkm_subdev *subdev)
 {
 	struct nvkm_acr *acr = nvkm_acr(subdev);
-	struct nvkm_acr_hsf *hsf, *hst;
+	struct nvkm_acr_hsfw *hsfw, *hsft;
 	struct nvkm_acr_lsf *lsf, *lst;
 
-	list_for_each_entry_safe(hsf, hst, &acr->hsf, head) {
-		nvkm_vmm_put(acr->vmm, &hsf->vma);
-		nvkm_memory_unref(&hsf->ucode);
-		kfree(hsf->imem);
-		list_del(&hsf->head);
-		kfree(hsf);
+	list_for_each_entry_safe(hsfw, hsft, &acr->hsfw, head) {
+		nvkm_falcon_fw_dtor(&hsfw->fw);
+		list_del(&hsfw->head);
+		kfree(hsfw);
 	}
 
 	nvkm_vmm_part(acr->vmm, acr->inst);
@@ -420,7 +427,6 @@ nvkm_acr_new_(const struct nvkm_acr_fwif *fwif, struct nvkm_device *device,
 	nvkm_subdev_ctor(&nvkm_acr, device, type, inst, &acr->subdev);
 	INIT_LIST_HEAD(&acr->hsfw);
 	INIT_LIST_HEAD(&acr->lsfw);
-	INIT_LIST_HEAD(&acr->hsf);
 	INIT_LIST_HEAD(&acr->lsf);
 
 	fwif = nvkm_firmware_load(&acr->subdev, fwif, "Acr", acr);
diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/acr/ga100.c b/drivers/gpu/drm/nouveau/nvkm/subdev/acr/ga100.c
new file mode 100644
index 000000000000..e3370c1551c0
--- /dev/null
+++ b/drivers/gpu/drm/nouveau/nvkm/subdev/acr/ga100.c
@@ -0,0 +1,49 @@
+/*
+ * Copyright 2021 Red Hat Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+#include "priv.h"
+
+void
+ga100_acr_wpr_check(struct nvkm_acr *acr, u64 *start, u64 *limit)
+{
+	struct nvkm_device *device = acr->subdev.device;
+
+	*start = (u64)(nvkm_rd32(device, 0x1fa81c) & 0xffffff00) << 8;
+	*limit = (u64)(nvkm_rd32(device, 0x1fa820) & 0xffffff00) << 8;
+	*limit = *limit + 0x20000;
+}
+
+int
+ga100_acr_hsfw_ctor(struct nvkm_acr *acr, const char *bl, const char *fw,
+		    const char *name, int ver, const struct nvkm_acr_hsf_fwif *fwif)
+{
+	struct nvkm_acr_hsfw *hsfw;
+
+	if (!(hsfw = kzalloc(sizeof(*hsfw), GFP_KERNEL)))
+		return -ENOMEM;
+
+	hsfw->falcon_id = fwif->falcon_id;
+	hsfw->boot_mbox0 = fwif->boot_mbox0;
+	hsfw->intr_clear = fwif->intr_clear;
+	list_add_tail(&hsfw->head, &acr->hsfw);
+
+	return nvkm_falcon_fw_ctor_hs_v2(fwif->func, name, &acr->subdev, fw, ver, NULL, &hsfw->fw);
+}
diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/acr/ga102.c b/drivers/gpu/drm/nouveau/nvkm/subdev/acr/ga102.c
new file mode 100644
index 000000000000..45dcf493e972
--- /dev/null
+++ b/drivers/gpu/drm/nouveau/nvkm/subdev/acr/ga102.c
@@ -0,0 +1,326 @@
+/*
+ * Copyright 2021 Red Hat Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+#include "priv.h"
+
+#include <nvfw/acr.h>
+
+static int
+ga102_acr_wpr_patch(struct nvkm_acr *acr, s64 adjust)
+{
+	struct wpr_header_v2 hdr;
+	struct lsb_header_v2 *lsb;
+	struct nvkm_acr_lsfw *lsfw;
+	u32 offset = 0;
+
+	lsb = kvmalloc(sizeof(*lsb), GFP_KERNEL);
+	if (!lsb)
+		return -ENOMEM;
+
+	do {
+		nvkm_robj(acr->wpr, offset, &hdr, sizeof(hdr));
+		wpr_header_v2_dump(&acr->subdev, &hdr);
+
+		list_for_each_entry(lsfw, &acr->lsfw, head) {
+			if (lsfw->id != hdr.wpr.falcon_id)
+				continue;
+
+			nvkm_robj(acr->wpr, hdr.wpr.lsb_offset, lsb, sizeof(*lsb));
+			lsb_header_v2_dump(&acr->subdev, lsb);
+
+			lsfw->func->bld_patch(acr, lsb->bl_data_off, adjust);
+			break;
+		}
+
+		offset += sizeof(hdr);
+	} while (hdr.wpr.falcon_id != WPR_HEADER_V1_FALCON_ID_INVALID);
+
+	kvfree(lsb);
+	return 0;
+}
+
+static int
+ga102_acr_wpr_build_lsb(struct nvkm_acr *acr, struct nvkm_acr_lsfw *lsfw)
+{
+	struct lsb_header_v2 *hdr;
+	int ret = 0;
+
+	if (WARN_ON(lsfw->sig->size != sizeof(hdr->signature)))
+		return -EINVAL;
+
+	hdr = kvzalloc(sizeof(*hdr), GFP_KERNEL);
+	if (!hdr)
+		return -ENOMEM;
+
+	hdr->hdr.identifier = WPR_GENERIC_HEADER_ID_LSF_LSB_HEADER;
+	hdr->hdr.version = 2;
+	hdr->hdr.size = sizeof(*hdr);
+
+	memcpy(&hdr->signature, lsfw->sig->data, lsfw->sig->size);
+	hdr->ucode_off = lsfw->offset.img;
+	hdr->ucode_size = lsfw->ucode_size;
+	hdr->data_size = lsfw->data_size;
+	hdr->bl_code_size = lsfw->bootloader_size;
+	hdr->bl_imem_off = lsfw->bootloader_imem_offset;
+	hdr->bl_data_off = lsfw->offset.bld;
+	hdr->bl_data_size = lsfw->bl_data_size;
+	hdr->app_code_off = lsfw->app_start_offset + lsfw->app_resident_code_offset;
+	hdr->app_code_size = ALIGN(lsfw->app_resident_code_size, 0x100);
+	hdr->app_data_off = lsfw->app_start_offset + lsfw->app_resident_data_offset;
+	hdr->app_data_size = ALIGN(lsfw->app_resident_data_size, 0x100);
+	hdr->app_imem_offset = lsfw->app_imem_offset;
+	hdr->app_dmem_offset = lsfw->app_dmem_offset;
+	hdr->flags = lsfw->func->flags;
+	hdr->monitor_code_offset = 0;
+	hdr->monitor_data_offset = 0;
+	hdr->manifest_offset = 0;
+
+	if (lsfw->secure_bootloader) {
+		struct nvkm_falcon_fw fw = {
+			.fw.img = hdr->hs_fmc_params.pkc_signature,
+			.fw.name = "LSFW",
+			.func = &(const struct nvkm_falcon_fw_func) {
+				.signature = ga100_flcn_fw_signature,
+			},
+			.sig_size = lsfw->sig_size,
+			.sig_nr = lsfw->sig_nr,
+			.sigs = lsfw->sigs,
+			.fuse_ver = lsfw->fuse_ver,
+			.engine_id = lsfw->engine_id,
+			.ucode_id = lsfw->ucode_id,
+			.falcon = lsfw->falcon,
+
+		};
+
+		ret = nvkm_falcon_get(fw.falcon, &acr->subdev);
+		if (ret == 0) {
+			hdr->hs_fmc_params.hs_fmc = 1;
+			hdr->hs_fmc_params.pkc_algo = 0;
+			hdr->hs_fmc_params.pkc_algo_version = 1;
+			hdr->hs_fmc_params.engid_mask = lsfw->engine_id;
+			hdr->hs_fmc_params.ucode_id = lsfw->ucode_id;
+			hdr->hs_fmc_params.fuse_ver = lsfw->fuse_ver;
+			ret = nvkm_falcon_fw_patch(&fw);
+			nvkm_falcon_put(fw.falcon, &acr->subdev);
+		}
+	}
+
+	nvkm_wobj(acr->wpr, lsfw->offset.lsb, hdr, sizeof(*hdr));
+	kvfree(hdr);
+	return ret;
+}
+
+static int
+ga102_acr_wpr_build(struct nvkm_acr *acr, struct nvkm_acr_lsf *rtos)
+{
+	struct nvkm_acr_lsfw *lsfw;
+	struct wpr_header_v2 hdr;
+	u32 offset = 0;
+	int ret;
+
+	/*XXX: shared sub-WPR headers, fill terminator for now. */
+	nvkm_wo32(acr->wpr, 0x300, (2 << 16) | WPR_GENERIC_HEADER_ID_LSF_SHARED_SUB_WPR);
+	nvkm_wo32(acr->wpr, 0x304, 0x14);
+	nvkm_wo32(acr->wpr, 0x308, 0xffffffff);
+	nvkm_wo32(acr->wpr, 0x30c, 0);
+	nvkm_wo32(acr->wpr, 0x310, 0);
+
+	/* Fill per-LSF structures. */
+	list_for_each_entry(lsfw, &acr->lsfw, head) {
+		struct lsf_signature_v2 *sig = (void *)lsfw->sig->data;
+
+		hdr.hdr.identifier = WPR_GENERIC_HEADER_ID_LSF_WPR_HEADER;
+		hdr.hdr.version = 2;
+		hdr.hdr.size = sizeof(hdr);
+		hdr.wpr.falcon_id = lsfw->id;
+		hdr.wpr.lsb_offset = lsfw->offset.lsb;
+		hdr.wpr.bootstrap_owner = NVKM_ACR_LSF_GSPLITE;
+		hdr.wpr.lazy_bootstrap = 1;
+		hdr.wpr.bin_version = sig->ls_ucode_version;
+		hdr.wpr.status = WPR_HEADER_V1_STATUS_COPY;
+
+		/* Write WPR header. */
+		nvkm_wobj(acr->wpr, offset, &hdr, sizeof(hdr));
+		offset += sizeof(hdr);
+
+		/* Write LSB header. */
+		ret = ga102_acr_wpr_build_lsb(acr, lsfw);
+		if (ret)
+			return ret;
+
+		/* Write ucode image. */
+		nvkm_wobj(acr->wpr, lsfw->offset.img,
+				    lsfw->img.data,
+				    lsfw->img.size);
+
+		/* Write bootloader data. */
+		lsfw->func->bld_write(acr, lsfw->offset.bld, lsfw);
+	}
+
+	/* Finalise WPR. */
+	hdr.hdr.identifier = WPR_GENERIC_HEADER_ID_LSF_WPR_HEADER;
+	hdr.hdr.version = 2;
+	hdr.hdr.size = sizeof(hdr);
+	hdr.wpr.falcon_id = WPR_HEADER_V1_FALCON_ID_INVALID;
+	nvkm_wobj(acr->wpr, offset, &hdr, sizeof(hdr));
+	return 0;
+}
+
+static u32
+ga102_acr_wpr_layout(struct nvkm_acr *acr)
+{
+	struct nvkm_acr_lsfw *lsfw;
+	u32 wpr = 0;
+
+	wpr += 21 /* MAX_LSF */ * sizeof(struct wpr_header_v2);
+	wpr  = ALIGN(wpr, 256);
+
+	wpr += 0x100; /* Shared sub-WPR headers. */
+
+	list_for_each_entry(lsfw, &acr->lsfw, head) {
+		wpr  = ALIGN(wpr, 256);
+		lsfw->offset.lsb = wpr;
+		wpr += sizeof(struct lsb_header_v2);
+
+		wpr  = ALIGN(wpr, 4096);
+		lsfw->offset.img = wpr;
+		wpr += lsfw->img.size;
+
+		wpr  = ALIGN(wpr, 256);
+		lsfw->offset.bld = wpr;
+		lsfw->bl_data_size = ALIGN(lsfw->func->bld_size, 256);
+		wpr += lsfw->bl_data_size;
+	}
+
+	return wpr;
+}
+
+static int
+ga102_acr_wpr_parse(struct nvkm_acr *acr)
+{
+	const struct wpr_header_v2 *hdr = (void *)acr->wpr_fw->data;
+
+	while (hdr->wpr.falcon_id != WPR_HEADER_V1_FALCON_ID_INVALID) {
+		wpr_header_v2_dump(&acr->subdev, hdr);
+		if (!nvkm_acr_lsfw_add(NULL, acr, NULL, (hdr++)->wpr.falcon_id))
+			return -ENOMEM;
+	}
+
+	return 0;
+}
+
+MODULE_FIRMWARE("nvidia/ga102/acr/ucode_unload.bin");
+MODULE_FIRMWARE("nvidia/ga103/acr/ucode_unload.bin");
+MODULE_FIRMWARE("nvidia/ga104/acr/ucode_unload.bin");
+MODULE_FIRMWARE("nvidia/ga106/acr/ucode_unload.bin");
+MODULE_FIRMWARE("nvidia/ga107/acr/ucode_unload.bin");
+
+static const struct nvkm_acr_hsf_fwif
+ga102_acr_unload_fwif[] = {
+	{  0, ga100_acr_hsfw_ctor, &ga102_flcn_fw, NVKM_ACR_HSF_SEC2 },
+	{}
+};
+
+MODULE_FIRMWARE("nvidia/ga102/acr/ucode_asb.bin");
+MODULE_FIRMWARE("nvidia/ga103/acr/ucode_asb.bin");
+MODULE_FIRMWARE("nvidia/ga104/acr/ucode_asb.bin");
+MODULE_FIRMWARE("nvidia/ga106/acr/ucode_asb.bin");
+MODULE_FIRMWARE("nvidia/ga107/acr/ucode_asb.bin");
+
+static const struct nvkm_acr_hsf_fwif
+ga102_acr_asb_fwif[] = {
+	{  0, ga100_acr_hsfw_ctor, &ga102_flcn_fw, NVKM_ACR_HSF_GSP },
+	{}
+};
+
+static const struct nvkm_falcon_fw_func
+ga102_acr_ahesasc_0 = {
+	.signature = ga100_flcn_fw_signature,
+	.reset = gm200_flcn_fw_reset,
+	.setup = gp102_acr_load_setup,
+	.load = ga102_flcn_fw_load,
+	.boot = ga102_flcn_fw_boot,
+};
+
+MODULE_FIRMWARE("nvidia/ga102/acr/ucode_ahesasc.bin");
+MODULE_FIRMWARE("nvidia/ga103/acr/ucode_ahesasc.bin");
+MODULE_FIRMWARE("nvidia/ga104/acr/ucode_ahesasc.bin");
+MODULE_FIRMWARE("nvidia/ga106/acr/ucode_ahesasc.bin");
+MODULE_FIRMWARE("nvidia/ga107/acr/ucode_ahesasc.bin");
+
+static const struct nvkm_acr_hsf_fwif
+ga102_acr_ahesasc_fwif[] = {
+	{  0, ga100_acr_hsfw_ctor, &ga102_acr_ahesasc_0, NVKM_ACR_HSF_SEC2 },
+	{}
+};
+
+static const struct nvkm_acr_func
+ga102_acr = {
+	.ahesasc = ga102_acr_ahesasc_fwif,
+	.asb = ga102_acr_asb_fwif,
+	.unload = ga102_acr_unload_fwif,
+	.wpr_parse = ga102_acr_wpr_parse,
+	.wpr_layout = ga102_acr_wpr_layout,
+	.wpr_alloc = gp102_acr_wpr_alloc,
+	.wpr_patch = ga102_acr_wpr_patch,
+	.wpr_build = ga102_acr_wpr_build,
+	.wpr_check = ga100_acr_wpr_check,
+	.init = tu102_acr_init,
+};
+
+static int
+ga102_acr_load(struct nvkm_acr *acr, int version,
+	       const struct nvkm_acr_fwif *fwif)
+{
+	struct nvkm_subdev *subdev = &acr->subdev;
+	const struct nvkm_acr_hsf_fwif *hsfwif;
+
+	hsfwif = nvkm_firmware_load(subdev, fwif->func->ahesasc, "AcrAHESASC",
+				    acr, NULL, "acr/ucode_ahesasc", "AHESASC");
+	if (IS_ERR(hsfwif))
+		return PTR_ERR(hsfwif);
+
+	hsfwif = nvkm_firmware_load(subdev, fwif->func->asb, "AcrASB",
+				    acr, NULL, "acr/ucode_asb", "ASB");
+	if (IS_ERR(hsfwif))
+		return PTR_ERR(hsfwif);
+
+	hsfwif = nvkm_firmware_load(subdev, fwif->func->unload, "AcrUnload",
+				    acr, NULL, "acr/ucode_unload", "unload");
+	if (IS_ERR(hsfwif))
+		return PTR_ERR(hsfwif);
+
+	return 0;
+}
+
+static const struct nvkm_acr_fwif
+ga102_acr_fwif[] = {
+	{  0, ga102_acr_load, &ga102_acr },
+	{ -1, gm200_acr_nofw, &gm200_acr },
+	{}
+};
+
+int
+ga102_acr_new(struct nvkm_device *device, enum nvkm_subdev_type type, int inst,
+	      struct nvkm_acr **pacr)
+{
+	return nvkm_acr_new_(ga102_acr_fwif, device, type, inst, pacr);
+}
diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/acr/gm200.c b/drivers/gpu/drm/nouveau/nvkm/subdev/acr/gm200.c
index 82b4c8e1457c..31079c947758 100644
--- a/drivers/gpu/drm/nouveau/nvkm/subdev/acr/gm200.c
+++ b/drivers/gpu/drm/nouveau/nvkm/subdev/acr/gm200.c
@@ -46,7 +46,7 @@ gm200_acr_nofw(struct nvkm_acr *acr, int ver, const struct nvkm_acr_fwif *fwif)
 int
 gm200_acr_init(struct nvkm_acr *acr)
 {
-	return nvkm_acr_hsf_boot(acr, "load");
+	return nvkm_acr_hsfw_boot(acr, "load");
 }
 
 void
@@ -61,7 +61,7 @@ gm200_acr_wpr_check(struct nvkm_acr *acr, u64 *start, u64 *limit)
 	*limit = *limit + 0x20000;
 }
 
-void
+int
 gm200_acr_wpr_patch(struct nvkm_acr *acr, s64 adjust)
 {
 	struct nvkm_subdev *subdev = &acr->subdev;
@@ -86,6 +86,8 @@ gm200_acr_wpr_patch(struct nvkm_acr *acr, s64 adjust)
 		}
 		offset += sizeof(hdr);
 	} while (hdr.falcon_id != WPR_HEADER_V0_FALCON_ID_INVALID);
+
+	return 0;
 }
 
 void
@@ -219,162 +221,50 @@ gm200_acr_wpr_parse(struct nvkm_acr *acr)
 	return 0;
 }
 
-void
-gm200_acr_hsfw_bld(struct nvkm_acr *acr, struct nvkm_acr_hsf *hsf)
+int
+gm200_acr_hsfw_load_bld(struct nvkm_falcon_fw *fw)
 {
 	struct flcn_bl_dmem_desc_v1 hsdesc = {
 		.ctx_dma = FALCON_DMAIDX_VIRT,
-		.code_dma_base = hsf->vma->addr,
-		.non_sec_code_off = hsf->non_sec_addr,
-		.non_sec_code_size = hsf->non_sec_size,
-		.sec_code_off = hsf->sec_addr,
-		.sec_code_size = hsf->sec_size,
+		.code_dma_base = fw->vma->addr,
+		.non_sec_code_off = fw->nmem_base,
+		.non_sec_code_size = fw->nmem_size,
+		.sec_code_off = fw->imem_base,
+		.sec_code_size = fw->imem_size,
 		.code_entry_point = 0,
-		.data_dma_base = hsf->vma->addr + hsf->data_addr,
-		.data_size = hsf->data_size,
+		.data_dma_base = fw->vma->addr + fw->dmem_base_img,
+		.data_size = fw->dmem_size,
 	};
 
-	flcn_bl_dmem_desc_v1_dump(&acr->subdev, &hsdesc);
-
-	nvkm_falcon_load_dmem(hsf->falcon, &hsdesc, 0, sizeof(hsdesc), 0);
-}
-
-int
-gm200_acr_hsfw_boot(struct nvkm_acr *acr, struct nvkm_acr_hsf *hsf,
-		    u32 intr_clear, u32 mbox0_ok)
-{
-	struct nvkm_subdev *subdev = &acr->subdev;
-	struct nvkm_device *device = subdev->device;
-	struct nvkm_falcon *falcon = hsf->falcon;
-	u32 mbox0, mbox1;
-	int ret;
-
-	/* Reset falcon. */
-	nvkm_falcon_reset(falcon);
-	nvkm_falcon_bind_context(falcon, acr->inst);
+	flcn_bl_dmem_desc_v1_dump(fw->falcon->user, &hsdesc);
 
-	/* Load bootloader into IMEM. */
-	nvkm_falcon_load_imem(falcon, hsf->imem,
-				      falcon->code.limit - hsf->imem_size,
-				      hsf->imem_size,
-				      hsf->imem_tag,
-				      0, false);
-
-	/* Load bootloader data into DMEM. */
-	hsf->func->bld(acr, hsf);
-
-	/* Boot the falcon. */
-	nvkm_mc_intr_mask(device, falcon->owner->type, falcon->owner->inst, false);
-
-	nvkm_falcon_wr32(falcon, 0x040, 0xdeada5a5);
-	nvkm_falcon_set_start_addr(falcon, hsf->imem_tag << 8);
-	nvkm_falcon_start(falcon);
-	ret = nvkm_falcon_wait_for_halt(falcon, 100);
-	if (ret)
-		return ret;
-
-	/* Check for successful completion. */
-	mbox0 = nvkm_falcon_rd32(falcon, 0x040);
-	mbox1 = nvkm_falcon_rd32(falcon, 0x044);
-	nvkm_debug(subdev, "mailbox %08x %08x\n", mbox0, mbox1);
-	if (mbox0 && mbox0 != mbox0_ok)
-		return -EIO;
-
-	nvkm_falcon_clear_interrupt(falcon, intr_clear);
-	nvkm_mc_intr_mask(device, falcon->owner->type, falcon->owner->inst, true);
-	return ret;
+	return nvkm_falcon_pio_wr(fw->falcon, (u8 *)&hsdesc, 0, 0, DMEM, 0, sizeof(hsdesc), 0, 0);
 }
 
 int
-gm200_acr_hsfw_load(struct nvkm_acr *acr, struct nvkm_acr_hsfw *hsfw,
-		    struct nvkm_falcon *falcon)
+gm200_acr_hsfw_ctor(struct nvkm_acr *acr, const char *bl, const char *fw, const char *name, int ver,
+		    const struct nvkm_acr_hsf_fwif *fwif)
 {
-	struct nvkm_subdev *subdev = &acr->subdev;
-	struct nvkm_acr_hsf *hsf;
-	int ret;
-
-	/* Patch the appropriate signature (production/debug) into the FW
-	 * image, as determined by the mode the falcon is in.
-	 */
-	ret = nvkm_falcon_get(falcon, subdev);
-	if (ret)
-		return ret;
-
-	if (hsfw->sig.patch_loc) {
-		if (!falcon->debug) {
-			nvkm_debug(subdev, "patching production signature\n");
-			memcpy(hsfw->image + hsfw->sig.patch_loc,
-			       hsfw->sig.prod.data,
-			       hsfw->sig.prod.size);
-		} else {
-			nvkm_debug(subdev, "patching debug signature\n");
-			memcpy(hsfw->image + hsfw->sig.patch_loc,
-			       hsfw->sig.dbg.data,
-			       hsfw->sig.dbg.size);
-		}
-	}
-
-	nvkm_falcon_put(falcon, subdev);
+	struct nvkm_acr_hsfw *hsfw;
 
-	if (!(hsf = kzalloc(sizeof(*hsf), GFP_KERNEL)))
+	if (!(hsfw = kzalloc(sizeof(*hsfw), GFP_KERNEL)))
 		return -ENOMEM;
-	hsf->func = hsfw->func;
-	hsf->name = hsfw->name;
-	list_add_tail(&hsf->head, &acr->hsf);
-
-	hsf->imem_size = hsfw->imem_size;
-	hsf->imem_tag = hsfw->imem_tag;
-	hsf->imem = kmemdup(hsfw->imem, hsfw->imem_size, GFP_KERNEL);
-	if (!hsf->imem)
-		return -ENOMEM;
-
-	hsf->non_sec_addr = hsfw->non_sec_addr;
-	hsf->non_sec_size = hsfw->non_sec_size;
-	hsf->sec_addr = hsfw->sec_addr;
-	hsf->sec_size = hsfw->sec_size;
-	hsf->data_addr = hsfw->data_addr;
-	hsf->data_size = hsfw->data_size;
-
-	/* Make the FW image accessible to the HS bootloader. */
-	ret = nvkm_memory_new(subdev->device, NVKM_MEM_TARGET_INST,
-			      hsfw->image_size, 0x1000, false, &hsf->ucode);
-	if (ret)
-		return ret;
-
-	nvkm_kmap(hsf->ucode);
-	nvkm_wobj(hsf->ucode, 0, hsfw->image, hsfw->image_size);
-	nvkm_done(hsf->ucode);
-
-	ret = nvkm_vmm_get(acr->vmm, 12, nvkm_memory_size(hsf->ucode),
-			   &hsf->vma);
-	if (ret)
-		return ret;
-
-	ret = nvkm_memory_map(hsf->ucode, 0, acr->vmm, hsf->vma, NULL, 0);
-	if (ret)
-		return ret;
 
-	hsf->falcon = falcon;
-	return 0;
-}
+	hsfw->falcon_id = fwif->falcon_id;
+	hsfw->boot_mbox0 = fwif->boot_mbox0;
+	hsfw->intr_clear = fwif->intr_clear;
+	list_add_tail(&hsfw->head, &acr->hsfw);
 
-int
-gm200_acr_unload_boot(struct nvkm_acr *acr, struct nvkm_acr_hsf *hsf)
-{
-	return gm200_acr_hsfw_boot(acr, hsf, 0, 0x1d);
-}
-
-int
-gm200_acr_unload_load(struct nvkm_acr *acr, struct nvkm_acr_hsfw *hsfw)
-{
-	return gm200_acr_hsfw_load(acr, hsfw, &acr->subdev.device->pmu->falcon);
+	return nvkm_falcon_fw_ctor_hs(fwif->func, name, &acr->subdev, bl, fw, ver, NULL, &hsfw->fw);
 }
 
-const struct nvkm_acr_hsf_func
+const struct nvkm_falcon_fw_func
 gm200_acr_unload_0 = {
-	.load = gm200_acr_unload_load,
-	.boot = gm200_acr_unload_boot,
-	.bld = gm200_acr_hsfw_bld,
+	.signature = gm200_flcn_fw_signature,
+	.reset = gm200_flcn_fw_reset,
+	.load = gm200_flcn_fw_load,
+	.load_bld = gm200_acr_hsfw_load_bld,
+	.boot = gm200_flcn_fw_boot,
 };
 
 MODULE_FIRMWARE("nvidia/gm200/acr/ucode_unload.bin");
@@ -384,20 +274,15 @@ MODULE_FIRMWARE("nvidia/gp100/acr/ucode_unload.bin");
 
 static const struct nvkm_acr_hsf_fwif
 gm200_acr_unload_fwif[] = {
-	{ 0, nvkm_acr_hsfw_load, &gm200_acr_unload_0 },
+	{ 0, gm200_acr_hsfw_ctor, &gm200_acr_unload_0, NVKM_ACR_HSF_PMU, 0, 0x00000010 },
 	{}
 };
 
-int
-gm200_acr_load_boot(struct nvkm_acr *acr, struct nvkm_acr_hsf *hsf)
-{
-	return gm200_acr_hsfw_boot(acr, hsf, 0x10, 0);
-}
-
 static int
-gm200_acr_load_load(struct nvkm_acr *acr, struct nvkm_acr_hsfw *hsfw)
+gm200_acr_load_setup(struct nvkm_falcon_fw *fw)
 {
-	struct flcn_acr_desc *desc = (void *)&hsfw->image[hsfw->data_addr];
+	struct flcn_acr_desc *desc = (void *)&fw->fw.img[fw->dmem_base_img];
+	struct nvkm_acr *acr = fw->falcon->owner->device->acr;
 
 	desc->wpr_region_id = 1;
 	desc->regions.no_regions = 2;
@@ -408,15 +293,17 @@ gm200_acr_load_load(struct nvkm_acr *acr, struct nvkm_acr_hsfw *hsfw)
 	desc->regions.region_props[0].write_mask = 0xc;
 	desc->regions.region_props[0].client_mask = 0x2;
 	flcn_acr_desc_dump(&acr->subdev, desc);
-
-	return gm200_acr_hsfw_load(acr, hsfw, &acr->subdev.device->pmu->falcon);
+	return 0;
 }
 
-static const struct nvkm_acr_hsf_func
+static const struct nvkm_falcon_fw_func
 gm200_acr_load_0 = {
-	.load = gm200_acr_load_load,
-	.boot = gm200_acr_load_boot,
-	.bld = gm200_acr_hsfw_bld,
+	.signature = gm200_flcn_fw_signature,
+	.reset = gm200_flcn_fw_reset,
+	.setup = gm200_acr_load_setup,
+	.load = gm200_flcn_fw_load,
+	.load_bld = gm200_acr_hsfw_load_bld,
+	.boot = gm200_flcn_fw_boot,
 };
 
 MODULE_FIRMWARE("nvidia/gm200/acr/bl.bin");
@@ -433,7 +320,7 @@ MODULE_FIRMWARE("nvidia/gp100/acr/ucode_load.bin");
 
 static const struct nvkm_acr_hsf_fwif
 gm200_acr_load_fwif[] = {
-	{ 0, nvkm_acr_hsfw_load, &gm200_acr_load_0 },
+	{ 0, gm200_acr_hsfw_ctor, &gm200_acr_load_0, NVKM_ACR_HSF_PMU, 0, 0x00000010 },
 	{}
 };
 
diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/acr/gm20b.c b/drivers/gpu/drm/nouveau/nvkm/subdev/acr/gm20b.c
index 54e996f2f630..ef5fb79128b1 100644
--- a/drivers/gpu/drm/nouveau/nvkm/subdev/acr/gm20b.c
+++ b/drivers/gpu/drm/nouveau/nvkm/subdev/acr/gm20b.c
@@ -45,43 +45,47 @@ gm20b_acr_wpr_alloc(struct nvkm_acr *acr, u32 wpr_size)
 			       wpr_size, 0, true, &acr->wpr);
 }
 
-static void
-gm20b_acr_load_bld(struct nvkm_acr *acr, struct nvkm_acr_hsf *hsf)
+static int
+gm20b_acr_hsfw_load_bld(struct nvkm_falcon_fw *fw)
 {
 	struct flcn_bl_dmem_desc hsdesc = {
 		.ctx_dma = FALCON_DMAIDX_VIRT,
-		.code_dma_base = hsf->vma->addr >> 8,
-		.non_sec_code_off = hsf->non_sec_addr,
-		.non_sec_code_size = hsf->non_sec_size,
-		.sec_code_off = hsf->sec_addr,
-		.sec_code_size = hsf->sec_size,
+		.code_dma_base = fw->vma->addr >> 8,
+		.non_sec_code_off = fw->nmem_base,
+		.non_sec_code_size = fw->nmem_size,
+		.sec_code_off = fw->imem_base,
+		.sec_code_size = fw->imem_size,
 		.code_entry_point = 0,
-		.data_dma_base = (hsf->vma->addr + hsf->data_addr) >> 8,
-		.data_size = hsf->data_size,
+		.data_dma_base = (fw->vma->addr + fw->dmem_base_img) >> 8,
+		.data_size = fw->dmem_size,
 	};
 
-	flcn_bl_dmem_desc_dump(&acr->subdev, &hsdesc);
+	flcn_bl_dmem_desc_dump(fw->falcon->user, &hsdesc);
 
-	nvkm_falcon_load_dmem(hsf->falcon, &hsdesc, 0, sizeof(hsdesc), 0);
+	return nvkm_falcon_pio_wr(fw->falcon, (u8 *)&hsdesc, 0, 0, DMEM, 0, sizeof(hsdesc), 0, 0);
 }
 
+
 static int
-gm20b_acr_load_load(struct nvkm_acr *acr, struct nvkm_acr_hsfw *hsfw)
+gm20b_acr_load_setup(struct nvkm_falcon_fw *fw)
 {
-	struct flcn_acr_desc *desc = (void *)&hsfw->image[hsfw->data_addr];
+	struct flcn_acr_desc *desc = (void *)&fw->fw.img[fw->dmem_base_img];
+	struct nvkm_acr *acr = fw->falcon->owner->device->acr;
 
 	desc->ucode_blob_base = nvkm_memory_addr(acr->wpr);
 	desc->ucode_blob_size = nvkm_memory_size(acr->wpr);
 	flcn_acr_desc_dump(&acr->subdev, desc);
-
-	return gm200_acr_hsfw_load(acr, hsfw, &acr->subdev.device->pmu->falcon);
+	return 0;
 }
 
-const struct nvkm_acr_hsf_func
+const struct nvkm_falcon_fw_func
 gm20b_acr_load_0 = {
-	.load = gm20b_acr_load_load,
-	.boot = gm200_acr_load_boot,
-	.bld = gm20b_acr_load_bld,
+	.signature = gm200_flcn_fw_signature,
+	.reset = gm200_flcn_fw_reset,
+	.setup = gm20b_acr_load_setup,
+	.load = gm200_flcn_fw_load,
+	.load_bld = gm20b_acr_hsfw_load_bld,
+	.boot = gm200_flcn_fw_boot,
 };
 
 #if IS_ENABLED(CONFIG_ARCH_TEGRA_210_SOC)
@@ -91,7 +95,7 @@ MODULE_FIRMWARE("nvidia/gm20b/acr/ucode_load.bin");
 
 static const struct nvkm_acr_hsf_fwif
 gm20b_acr_load_fwif[] = {
-	{ 0, nvkm_acr_hsfw_load, &gm20b_acr_load_0 },
+	{ 0, gm200_acr_hsfw_ctor, &gm20b_acr_load_0, NVKM_ACR_HSF_PMU, 0, 0x10 },
 	{}
 };
 
diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/acr/gp102.c b/drivers/gpu/drm/nouveau/nvkm/subdev/acr/gp102.c
index fd97a935a380..084f28449e52 100644
--- a/drivers/gpu/drm/nouveau/nvkm/subdev/acr/gp102.c
+++ b/drivers/gpu/drm/nouveau/nvkm/subdev/acr/gp102.c
@@ -29,7 +29,7 @@
 #include <nvfw/acr.h>
 #include <nvfw/flcn.h>
 
-void
+int
 gp102_acr_wpr_patch(struct nvkm_acr *acr, s64 adjust)
 {
 	struct wpr_header_v1 hdr;
@@ -54,6 +54,8 @@ gp102_acr_wpr_patch(struct nvkm_acr *acr, s64 adjust)
 
 		offset += sizeof(hdr);
 	} while (hdr.falcon_id != WPR_HEADER_V1_FALCON_ID_INVALID);
+
+	return 0;
 }
 
 int
@@ -187,14 +189,15 @@ MODULE_FIRMWARE("nvidia/gp107/acr/ucode_unload.bin");
 
 static const struct nvkm_acr_hsf_fwif
 gp102_acr_unload_fwif[] = {
-	{ 0, nvkm_acr_hsfw_load, &gm200_acr_unload_0 },
+	{ 0, gm200_acr_hsfw_ctor, &gm200_acr_unload_0, NVKM_ACR_HSF_PMU, 0x1d, 0x00000010 },
 	{}
 };
 
 int
-gp102_acr_load_load(struct nvkm_acr *acr, struct nvkm_acr_hsfw *hsfw)
+gp102_acr_load_setup(struct nvkm_falcon_fw *fw)
 {
-	struct flcn_acr_desc_v1 *desc = (void *)&hsfw->image[hsfw->data_addr];
+	struct flcn_acr_desc_v1 *desc = (void *)&fw->fw.img[fw->dmem_base_img];
+	struct nvkm_acr *acr = fw->falcon->owner->device->acr;
 
 	desc->wpr_region_id = 1;
 	desc->regions.no_regions = 2;
@@ -204,19 +207,19 @@ gp102_acr_load_load(struct nvkm_acr *acr, struct nvkm_acr_hsfw *hsfw)
 	desc->regions.region_props[0].read_mask = 0xf;
 	desc->regions.region_props[0].write_mask = 0xc;
 	desc->regions.region_props[0].client_mask = 0x2;
-	desc->regions.region_props[0].shadow_mem_start_addr =
-		acr->shadow_start >> 8;
+	desc->regions.region_props[0].shadow_mem_start_addr = acr->shadow_start >> 8;
 	flcn_acr_desc_v1_dump(&acr->subdev, desc);
-
-	return gm200_acr_hsfw_load(acr, hsfw,
-				  &acr->subdev.device->sec2->falcon);
+	return 0;
 }
 
-static const struct nvkm_acr_hsf_func
+static const struct nvkm_falcon_fw_func
 gp102_acr_load_0 = {
-	.load = gp102_acr_load_load,
-	.boot = gm200_acr_load_boot,
-	.bld = gm200_acr_hsfw_bld,
+	.signature = gm200_flcn_fw_signature,
+	.reset = gm200_flcn_fw_reset,
+	.setup = gp102_acr_load_setup,
+	.load = gm200_flcn_fw_load,
+	.load_bld = gm200_acr_hsfw_load_bld,
+	.boot = gm200_flcn_fw_boot,
 };
 
 MODULE_FIRMWARE("nvidia/gp102/acr/bl.bin");
@@ -233,7 +236,7 @@ MODULE_FIRMWARE("nvidia/gp107/acr/ucode_load.bin");
 
 static const struct nvkm_acr_hsf_fwif
 gp102_acr_load_fwif[] = {
-	{ 0, nvkm_acr_hsfw_load, &gp102_acr_load_0 },
+	{ 0, gm200_acr_hsfw_ctor, &gp102_acr_load_0, NVKM_ACR_HSF_SEC2, 0, 0x00000010 },
 	{}
 };
 
diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/acr/gp108.c b/drivers/gpu/drm/nouveau/nvkm/subdev/acr/gp108.c
index 373d638a2177..6ab9d4959c17 100644
--- a/drivers/gpu/drm/nouveau/nvkm/subdev/acr/gp108.c
+++ b/drivers/gpu/drm/nouveau/nvkm/subdev/acr/gp108.c
@@ -25,63 +25,62 @@
 
 #include <nvfw/flcn.h>
 
-void
-gp108_acr_hsfw_bld(struct nvkm_acr *acr, struct nvkm_acr_hsf *hsf)
+int
+gp108_acr_hsfw_load_bld(struct nvkm_falcon_fw *fw)
 {
 	struct flcn_bl_dmem_desc_v2 hsdesc = {
 		.ctx_dma = FALCON_DMAIDX_VIRT,
-		.code_dma_base = hsf->vma->addr,
-		.non_sec_code_off = hsf->non_sec_addr,
-		.non_sec_code_size = hsf->non_sec_size,
-		.sec_code_off = hsf->sec_addr,
-		.sec_code_size = hsf->sec_size,
+		.code_dma_base = fw->vma->addr,
+		.non_sec_code_off = fw->nmem_base,
+		.non_sec_code_size = fw->nmem_size,
+		.sec_code_off = fw->imem_base,
+		.sec_code_size = fw->imem_size,
 		.code_entry_point = 0,
-		.data_dma_base = hsf->vma->addr + hsf->data_addr,
-		.data_size = hsf->data_size,
+		.data_dma_base = fw->vma->addr + fw->dmem_base_img,
+		.data_size = fw->dmem_size,
 		.argc = 0,
 		.argv = 0,
 	};
 
-	flcn_bl_dmem_desc_v2_dump(&acr->subdev, &hsdesc);
+	flcn_bl_dmem_desc_v2_dump(fw->falcon->user, &hsdesc);
 
-	nvkm_falcon_load_dmem(hsf->falcon, &hsdesc, 0, sizeof(hsdesc), 0);
+	return nvkm_falcon_pio_wr(fw->falcon, (u8 *)&hsdesc, 0, 0, DMEM, 0, sizeof(hsdesc), 0, 0);
 }
 
-const struct nvkm_acr_hsf_func
-gp108_acr_unload_0 = {
-	.load = gm200_acr_unload_load,
-	.boot = gm200_acr_unload_boot,
-	.bld = gp108_acr_hsfw_bld,
+const struct nvkm_falcon_fw_func
+gp108_acr_hsfw_0 = {
+	.signature = gm200_flcn_fw_signature,
+	.reset = gm200_flcn_fw_reset,
+	.load = gm200_flcn_fw_load,
+	.load_bld = gp108_acr_hsfw_load_bld,
+	.boot = gm200_flcn_fw_boot,
 };
 
 MODULE_FIRMWARE("nvidia/gp108/acr/unload_bl.bin");
 MODULE_FIRMWARE("nvidia/gp108/acr/ucode_unload.bin");
 
-MODULE_FIRMWARE("nvidia/gv100/acr/unload_bl.bin");
-MODULE_FIRMWARE("nvidia/gv100/acr/ucode_unload.bin");
-
 static const struct nvkm_acr_hsf_fwif
 gp108_acr_unload_fwif[] = {
-	{ 0, nvkm_acr_hsfw_load, &gp108_acr_unload_0 },
+	{ 0, gm200_acr_hsfw_ctor, &gp108_acr_hsfw_0, NVKM_ACR_HSF_PMU, 0x1d, 0x00000010 },
 	{}
 };
 
-static const struct nvkm_acr_hsf_func
+const struct nvkm_falcon_fw_func
 gp108_acr_load_0 = {
-	.load = gp102_acr_load_load,
-	.boot = gm200_acr_load_boot,
-	.bld = gp108_acr_hsfw_bld,
+	.signature = gm200_flcn_fw_signature,
+	.reset = gm200_flcn_fw_reset,
+	.setup = gp102_acr_load_setup,
+	.load = gm200_flcn_fw_load,
+	.load_bld = gp108_acr_hsfw_load_bld,
+	.boot = gm200_flcn_fw_boot,
 };
 
 MODULE_FIRMWARE("nvidia/gp108/acr/bl.bin");
 MODULE_FIRMWARE("nvidia/gp108/acr/ucode_load.bin");
 
-MODULE_FIRMWARE("nvidia/gv100/acr/bl.bin");
-MODULE_FIRMWARE("nvidia/gv100/acr/ucode_load.bin");
-
 static const struct nvkm_acr_hsf_fwif
 gp108_acr_load_fwif[] = {
-	{ 0, nvkm_acr_hsfw_load, &gp108_acr_load_0 },
+	{ 0, gm200_acr_hsfw_ctor, &gp108_acr_load_0, NVKM_ACR_HSF_SEC2, 0, 0x00000010 },
 	{}
 };
 
diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/acr/gp10b.c b/drivers/gpu/drm/nouveau/nvkm/subdev/acr/gp10b.c
index f03ba028867b..a3422ab6deab 100644
--- a/drivers/gpu/drm/nouveau/nvkm/subdev/acr/gp10b.c
+++ b/drivers/gpu/drm/nouveau/nvkm/subdev/acr/gp10b.c
@@ -28,7 +28,7 @@ MODULE_FIRMWARE("nvidia/gp10b/acr/ucode_load.bin");
 
 static const struct nvkm_acr_hsf_fwif
 gp10b_acr_load_fwif[] = {
-	{ 0, nvkm_acr_hsfw_load, &gm20b_acr_load_0 },
+	{ 0, gm200_acr_hsfw_ctor, &gm20b_acr_load_0, NVKM_ACR_HSF_PMU, 0, 0x00000010 },
 	{}
 };
 
diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/acr/gv100.c b/drivers/gpu/drm/nouveau/nvkm/subdev/acr/gv100.c
new file mode 100644
index 000000000000..4c5ca6b40027
--- /dev/null
+++ b/drivers/gpu/drm/nouveau/nvkm/subdev/acr/gv100.c
@@ -0,0 +1,67 @@
+/*
+ * Copyright 2022 Red Hat Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+#include "priv.h"
+
+MODULE_FIRMWARE("nvidia/gv100/acr/unload_bl.bin");
+MODULE_FIRMWARE("nvidia/gv100/acr/ucode_unload.bin");
+
+static const struct nvkm_acr_hsf_fwif
+gv100_acr_unload_fwif[] = {
+	{ 0, gm200_acr_hsfw_ctor, &gp108_acr_hsfw_0, NVKM_ACR_HSF_PMU, 0, 0x00000000 },
+	{}
+};
+
+MODULE_FIRMWARE("nvidia/gv100/acr/bl.bin");
+MODULE_FIRMWARE("nvidia/gv100/acr/ucode_load.bin");
+
+static const struct nvkm_acr_hsf_fwif
+gv100_acr_load_fwif[] = {
+	{ 0, gm200_acr_hsfw_ctor, &gp108_acr_load_0, NVKM_ACR_HSF_SEC2, 0, 0x00000010 },
+	{}
+};
+
+static const struct nvkm_acr_func
+gv100_acr = {
+	.load = gv100_acr_load_fwif,
+	.unload = gv100_acr_unload_fwif,
+	.wpr_parse = gp102_acr_wpr_parse,
+	.wpr_layout = gp102_acr_wpr_layout,
+	.wpr_alloc = gp102_acr_wpr_alloc,
+	.wpr_build = gp102_acr_wpr_build,
+	.wpr_patch = gp102_acr_wpr_patch,
+	.wpr_check = gm200_acr_wpr_check,
+	.init = gm200_acr_init,
+};
+
+static const struct nvkm_acr_fwif
+gv100_acr_fwif[] = {
+	{  0, gp102_acr_load, &gv100_acr },
+	{ -1, gm200_acr_nofw, &gm200_acr },
+	{}
+};
+
+int
+gv100_acr_new(struct nvkm_device *device, enum nvkm_subdev_type type, int inst,
+	      struct nvkm_acr **pacr)
+{
+	return nvkm_acr_new_(gv100_acr_fwif, device, type, inst, pacr);
+}
diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/acr/hsfw.c b/drivers/gpu/drm/nouveau/nvkm/subdev/acr/hsfw.c
deleted file mode 100644
index a6ea89a5d51a..000000000000
--- a/drivers/gpu/drm/nouveau/nvkm/subdev/acr/hsfw.c
+++ /dev/null
@@ -1,177 +0,0 @@
-/*
- * Copyright 2019 Red Hat Inc.
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * the rights to use, copy, modify, merge, publish, distribute, sublicense,
- * and/or sell copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in
- * all copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
- * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
- * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
- * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
- * OTHER DEALINGS IN THE SOFTWARE.
- */
-#include "priv.h"
-
-#include <core/firmware.h>
-
-#include <nvfw/fw.h>
-#include <nvfw/hs.h>
-
-static void
-nvkm_acr_hsfw_del(struct nvkm_acr_hsfw *hsfw)
-{
-	list_del(&hsfw->head);
-	kfree(hsfw->imem);
-	kfree(hsfw->image);
-	kfree(hsfw->sig.prod.data);
-	kfree(hsfw->sig.dbg.data);
-	kfree(hsfw);
-}
-
-void
-nvkm_acr_hsfw_del_all(struct nvkm_acr *acr)
-{
-	struct nvkm_acr_hsfw *hsfw, *hsft;
-	list_for_each_entry_safe(hsfw, hsft, &acr->hsfw, head) {
-		nvkm_acr_hsfw_del(hsfw);
-	}
-}
-
-static int
-nvkm_acr_hsfw_load_image(struct nvkm_acr *acr, const char *name, int ver,
-			 struct nvkm_acr_hsfw *hsfw)
-{
-	struct nvkm_subdev *subdev = &acr->subdev;
-	const struct firmware *fw;
-	const struct nvfw_bin_hdr *hdr;
-	const struct nvfw_hs_header *fwhdr;
-	const struct nvfw_hs_load_header *lhdr;
-	u32 loc, sig;
-	int ret;
-
-	ret = nvkm_firmware_get(subdev, name, ver, &fw);
-	if (ret < 0)
-		return ret;
-
-	hdr = nvfw_bin_hdr(subdev, fw->data);
-	fwhdr = nvfw_hs_header(subdev, fw->data + hdr->header_offset);
-
-	/* Earlier FW releases by NVIDIA for Nouveau's use aren't in NVIDIA's
-	 * standard format, and don't have the indirection seen in the 0x10de
-	 * case.
-	 */
-	switch (hdr->bin_magic) {
-	case 0x000010de:
-		loc = *(u32 *)(fw->data + fwhdr->patch_loc);
-		sig = *(u32 *)(fw->data + fwhdr->patch_sig);
-		break;
-	case 0x3b1d14f0:
-		loc = fwhdr->patch_loc;
-		sig = fwhdr->patch_sig;
-		break;
-	default:
-		ret = -EINVAL;
-		goto done;
-	}
-
-	lhdr = nvfw_hs_load_header(subdev, fw->data + fwhdr->hdr_offset);
-
-	if (!(hsfw->image = kmalloc(hdr->data_size, GFP_KERNEL))) {
-		ret = -ENOMEM;
-		goto done;
-	}
-
-	memcpy(hsfw->image, fw->data + hdr->data_offset, hdr->data_size);
-	hsfw->image_size = hdr->data_size;
-	hsfw->non_sec_addr = lhdr->non_sec_code_off;
-	hsfw->non_sec_size = lhdr->non_sec_code_size;
-	hsfw->sec_addr = lhdr->apps[0];
-	hsfw->sec_size = lhdr->apps[lhdr->num_apps];
-	hsfw->data_addr = lhdr->data_dma_base;
-	hsfw->data_size = lhdr->data_size;
-
-	hsfw->sig.prod.size = fwhdr->sig_prod_size;
-	hsfw->sig.prod.data = kmemdup(fw->data + fwhdr->sig_prod_offset + sig,
-				      hsfw->sig.prod.size, GFP_KERNEL);
-	if (!hsfw->sig.prod.data) {
-		ret = -ENOMEM;
-		goto done;
-	}
-
-	hsfw->sig.dbg.size = fwhdr->sig_dbg_size;
-	hsfw->sig.dbg.data = kmemdup(fw->data + fwhdr->sig_dbg_offset + sig,
-				     hsfw->sig.dbg.size, GFP_KERNEL);
-	if (!hsfw->sig.dbg.data) {
-		ret = -ENOMEM;
-		goto done;
-	}
-
-	hsfw->sig.patch_loc = loc;
-done:
-	nvkm_firmware_put(fw);
-	return ret;
-}
-
-static int
-nvkm_acr_hsfw_load_bl(struct nvkm_acr *acr, const char *name, int ver,
-		      struct nvkm_acr_hsfw *hsfw)
-{
-	struct nvkm_subdev *subdev = &acr->subdev;
-	const struct nvfw_bin_hdr *hdr;
-	const struct nvfw_bl_desc *desc;
-	const struct firmware *fw;
-	u8 *data;
-	int ret;
-
-	ret = nvkm_firmware_get(subdev, name, ver, &fw);
-	if (ret)
-		return ret;
-
-	hdr = nvfw_bin_hdr(subdev, fw->data);
-	desc = nvfw_bl_desc(subdev, fw->data + hdr->header_offset);
-	data = (void *)fw->data + hdr->data_offset;
-
-	hsfw->imem_size = desc->code_size;
-	hsfw->imem_tag = desc->start_tag;
-	hsfw->imem = kmemdup(data + desc->code_off, desc->code_size, GFP_KERNEL);
-	nvkm_firmware_put(fw);
-	if (!hsfw->imem)
-		return -ENOMEM;
-	else
-		return 0;
-}
-
-int
-nvkm_acr_hsfw_load(struct nvkm_acr *acr, const char *bl, const char *fw,
-		   const char *name, int version,
-		   const struct nvkm_acr_hsf_fwif *fwif)
-{
-	struct nvkm_acr_hsfw *hsfw;
-	int ret;
-
-	if (!(hsfw = kzalloc(sizeof(*hsfw), GFP_KERNEL)))
-		return -ENOMEM;
-
-	hsfw->func = fwif->func;
-	hsfw->name = name;
-	list_add_tail(&hsfw->head, &acr->hsfw);
-
-	ret = nvkm_acr_hsfw_load_bl(acr, bl, version, hsfw);
-	if (ret)
-		goto done;
-
-	ret = nvkm_acr_hsfw_load_image(acr, fw, version, hsfw);
-done:
-	if (ret)
-		nvkm_acr_hsfw_del(hsfw);
-	return ret;
-}
diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/acr/lsfw.c b/drivers/gpu/drm/nouveau/nvkm/subdev/acr/lsfw.c
index 9b1cf6711ae9..f36a359d4531 100644
--- a/drivers/gpu/drm/nouveau/nvkm/subdev/acr/lsfw.c
+++ b/drivers/gpu/drm/nouveau/nvkm/subdev/acr/lsfw.c
@@ -29,6 +29,7 @@ void
 nvkm_acr_lsfw_del(struct nvkm_acr_lsfw *lsfw)
 {
 	nvkm_blob_dtor(&lsfw->img);
+	kfree(lsfw->sigs);
 	nvkm_firmware_put(lsfw->sig);
 	list_del(&lsfw->head);
 	kfree(lsfw);
@@ -177,6 +178,75 @@ nvkm_acr_lsfw_load_sig_image_desc_v1(struct nvkm_subdev *subdev,
 }
 
 int
+nvkm_acr_lsfw_load_sig_image_desc_v2(struct nvkm_subdev *subdev,
+				     struct nvkm_falcon *falcon,
+				     enum nvkm_acr_lsf_id id,
+				     const char *path, int ver,
+				     const struct nvkm_acr_lsf_func *func)
+{
+	const struct firmware *fw;
+	struct nvkm_acr_lsfw *lsfw;
+	const struct nvfw_ls_desc_v2 *desc;
+	int ret = 0;
+
+	lsfw = nvkm_acr_lsfw_load_sig_image_desc_(subdev, falcon, id, path, ver, func, &fw);
+	if (IS_ERR(lsfw))
+		return PTR_ERR(lsfw);
+
+	desc = nvfw_ls_desc_v2(subdev, fw->data);
+
+	lsfw->secure_bootloader = desc->secure_bootloader;
+	lsfw->bootloader_size = ALIGN(desc->bootloader_size, 256);
+	lsfw->bootloader_imem_offset = desc->bootloader_imem_offset;
+
+	lsfw->app_size = ALIGN(desc->app_size, 256);
+	lsfw->app_start_offset = desc->app_start_offset;
+	lsfw->app_imem_entry = desc->app_imem_entry;
+	lsfw->app_resident_code_offset = desc->app_resident_code_offset;
+	lsfw->app_resident_code_size = desc->app_resident_code_size;
+	lsfw->app_resident_data_offset = desc->app_resident_data_offset;
+	lsfw->app_resident_data_size = desc->app_resident_data_size;
+	lsfw->app_imem_offset = desc->app_imem_offset;
+	lsfw->app_dmem_offset = desc->app_dmem_offset;
+
+	lsfw->ucode_size = ALIGN(lsfw->app_resident_data_offset, 256) + lsfw->bootloader_size;
+	lsfw->data_size = lsfw->app_size + lsfw->bootloader_size - lsfw->ucode_size;
+
+	nvkm_firmware_put(fw);
+
+	if (lsfw->secure_bootloader) {
+		const struct firmware *hsbl;
+		const struct nvfw_ls_hsbl_bin_hdr *hdr;
+		const struct nvfw_ls_hsbl_hdr *hshdr;
+		u32 loc, sig, cnt, *meta;
+
+		ret = nvkm_firmware_load_name(subdev, path, "hs_bl_sig", ver, &hsbl);
+		if (ret)
+			return ret;
+
+		hdr = nvfw_ls_hsbl_bin_hdr(subdev, hsbl->data);
+		hshdr = nvfw_ls_hsbl_hdr(subdev, hsbl->data + hdr->header_offset);
+		meta = (u32 *)(hsbl->data + hshdr->meta_data_offset);
+		loc = *(u32 *)(hsbl->data + hshdr->patch_loc);
+		sig = *(u32 *)(hsbl->data + hshdr->patch_sig);
+		cnt = *(u32 *)(hsbl->data + hshdr->num_sig);
+
+		lsfw->fuse_ver = meta[0];
+		lsfw->engine_id = meta[1];
+		lsfw->ucode_id = meta[2];
+		lsfw->sig_size = hshdr->sig_prod_size / cnt;
+		lsfw->sig_nr = cnt;
+		lsfw->sigs = kmemdup(hsbl->data + hshdr->sig_prod_offset + sig,
+				     lsfw->sig_nr * lsfw->sig_size, GFP_KERNEL);
+		nvkm_firmware_put(hsbl);
+		if (!lsfw->sigs)
+			ret = -ENOMEM;
+	}
+
+	return ret;
+}
+
+int
 nvkm_acr_lsfw_load_bl_inst_data_sig(struct nvkm_subdev *subdev,
 				    struct nvkm_falcon *falcon,
 				    enum nvkm_acr_lsf_id id,
@@ -251,3 +321,78 @@ done:
 	nvkm_firmware_put(bl);
 	return ret;
 }
+
+int
+nvkm_acr_lsfw_load_bl_sig_net(struct nvkm_subdev *subdev,
+			      struct nvkm_falcon *falcon,
+			      enum nvkm_acr_lsf_id id,
+			      const char *path, int ver,
+			      const struct nvkm_acr_lsf_func *func,
+			      const void *inst_data, u32 inst_size,
+			      const void *data_data, u32 data_size)
+{
+	struct nvkm_acr *acr = subdev->device->acr;
+	struct nvkm_acr_lsfw *lsfw;
+	const struct firmware _inst = { .data = inst_data, .size = inst_size };
+	const struct firmware _data = { .data = data_data, .size = data_size };
+	const struct firmware *bl = NULL, *inst = &_inst, *data = &_data;
+	const struct {
+	    int bin_magic;
+	    int bin_version;
+	    int bin_size;
+	    int header_offset;
+	    int header_size;
+	} *hdr;
+	u32 *bldata;
+	int ret;
+
+	if (IS_ERR((lsfw = nvkm_acr_lsfw_add(func, acr, falcon, id))))
+		return PTR_ERR(lsfw);
+
+	ret = nvkm_firmware_load_name(subdev, path, "bl", ver, &bl);
+	if (ret)
+		goto done;
+
+	hdr = (const void *)bl->data;
+	bldata = (void *)(bl->data + hdr->header_offset);
+
+	ret = nvkm_firmware_load_name(subdev, path, "sig", ver, &lsfw->sig);
+	if (ret)
+		goto done;
+
+	lsfw->bootloader_size = ALIGN(hdr->header_size, 256);
+	lsfw->bootloader_imem_offset = func->bl_entry;
+
+	lsfw->app_start_offset = lsfw->bootloader_size;
+	lsfw->app_imem_entry = 0;
+	lsfw->app_resident_code_offset = 0;
+	lsfw->app_resident_code_size = ALIGN(inst->size, 256);
+	lsfw->app_resident_data_offset = lsfw->app_resident_code_size;
+	lsfw->app_resident_data_size = ALIGN(data->size, 256);
+	lsfw->app_imem_offset = 0;
+	lsfw->app_dmem_offset = 0;
+	lsfw->app_size = lsfw->app_resident_code_size + lsfw->app_resident_data_size;
+
+	lsfw->img.size = lsfw->bootloader_size + lsfw->app_size;
+	if (!(lsfw->img.data = kzalloc(lsfw->img.size, GFP_KERNEL))) {
+		ret = -ENOMEM;
+		goto done;
+	}
+
+	memcpy(lsfw->img.data, bldata, lsfw->bootloader_size);
+	memcpy(lsfw->img.data + lsfw->app_start_offset +
+	       lsfw->app_resident_code_offset, inst->data, inst->size);
+	memcpy(lsfw->img.data + lsfw->app_start_offset +
+	       lsfw->app_resident_data_offset, data->data, data->size);
+
+	lsfw->ucode_size = ALIGN(lsfw->app_resident_data_offset, 256) +
+			   lsfw->bootloader_size;
+	lsfw->data_size = lsfw->app_size + lsfw->bootloader_size -
+			  lsfw->ucode_size;
+
+done:
+	if (ret)
+		nvkm_acr_lsfw_del(lsfw);
+	nvkm_firmware_put(bl);
+	return ret;
+}
diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/acr/priv.h b/drivers/gpu/drm/nouveau/nvkm/subdev/acr/priv.h
index c30b841c9d35..4881c8ba3880 100644
--- a/drivers/gpu/drm/nouveau/nvkm/subdev/acr/priv.h
+++ b/drivers/gpu/drm/nouveau/nvkm/subdev/acr/priv.h
@@ -24,7 +24,7 @@ struct nvkm_acr_func {
 	u32 (*wpr_layout)(struct nvkm_acr *);
 	int (*wpr_alloc)(struct nvkm_acr *, u32 wpr_size);
 	int (*wpr_build)(struct nvkm_acr *, struct nvkm_acr_lsf *rtos);
-	void (*wpr_patch)(struct nvkm_acr *, s64 adjust);
+	int (*wpr_patch)(struct nvkm_acr *, s64 adjust);
 	void (*wpr_check)(struct nvkm_acr *, u64 *start, u64 *limit);
 	int (*init)(struct nvkm_acr *);
 	void (*fini)(struct nvkm_acr *);
@@ -35,7 +35,7 @@ extern const struct nvkm_acr_func gm200_acr;
 int gm200_acr_wpr_parse(struct nvkm_acr *);
 u32 gm200_acr_wpr_layout(struct nvkm_acr *);
 int gm200_acr_wpr_build(struct nvkm_acr *, struct nvkm_acr_lsf *);
-void gm200_acr_wpr_patch(struct nvkm_acr *, s64);
+int gm200_acr_wpr_patch(struct nvkm_acr *, s64);
 void gm200_acr_wpr_check(struct nvkm_acr *, u64 *, u64 *);
 void gm200_acr_wpr_build_lsb_tail(struct nvkm_acr_lsfw *,
 				  struct lsb_header_tail *);
@@ -48,96 +48,60 @@ u32 gp102_acr_wpr_layout(struct nvkm_acr *);
 int gp102_acr_wpr_alloc(struct nvkm_acr *, u32 wpr_size);
 int gp102_acr_wpr_build(struct nvkm_acr *, struct nvkm_acr_lsf *);
 int gp102_acr_wpr_build_lsb(struct nvkm_acr *, struct nvkm_acr_lsfw *);
-void gp102_acr_wpr_patch(struct nvkm_acr *, s64);
+int gp102_acr_wpr_patch(struct nvkm_acr *, s64);
+
+int tu102_acr_init(struct nvkm_acr *);
+
+void ga100_acr_wpr_check(struct nvkm_acr *, u64 *, u64 *);
 
 struct nvkm_acr_hsfw {
-	const struct nvkm_acr_hsf_func *func;
-	const char *name;
-	struct list_head head;
+	struct nvkm_falcon_fw fw;
+
+	enum nvkm_acr_hsf_id {
+		NVKM_ACR_HSF_PMU,
+		NVKM_ACR_HSF_SEC2,
+		NVKM_ACR_HSF_GSP,
+	} falcon_id;
+	u32 boot_mbox0;
+	u32 intr_clear;
 
-	u32 imem_size;
-	u32 imem_tag;
-	u32 *imem;
-
-	u8 *image;
-	u32 image_size;
-	u32 non_sec_addr;
-	u32 non_sec_size;
-	u32 sec_addr;
-	u32 sec_size;
-	u32 data_addr;
-	u32 data_size;
-
-	struct {
-		struct {
-			void *data;
-			u32 size;
-		} prod, dbg;
-		u32 patch_loc;
-	} sig;
+	struct list_head head;
 };
 
+int nvkm_acr_hsfw_boot(struct nvkm_acr *, const char *name);
+
 struct nvkm_acr_hsf_fwif {
 	int version;
 	int (*load)(struct nvkm_acr *, const char *bl, const char *fw,
 		    const char *name, int version,
 		    const struct nvkm_acr_hsf_fwif *);
-	const struct nvkm_acr_hsf_func *func;
-};
+	const struct nvkm_falcon_fw_func *func;
 
-int nvkm_acr_hsfw_load(struct nvkm_acr *, const char *, const char *,
-		       const char *, int, const struct nvkm_acr_hsf_fwif *);
-void nvkm_acr_hsfw_del_all(struct nvkm_acr *);
-
-struct nvkm_acr_hsf {
-	const struct nvkm_acr_hsf_func *func;
-	const char *name;
-	struct list_head head;
-
-	u32 imem_size;
-	u32 imem_tag;
-	u32 *imem;
-
-	u32 non_sec_addr;
-	u32 non_sec_size;
-	u32 sec_addr;
-	u32 sec_size;
-	u32 data_addr;
-	u32 data_size;
-
-	struct nvkm_memory *ucode;
-	struct nvkm_vma *vma;
-	struct nvkm_falcon *falcon;
+	enum nvkm_acr_hsf_id falcon_id;
+	u32 boot_mbox0;
+	u32 intr_clear;
 };
 
-struct nvkm_acr_hsf_func {
-	int (*load)(struct nvkm_acr *, struct nvkm_acr_hsfw *);
-	int (*boot)(struct nvkm_acr *, struct nvkm_acr_hsf *);
-	void (*bld)(struct nvkm_acr *, struct nvkm_acr_hsf *);
-};
 
-int gm200_acr_hsfw_load(struct nvkm_acr *, struct nvkm_acr_hsfw *,
-			struct nvkm_falcon *);
-int gm200_acr_hsfw_boot(struct nvkm_acr *, struct nvkm_acr_hsf *,
-			u32 clear_intr, u32 mbox0_ok);
+int gm200_acr_hsfw_ctor(struct nvkm_acr *, const char *, const char *, const char *, int,
+			const struct nvkm_acr_hsf_fwif *);
+int gm200_acr_hsfw_load_bld(struct nvkm_falcon_fw *);
+extern const struct nvkm_falcon_fw_func gm200_acr_unload_0;
 
-int gm200_acr_load_boot(struct nvkm_acr *, struct nvkm_acr_hsf *);
+extern const struct nvkm_falcon_fw_func gm20b_acr_load_0;
 
-extern const struct nvkm_acr_hsf_func gm200_acr_unload_0;
-int gm200_acr_unload_load(struct nvkm_acr *, struct nvkm_acr_hsfw *);
-int gm200_acr_unload_boot(struct nvkm_acr *, struct nvkm_acr_hsf *);
-void gm200_acr_hsfw_bld(struct nvkm_acr *, struct nvkm_acr_hsf *);
+int gp102_acr_load_setup(struct nvkm_falcon_fw *);
 
-extern const struct nvkm_acr_hsf_func gm20b_acr_load_0;
+extern const struct nvkm_falcon_fw_func gp108_acr_load_0;
 
-int gp102_acr_load_load(struct nvkm_acr *, struct nvkm_acr_hsfw *);
+extern const struct nvkm_falcon_fw_func gp108_acr_hsfw_0;
+int gp108_acr_hsfw_load_bld(struct nvkm_falcon_fw *);
 
-extern const struct nvkm_acr_hsf_func gp108_acr_unload_0;
-void gp108_acr_hsfw_bld(struct nvkm_acr *, struct nvkm_acr_hsf *);
+int ga100_acr_hsfw_ctor(struct nvkm_acr *, const char *, const char *, const char *, int,
+			const struct nvkm_acr_hsf_fwif *);
 
 int nvkm_acr_new_(const struct nvkm_acr_fwif *, struct nvkm_device *, enum nvkm_subdev_type,
 		  int inst, struct nvkm_acr **);
-int nvkm_acr_hsf_boot(struct nvkm_acr *, const char *name);
 
 struct nvkm_acr_lsf {
 	const struct nvkm_acr_lsf_func *func;
diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/acr/tu102.c b/drivers/gpu/drm/nouveau/nvkm/subdev/acr/tu102.c
index 05a87e77525f..c22d551c0078 100644
--- a/drivers/gpu/drm/nouveau/nvkm/subdev/acr/tu102.c
+++ b/drivers/gpu/drm/nouveau/nvkm/subdev/acr/tu102.c
@@ -29,14 +29,14 @@
 
 #include <nvfw/acr.h>
 
-static int
+int
 tu102_acr_init(struct nvkm_acr *acr)
 {
-	int ret = nvkm_acr_hsf_boot(acr, "AHESASC");
+	int ret = nvkm_acr_hsfw_boot(acr, "AHESASC");
 	if (ret)
 		return ret;
 
-	return nvkm_acr_hsf_boot(acr, "ASB");
+	return nvkm_acr_hsfw_boot(acr, "ASB");
 }
 
 static int
@@ -85,12 +85,6 @@ tu102_acr_wpr_build(struct nvkm_acr *acr, struct nvkm_acr_lsf *rtos)
 }
 
 static int
-tu102_acr_hsfw_boot(struct nvkm_acr *acr, struct nvkm_acr_hsf *hsf)
-{
-	return gm200_acr_hsfw_boot(acr, hsf, 0, 0);
-}
-
-static int
 tu102_acr_hsfw_nofw(struct nvkm_acr *acr, const char *bl, const char *fw,
 		    const char *name, int version,
 		    const struct nvkm_acr_hsf_fwif *fwif)
@@ -115,24 +109,11 @@ MODULE_FIRMWARE("nvidia/tu117/acr/ucode_unload.bin");
 
 static const struct nvkm_acr_hsf_fwif
 tu102_acr_unload_fwif[] = {
-	{  0, nvkm_acr_hsfw_load, &gp108_acr_unload_0 },
+	{  0, gm200_acr_hsfw_ctor, &gp108_acr_hsfw_0, NVKM_ACR_HSF_PMU, 0, 0x00000000 },
 	{ -1, tu102_acr_hsfw_nofw },
 	{}
 };
 
-static int
-tu102_acr_asb_load(struct nvkm_acr *acr, struct nvkm_acr_hsfw *hsfw)
-{
-	return gm200_acr_hsfw_load(acr, hsfw, &acr->subdev.device->gsp->falcon);
-}
-
-static const struct nvkm_acr_hsf_func
-tu102_acr_asb_0 = {
-	.load = tu102_acr_asb_load,
-	.boot = tu102_acr_hsfw_boot,
-	.bld = gp108_acr_hsfw_bld,
-};
-
 MODULE_FIRMWARE("nvidia/tu102/acr/ucode_asb.bin");
 MODULE_FIRMWARE("nvidia/tu104/acr/ucode_asb.bin");
 MODULE_FIRMWARE("nvidia/tu106/acr/ucode_asb.bin");
@@ -141,18 +122,11 @@ MODULE_FIRMWARE("nvidia/tu117/acr/ucode_asb.bin");
 
 static const struct nvkm_acr_hsf_fwif
 tu102_acr_asb_fwif[] = {
-	{  0, nvkm_acr_hsfw_load, &tu102_acr_asb_0 },
+	{  0, gm200_acr_hsfw_ctor, &gp108_acr_hsfw_0, NVKM_ACR_HSF_GSP, 0, 0x00000000 },
 	{ -1, tu102_acr_hsfw_nofw },
 	{}
 };
 
-static const struct nvkm_acr_hsf_func
-tu102_acr_ahesasc_0 = {
-	.load = gp102_acr_load_load,
-	.boot = tu102_acr_hsfw_boot,
-	.bld = gp108_acr_hsfw_bld,
-};
-
 MODULE_FIRMWARE("nvidia/tu102/acr/bl.bin");
 MODULE_FIRMWARE("nvidia/tu102/acr/ucode_ahesasc.bin");
 
@@ -170,7 +144,7 @@ MODULE_FIRMWARE("nvidia/tu117/acr/ucode_ahesasc.bin");
 
 static const struct nvkm_acr_hsf_fwif
 tu102_acr_ahesasc_fwif[] = {
-	{  0, nvkm_acr_hsfw_load, &tu102_acr_ahesasc_0 },
+	{  0, gm200_acr_hsfw_ctor, &gp108_acr_load_0, NVKM_ACR_HSF_SEC2, 0, 0x00000000 },
 	{ -1, tu102_acr_hsfw_nofw },
 	{}
 };
diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/devinit/gm200.c b/drivers/gpu/drm/nouveau/nvkm/subdev/devinit/gm200.c
index a308b9bde449..f30718d7e61a 100644
--- a/drivers/gpu/drm/nouveau/nvkm/subdev/devinit/gm200.c
+++ b/drivers/gpu/drm/nouveau/nvkm/subdev/devinit/gm200.c
@@ -26,6 +26,7 @@
 #include <subdev/bios.h>
 #include <subdev/bios/bit.h>
 #include <subdev/bios/pmu.h>
+#include <subdev/pmu.h>
 #include <subdev/timer.h>
 
 static void
@@ -85,13 +86,18 @@ pmu_load(struct nv50_devinit *init, u8 type, bool post,
 	struct nvkm_subdev *subdev = &init->base.subdev;
 	struct nvkm_bios *bios = subdev->device->bios;
 	struct nvbios_pmuR pmu;
+	int ret;
 
 	if (!nvbios_pmuRm(bios, type, &pmu))
 		return -EINVAL;
 
-	if (!post)
+	if (!post || !subdev->device->pmu)
 		return 0;
 
+	ret = nvkm_falcon_reset(&subdev->device->pmu->falcon);
+	if (ret)
+		return ret;
+
 	pmu_code(init, pmu.boot_addr_pmu, pmu.boot_addr, pmu.boot_size, false);
 	pmu_code(init, pmu.code_addr_pmu, pmu.code_addr, pmu.code_size, true);
 	pmu_data(init, pmu.data_addr_pmu, pmu.data_addr, pmu.data_size);
diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/fault/base.c b/drivers/gpu/drm/nouveau/nvkm/subdev/fault/base.c
index fd54fa504efa..b53ac9a2552f 100644
--- a/drivers/gpu/drm/nouveau/nvkm/subdev/fault/base.c
+++ b/drivers/gpu/drm/nouveau/nvkm/subdev/fault/base.c
@@ -22,7 +22,6 @@
 #include "priv.h"
 
 #include <core/memory.h>
-#include <core/notify.h>
 
 static void
 nvkm_fault_ntfy_fini(struct nvkm_event *event, int type, int index)
@@ -38,23 +37,8 @@ nvkm_fault_ntfy_init(struct nvkm_event *event, int type, int index)
 	fault->func->buffer.intr(fault->buffer[index], true);
 }
 
-static int
-nvkm_fault_ntfy_ctor(struct nvkm_object *object, void *argv, u32 argc,
-		     struct nvkm_notify *notify)
-{
-	struct nvkm_fault_buffer *buffer = nvkm_fault_buffer(object);
-	if (argc == 0) {
-		notify->size  = 0;
-		notify->types = 1;
-		notify->index = buffer->id;
-		return 0;
-	}
-	return -ENOSYS;
-}
-
 static const struct nvkm_event_func
 nvkm_fault_ntfy = {
-	.ctor = nvkm_fault_ntfy_ctor,
 	.init = nvkm_fault_ntfy_init,
 	.fini = nvkm_fault_ntfy_fini,
 };
@@ -130,8 +114,7 @@ nvkm_fault_oneinit(struct nvkm_subdev *subdev)
 		}
 	}
 
-	ret = nvkm_event_init(&nvkm_fault_ntfy, 1, fault->buffer_nr,
-			      &fault->event);
+	ret = nvkm_event_init(&nvkm_fault_ntfy, subdev, 1, fault->buffer_nr, &fault->event);
 	if (ret)
 		return ret;
 
@@ -146,7 +129,7 @@ nvkm_fault_dtor(struct nvkm_subdev *subdev)
 	struct nvkm_fault *fault = nvkm_fault(subdev);
 	int i;
 
-	nvkm_notify_fini(&fault->nrpfb);
+	nvkm_event_ntfy_del(&fault->nrpfb);
 	nvkm_event_fini(&fault->event);
 
 	for (i = 0; i < fault->buffer_nr; i++) {
diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/fault/gp100.c b/drivers/gpu/drm/nouveau/nvkm/subdev/fault/gp100.c
index 6af7959e02ea..04c7526888bc 100644
--- a/drivers/gpu/drm/nouveau/nvkm/subdev/fault/gp100.c
+++ b/drivers/gpu/drm/nouveau/nvkm/subdev/fault/gp100.c
@@ -65,7 +65,7 @@ gp100_fault_buffer_info(struct nvkm_fault_buffer *buffer)
 void
 gp100_fault_intr(struct nvkm_fault *fault)
 {
-	nvkm_event_send(&fault->event, 1, 0, NULL, 0);
+	nvkm_event_ntfy(&fault->event, 0, NVKM_FAULT_BUFFER_EVENT_PENDING);
 }
 
 static const struct nvkm_fault_func
diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/fault/gv100.c b/drivers/gpu/drm/nouveau/nvkm/subdev/fault/gv100.c
index cd9d2ade5ac7..8e34d40e7649 100644
--- a/drivers/gpu/drm/nouveau/nvkm/subdev/fault/gv100.c
+++ b/drivers/gpu/drm/nouveau/nvkm/subdev/fault/gv100.c
@@ -27,10 +27,12 @@
 
 #include <nvif/class.h>
 
-static void
-gv100_fault_buffer_process(struct nvkm_fault_buffer *buffer)
+void
+gv100_fault_buffer_process(struct work_struct *work)
 {
-	struct nvkm_device *device = buffer->fault->subdev.device;
+	struct nvkm_fault *fault = container_of(work, typeof(*fault), nrpfb_work);
+	struct nvkm_fault_buffer *buffer = fault->buffer[0];
+	struct nvkm_device *device = fault->subdev.device;
 	struct nvkm_memory *mem = buffer->mem;
 	u32 get = nvkm_rd32(device, buffer->get);
 	u32 put = nvkm_rd32(device, buffer->put);
@@ -115,11 +117,12 @@ gv100_fault_buffer_info(struct nvkm_fault_buffer *buffer)
 }
 
 static int
-gv100_fault_ntfy_nrpfb(struct nvkm_notify *notify)
+gv100_fault_ntfy_nrpfb(struct nvkm_event_ntfy *ntfy, u32 bits)
 {
-	struct nvkm_fault *fault = container_of(notify, typeof(*fault), nrpfb);
-	gv100_fault_buffer_process(fault->buffer[0]);
-	return NVKM_NOTIFY_KEEP;
+	struct nvkm_fault *fault = container_of(ntfy, typeof(*fault), nrpfb);
+
+	schedule_work(&fault->nrpfb_work);
+	return NVKM_EVENT_KEEP;
 }
 
 static void
@@ -163,14 +166,14 @@ gv100_fault_intr(struct nvkm_fault *fault)
 
 	if (stat & 0x20000000) {
 		if (fault->buffer[0]) {
-			nvkm_event_send(&fault->event, 1, 0, NULL, 0);
+			nvkm_event_ntfy(&fault->event, 0, NVKM_FAULT_BUFFER_EVENT_PENDING);
 			stat &= ~0x20000000;
 		}
 	}
 
 	if (stat & 0x08000000) {
 		if (fault->buffer[1]) {
-			nvkm_event_send(&fault->event, 1, 1, NULL, 0);
+			nvkm_event_ntfy(&fault->event, 1, NVKM_FAULT_BUFFER_EVENT_PENDING);
 			stat &= ~0x08000000;
 		}
 	}
@@ -183,9 +186,12 @@ gv100_fault_intr(struct nvkm_fault *fault)
 static void
 gv100_fault_fini(struct nvkm_fault *fault)
 {
-	nvkm_notify_put(&fault->nrpfb);
+	nvkm_event_ntfy_block(&fault->nrpfb);
+	flush_work(&fault->nrpfb_work);
+
 	if (fault->buffer[0])
 		fault->func->buffer.fini(fault->buffer[0]);
+
 	nvkm_mask(fault->subdev.device, 0x100a34, 0x80000000, 0x80000000);
 }
 
@@ -194,15 +200,15 @@ gv100_fault_init(struct nvkm_fault *fault)
 {
 	nvkm_mask(fault->subdev.device, 0x100a2c, 0x80000000, 0x80000000);
 	fault->func->buffer.init(fault->buffer[0]);
-	nvkm_notify_get(&fault->nrpfb);
+	nvkm_event_ntfy_allow(&fault->nrpfb);
 }
 
 int
 gv100_fault_oneinit(struct nvkm_fault *fault)
 {
-	return nvkm_notify_init(&fault->buffer[0]->object, &fault->event,
-				gv100_fault_ntfy_nrpfb, true, NULL, 0, 0,
-				&fault->nrpfb);
+	nvkm_event_ntfy_add(&fault->event, 0, NVKM_FAULT_BUFFER_EVENT_PENDING, true,
+			    gv100_fault_ntfy_nrpfb, &fault->nrpfb);
+	return 0;
 }
 
 static const struct nvkm_fault_func
@@ -231,5 +237,10 @@ int
 gv100_fault_new(struct nvkm_device *device, enum nvkm_subdev_type type, int inst,
 		struct nvkm_fault **pfault)
 {
-	return nvkm_fault_new_(&gv100_fault, device, type, inst, pfault);
+	int ret = nvkm_fault_new_(&gv100_fault, device, type, inst, pfault);
+	if (ret)
+		return ret;
+
+	INIT_WORK(&(*pfault)->nrpfb_work, gv100_fault_buffer_process);
+	return 0;
 }
diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/fault/priv.h b/drivers/gpu/drm/nouveau/nvkm/subdev/fault/priv.h
index 36681c347fb5..a5510332c402 100644
--- a/drivers/gpu/drm/nouveau/nvkm/subdev/fault/priv.h
+++ b/drivers/gpu/drm/nouveau/nvkm/subdev/fault/priv.h
@@ -16,6 +16,8 @@ struct nvkm_fault_buffer {
 	u32 put;
 	struct nvkm_memory *mem;
 	u64 addr;
+
+	struct nvkm_inth inth;
 };
 
 int nvkm_fault_new_(const struct nvkm_fault_func *, struct nvkm_device *, enum nvkm_subdev_type,
@@ -46,6 +48,7 @@ void gp100_fault_buffer_fini(struct nvkm_fault_buffer *);
 void gp100_fault_buffer_init(struct nvkm_fault_buffer *);
 u64 gp100_fault_buffer_pin(struct nvkm_fault_buffer *);
 void gp100_fault_buffer_info(struct nvkm_fault_buffer *);
+void gv100_fault_buffer_process(struct work_struct *);
 void gp100_fault_intr(struct nvkm_fault *);
 
 u64 gp10b_fault_buffer_pin(struct nvkm_fault_buffer *);
diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/fault/tu102.c b/drivers/gpu/drm/nouveau/nvkm/subdev/fault/tu102.c
index 91eb6729c84d..967efaddae28 100644
--- a/drivers/gpu/drm/nouveau/nvkm/subdev/fault/tu102.c
+++ b/drivers/gpu/drm/nouveau/nvkm/subdev/fault/tu102.c
@@ -24,20 +24,27 @@
 #include <core/memory.h>
 #include <subdev/mc.h>
 #include <subdev/mmu.h>
+#include <subdev/vfn.h>
 #include <engine/fifo.h>
 
 #include <nvif/class.h>
 
+static irqreturn_t
+tu102_fault_buffer_notify(struct nvkm_inth *inth)
+{
+	struct nvkm_fault_buffer *buffer = container_of(inth, typeof(*buffer), inth);
+
+	nvkm_event_ntfy(&buffer->fault->event, buffer->id, NVKM_FAULT_BUFFER_EVENT_PENDING);
+	return IRQ_HANDLED;
+}
+
 static void
 tu102_fault_buffer_intr(struct nvkm_fault_buffer *buffer, bool enable)
 {
-	/*XXX: Earlier versions of RM touched the old regs on Turing,
-	 *     which don't appear to actually work anymore, but newer
-	 *     versions of RM don't appear to touch anything at all..
-	 */
-	struct nvkm_device *device = buffer->fault->subdev.device;
-
-	nvkm_mc_intr_mask(device, NVKM_SUBDEV_FAULT, 0, enable);
+	if (enable)
+		nvkm_inth_allow(&buffer->inth);
+	else
+		nvkm_inth_block(&buffer->inth);
 }
 
 static void
@@ -46,10 +53,6 @@ tu102_fault_buffer_fini(struct nvkm_fault_buffer *buffer)
 	struct nvkm_device *device = buffer->fault->subdev.device;
 	const u32 foff = buffer->id * 0x20;
 
-	/* Disable the fault interrupts */
-	nvkm_wr32(device, 0xb81408, 0x1);
-	nvkm_wr32(device, 0xb81410, 0x10);
-
 	nvkm_mask(device, 0xb83010 + foff, 0x80000000, 0x00000000);
 }
 
@@ -59,10 +62,6 @@ tu102_fault_buffer_init(struct nvkm_fault_buffer *buffer)
 	struct nvkm_device *device = buffer->fault->subdev.device;
 	const u32 foff = buffer->id * 0x20;
 
-	/* Enable the fault interrupts */
-	nvkm_wr32(device, 0xb81208, 0x1);
-	nvkm_wr32(device, 0xb81210, 0x10);
-
 	nvkm_mask(device, 0xb83010 + foff, 0xc0000000, 0x40000000);
 	nvkm_wr32(device, 0xb83004 + foff, upper_32_bits(buffer->addr));
 	nvkm_wr32(device, 0xb83000 + foff, lower_32_bits(buffer->addr));
@@ -82,9 +81,10 @@ tu102_fault_buffer_info(struct nvkm_fault_buffer *buffer)
 	buffer->put = 0xb8300c + foff;
 }
 
-static void
-tu102_fault_intr_fault(struct nvkm_fault *fault)
+static irqreturn_t
+tu102_fault_info_fault(struct nvkm_inth *inth)
 {
+	struct nvkm_fault *fault = container_of(inth, typeof(*fault), info_fault);
 	struct nvkm_subdev *subdev = &fault->subdev;
 	struct nvkm_device *device = subdev->device;
 	struct nvkm_fault_data info;
@@ -106,70 +106,61 @@ tu102_fault_intr_fault(struct nvkm_fault *fault)
 	info.reason = (info1 & 0x0000001f);
 
 	nvkm_fifo_fault(device->fifo, &info);
-}
-
-static void
-tu102_fault_intr(struct nvkm_fault *fault)
-{
-	struct nvkm_subdev *subdev = &fault->subdev;
-	struct nvkm_device *device = subdev->device;
-	u32 stat = nvkm_rd32(device, 0xb83094);
-
-	if (stat & 0x80000000) {
-		tu102_fault_intr_fault(fault);
-		nvkm_wr32(device, 0xb83094, 0x80000000);
-		stat &= ~0x80000000;
-	}
 
-	if (stat & 0x00000200) {
-		/* Clear the associated interrupt flag */
-		nvkm_wr32(device, 0xb81010, 0x10);
-
-		if (fault->buffer[0]) {
-			nvkm_event_send(&fault->event, 1, 0, NULL, 0);
-			stat &= ~0x00000200;
-		}
-	}
-
-	/* Replayable MMU fault */
-	if (stat & 0x00000100) {
-		/* Clear the associated interrupt flag */
-		nvkm_wr32(device, 0xb81008, 0x1);
-
-		if (fault->buffer[1]) {
-			nvkm_event_send(&fault->event, 1, 1, NULL, 0);
-			stat &= ~0x00000100;
-		}
-	}
-
-	if (stat) {
-		nvkm_debug(subdev, "intr %08x\n", stat);
-	}
+	nvkm_wr32(device, 0xb83094, 0x80000000);
+	return IRQ_HANDLED;
 }
 
 static void
 tu102_fault_fini(struct nvkm_fault *fault)
 {
-	nvkm_notify_put(&fault->nrpfb);
+	nvkm_event_ntfy_block(&fault->nrpfb);
+	flush_work(&fault->nrpfb_work);
+
 	if (fault->buffer[0])
 		fault->func->buffer.fini(fault->buffer[0]);
-	/*XXX: disable priv faults */
+
+	nvkm_inth_block(&fault->info_fault);
 }
 
 static void
 tu102_fault_init(struct nvkm_fault *fault)
 {
-	/*XXX: enable priv faults */
+	nvkm_inth_allow(&fault->info_fault);
+
 	fault->func->buffer.init(fault->buffer[0]);
-	nvkm_notify_get(&fault->nrpfb);
+	nvkm_event_ntfy_allow(&fault->nrpfb);
+}
+
+static int
+tu102_fault_oneinit(struct nvkm_fault *fault)
+{
+	struct nvkm_device *device = fault->subdev.device;
+	struct nvkm_intr *intr = &device->vfn->intr;
+	int ret, i;
+
+	ret = nvkm_inth_add(intr, nvkm_rd32(device, 0x100ee0) & 0x0000ffff,
+			    NVKM_INTR_PRIO_NORMAL, &fault->subdev, tu102_fault_info_fault,
+			    &fault->info_fault);
+	if (ret)
+		return ret;
+
+	for (i = 0; i < fault->buffer_nr; i++) {
+		ret = nvkm_inth_add(intr, nvkm_rd32(device, 0x100ee4 + (i * 4)) >> 16,
+				    NVKM_INTR_PRIO_NORMAL, &fault->subdev,
+				    tu102_fault_buffer_notify, &fault->buffer[i]->inth);
+		if (ret)
+			return ret;
+	}
+
+	return gv100_fault_oneinit(fault);
 }
 
 static const struct nvkm_fault_func
 tu102_fault = {
-	.oneinit = gv100_fault_oneinit,
+	.oneinit = tu102_fault_oneinit,
 	.init = tu102_fault_init,
 	.fini = tu102_fault_fini,
-	.intr = tu102_fault_intr,
 	.buffer.nr = 2,
 	.buffer.entry_size = 32,
 	.buffer.info = tu102_fault_buffer_info,
@@ -184,5 +175,10 @@ int
 tu102_fault_new(struct nvkm_device *device, enum nvkm_subdev_type type, int inst,
 		struct nvkm_fault **pfault)
 {
-	return nvkm_fault_new_(&tu102_fault, device, type, inst, pfault);
+	int ret = nvkm_fault_new_(&tu102_fault, device, type, inst, pfault);
+	if (ret)
+		return ret;
+
+	INIT_WORK(&(*pfault)->nrpfb_work, gv100_fault_buffer_process);
+	return 0;
 }
diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/fault/user.c b/drivers/gpu/drm/nouveau/nvkm/subdev/fault/user.c
index ac835c9582fd..c123e5893d76 100644
--- a/drivers/gpu/drm/nouveau/nvkm/subdev/fault/user.c
+++ b/drivers/gpu/drm/nouveau/nvkm/subdev/fault/user.c
@@ -22,12 +22,28 @@
 #include "priv.h"
 
 #include <core/memory.h>
+#include <core/event.h>
 #include <subdev/mmu.h>
 
 #include <nvif/clb069.h>
 #include <nvif/unpack.h>
 
 static int
+nvkm_ufault_uevent(struct nvkm_object *object, void *argv, u32 argc, struct nvkm_uevent *uevent)
+{
+	struct nvkm_fault_buffer *buffer = nvkm_fault_buffer(object);
+	union nvif_clb069_event_args *args = argv;
+
+	if (!uevent)
+		return 0;
+	if (argc != sizeof(args->vn))
+		return -ENOSYS;
+
+	return nvkm_uevent_add(uevent, &buffer->fault->event, buffer->id,
+			       NVKM_FAULT_BUFFER_EVENT_PENDING, NULL);
+}
+
+static int
 nvkm_ufault_map(struct nvkm_object *object, void *argv, u32 argc,
 		enum nvkm_object_map *type, u64 *addr, u64 *size)
 {
@@ -40,18 +56,6 @@ nvkm_ufault_map(struct nvkm_object *object, void *argv, u32 argc,
 }
 
 static int
-nvkm_ufault_ntfy(struct nvkm_object *object, u32 type,
-		 struct nvkm_event **pevent)
-{
-	struct nvkm_fault_buffer *buffer = nvkm_fault_buffer(object);
-	if (type == NVB069_V0_NTFY_FAULT) {
-		*pevent = &buffer->fault->event;
-		return 0;
-	}
-	return -EINVAL;
-}
-
-static int
 nvkm_ufault_fini(struct nvkm_object *object, bool suspend)
 {
 	struct nvkm_fault_buffer *buffer = nvkm_fault_buffer(object);
@@ -78,8 +82,8 @@ nvkm_ufault = {
 	.dtor = nvkm_ufault_dtor,
 	.init = nvkm_ufault_init,
 	.fini = nvkm_ufault_fini,
-	.ntfy = nvkm_ufault_ntfy,
 	.map = nvkm_ufault_map,
+	.uevent = nvkm_ufault_uevent,
 };
 
 int
diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/fb/base.c b/drivers/gpu/drm/nouveau/nvkm/subdev/fb/base.c
index 6faaea948fc4..bac7dcc4c2c1 100644
--- a/drivers/gpu/drm/nouveau/nvkm/subdev/fb/base.c
+++ b/drivers/gpu/drm/nouveau/nvkm/subdev/fb/base.c
@@ -57,6 +57,15 @@ nvkm_fb_tile_prog(struct nvkm_fb *fb, int region, struct nvkm_fb_tile *tile)
 	}
 }
 
+static void
+nvkm_fb_sysmem_flush_page_init(struct nvkm_device *device)
+{
+	struct nvkm_fb *fb = device->fb;
+
+	if (fb->func->sysmem.flush_page_init)
+		fb->func->sysmem.flush_page_init(fb);
+}
+
 int
 nvkm_fb_bios_memtype(struct nvkm_bios *bios)
 {
@@ -125,12 +134,20 @@ nvkm_fb_oneinit(struct nvkm_subdev *subdev)
 	return nvkm_mm_init(&fb->tags.mm, 0, 0, tags, 1);
 }
 
-static int
-nvkm_fb_init_scrub_vpr(struct nvkm_fb *fb)
+int
+nvkm_fb_mem_unlock(struct nvkm_fb *fb)
 {
 	struct nvkm_subdev *subdev = &fb->subdev;
 	int ret;
 
+	if (!fb->func->vpr.scrub_required)
+		return 0;
+
+	if (!fb->func->vpr.scrub_required(fb)) {
+		nvkm_debug(subdev, "VPR not locked\n");
+		return 0;
+	}
+
 	nvkm_debug(subdev, "VPR locked, running scrubber binary\n");
 
 	if (!fb->vpr_scrubber.size) {
@@ -168,6 +185,8 @@ nvkm_fb_init(struct nvkm_subdev *subdev)
 	for (i = 0; i < fb->tile.regions; i++)
 		fb->func->tile.prog(fb, i, &fb->tile.region[i]);
 
+	nvkm_fb_sysmem_flush_page_init(subdev->device);
+
 	if (fb->func->init)
 		fb->func->init(fb);
 
@@ -183,13 +202,13 @@ nvkm_fb_init(struct nvkm_subdev *subdev)
 	if (fb->func->init_unkn)
 		fb->func->init_unkn(fb);
 
-	if (fb->func->vpr.scrub_required &&
-	    fb->func->vpr.scrub_required(fb)) {
-		ret = nvkm_fb_init_scrub_vpr(fb);
-		if (ret)
-			return ret;
-	}
+	return 0;
+}
 
+static int
+nvkm_fb_preinit(struct nvkm_subdev *subdev)
+{
+	nvkm_fb_sysmem_flush_page_init(subdev->device);
 	return 0;
 }
 
@@ -212,20 +231,28 @@ nvkm_fb_dtor(struct nvkm_subdev *subdev)
 
 	nvkm_blob_dtor(&fb->vpr_scrubber);
 
+	if (fb->sysmem.flush_page) {
+		dma_unmap_page(subdev->device->dev, fb->sysmem.flush_page_addr,
+			       PAGE_SIZE, DMA_BIDIRECTIONAL);
+		__free_page(fb->sysmem.flush_page);
+	}
+
 	if (fb->func->dtor)
 		return fb->func->dtor(fb);
+
 	return fb;
 }
 
 static const struct nvkm_subdev_func
 nvkm_fb = {
 	.dtor = nvkm_fb_dtor,
+	.preinit = nvkm_fb_preinit,
 	.oneinit = nvkm_fb_oneinit,
 	.init = nvkm_fb_init,
 	.intr = nvkm_fb_intr,
 };
 
-void
+int
 nvkm_fb_ctor(const struct nvkm_fb_func *func, struct nvkm_device *device,
 	     enum nvkm_subdev_type type, int inst, struct nvkm_fb *fb)
 {
@@ -234,6 +261,19 @@ nvkm_fb_ctor(const struct nvkm_fb_func *func, struct nvkm_device *device,
 	fb->tile.regions = fb->func->tile.regions;
 	fb->page = nvkm_longopt(device->cfgopt, "NvFbBigPage", fb->func->default_bigpage);
 	mutex_init(&fb->tags.mutex);
+
+	if (func->sysmem.flush_page_init) {
+		fb->sysmem.flush_page = alloc_page(GFP_KERNEL | __GFP_ZERO);
+		if (!fb->sysmem.flush_page)
+			return -ENOMEM;
+
+		fb->sysmem.flush_page_addr = dma_map_page(device->dev, fb->sysmem.flush_page,
+							  0, PAGE_SIZE, DMA_BIDIRECTIONAL);
+		if (dma_mapping_error(device->dev, fb->sysmem.flush_page_addr))
+			return -EFAULT;
+	}
+
+	return 0;
 }
 
 int
@@ -242,6 +282,5 @@ nvkm_fb_new_(const struct nvkm_fb_func *func, struct nvkm_device *device,
 {
 	if (!(*pfb = kzalloc(sizeof(**pfb), GFP_KERNEL)))
 		return -ENOMEM;
-	nvkm_fb_ctor(func, device, type, inst, *pfb);
-	return 0;
+	return nvkm_fb_ctor(func, device, type, inst, *pfb);
 }
diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/fb/ga100.c b/drivers/gpu/drm/nouveau/nvkm/subdev/fb/ga100.c
index b47bebfbc26f..5098f219e3e6 100644
--- a/drivers/gpu/drm/nouveau/nvkm/subdev/fb/ga100.c
+++ b/drivers/gpu/drm/nouveau/nvkm/subdev/fb/ga100.c
@@ -26,9 +26,10 @@ static const struct nvkm_fb_func
 ga100_fb = {
 	.dtor = gf100_fb_dtor,
 	.oneinit = gf100_fb_oneinit,
-	.init = gp100_fb_init,
+	.init = gm200_fb_init,
 	.init_page = gv100_fb_init_page,
 	.init_unkn = gp100_fb_init_unkn,
+	.sysmem.flush_page_init = gf100_fb_sysmem_flush_page_init,
 	.ram_new = gp100_ram_new,
 	.default_bigpage = 16,
 };
diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/fb/ga102.c b/drivers/gpu/drm/nouveau/nvkm/subdev/fb/ga102.c
index 6ea7908f0563..8b7c8ea5e8a5 100644
--- a/drivers/gpu/drm/nouveau/nvkm/subdev/fb/ga102.c
+++ b/drivers/gpu/drm/nouveau/nvkm/subdev/fb/ga102.c
@@ -22,15 +22,42 @@
 #include "gf100.h"
 #include "ram.h"
 
+#include <engine/nvdec.h>
+
+static int
+ga102_fb_vpr_scrub(struct nvkm_fb *fb)
+{
+	struct nvkm_falcon_fw fw = {};
+	int ret;
+
+	ret = nvkm_falcon_fw_ctor_hs_v2(&ga102_flcn_fw, "mem-unlock", &fb->subdev, "nvdec/scrubber",
+					0, &fb->subdev.device->nvdec[0]->falcon, &fw);
+	if (ret)
+		return ret;
+
+	ret = nvkm_falcon_fw_boot(&fw, &fb->subdev, true, NULL, NULL, 0, 0);
+	nvkm_falcon_fw_dtor(&fw);
+	return ret;
+}
+
+static bool
+ga102_fb_vpr_scrub_required(struct nvkm_fb *fb)
+{
+	return (nvkm_rd32(fb->subdev.device, 0x1fa80c) & 0x00000010) != 0;
+}
+
 static const struct nvkm_fb_func
 ga102_fb = {
 	.dtor = gf100_fb_dtor,
 	.oneinit = gf100_fb_oneinit,
-	.init = gp100_fb_init,
+	.init = gm200_fb_init,
 	.init_page = gv100_fb_init_page,
 	.init_unkn = gp100_fb_init_unkn,
+	.sysmem.flush_page_init = gf100_fb_sysmem_flush_page_init,
 	.ram_new = ga102_ram_new,
 	.default_bigpage = 16,
+	.vpr.scrub_required = ga102_fb_vpr_scrub_required,
+	.vpr.scrub = ga102_fb_vpr_scrub,
 };
 
 int
@@ -38,3 +65,9 @@ ga102_fb_new(struct nvkm_device *device, enum nvkm_subdev_type type, int inst, s
 {
 	return gp102_fb_new_(&ga102_fb, device, type, inst, pfb);
 }
+
+MODULE_FIRMWARE("nvidia/ga102/nvdec/scrubber.bin");
+MODULE_FIRMWARE("nvidia/ga103/nvdec/scrubber.bin");
+MODULE_FIRMWARE("nvidia/ga104/nvdec/scrubber.bin");
+MODULE_FIRMWARE("nvidia/ga106/nvdec/scrubber.bin");
+MODULE_FIRMWARE("nvidia/ga107/nvdec/scrubber.bin");
diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/fb/gf100.c b/drivers/gpu/drm/nouveau/nvkm/subdev/fb/gf100.c
index 9dcc40f9ef79..07db9b397ac1 100644
--- a/drivers/gpu/drm/nouveau/nvkm/subdev/fb/gf100.c
+++ b/drivers/gpu/drm/nouveau/nvkm/subdev/fb/gf100.c
@@ -61,14 +61,6 @@ gf100_fb_oneinit(struct nvkm_fb *base)
 	if (ret)
 		return ret;
 
-	fb->r100c10_page = alloc_page(GFP_KERNEL | __GFP_ZERO);
-	if (fb->r100c10_page) {
-		fb->r100c10 = dma_map_page(device->dev, fb->r100c10_page, 0,
-					   PAGE_SIZE, DMA_BIDIRECTIONAL);
-		if (dma_mapping_error(device->dev, fb->r100c10))
-			return -EFAULT;
-	}
-
 	return 0;
 }
 
@@ -86,14 +78,17 @@ gf100_fb_init_page(struct nvkm_fb *fb)
 }
 
 void
+gf100_fb_sysmem_flush_page_init(struct nvkm_fb *fb)
+{
+	nvkm_wr32(fb->subdev.device, 0x100c10, fb->sysmem.flush_page_addr >> 8);
+}
+
+void
 gf100_fb_init(struct nvkm_fb *base)
 {
 	struct gf100_fb *fb = gf100_fb(base);
 	struct nvkm_device *device = fb->base.subdev.device;
 
-	if (fb->r100c10_page)
-		nvkm_wr32(device, 0x100c10, fb->r100c10 >> 8);
-
 	if (base->func->clkgate_pack) {
 		nvkm_therm_clkgate_init(device->therm,
 					base->func->clkgate_pack);
@@ -104,13 +99,6 @@ void *
 gf100_fb_dtor(struct nvkm_fb *base)
 {
 	struct gf100_fb *fb = gf100_fb(base);
-	struct nvkm_device *device = fb->base.subdev.device;
-
-	if (fb->r100c10_page) {
-		dma_unmap_page(device->dev, fb->r100c10, PAGE_SIZE,
-			       DMA_BIDIRECTIONAL);
-		__free_page(fb->r100c10_page);
-	}
 
 	return fb;
 }
@@ -136,6 +124,7 @@ gf100_fb = {
 	.init = gf100_fb_init,
 	.init_page = gf100_fb_init_page,
 	.intr = gf100_fb_intr,
+	.sysmem.flush_page_init = gf100_fb_sysmem_flush_page_init,
 	.ram_new = gf100_ram_new,
 	.default_bigpage = 17,
 };
diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/fb/gf100.h b/drivers/gpu/drm/nouveau/nvkm/subdev/fb/gf100.h
index 0cac7b06acc8..77472b558591 100644
--- a/drivers/gpu/drm/nouveau/nvkm/subdev/fb/gf100.h
+++ b/drivers/gpu/drm/nouveau/nvkm/subdev/fb/gf100.h
@@ -6,8 +6,6 @@
 
 struct gf100_fb {
 	struct nvkm_fb base;
-	struct page *r100c10_page;
-	dma_addr_t r100c10;
 };
 
 int gf100_fb_new_(const struct nvkm_fb_func *, struct nvkm_device *, enum nvkm_subdev_type, int,
@@ -16,7 +14,5 @@ void *gf100_fb_dtor(struct nvkm_fb *);
 void gf100_fb_init(struct nvkm_fb *);
 void gf100_fb_intr(struct nvkm_fb *);
 
-void gp100_fb_init(struct nvkm_fb *);
-
 void gm200_fb_init(struct nvkm_fb *base);
 #endif
diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/fb/gm200.c b/drivers/gpu/drm/nouveau/nvkm/subdev/fb/gm200.c
index 5acf8d15d06f..fb02092a65eb 100644
--- a/drivers/gpu/drm/nouveau/nvkm/subdev/fb/gm200.c
+++ b/drivers/gpu/drm/nouveau/nvkm/subdev/fb/gm200.c
@@ -46,9 +46,6 @@ gm200_fb_init(struct nvkm_fb *base)
 	struct gf100_fb *fb = gf100_fb(base);
 	struct nvkm_device *device = fb->base.subdev.device;
 
-	if (fb->r100c10_page)
-		nvkm_wr32(device, 0x100c10, fb->r100c10 >> 8);
-
 	nvkm_wr32(device, 0x100cc8, nvkm_memory_addr(fb->base.mmu_wr) >> 8);
 	nvkm_wr32(device, 0x100ccc, nvkm_memory_addr(fb->base.mmu_rd) >> 8);
 	nvkm_mask(device, 0x100cc4, 0x00060000,
@@ -62,6 +59,7 @@ gm200_fb = {
 	.init = gm200_fb_init,
 	.init_page = gm200_fb_init_page,
 	.intr = gf100_fb_intr,
+	.sysmem.flush_page_init = gf100_fb_sysmem_flush_page_init,
 	.ram_new = gm200_ram_new,
 	.default_bigpage = 0 /* per-instance. */,
 };
diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/fb/gm20b.c b/drivers/gpu/drm/nouveau/nvkm/subdev/fb/gm20b.c
index 86f61a3f2fea..50875af94c18 100644
--- a/drivers/gpu/drm/nouveau/nvkm/subdev/fb/gm20b.c
+++ b/drivers/gpu/drm/nouveau/nvkm/subdev/fb/gm20b.c
@@ -30,6 +30,7 @@ gm20b_fb = {
 	.init = gm200_fb_init,
 	.init_page = gm200_fb_init_page,
 	.intr = gf100_fb_intr,
+	.sysmem.flush_page_init = gf100_fb_sysmem_flush_page_init,
 	.default_bigpage = 0 /* per-instance. */,
 };
 
diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/fb/gp100.c b/drivers/gpu/drm/nouveau/nvkm/subdev/fb/gp100.c
index 09e943edc362..110c08c94849 100644
--- a/drivers/gpu/drm/nouveau/nvkm/subdev/fb/gp100.c
+++ b/drivers/gpu/drm/nouveau/nvkm/subdev/fb/gp100.c
@@ -44,29 +44,15 @@ gp100_fb_init_remapper(struct nvkm_fb *fb)
 	nvkm_mask(device, 0x100c14, 0x00040000, 0x00000000);
 }
 
-void
-gp100_fb_init(struct nvkm_fb *base)
-{
-	struct gf100_fb *fb = gf100_fb(base);
-	struct nvkm_device *device = fb->base.subdev.device;
-
-	if (fb->r100c10_page)
-		nvkm_wr32(device, 0x100c10, fb->r100c10 >> 8);
-
-	nvkm_wr32(device, 0x100cc8, nvkm_memory_addr(fb->base.mmu_wr) >> 8);
-	nvkm_wr32(device, 0x100ccc, nvkm_memory_addr(fb->base.mmu_rd) >> 8);
-	nvkm_mask(device, 0x100cc4, 0x00060000,
-		  min(nvkm_memory_size(fb->base.mmu_rd) >> 16, (u64)2) << 17);
-}
-
 static const struct nvkm_fb_func
 gp100_fb = {
 	.dtor = gf100_fb_dtor,
 	.oneinit = gf100_fb_oneinit,
-	.init = gp100_fb_init,
+	.init = gm200_fb_init,
 	.init_remapper = gp100_fb_init_remapper,
 	.init_page = gm200_fb_init_page,
 	.init_unkn = gp100_fb_init_unkn,
+	.sysmem.flush_page_init = gf100_fb_sysmem_flush_page_init,
 	.ram_new = gp100_ram_new,
 };
 
diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/fb/gp102.c b/drivers/gpu/drm/nouveau/nvkm/subdev/fb/gp102.c
index 0e78b3d734a0..2658481d575b 100644
--- a/drivers/gpu/drm/nouveau/nvkm/subdev/fb/gp102.c
+++ b/drivers/gpu/drm/nouveau/nvkm/subdev/fb/gp102.c
@@ -24,71 +24,22 @@
 #include "gf100.h"
 #include "ram.h"
 
-#include <core/firmware.h>
-#include <core/memory.h>
-#include <nvfw/fw.h>
-#include <nvfw/hs.h>
 #include <engine/nvdec.h>
 
 int
 gp102_fb_vpr_scrub(struct nvkm_fb *fb)
 {
 	struct nvkm_subdev *subdev = &fb->subdev;
-	struct nvkm_device *device = subdev->device;
-	struct nvkm_falcon *falcon = &device->nvdec[0]->falcon;
-	struct nvkm_blob *blob = &fb->vpr_scrubber;
-	const struct nvfw_bin_hdr *hsbin_hdr;
-	const struct nvfw_hs_header *fw_hdr;
-	const struct nvfw_hs_load_header *lhdr;
-	void *scrub_data;
-	u32 patch_loc, patch_sig;
+	struct nvkm_falcon_fw fw = {};
 	int ret;
 
-	nvkm_falcon_get(falcon, subdev);
-
-	hsbin_hdr = nvfw_bin_hdr(subdev, blob->data);
-	fw_hdr = nvfw_hs_header(subdev, blob->data + hsbin_hdr->header_offset);
-	lhdr = nvfw_hs_load_header(subdev, blob->data + fw_hdr->hdr_offset);
-	scrub_data = blob->data + hsbin_hdr->data_offset;
-
-	patch_loc = *(u32 *)(blob->data + fw_hdr->patch_loc);
-	patch_sig = *(u32 *)(blob->data + fw_hdr->patch_sig);
-	if (falcon->debug) {
-		memcpy(scrub_data + patch_loc,
-		       blob->data + fw_hdr->sig_dbg_offset + patch_sig,
-		       fw_hdr->sig_dbg_size);
-	} else {
-		memcpy(scrub_data + patch_loc,
-		       blob->data + fw_hdr->sig_prod_offset + patch_sig,
-		       fw_hdr->sig_prod_size);
-	}
-
-	nvkm_falcon_reset(falcon);
-	nvkm_falcon_bind_context(falcon, NULL);
-
-	nvkm_falcon_load_imem(falcon, scrub_data, lhdr->non_sec_code_off,
-			      lhdr->non_sec_code_size,
-			      lhdr->non_sec_code_off >> 8, 0, false);
-	nvkm_falcon_load_imem(falcon, scrub_data + lhdr->apps[0],
-			      ALIGN(lhdr->apps[0], 0x100),
-			      lhdr->apps[1],
-			      lhdr->apps[0] >> 8, 0, true);
-	nvkm_falcon_load_dmem(falcon, scrub_data + lhdr->data_dma_base, 0,
-			      lhdr->data_size, 0);
-
-	nvkm_falcon_set_start_addr(falcon, 0x0);
-	nvkm_falcon_start(falcon);
-
-	ret = nvkm_falcon_wait_for_halt(falcon, 500);
-	if (ret < 0) {
-		ret = -ETIMEDOUT;
-		goto end;
-	}
+	ret = nvkm_falcon_fw_ctor_hs(&gm200_flcn_fw, "mem-unlock", subdev, NULL,
+				     "nvdec/scrubber", 0, &subdev->device->nvdec[0]->falcon, &fw);
+	if (ret)
+		return ret;
 
-	/* put nvdec in clean state - without reset it will remain in HS mode */
-	nvkm_falcon_reset(falcon);
-end:
-	nvkm_falcon_put(falcon, subdev);
+	ret = nvkm_falcon_fw_boot(&fw, subdev, true, NULL, NULL, 0, 0x00000000);
+	nvkm_falcon_fw_dtor(&fw);
 	return ret;
 }
 
@@ -104,9 +55,10 @@ static const struct nvkm_fb_func
 gp102_fb = {
 	.dtor = gf100_fb_dtor,
 	.oneinit = gf100_fb_oneinit,
-	.init = gp100_fb_init,
+	.init = gm200_fb_init,
 	.init_remapper = gp100_fb_init_remapper,
 	.init_page = gm200_fb_init_page,
+	.sysmem.flush_page_init = gf100_fb_sysmem_flush_page_init,
 	.vpr.scrub_required = gp102_fb_vpr_scrub_required,
 	.vpr.scrub = gp102_fb_vpr_scrub,
 	.ram_new = gp100_ram_new,
diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/fb/gp10b.c b/drivers/gpu/drm/nouveau/nvkm/subdev/fb/gp10b.c
index 84c9815a6d48..a04a5f712019 100644
--- a/drivers/gpu/drm/nouveau/nvkm/subdev/fb/gp10b.c
+++ b/drivers/gpu/drm/nouveau/nvkm/subdev/fb/gp10b.c
@@ -28,6 +28,7 @@ gp10b_fb = {
 	.init = gm200_fb_init,
 	.init_page = gm200_fb_init_page,
 	.intr = gf100_fb_intr,
+	.sysmem.flush_page_init = gf100_fb_sysmem_flush_page_init,
 };
 
 int
diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/fb/gv100.c b/drivers/gpu/drm/nouveau/nvkm/subdev/fb/gv100.c
index 63daa83ae12d..1f0126437c1a 100644
--- a/drivers/gpu/drm/nouveau/nvkm/subdev/fb/gv100.c
+++ b/drivers/gpu/drm/nouveau/nvkm/subdev/fb/gv100.c
@@ -32,9 +32,10 @@ static const struct nvkm_fb_func
 gv100_fb = {
 	.dtor = gf100_fb_dtor,
 	.oneinit = gf100_fb_oneinit,
-	.init = gp100_fb_init,
+	.init = gm200_fb_init,
 	.init_page = gv100_fb_init_page,
 	.init_unkn = gp100_fb_init_unkn,
+	.sysmem.flush_page_init = gf100_fb_sysmem_flush_page_init,
 	.vpr.scrub_required = gp102_fb_vpr_scrub_required,
 	.vpr.scrub = gp102_fb_vpr_scrub,
 	.ram_new = gp100_ram_new,
diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/fb/nv50.c b/drivers/gpu/drm/nouveau/nvkm/subdev/fb/nv50.c
index 95fd8f834010..a6efbd913c13 100644
--- a/drivers/gpu/drm/nouveau/nvkm/subdev/fb/nv50.c
+++ b/drivers/gpu/drm/nouveau/nvkm/subdev/fb/nv50.c
@@ -137,8 +137,7 @@ nv50_fb_intr(struct nvkm_fb *base)
 	struct nv50_fb *fb = nv50_fb(base);
 	struct nvkm_subdev *subdev = &fb->base.subdev;
 	struct nvkm_device *device = subdev->device;
-	struct nvkm_fifo *fifo = device->fifo;
-	struct nvkm_fifo_chan *chan;
+	struct nvkm_chan *chan;
 	const struct nvkm_enum *en, *re, *cl, *sc;
 	u32 trap[6], idx, inst;
 	u8 st0, st1, st2, st3;
@@ -178,35 +177,18 @@ nv50_fb_intr(struct nvkm_fb *base)
 	else if (en && en->data) sc = nvkm_enum_find(en->data, st3);
 	else                     sc = NULL;
 
-	chan = nvkm_fifo_chan_inst(fifo, inst, &flags);
+	chan = nvkm_chan_get_inst(&device->fifo->engine, inst, &flags);
 	nvkm_error(subdev, "trapped %s at %02x%04x%04x on channel %d [%08x %s] "
 			   "engine %02x [%s] client %02x [%s] "
 			   "subclient %02x [%s] reason %08x [%s]\n",
 		   (trap[5] & 0x00000100) ? "read" : "write",
 		   trap[5] & 0xff, trap[4] & 0xffff, trap[3] & 0xffff,
-		   chan ? chan->chid : -1, inst,
-		   chan ? chan->object.client->name : "unknown",
+		   chan ? chan->id : -1, inst,
+		   chan ? chan->name : "unknown",
 		   st0, en ? en->name : "",
 		   st2, cl ? cl->name : "", st3, sc ? sc->name : "",
 		   st1, re ? re->name : "");
-	nvkm_fifo_chan_put(fifo, flags, &chan);
-}
-
-static int
-nv50_fb_oneinit(struct nvkm_fb *base)
-{
-	struct nv50_fb *fb = nv50_fb(base);
-	struct nvkm_device *device = fb->base.subdev.device;
-
-	fb->r100c08_page = alloc_page(GFP_KERNEL | __GFP_ZERO);
-	if (fb->r100c08_page) {
-		fb->r100c08 = dma_map_page(device->dev, fb->r100c08_page, 0,
-					   PAGE_SIZE, DMA_BIDIRECTIONAL);
-		if (dma_mapping_error(device->dev, fb->r100c08))
-			return -EFAULT;
-	}
-
-	return 0;
+	nvkm_chan_put(&chan, flags);
 }
 
 static void
@@ -215,12 +197,6 @@ nv50_fb_init(struct nvkm_fb *base)
 	struct nv50_fb *fb = nv50_fb(base);
 	struct nvkm_device *device = fb->base.subdev.device;
 
-	/* Not a clue what this is exactly.  Without pointing it at a
-	 * scratch page, VRAM->GART blits with M2MF (as in DDX DFS)
-	 * cause IOMMU "read from address 0" errors (rh#561267)
-	 */
-	nvkm_wr32(device, 0x100c08, fb->r100c08 >> 8);
-
 	/* This is needed to get meaningful information from 100c90
 	 * on traps. No idea what these values mean exactly. */
 	nvkm_wr32(device, 0x100c90, fb->func->trap);
@@ -235,17 +211,16 @@ nv50_fb_tags(struct nvkm_fb *base)
 	return 0;
 }
 
+static void
+nv50_fb_sysmem_flush_page_init(struct nvkm_fb *fb)
+{
+	nvkm_wr32(fb->subdev.device, 0x100c08, fb->sysmem.flush_page_addr >> 8);
+}
+
 static void *
 nv50_fb_dtor(struct nvkm_fb *base)
 {
 	struct nv50_fb *fb = nv50_fb(base);
-	struct nvkm_device *device = fb->base.subdev.device;
-
-	if (fb->r100c08_page) {
-		dma_unmap_page(device->dev, fb->r100c08, PAGE_SIZE,
-			       DMA_BIDIRECTIONAL);
-		__free_page(fb->r100c08_page);
-	}
 
 	return fb;
 }
@@ -254,9 +229,9 @@ static const struct nvkm_fb_func
 nv50_fb_ = {
 	.dtor = nv50_fb_dtor,
 	.tags = nv50_fb_tags,
-	.oneinit = nv50_fb_oneinit,
 	.init = nv50_fb_init,
 	.intr = nv50_fb_intr,
+	.sysmem.flush_page_init = nv50_fb_sysmem_flush_page_init,
 	.ram_new = nv50_fb_ram_new,
 };
 
diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/fb/nv50.h b/drivers/gpu/drm/nouveau/nvkm/subdev/fb/nv50.h
index a5e673859a90..4f68bc4513a7 100644
--- a/drivers/gpu/drm/nouveau/nvkm/subdev/fb/nv50.h
+++ b/drivers/gpu/drm/nouveau/nvkm/subdev/fb/nv50.h
@@ -7,8 +7,6 @@
 struct nv50_fb {
 	const struct nv50_fb_func *func;
 	struct nvkm_fb base;
-	struct page *r100c08_page;
-	dma_addr_t r100c08;
 };
 
 struct nv50_fb_func {
diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/fb/priv.h b/drivers/gpu/drm/nouveau/nvkm/subdev/fb/priv.h
index 3f1be9780c65..ac03eac0f261 100644
--- a/drivers/gpu/drm/nouveau/nvkm/subdev/fb/priv.h
+++ b/drivers/gpu/drm/nouveau/nvkm/subdev/fb/priv.h
@@ -16,6 +16,10 @@ struct nvkm_fb_func {
 	void (*init_unkn)(struct nvkm_fb *);
 	void (*intr)(struct nvkm_fb *);
 
+	struct nvkm_fb_func_sysmem {
+		void (*flush_page_init)(struct nvkm_fb *);
+	} sysmem;
+
 	struct {
 		bool (*scrub_required)(struct nvkm_fb *);
 		int (*scrub)(struct nvkm_fb *);
@@ -37,8 +41,8 @@ struct nvkm_fb_func {
 	const struct nvkm_therm_clkgate_pack *clkgate_pack;
 };
 
-void nvkm_fb_ctor(const struct nvkm_fb_func *, struct nvkm_device *device,
-		  enum nvkm_subdev_type type, int inst, struct nvkm_fb *);
+int nvkm_fb_ctor(const struct nvkm_fb_func *, struct nvkm_device *device,
+		 enum nvkm_subdev_type type, int inst, struct nvkm_fb *);
 int nvkm_fb_new_(const struct nvkm_fb_func *, struct nvkm_device *device,
 		 enum nvkm_subdev_type type, int inst, struct nvkm_fb **);
 int nvkm_fb_bios_memtype(struct nvkm_bios *);
@@ -72,6 +76,7 @@ void nv46_fb_tile_init(struct nvkm_fb *, int i, u32 addr, u32 size,
 
 int gf100_fb_oneinit(struct nvkm_fb *);
 int gf100_fb_init_page(struct nvkm_fb *);
+void gf100_fb_sysmem_flush_page_init(struct nvkm_fb *);
 
 int gm200_fb_init_page(struct nvkm_fb *);
 
diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/fb/ram.c b/drivers/gpu/drm/nouveau/nvkm/subdev/fb/ram.c
index 03b1bdb27770..5c34416cb637 100644
--- a/drivers/gpu/drm/nouveau/nvkm/subdev/fb/ram.c
+++ b/drivers/gpu/drm/nouveau/nvkm/subdev/fb/ram.c
@@ -25,6 +25,7 @@
 #include "ram.h"
 
 #include <core/memory.h>
+#include <subdev/instmem.h>
 #include <subdev/mmu.h>
 
 struct nvkm_vram {
@@ -35,6 +36,12 @@ struct nvkm_vram {
 };
 
 static int
+nvkm_vram_kmap(struct nvkm_memory *memory, struct nvkm_memory **pmemory)
+{
+	return nvkm_instobj_wrap(nvkm_vram(memory)->ram->fb->subdev.device, memory, pmemory);
+}
+
+static int
 nvkm_vram_map(struct nvkm_memory *memory, u64 offset, struct nvkm_vmm *vmm,
 	      struct nvkm_vma *vma, void *argv, u32 argc)
 {
@@ -98,6 +105,7 @@ nvkm_vram = {
 	.addr = nvkm_vram_addr,
 	.size = nvkm_vram_size,
 	.map = nvkm_vram_map,
+	.kmap = nvkm_vram_kmap,
 };
 
 int
diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/gpio/base.c b/drivers/gpu/drm/nouveau/nvkm/subdev/gpio/base.c
index 048bcc70c3f4..b196baa376dc 100644
--- a/drivers/gpu/drm/nouveau/nvkm/subdev/gpio/base.c
+++ b/drivers/gpu/drm/nouveau/nvkm/subdev/gpio/base.c
@@ -24,7 +24,6 @@
 #include "priv.h"
 
 #include <core/option.h>
-#include <core/notify.h>
 
 static int
 nvkm_gpio_drive(struct nvkm_gpio *gpio, int idx, int line, int dir, int out)
@@ -123,23 +122,8 @@ nvkm_gpio_intr_init(struct nvkm_event *event, int type, int index)
 	gpio->func->intr_mask(gpio, type, 1 << index, 1 << index);
 }
 
-static int
-nvkm_gpio_intr_ctor(struct nvkm_object *object, void *data, u32 size,
-		    struct nvkm_notify *notify)
-{
-	struct nvkm_gpio_ntfy_req *req = data;
-	if (!WARN_ON(size != sizeof(*req))) {
-		notify->size  = sizeof(struct nvkm_gpio_ntfy_rep);
-		notify->types = req->mask;
-		notify->index = req->line;
-		return 0;
-	}
-	return -EINVAL;
-}
-
 static const struct nvkm_event_func
 nvkm_gpio_intr_func = {
-	.ctor = nvkm_gpio_intr_ctor,
 	.init = nvkm_gpio_intr_init,
 	.fini = nvkm_gpio_intr_fini,
 };
@@ -153,11 +137,9 @@ nvkm_gpio_intr(struct nvkm_subdev *subdev)
 	gpio->func->intr_stat(gpio, &hi, &lo);
 
 	for (i = 0; (hi | lo) && i < gpio->func->lines; i++) {
-		struct nvkm_gpio_ntfy_rep rep = {
-			.mask = (NVKM_GPIO_HI * !!(hi & (1 << i))) |
-				(NVKM_GPIO_LO * !!(lo & (1 << i))),
-		};
-		nvkm_event_send(&gpio->event, rep.mask, i, &rep, sizeof(rep));
+		u32 mask = (NVKM_GPIO_HI * !!(hi & (1 << i))) |
+			   (NVKM_GPIO_LO * !!(lo & (1 << i)));
+		nvkm_event_ntfy(&gpio->event, i, mask);
 	}
 }
 
@@ -251,6 +233,5 @@ nvkm_gpio_new_(const struct nvkm_gpio_func *func, struct nvkm_device *device,
 	nvkm_subdev_ctor(&nvkm_gpio, device, type, inst, &gpio->subdev);
 	gpio->func = func;
 
-	return nvkm_event_init(&nvkm_gpio_intr_func, 2, func->lines,
-			       &gpio->event);
+	return nvkm_event_init(&nvkm_gpio_intr_func, &gpio->subdev, 2, func->lines, &gpio->event);
 }
diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/gsp/Kbuild b/drivers/gpu/drm/nouveau/nvkm/subdev/gsp/Kbuild
index 67cc3b320169..7f61a1ed158b 100644
--- a/drivers/gpu/drm/nouveau/nvkm/subdev/gsp/Kbuild
+++ b/drivers/gpu/drm/nouveau/nvkm/subdev/gsp/Kbuild
@@ -1,3 +1,4 @@
 # SPDX-License-Identifier: MIT
 nvkm-y += nvkm/subdev/gsp/base.o
 nvkm-y += nvkm/subdev/gsp/gv100.o
+nvkm-y += nvkm/subdev/gsp/ga102.o
diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/gsp/base.c b/drivers/gpu/drm/nouveau/nvkm/subdev/gsp/base.c
index 22574886b819..591ac95c2669 100644
--- a/drivers/gpu/drm/nouveau/nvkm/subdev/gsp/base.c
+++ b/drivers/gpu/drm/nouveau/nvkm/subdev/gsp/base.c
@@ -53,5 +53,7 @@ nvkm_gsp_new_(const struct nvkm_gsp_fwif *fwif, struct nvkm_device *device,
 	if (IS_ERR(fwif))
 		return PTR_ERR(fwif);
 
-	return nvkm_falcon_ctor(fwif->flcn, &gsp->subdev, gsp->subdev.name, 0, &gsp->falcon);
+	gsp->func = fwif->func;
+
+	return nvkm_falcon_ctor(gsp->func->flcn, &gsp->subdev, gsp->subdev.name, 0, &gsp->falcon);
 }
diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/gsp/ga102.c b/drivers/gpu/drm/nouveau/nvkm/subdev/gsp/ga102.c
new file mode 100644
index 000000000000..525267412c3e
--- /dev/null
+++ b/drivers/gpu/drm/nouveau/nvkm/subdev/gsp/ga102.c
@@ -0,0 +1,59 @@
+/*
+ * Copyright 2021 Red Hat Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+#include "priv.h"
+
+static const struct nvkm_falcon_func
+ga102_gsp_flcn = {
+	.disable = gm200_flcn_disable,
+	.enable = gm200_flcn_enable,
+	.select = ga102_flcn_select,
+	.addr2 = 0x1000,
+	.reset_eng = gp102_flcn_reset_eng,
+	.reset_prep = ga102_flcn_reset_prep,
+	.reset_wait_mem_scrubbing = ga102_flcn_reset_wait_mem_scrubbing,
+	.imem_dma = &ga102_flcn_dma,
+	.dmem_dma = &ga102_flcn_dma,
+};
+
+static const struct nvkm_gsp_func
+ga102_gsp = {
+	.flcn = &ga102_gsp_flcn,
+};
+
+static int
+ga102_gsp_nofw(struct nvkm_gsp *gsp, int ver, const struct nvkm_gsp_fwif *fwif)
+{
+	return 0;
+}
+
+struct nvkm_gsp_fwif
+ga102_gsps[] = {
+	{ -1, ga102_gsp_nofw, &ga102_gsp },
+	{}
+};
+
+int
+ga102_gsp_new(struct nvkm_device *device, enum nvkm_subdev_type type, int inst,
+	      struct nvkm_gsp **pgsp)
+{
+	return nvkm_gsp_new_(ga102_gsps, device, type, inst, pgsp);
+}
diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/gsp/gv100.c b/drivers/gpu/drm/nouveau/nvkm/subdev/gsp/gv100.c
index 6c4ef62a746a..da6a809cd317 100644
--- a/drivers/gpu/drm/nouveau/nvkm/subdev/gsp/gv100.c
+++ b/drivers/gpu/drm/nouveau/nvkm/subdev/gsp/gv100.c
@@ -23,17 +23,20 @@
 
 static const struct nvkm_falcon_func
 gv100_gsp_flcn = {
-	.fbif = 0x600,
-	.load_imem = nvkm_falcon_v1_load_imem,
-	.load_dmem = nvkm_falcon_v1_load_dmem,
-	.read_dmem = nvkm_falcon_v1_read_dmem,
-	.bind_context = gp102_sec2_flcn_bind_context,
-	.wait_for_halt = nvkm_falcon_v1_wait_for_halt,
-	.clear_interrupt = nvkm_falcon_v1_clear_interrupt,
-	.set_start_addr = nvkm_falcon_v1_set_start_addr,
-	.start = nvkm_falcon_v1_start,
-	.enable = gp102_sec2_flcn_enable,
-	.disable = nvkm_falcon_v1_disable,
+	.disable = gm200_flcn_disable,
+	.enable = gm200_flcn_enable,
+	.reset_eng = gp102_flcn_reset_eng,
+	.reset_wait_mem_scrubbing = gm200_flcn_reset_wait_mem_scrubbing,
+	.bind_inst = gm200_flcn_bind_inst,
+	.bind_stat = gm200_flcn_bind_stat,
+	.bind_intr = true,
+	.imem_pio = &gm200_flcn_imem_pio,
+	.dmem_pio = &gm200_flcn_dmem_pio,
+};
+
+static const struct nvkm_gsp_func
+gv100_gsp = {
+	.flcn = &gv100_gsp_flcn,
 };
 
 static int
@@ -43,8 +46,8 @@ gv100_gsp_nofw(struct nvkm_gsp *gsp, int ver, const struct nvkm_gsp_fwif *fwif)
 }
 
 static struct nvkm_gsp_fwif
-gv100_gsp[] = {
-	{ -1, gv100_gsp_nofw, &gv100_gsp_flcn },
+gv100_gsps[] = {
+	{ -1, gv100_gsp_nofw, &gv100_gsp },
 	{}
 };
 
@@ -52,5 +55,5 @@ int
 gv100_gsp_new(struct nvkm_device *device, enum nvkm_subdev_type type, int inst,
 	      struct nvkm_gsp **pgsp)
 {
-	return nvkm_gsp_new_(gv100_gsp, device, type, inst, pgsp);
+	return nvkm_gsp_new_(gv100_gsps, device, type, inst, pgsp);
 }
diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/gsp/priv.h b/drivers/gpu/drm/nouveau/nvkm/subdev/gsp/priv.h
index 19381ddd38d4..89749a40203c 100644
--- a/drivers/gpu/drm/nouveau/nvkm/subdev/gsp/priv.h
+++ b/drivers/gpu/drm/nouveau/nvkm/subdev/gsp/priv.h
@@ -4,10 +4,14 @@
 #include <subdev/gsp.h>
 enum nvkm_acr_lsf_id;
 
+struct nvkm_gsp_func {
+	const struct nvkm_falcon_func *flcn;
+};
+
 struct nvkm_gsp_fwif {
 	int version;
 	int (*load)(struct nvkm_gsp *, int ver, const struct nvkm_gsp_fwif *);
-	const struct nvkm_falcon_func *flcn;
+	const struct nvkm_gsp_func *func;
 };
 
 int nvkm_gsp_new_(const struct nvkm_gsp_fwif *, struct nvkm_device *, enum nvkm_subdev_type, int,
diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/i2c/base.c b/drivers/gpu/drm/nouveau/nvkm/subdev/i2c/base.c
index cb5cb533d91c..976539de4220 100644
--- a/drivers/gpu/drm/nouveau/nvkm/subdev/i2c/base.c
+++ b/drivers/gpu/drm/nouveau/nvkm/subdev/i2c/base.c
@@ -26,7 +26,6 @@
 #include "bus.h"
 #include "pad.h"
 
-#include <core/notify.h>
 #include <core/option.h>
 #include <subdev/bios.h>
 #include <subdev/bios/dcb.h>
@@ -104,23 +103,8 @@ nvkm_i2c_intr_init(struct nvkm_event *event, int type, int id)
 		i2c->func->aux_mask(i2c, type, aux->intr, aux->intr);
 }
 
-static int
-nvkm_i2c_intr_ctor(struct nvkm_object *object, void *data, u32 size,
-		   struct nvkm_notify *notify)
-{
-	struct nvkm_i2c_ntfy_req *req = data;
-	if (!WARN_ON(size != sizeof(*req))) {
-		notify->size  = sizeof(struct nvkm_i2c_ntfy_rep);
-		notify->types = req->mask;
-		notify->index = req->port;
-		return 0;
-	}
-	return -EINVAL;
-}
-
 static const struct nvkm_event_func
 nvkm_i2c_intr_func = {
-	.ctor = nvkm_i2c_intr_ctor,
 	.init = nvkm_i2c_intr_init,
 	.fini = nvkm_i2c_intr_fini,
 };
@@ -145,13 +129,8 @@ nvkm_i2c_intr(struct nvkm_subdev *subdev)
 		if (lo & aux->intr) mask |= NVKM_I2C_UNPLUG;
 		if (rq & aux->intr) mask |= NVKM_I2C_IRQ;
 		if (tx & aux->intr) mask |= NVKM_I2C_DONE;
-		if (mask) {
-			struct nvkm_i2c_ntfy_rep rep = {
-				.mask = mask,
-			};
-			nvkm_event_send(&i2c->event, rep.mask, aux->id,
-					&rep, sizeof(rep));
-		}
+		if (mask)
+			nvkm_event_ntfy(&i2c->event, aux->id, mask);
 	}
 }
 
@@ -427,5 +406,5 @@ nvkm_i2c_new_(const struct nvkm_i2c_func *func, struct nvkm_device *device,
 		}
 	}
 
-	return nvkm_event_init(&nvkm_i2c_intr_func, 4, i, &i2c->event);
+	return nvkm_event_init(&nvkm_i2c_intr_func, &i2c->subdev, 4, i, &i2c->event);
 }
diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/instmem/base.c b/drivers/gpu/drm/nouveau/nvkm/subdev/instmem/base.c
index cd8163a52bb6..e0e4f97be029 100644
--- a/drivers/gpu/drm/nouveau/nvkm/subdev/instmem/base.c
+++ b/drivers/gpu/drm/nouveau/nvkm/subdev/instmem/base.c
@@ -90,6 +90,18 @@ nvkm_instobj_ctor(const struct nvkm_memory_func *func,
 }
 
 int
+nvkm_instobj_wrap(struct nvkm_device *device,
+		  struct nvkm_memory *memory, struct nvkm_memory **pmemory)
+{
+	struct nvkm_instmem *imem = device->imem;
+
+	if (!imem->func->memory_wrap)
+		return -ENOSYS;
+
+	return imem->func->memory_wrap(imem, memory, pmemory);
+}
+
+int
 nvkm_instobj_new(struct nvkm_instmem *imem, u32 size, u32 align, bool zero,
 		 struct nvkm_memory **pmemory)
 {
diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/instmem/nv50.c b/drivers/gpu/drm/nouveau/nvkm/subdev/instmem/nv50.c
index c51bac76174c..4b2d7465d22f 100644
--- a/drivers/gpu/drm/nouveau/nvkm/subdev/instmem/nv50.c
+++ b/drivers/gpu/drm/nouveau/nvkm/subdev/instmem/nv50.c
@@ -348,13 +348,11 @@ nv50_instobj_func = {
 };
 
 static int
-nv50_instobj_new(struct nvkm_instmem *base, u32 size, u32 align, bool zero,
-		 struct nvkm_memory **pmemory)
+nv50_instobj_wrap(struct nvkm_instmem *base,
+		  struct nvkm_memory *memory, struct nvkm_memory **pmemory)
 {
 	struct nv50_instmem *imem = nv50_instmem(base);
 	struct nv50_instobj *iobj;
-	struct nvkm_device *device = imem->base.subdev.device;
-	u8 page = max(order_base_2(align), 12);
 
 	if (!(iobj = kzalloc(sizeof(*iobj), GFP_KERNEL)))
 		return -ENOMEM;
@@ -365,7 +363,25 @@ nv50_instobj_new(struct nvkm_instmem *base, u32 size, u32 align, bool zero,
 	refcount_set(&iobj->maps, 0);
 	INIT_LIST_HEAD(&iobj->lru);
 
-	return nvkm_ram_get(device, 0, 1, page, size, true, true, &iobj->ram);
+	iobj->ram = nvkm_memory_ref(memory);
+	return 0;
+}
+
+static int
+nv50_instobj_new(struct nvkm_instmem *imem, u32 size, u32 align, bool zero,
+		 struct nvkm_memory **pmemory)
+{
+	u8 page = max(order_base_2(align), 12);
+	struct nvkm_memory *ram;
+	int ret;
+
+	ret = nvkm_ram_get(imem->subdev.device, 0, 1, page, size, true, true, &ram);
+	if (ret)
+		return ret;
+
+	ret = nv50_instobj_wrap(imem, ram, pmemory);
+	nvkm_memory_unref(&ram);
+	return ret;
 }
 
 /******************************************************************************
@@ -382,6 +398,7 @@ static const struct nvkm_instmem_func
 nv50_instmem = {
 	.fini = nv50_instmem_fini,
 	.memory_new = nv50_instobj_new,
+	.memory_wrap = nv50_instobj_wrap,
 	.zero = false,
 };
 
diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/instmem/priv.h b/drivers/gpu/drm/nouveau/nvkm/subdev/instmem/priv.h
index 56c15e30a5dd..fe92986a3885 100644
--- a/drivers/gpu/drm/nouveau/nvkm/subdev/instmem/priv.h
+++ b/drivers/gpu/drm/nouveau/nvkm/subdev/instmem/priv.h
@@ -12,6 +12,7 @@ struct nvkm_instmem_func {
 	void (*wr32)(struct nvkm_instmem *, u32 addr, u32 data);
 	int (*memory_new)(struct nvkm_instmem *, u32 size, u32 align,
 			  bool zero, struct nvkm_memory **);
+	int (*memory_wrap)(struct nvkm_instmem *, struct nvkm_memory *, struct nvkm_memory **);
 	bool zero;
 };
 
diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/ltc/Kbuild b/drivers/gpu/drm/nouveau/nvkm/subdev/ltc/Kbuild
index 728d75010847..0d8a915d727e 100644
--- a/drivers/gpu/drm/nouveau/nvkm/subdev/ltc/Kbuild
+++ b/drivers/gpu/drm/nouveau/nvkm/subdev/ltc/Kbuild
@@ -7,3 +7,4 @@ nvkm-y += nvkm/subdev/ltc/gm200.o
 nvkm-y += nvkm/subdev/ltc/gp100.o
 nvkm-y += nvkm/subdev/ltc/gp102.o
 nvkm-y += nvkm/subdev/ltc/gp10b.o
+nvkm-y += nvkm/subdev/ltc/ga102.o
diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/ltc/base.c b/drivers/gpu/drm/nouveau/nvkm/subdev/ltc/base.c
index fa683c190795..f742a7b7b175 100644
--- a/drivers/gpu/drm/nouveau/nvkm/subdev/ltc/base.c
+++ b/drivers/gpu/drm/nouveau/nvkm/subdev/ltc/base.c
@@ -97,8 +97,10 @@ nvkm_ltc_init(struct nvkm_subdev *subdev)
 	struct nvkm_ltc *ltc = nvkm_ltc(subdev);
 	int i;
 
-	for (i = ltc->zbc_min; i <= ltc->zbc_max; i++) {
+	for (i = ltc->zbc_color_min; i <= ltc->zbc_color_max; i++)
 		ltc->func->zbc_clear_color(ltc, i, ltc->zbc_color[i]);
+
+	for (i = ltc->zbc_depth_min; i <= ltc->zbc_depth_max; i++) {
 		ltc->func->zbc_clear_depth(ltc, i, ltc->zbc_depth[i]);
 		if (ltc->func->zbc_clear_stencil)
 			ltc->func->zbc_clear_stencil(ltc, i, ltc->zbc_stencil[i]);
@@ -137,7 +139,9 @@ nvkm_ltc_new_(const struct nvkm_ltc_func *func, struct nvkm_device *device,
 	nvkm_subdev_ctor(&nvkm_ltc, device, type, inst, &ltc->subdev);
 	ltc->func = func;
 	mutex_init(&ltc->mutex);
-	ltc->zbc_min = 1; /* reserve 0 for disabled */
-	ltc->zbc_max = min(func->zbc, NVKM_LTC_MAX_ZBC_CNT) - 1;
+	ltc->zbc_color_min = 1; /* reserve 0 for disabled */
+	ltc->zbc_color_max = min(func->zbc_color, NVKM_LTC_MAX_ZBC_COLOR_CNT) - 1;
+	ltc->zbc_depth_min = 1; /* reserve 0 for disabled */
+	ltc->zbc_depth_max = min(func->zbc_depth, NVKM_LTC_MAX_ZBC_DEPTH_CNT) - 1;
 	return 0;
 }
diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/ltc/ga102.c b/drivers/gpu/drm/nouveau/nvkm/subdev/ltc/ga102.c
new file mode 100644
index 000000000000..159d9f8c95f3
--- /dev/null
+++ b/drivers/gpu/drm/nouveau/nvkm/subdev/ltc/ga102.c
@@ -0,0 +1,57 @@
+/*
+ * Copyright 2021 Red Hat Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+#include "priv.h"
+
+static void
+ga102_ltc_zbc_clear_color(struct nvkm_ltc *ltc, int i, const u32 color[4])
+{
+	struct nvkm_device *device = ltc->subdev.device;
+
+	nvkm_mask(device, 0x17e338, 0x0000001f, i);
+	nvkm_wr32(device, 0x17e33c, color[0]);
+	nvkm_wr32(device, 0x17e340, color[1]);
+	nvkm_wr32(device, 0x17e344, color[2]);
+	nvkm_wr32(device, 0x17e348, color[3]);
+}
+
+static const struct nvkm_ltc_func
+ga102_ltc = {
+	.oneinit = gp100_ltc_oneinit,
+	.init = gp100_ltc_init,
+	.intr = gp100_ltc_intr,
+	.cbc_clear = gm107_ltc_cbc_clear,
+	.cbc_wait = gm107_ltc_cbc_wait,
+	.zbc_color = 31,
+	.zbc_depth = 16,
+	.zbc_clear_color = ga102_ltc_zbc_clear_color,
+	.zbc_clear_depth = gm107_ltc_zbc_clear_depth,
+	.zbc_clear_stencil = gp102_ltc_zbc_clear_stencil,
+	.invalidate = gf100_ltc_invalidate,
+	.flush = gf100_ltc_flush,
+};
+
+int
+ga102_ltc_new(struct nvkm_device *device, enum nvkm_subdev_type type, int inst,
+	      struct nvkm_ltc **pltc)
+{
+	return nvkm_ltc_new_(&ga102_ltc, device, type, inst, pltc);
+}
diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/ltc/gf100.c b/drivers/gpu/drm/nouveau/nvkm/subdev/ltc/gf100.c
index fd8aeafc812d..de71ba3c9292 100644
--- a/drivers/gpu/drm/nouveau/nvkm/subdev/ltc/gf100.c
+++ b/drivers/gpu/drm/nouveau/nvkm/subdev/ltc/gf100.c
@@ -241,7 +241,8 @@ gf100_ltc = {
 	.intr = gf100_ltc_intr,
 	.cbc_clear = gf100_ltc_cbc_clear,
 	.cbc_wait = gf100_ltc_cbc_wait,
-	.zbc = 16,
+	.zbc_color = 16,
+	.zbc_depth = 16,
 	.zbc_clear_color = gf100_ltc_zbc_clear_color,
 	.zbc_clear_depth = gf100_ltc_zbc_clear_depth,
 	.invalidate = gf100_ltc_invalidate,
diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/ltc/gk104.c b/drivers/gpu/drm/nouveau/nvkm/subdev/ltc/gk104.c
index 94aa09244d67..5d61e3c6ff59 100644
--- a/drivers/gpu/drm/nouveau/nvkm/subdev/ltc/gk104.c
+++ b/drivers/gpu/drm/nouveau/nvkm/subdev/ltc/gk104.c
@@ -42,7 +42,8 @@ gk104_ltc = {
 	.intr = gf100_ltc_intr,
 	.cbc_clear = gf100_ltc_cbc_clear,
 	.cbc_wait = gf100_ltc_cbc_wait,
-	.zbc = 16,
+	.zbc_color = 16,
+	.zbc_depth = 16,
 	.zbc_clear_color = gf100_ltc_zbc_clear_color,
 	.zbc_clear_depth = gf100_ltc_zbc_clear_depth,
 	.invalidate = gf100_ltc_invalidate,
diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/ltc/gm107.c b/drivers/gpu/drm/nouveau/nvkm/subdev/ltc/gm107.c
index 54d1d65d5a85..18685d849657 100644
--- a/drivers/gpu/drm/nouveau/nvkm/subdev/ltc/gm107.c
+++ b/drivers/gpu/drm/nouveau/nvkm/subdev/ltc/gm107.c
@@ -137,7 +137,8 @@ gm107_ltc = {
 	.intr = gm107_ltc_intr,
 	.cbc_clear = gm107_ltc_cbc_clear,
 	.cbc_wait = gm107_ltc_cbc_wait,
-	.zbc = 16,
+	.zbc_color = 16,
+	.zbc_depth = 16,
 	.zbc_clear_color = gm107_ltc_zbc_clear_color,
 	.zbc_clear_depth = gm107_ltc_zbc_clear_depth,
 	.invalidate = gf100_ltc_invalidate,
diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/ltc/gm200.c b/drivers/gpu/drm/nouveau/nvkm/subdev/ltc/gm200.c
index 8cfdbbdd8e8d..7a9464b9def5 100644
--- a/drivers/gpu/drm/nouveau/nvkm/subdev/ltc/gm200.c
+++ b/drivers/gpu/drm/nouveau/nvkm/subdev/ltc/gm200.c
@@ -49,7 +49,8 @@ gm200_ltc = {
 	.intr = gm107_ltc_intr,
 	.cbc_clear = gm107_ltc_cbc_clear,
 	.cbc_wait = gm107_ltc_cbc_wait,
-	.zbc = 16,
+	.zbc_color = 16,
+	.zbc_depth = 16,
 	.zbc_clear_color = gm107_ltc_zbc_clear_color,
 	.zbc_clear_depth = gm107_ltc_zbc_clear_depth,
 	.invalidate = gf100_ltc_invalidate,
diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/ltc/gp100.c b/drivers/gpu/drm/nouveau/nvkm/subdev/ltc/gp100.c
index a4a6cd9b435a..1a17a451754c 100644
--- a/drivers/gpu/drm/nouveau/nvkm/subdev/ltc/gp100.c
+++ b/drivers/gpu/drm/nouveau/nvkm/subdev/ltc/gp100.c
@@ -61,7 +61,8 @@ gp100_ltc = {
 	.intr = gp100_ltc_intr,
 	.cbc_clear = gm107_ltc_cbc_clear,
 	.cbc_wait = gm107_ltc_cbc_wait,
-	.zbc = 16,
+	.zbc_color = 16,
+	.zbc_depth = 16,
 	.zbc_clear_color = gm107_ltc_zbc_clear_color,
 	.zbc_clear_depth = gm107_ltc_zbc_clear_depth,
 	.invalidate = gf100_ltc_invalidate,
diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/ltc/gp102.c b/drivers/gpu/drm/nouveau/nvkm/subdev/ltc/gp102.c
index ff05d617e7f4..265a05fd5f6b 100644
--- a/drivers/gpu/drm/nouveau/nvkm/subdev/ltc/gp102.c
+++ b/drivers/gpu/drm/nouveau/nvkm/subdev/ltc/gp102.c
@@ -36,7 +36,8 @@ gp102_ltc = {
 	.intr = gp100_ltc_intr,
 	.cbc_clear = gm107_ltc_cbc_clear,
 	.cbc_wait = gm107_ltc_cbc_wait,
-	.zbc = 16,
+	.zbc_color = 16,
+	.zbc_depth = 16,
 	.zbc_clear_color = gm107_ltc_zbc_clear_color,
 	.zbc_clear_depth = gm107_ltc_zbc_clear_depth,
 	.zbc_clear_stencil = gp102_ltc_zbc_clear_stencil,
diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/ltc/gp10b.c b/drivers/gpu/drm/nouveau/nvkm/subdev/ltc/gp10b.c
index dfebd796cb4b..e7e8fdf3adab 100644
--- a/drivers/gpu/drm/nouveau/nvkm/subdev/ltc/gp10b.c
+++ b/drivers/gpu/drm/nouveau/nvkm/subdev/ltc/gp10b.c
@@ -50,7 +50,8 @@ gp10b_ltc = {
 	.intr = gp100_ltc_intr,
 	.cbc_clear = gm107_ltc_cbc_clear,
 	.cbc_wait = gm107_ltc_cbc_wait,
-	.zbc = 16,
+	.zbc_color = 16,
+	.zbc_depth = 16,
 	.zbc_clear_color = gm107_ltc_zbc_clear_color,
 	.zbc_clear_depth = gm107_ltc_zbc_clear_depth,
 	.zbc_clear_stencil = gp102_ltc_zbc_clear_stencil,
diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/ltc/priv.h b/drivers/gpu/drm/nouveau/nvkm/subdev/ltc/priv.h
index 2bebe139005d..134e90c9e861 100644
--- a/drivers/gpu/drm/nouveau/nvkm/subdev/ltc/priv.h
+++ b/drivers/gpu/drm/nouveau/nvkm/subdev/ltc/priv.h
@@ -16,7 +16,8 @@ struct nvkm_ltc_func {
 	void (*cbc_clear)(struct nvkm_ltc *, u32 start, u32 limit);
 	void (*cbc_wait)(struct nvkm_ltc *);
 
-	int zbc;
+	int zbc_color;
+	int zbc_depth;
 	void (*zbc_clear_color)(struct nvkm_ltc *, int, const u32[4]);
 	void (*zbc_clear_depth)(struct nvkm_ltc *, int, const u32);
 	void (*zbc_clear_stencil)(struct nvkm_ltc *, int, const u32);
diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/mc/Kbuild b/drivers/gpu/drm/nouveau/nvkm/subdev/mc/Kbuild
index ac2b34e9ac6a..2a3255ced8b7 100644
--- a/drivers/gpu/drm/nouveau/nvkm/subdev/mc/Kbuild
+++ b/drivers/gpu/drm/nouveau/nvkm/subdev/mc/Kbuild
@@ -13,5 +13,4 @@ nvkm-y += nvkm/subdev/mc/gk104.o
 nvkm-y += nvkm/subdev/mc/gk20a.o
 nvkm-y += nvkm/subdev/mc/gp100.o
 nvkm-y += nvkm/subdev/mc/gp10b.o
-nvkm-y += nvkm/subdev/mc/tu102.o
 nvkm-y += nvkm/subdev/mc/ga100.o
diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/mc/base.c b/drivers/gpu/drm/nouveau/nvkm/subdev/mc/base.c
index 21c4af3f81d5..c85600ba69f9 100644
--- a/drivers/gpu/drm/nouveau/nvkm/subdev/mc/base.c
+++ b/drivers/gpu/drm/nouveau/nvkm/subdev/mc/base.c
@@ -37,84 +37,14 @@ nvkm_mc_unk260(struct nvkm_device *device, u32 data)
 void
 nvkm_mc_intr_mask(struct nvkm_device *device, enum nvkm_subdev_type type, int inst, bool en)
 {
-	struct nvkm_mc *mc = device->mc;
-	const struct nvkm_mc_map *map;
-	if (likely(mc) && mc->func->intr_mask) {
-		u32 mask = nvkm_top_intr_mask(device, type, inst);
-		for (map = mc->func->intr; !mask && map->stat; map++) {
-			if (map->type == type && map->inst == inst)
-				mask = map->stat;
-		}
-		mc->func->intr_mask(mc, mask, en ? mask : 0);
-	}
-}
-
-void
-nvkm_mc_intr_unarm(struct nvkm_device *device)
-{
-	struct nvkm_mc *mc = device->mc;
-	if (likely(mc))
-		mc->func->intr_unarm(mc);
-}
-
-void
-nvkm_mc_intr_rearm(struct nvkm_device *device)
-{
-	struct nvkm_mc *mc = device->mc;
-	if (likely(mc))
-		mc->func->intr_rearm(mc);
-}
-
-static u32
-nvkm_mc_intr_stat(struct nvkm_mc *mc)
-{
-	u32 intr = mc->func->intr_stat(mc);
-	if (WARN_ON_ONCE(intr == 0xffffffff))
-		intr = 0; /* likely fallen off the bus */
-	return intr;
-}
-
-void
-nvkm_mc_intr(struct nvkm_device *device, bool *handled)
-{
-	struct nvkm_mc *mc = device->mc;
-	struct nvkm_top *top = device->top;
-	struct nvkm_top_device *tdev;
-	struct nvkm_subdev *subdev;
-	const struct nvkm_mc_map *map;
-	u32 stat, intr;
-
-	if (unlikely(!mc))
-		return;
-
-	stat = intr = nvkm_mc_intr_stat(mc);
-
-	if (top) {
-		list_for_each_entry(tdev, &top->device, head) {
-			if (tdev->intr >= 0 && (stat & BIT(tdev->intr))) {
-				subdev = nvkm_device_subdev(device, tdev->type, tdev->inst);
-				if (subdev) {
-					nvkm_subdev_intr(subdev);
-					stat &= ~BIT(tdev->intr);
-					if (!stat)
-						break;
-				}
-			}
-		}
-	}
+	struct nvkm_subdev *subdev = nvkm_device_subdev(device, type, inst);
 
-	for (map = mc->func->intr; map->stat; map++) {
-		if (intr & map->stat) {
-			subdev = nvkm_device_subdev(device, map->type, map->inst);
-			if (subdev)
-				nvkm_subdev_intr(subdev);
-			stat &= ~map->stat;
-		}
+	if (subdev) {
+		if (en)
+			nvkm_intr_allow(subdev, NVKM_INTR_SUBDEV);
+		else
+			nvkm_intr_block(subdev, NVKM_INTR_SUBDEV);
 	}
-
-	if (stat)
-		nvkm_error(&mc->subdev, "intr %08x\n", stat);
-	*handled = intr != 0;
 }
 
 static u32
@@ -143,9 +73,8 @@ nvkm_mc_reset(struct nvkm_device *device, enum nvkm_subdev_type type, int inst)
 {
 	u64 pmc_enable = nvkm_mc_reset_mask(device, true, type, inst);
 	if (pmc_enable) {
-		nvkm_mask(device, 0x000200, pmc_enable, 0x00000000);
-		nvkm_mask(device, 0x000200, pmc_enable, pmc_enable);
-		nvkm_rd32(device, 0x000200);
+		device->mc->func->device->disable(device->mc, pmc_enable);
+		device->mc->func->device->enable(device->mc, pmc_enable);
 	}
 }
 
@@ -154,17 +83,15 @@ nvkm_mc_disable(struct nvkm_device *device, enum nvkm_subdev_type type, int inst
 {
 	u64 pmc_enable = nvkm_mc_reset_mask(device, false, type, inst);
 	if (pmc_enable)
-		nvkm_mask(device, 0x000200, pmc_enable, 0x00000000);
+		device->mc->func->device->disable(device->mc, pmc_enable);
 }
 
 void
 nvkm_mc_enable(struct nvkm_device *device, enum nvkm_subdev_type type, int inst)
 {
 	u64 pmc_enable = nvkm_mc_reset_mask(device, false, type, inst);
-	if (pmc_enable) {
-		nvkm_mask(device, 0x000200, pmc_enable, pmc_enable);
-		nvkm_rd32(device, 0x000200);
-	}
+	if (pmc_enable)
+		device->mc->func->device->enable(device->mc, pmc_enable);
 }
 
 bool
@@ -172,16 +99,7 @@ nvkm_mc_enabled(struct nvkm_device *device, enum nvkm_subdev_type type, int inst
 {
 	u64 pmc_enable = nvkm_mc_reset_mask(device, false, type, inst);
 
-	return (pmc_enable != 0) &&
-	       ((nvkm_rd32(device, 0x000200) & pmc_enable) == pmc_enable);
-}
-
-
-static int
-nvkm_mc_fini(struct nvkm_subdev *subdev, bool suspend)
-{
-	nvkm_mc_intr_unarm(subdev->device);
-	return 0;
+	return (pmc_enable != 0) && device->mc->func->device->enabled(device->mc, pmc_enable);
 }
 
 static int
@@ -190,7 +108,6 @@ nvkm_mc_init(struct nvkm_subdev *subdev)
 	struct nvkm_mc *mc = nvkm_mc(subdev);
 	if (mc->func->init)
 		mc->func->init(mc);
-	nvkm_mc_intr_rearm(subdev->device);
 	return 0;
 }
 
@@ -204,24 +121,27 @@ static const struct nvkm_subdev_func
 nvkm_mc = {
 	.dtor = nvkm_mc_dtor,
 	.init = nvkm_mc_init,
-	.fini = nvkm_mc_fini,
 };
 
-void
-nvkm_mc_ctor(const struct nvkm_mc_func *func, struct nvkm_device *device,
-	     enum nvkm_subdev_type type, int inst, struct nvkm_mc *mc)
-{
-	nvkm_subdev_ctor(&nvkm_mc, device, type, inst, &mc->subdev);
-	mc->func = func;
-}
-
 int
 nvkm_mc_new_(const struct nvkm_mc_func *func, struct nvkm_device *device,
 	     enum nvkm_subdev_type type, int inst, struct nvkm_mc **pmc)
 {
 	struct nvkm_mc *mc;
+	int ret;
+
 	if (!(mc = *pmc = kzalloc(sizeof(*mc), GFP_KERNEL)))
 		return -ENOMEM;
-	nvkm_mc_ctor(func, device, type, inst, *pmc);
+
+	nvkm_subdev_ctor(&nvkm_mc, device, type, inst, &mc->subdev);
+	mc->func = func;
+
+	if (mc->func->intr) {
+		ret = nvkm_intr_add(mc->func->intr, mc->func->intrs, &mc->subdev,
+				    mc->func->intr_nonstall ? 2 : 1, &mc->intr);
+		if (ret)
+			return ret;
+	}
+
 	return 0;
 }
diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/mc/g84.c b/drivers/gpu/drm/nouveau/nvkm/subdev/mc/g84.c
index 4cfc1c984006..f4ee99137b1f 100644
--- a/drivers/gpu/drm/nouveau/nvkm/subdev/mc/g84.c
+++ b/drivers/gpu/drm/nouveau/nvkm/subdev/mc/g84.c
@@ -34,30 +34,29 @@ g84_mc_reset[] = {
 	{}
 };
 
-static const struct nvkm_mc_map
-g84_mc_intr[] = {
-	{ 0x04000000, NVKM_ENGINE_DISP },
-	{ 0x00020000, NVKM_ENGINE_VP },
-	{ 0x00008000, NVKM_ENGINE_BSP },
-	{ 0x00004000, NVKM_ENGINE_CIPHER },
-	{ 0x00001000, NVKM_ENGINE_GR },
-	{ 0x00000100, NVKM_ENGINE_FIFO },
-	{ 0x00000001, NVKM_ENGINE_MPEG },
-	{ 0x0002d101, NVKM_SUBDEV_FB },
-	{ 0x10000000, NVKM_SUBDEV_BUS },
-	{ 0x00200000, NVKM_SUBDEV_GPIO },
-	{ 0x00200000, NVKM_SUBDEV_I2C },
-	{ 0x00100000, NVKM_SUBDEV_TIMER },
+static const struct nvkm_intr_data
+g84_mc_intrs[] = {
+	{ NVKM_ENGINE_DISP  , 0, 0, 0x04000000, true },
+	{ NVKM_ENGINE_VP    , 0, 0, 0x00020000, true },
+	{ NVKM_ENGINE_BSP   , 0, 0, 0x00008000, true },
+	{ NVKM_ENGINE_CIPHER, 0, 0, 0x00004000, true },
+	{ NVKM_ENGINE_GR    , 0, 0, 0x00001000, true },
+	{ NVKM_ENGINE_FIFO  , 0, 0, 0x00000100 },
+	{ NVKM_ENGINE_MPEG  , 0, 0, 0x00000001, true },
+	{ NVKM_SUBDEV_FB    , 0, 0, 0x0002d101, true },
+	{ NVKM_SUBDEV_BUS   , 0, 0, 0x10000000, true },
+	{ NVKM_SUBDEV_GPIO  , 0, 0, 0x00200000, true },
+	{ NVKM_SUBDEV_I2C   , 0, 0, 0x00200000, true },
+	{ NVKM_SUBDEV_TIMER , 0, 0, 0x00100000, true },
 	{},
 };
 
 static const struct nvkm_mc_func
 g84_mc = {
 	.init = nv50_mc_init,
-	.intr = g84_mc_intr,
-	.intr_unarm = nv04_mc_intr_unarm,
-	.intr_rearm = nv04_mc_intr_rearm,
-	.intr_stat = nv04_mc_intr_stat,
+	.intr = &nv04_mc_intr,
+	.intrs = g84_mc_intrs,
+	.device = &nv04_mc_device,
 	.reset = g84_mc_reset,
 };
 
diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/mc/g98.c b/drivers/gpu/drm/nouveau/nvkm/subdev/mc/g98.c
index b7e58d75d894..f42684809f08 100644
--- a/drivers/gpu/drm/nouveau/nvkm/subdev/mc/g98.c
+++ b/drivers/gpu/drm/nouveau/nvkm/subdev/mc/g98.c
@@ -34,30 +34,29 @@ g98_mc_reset[] = {
 	{}
 };
 
-static const struct nvkm_mc_map
-g98_mc_intr[] = {
-	{ 0x04000000, NVKM_ENGINE_DISP },
-	{ 0x00020000, NVKM_ENGINE_MSPDEC },
-	{ 0x00008000, NVKM_ENGINE_MSVLD },
-	{ 0x00004000, NVKM_ENGINE_SEC },
-	{ 0x00001000, NVKM_ENGINE_GR },
-	{ 0x00000100, NVKM_ENGINE_FIFO },
-	{ 0x00000001, NVKM_ENGINE_MSPPP },
-	{ 0x0002d101, NVKM_SUBDEV_FB },
-	{ 0x10000000, NVKM_SUBDEV_BUS },
-	{ 0x00200000, NVKM_SUBDEV_GPIO },
-	{ 0x00200000, NVKM_SUBDEV_I2C },
-	{ 0x00100000, NVKM_SUBDEV_TIMER },
+static const struct nvkm_intr_data
+g98_mc_intrs[] = {
+	{ NVKM_ENGINE_DISP  , 0, 0, 0x04000000, true },
+	{ NVKM_ENGINE_MSPDEC, 0, 0, 0x00020000, true },
+	{ NVKM_ENGINE_MSVLD , 0, 0, 0x00008000, true },
+	{ NVKM_ENGINE_SEC   , 0, 0, 0x00004000, true },
+	{ NVKM_ENGINE_GR    , 0, 0, 0x00001000, true },
+	{ NVKM_ENGINE_FIFO  , 0, 0, 0x00000100 },
+	{ NVKM_ENGINE_MSPPP , 0, 0, 0x00000001, true },
+	{ NVKM_SUBDEV_FB    , 0, 0, 0x0002d101, true },
+	{ NVKM_SUBDEV_BUS   , 0, 0, 0x10000000, true },
+	{ NVKM_SUBDEV_GPIO  , 0, 0, 0x00200000, true },
+	{ NVKM_SUBDEV_I2C   , 0, 0, 0x00200000, true },
+	{ NVKM_SUBDEV_TIMER , 0, 0, 0x00100000, true },
 	{},
 };
 
 static const struct nvkm_mc_func
 g98_mc = {
 	.init = nv50_mc_init,
-	.intr = g98_mc_intr,
-	.intr_unarm = nv04_mc_intr_unarm,
-	.intr_rearm = nv04_mc_intr_rearm,
-	.intr_stat = nv04_mc_intr_stat,
+	.intr = &nv04_mc_intr,
+	.intrs = g98_mc_intrs,
+	.device = &nv04_mc_device,
 	.reset = g98_mc_reset,
 };
 
diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/mc/ga100.c b/drivers/gpu/drm/nouveau/nvkm/subdev/mc/ga100.c
index 4105175dfccd..1e2eabec1a76 100644
--- a/drivers/gpu/drm/nouveau/nvkm/subdev/mc/ga100.c
+++ b/drivers/gpu/drm/nouveau/nvkm/subdev/mc/ga100.c
@@ -22,49 +22,51 @@
 #include "priv.h"
 
 static void
-ga100_mc_intr_unarm(struct nvkm_mc *mc)
+ga100_mc_device_disable(struct nvkm_mc *mc, u32 mask)
 {
-	nvkm_wr32(mc->subdev.device, 0xb81610, 0x00000004);
-}
+	struct nvkm_device *device = mc->subdev.device;
 
-static void
-ga100_mc_intr_rearm(struct nvkm_mc *mc)
-{
-	nvkm_wr32(mc->subdev.device, 0xb81608, 0x00000004);
+	nvkm_mask(device, 0x000600, mask, 0x00000000);
+	nvkm_rd32(device, 0x000600);
+	nvkm_rd32(device, 0x000600);
 }
 
 static void
-ga100_mc_intr_mask(struct nvkm_mc *mc, u32 mask, u32 intr)
+ga100_mc_device_enable(struct nvkm_mc *mc, u32 mask)
 {
-	nvkm_wr32(mc->subdev.device, 0xb81210,          mask & intr );
-	nvkm_wr32(mc->subdev.device, 0xb81410, mask & ~(mask & intr));
+	struct nvkm_device *device = mc->subdev.device;
+
+	nvkm_mask(device, 0x000600, mask, mask);
+	nvkm_rd32(device, 0x000600);
+	nvkm_rd32(device, 0x000600);
 }
 
-static u32
-ga100_mc_intr_stat(struct nvkm_mc *mc)
+static bool
+ga100_mc_device_enabled(struct nvkm_mc *mc, u32 mask)
 {
-	u32 intr_top = nvkm_rd32(mc->subdev.device, 0xb81600), intr = 0x00000000;
-	if (intr_top & 0x00000004)
-		intr = nvkm_mask(mc->subdev.device, 0xb81010, 0x00000000, 0x00000000);
-	return intr;
+	return (nvkm_rd32(mc->subdev.device, 0x000600) & mask) == mask;
 }
 
+const struct nvkm_mc_device_func
+ga100_mc_device = {
+	.enabled = ga100_mc_device_enabled,
+	.enable = ga100_mc_device_enable,
+	.disable = ga100_mc_device_disable,
+};
+
 static void
 ga100_mc_init(struct nvkm_mc *mc)
 {
-	nv50_mc_init(mc);
-	nvkm_wr32(mc->subdev.device, 0xb81210, 0xffffffff);
+	struct nvkm_device *device = mc->subdev.device;
+
+	nvkm_wr32(device, 0x000200, 0xffffffff);
+	nvkm_wr32(device, 0x000600, 0xffffffff);
 }
 
 static const struct nvkm_mc_func
 ga100_mc = {
 	.init = ga100_mc_init,
-	.intr = gp100_mc_intr,
-	.intr_unarm = ga100_mc_intr_unarm,
-	.intr_rearm = ga100_mc_intr_rearm,
-	.intr_mask = ga100_mc_intr_mask,
-	.intr_stat = ga100_mc_intr_stat,
-	.reset = gk104_mc_reset,
+	.device = &ga100_mc_device,
 };
 
 int
diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/mc/gf100.c b/drivers/gpu/drm/nouveau/nvkm/subdev/mc/gf100.c
index 3a589c6f7fad..ab1eaa37123a 100644
--- a/drivers/gpu/drm/nouveau/nvkm/subdev/mc/gf100.c
+++ b/drivers/gpu/drm/nouveau/nvkm/subdev/mc/gf100.c
@@ -36,64 +36,29 @@ gf100_mc_reset[] = {
 	{}
 };
 
-static const struct nvkm_mc_map
-gf100_mc_intr[] = {
-	{ 0x04000000, NVKM_ENGINE_DISP },
-	{ 0x00020000, NVKM_ENGINE_MSPDEC },
-	{ 0x00008000, NVKM_ENGINE_MSVLD },
-	{ 0x00001000, NVKM_ENGINE_GR },
-	{ 0x00000100, NVKM_ENGINE_FIFO },
-	{ 0x00000040, NVKM_ENGINE_CE, 1 },
-	{ 0x00000020, NVKM_ENGINE_CE, 0 },
-	{ 0x00000001, NVKM_ENGINE_MSPPP },
-	{ 0x40000000, NVKM_SUBDEV_PRIVRING },
-	{ 0x10000000, NVKM_SUBDEV_BUS },
-	{ 0x08000000, NVKM_SUBDEV_FB },
-	{ 0x02000000, NVKM_SUBDEV_LTC },
-	{ 0x01000000, NVKM_SUBDEV_PMU },
-	{ 0x00200000, NVKM_SUBDEV_GPIO },
-	{ 0x00200000, NVKM_SUBDEV_I2C },
-	{ 0x00100000, NVKM_SUBDEV_TIMER },
-	{ 0x00040000, NVKM_SUBDEV_THERM },
-	{ 0x00002000, NVKM_SUBDEV_FB },
+static const struct nvkm_intr_data
+gf100_mc_intrs[] = {
+	{ NVKM_ENGINE_DISP    , 0, 0, 0x04000000, true },
+	{ NVKM_ENGINE_MSPDEC  , 0, 0, 0x00020000, true },
+	{ NVKM_ENGINE_MSVLD   , 0, 0, 0x00008000, true },
+	{ NVKM_ENGINE_GR      , 0, 0, 0x00001000 },
+	{ NVKM_ENGINE_FIFO    , 0, 0, 0x00000100 },
+	{ NVKM_ENGINE_CE      , 1, 0, 0x00000040, true },
+	{ NVKM_ENGINE_CE      , 0, 0, 0x00000020, true },
+	{ NVKM_ENGINE_MSPPP   , 0, 0, 0x00000001, true },
+	{ NVKM_SUBDEV_PRIVRING, 0, 0, 0x40000000, true },
+	{ NVKM_SUBDEV_BUS     , 0, 0, 0x10000000, true },
+	{ NVKM_SUBDEV_FB      , 0, 0, 0x08002000, true },
+	{ NVKM_SUBDEV_LTC     , 0, 0, 0x02000000, true },
+	{ NVKM_SUBDEV_PMU     , 0, 0, 0x01000000, true },
+	{ NVKM_SUBDEV_GPIO    , 0, 0, 0x00200000, true },
+	{ NVKM_SUBDEV_I2C     , 0, 0, 0x00200000, true },
+	{ NVKM_SUBDEV_TIMER   , 0, 0, 0x00100000, true },
+	{ NVKM_SUBDEV_THERM   , 0, 0, 0x00040000, true },
 	{},
 };
 
 void
-gf100_mc_intr_unarm(struct nvkm_mc *mc)
-{
-	struct nvkm_device *device = mc->subdev.device;
-	nvkm_wr32(device, 0x000140, 0x00000000);
-	nvkm_wr32(device, 0x000144, 0x00000000);
-	nvkm_rd32(device, 0x000140);
-}
-
-void
-gf100_mc_intr_rearm(struct nvkm_mc *mc)
-{
-	struct nvkm_device *device = mc->subdev.device;
-	nvkm_wr32(device, 0x000140, 0x00000001);
-	nvkm_wr32(device, 0x000144, 0x00000001);
-}
-
-u32
-gf100_mc_intr_stat(struct nvkm_mc *mc)
-{
-	struct nvkm_device *device = mc->subdev.device;
-	u32 intr0 = nvkm_rd32(device, 0x000100);
-	u32 intr1 = nvkm_rd32(device, 0x000104);
-	return intr0 | intr1;
-}
-
-void
-gf100_mc_intr_mask(struct nvkm_mc *mc, u32 mask, u32 stat)
-{
-	struct nvkm_device *device = mc->subdev.device;
-	nvkm_mask(device, 0x000640, mask, stat);
-	nvkm_mask(device, 0x000644, mask, stat);
-}
-
-void
 gf100_mc_unk260(struct nvkm_mc *mc, u32 data)
 {
 	nvkm_wr32(mc->subdev.device, 0x000260, data);
@@ -102,12 +67,11 @@ gf100_mc_unk260(struct nvkm_mc *mc, u32 data)
 static const struct nvkm_mc_func
 gf100_mc = {
 	.init = nv50_mc_init,
-	.intr = gf100_mc_intr,
-	.intr_unarm = gf100_mc_intr_unarm,
-	.intr_rearm = gf100_mc_intr_rearm,
-	.intr_mask = gf100_mc_intr_mask,
-	.intr_stat = gf100_mc_intr_stat,
+	.intr = &gt215_mc_intr,
+	.intrs = gf100_mc_intrs,
+	.intr_nonstall = true,
 	.reset = gf100_mc_reset,
+	.device = &nv04_mc_device,
 	.unk260 = gf100_mc_unk260,
 };
 
diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/mc/gk104.c b/drivers/gpu/drm/nouveau/nvkm/subdev/mc/gk104.c
index d9b9067fa93f..66829586a124 100644
--- a/drivers/gpu/drm/nouveau/nvkm/subdev/mc/gk104.c
+++ b/drivers/gpu/drm/nouveau/nvkm/subdev/mc/gk104.c
@@ -30,32 +30,32 @@ gk104_mc_reset[] = {
 	{}
 };
 
-const struct nvkm_mc_map
-gk104_mc_intr[] = {
-	{ 0x04000000, NVKM_ENGINE_DISP },
-	{ 0x00000100, NVKM_ENGINE_FIFO },
-	{ 0x40000000, NVKM_SUBDEV_PRIVRING },
-	{ 0x10000000, NVKM_SUBDEV_BUS },
-	{ 0x08000000, NVKM_SUBDEV_FB },
-	{ 0x02000000, NVKM_SUBDEV_LTC },
-	{ 0x01000000, NVKM_SUBDEV_PMU },
-	{ 0x00200000, NVKM_SUBDEV_GPIO },
-	{ 0x00200000, NVKM_SUBDEV_I2C },
-	{ 0x00100000, NVKM_SUBDEV_TIMER },
-	{ 0x00040000, NVKM_SUBDEV_THERM },
-	{ 0x00002000, NVKM_SUBDEV_FB },
+const struct nvkm_intr_data
+gk104_mc_intrs[] = {
+	{ NVKM_ENGINE_DISP    , 0, 0, 0x04000000, true },
+	{ NVKM_ENGINE_FIFO    , 0, 0, 0x00000100 },
+	{ NVKM_SUBDEV_PRIVRING, 0, 0, 0x40000000, true },
+	{ NVKM_SUBDEV_BUS     , 0, 0, 0x10000000, true },
+	{ NVKM_SUBDEV_FB      , 0, 0, 0x08002000, true },
+	{ NVKM_SUBDEV_LTC     , 0, 0, 0x02000000, true },
+	{ NVKM_SUBDEV_PMU     , 0, 0, 0x01000000, true },
+	{ NVKM_SUBDEV_GPIO    , 0, 0, 0x00200000, true },
+	{ NVKM_SUBDEV_I2C     , 0, 0, 0x00200000, true },
+	{ NVKM_SUBDEV_TIMER   , 0, 0, 0x00100000, true },
+	{ NVKM_SUBDEV_THERM   , 0, 0, 0x00040000, true },
+	{ NVKM_SUBDEV_TOP     , 0, 0, 0x00001000 },
+	{ NVKM_SUBDEV_TOP     , 0, 0, 0xffffefff, true },
 	{},
 };
 
 static const struct nvkm_mc_func
 gk104_mc = {
 	.init = nv50_mc_init,
-	.intr = gk104_mc_intr,
-	.intr_unarm = gf100_mc_intr_unarm,
-	.intr_rearm = gf100_mc_intr_rearm,
-	.intr_mask = gf100_mc_intr_mask,
-	.intr_stat = gf100_mc_intr_stat,
+	.intr = &gt215_mc_intr,
+	.intrs = gk104_mc_intrs,
+	.intr_nonstall = true,
 	.reset = gk104_mc_reset,
+	.device = &nv04_mc_device,
 	.unk260 = gf100_mc_unk260,
 };
 
diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/mc/gk20a.c b/drivers/gpu/drm/nouveau/nvkm/subdev/mc/gk20a.c
index 03590292749a..d98a6563a411 100644
--- a/drivers/gpu/drm/nouveau/nvkm/subdev/mc/gk20a.c
+++ b/drivers/gpu/drm/nouveau/nvkm/subdev/mc/gk20a.c
@@ -26,11 +26,10 @@
 static const struct nvkm_mc_func
 gk20a_mc = {
 	.init = nv50_mc_init,
-	.intr = gk104_mc_intr,
-	.intr_unarm = gf100_mc_intr_unarm,
-	.intr_rearm = gf100_mc_intr_rearm,
-	.intr_mask = gf100_mc_intr_mask,
-	.intr_stat = gf100_mc_intr_stat,
+	.intr = &gt215_mc_intr,
+	.intrs = gk104_mc_intrs,
+	.intr_nonstall = true,
+	.device = &nv04_mc_device,
 	.reset = gk104_mc_reset,
 };
 
diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/mc/gp100.c b/drivers/gpu/drm/nouveau/nvkm/subdev/mc/gp100.c
index 5fd1a0595c33..eb2ab03f4360 100644
--- a/drivers/gpu/drm/nouveau/nvkm/subdev/mc/gp100.c
+++ b/drivers/gpu/drm/nouveau/nvkm/subdev/mc/gp100.c
@@ -21,108 +21,82 @@
  *
  * Authors: Ben Skeggs
  */
-#define gp100_mc(p) container_of((p), struct gp100_mc, base)
 #include "priv.h"
 
-struct gp100_mc {
-	struct nvkm_mc base;
-	spinlock_t lock;
-	bool intr;
-	u32 mask;
+const struct nvkm_intr_data
+gp100_mc_intrs[] = {
+	{ NVKM_ENGINE_DISP    , 0, 0, 0x04000000, true },
+	{ NVKM_ENGINE_FIFO    , 0, 0, 0x00000100 },
+	{ NVKM_SUBDEV_FAULT   , 0, 0, 0x00000200, true },
+	{ NVKM_SUBDEV_PRIVRING, 0, 0, 0x40000000, true },
+	{ NVKM_SUBDEV_BUS     , 0, 0, 0x10000000, true },
+	{ NVKM_SUBDEV_FB      , 0, 0, 0x08002000, true },
+	{ NVKM_SUBDEV_LTC     , 0, 0, 0x02000000, true },
+	{ NVKM_SUBDEV_PMU     , 0, 0, 0x01000000, true },
+	{ NVKM_SUBDEV_GPIO    , 0, 0, 0x00200000, true },
+	{ NVKM_SUBDEV_I2C     , 0, 0, 0x00200000, true },
+	{ NVKM_SUBDEV_TIMER   , 0, 0, 0x00100000, true },
+	{ NVKM_SUBDEV_THERM   , 0, 0, 0x00040000, true },
+	{ NVKM_SUBDEV_TOP     , 0, 0, 0x00009000 },
+	{ NVKM_SUBDEV_TOP     , 0, 0, 0xffff6fff, true },
+	{},
 };
 
 static void
-gp100_mc_intr_update(struct gp100_mc *mc)
+gp100_mc_intr_allow(struct nvkm_intr *intr, int leaf, u32 mask)
 {
-	struct nvkm_device *device = mc->base.subdev.device;
-	u32 mask = mc->intr ? mc->mask : 0, i;
-	for (i = 0; i < 2; i++) {
-		nvkm_wr32(device, 0x000180 + (i * 0x04), ~mask);
-		nvkm_wr32(device, 0x000160 + (i * 0x04),  mask);
-	}
+	struct nvkm_mc *mc = container_of(intr, typeof(*mc), intr);
+
+	nvkm_wr32(mc->subdev.device, 0x000160 + (leaf * 4), mask);
 }
 
-void
-gp100_mc_intr_unarm(struct nvkm_mc *base)
+static void
+gp100_mc_intr_block(struct nvkm_intr *intr, int leaf, u32 mask)
 {
-	struct gp100_mc *mc = gp100_mc(base);
-	unsigned long flags;
-	spin_lock_irqsave(&mc->lock, flags);
-	mc->intr = false;
-	gp100_mc_intr_update(mc);
-	spin_unlock_irqrestore(&mc->lock, flags);
+	struct nvkm_mc *mc = container_of(intr, typeof(*mc), intr);
+
+	nvkm_wr32(mc->subdev.device, 0x000180 + (leaf * 4), mask);
 }
 
-void
-gp100_mc_intr_rearm(struct nvkm_mc *base)
+static void
+gp100_mc_intr_rearm(struct nvkm_intr *intr)
 {
-	struct gp100_mc *mc = gp100_mc(base);
-	unsigned long flags;
-	spin_lock_irqsave(&mc->lock, flags);
-	mc->intr = true;
-	gp100_mc_intr_update(mc);
-	spin_unlock_irqrestore(&mc->lock, flags);
+	int i;
+
+	for (i = 0; i < intr->leaves; i++)
+		intr->func->allow(intr, i, intr->mask[i]);
 }
 
-void
-gp100_mc_intr_mask(struct nvkm_mc *base, u32 mask, u32 intr)
+static void
+gp100_mc_intr_unarm(struct nvkm_intr *intr)
 {
-	struct gp100_mc *mc = gp100_mc(base);
-	unsigned long flags;
-	spin_lock_irqsave(&mc->lock, flags);
-	mc->mask = (mc->mask & ~mask) | intr;
-	gp100_mc_intr_update(mc);
-	spin_unlock_irqrestore(&mc->lock, flags);
+	int i;
+
+	for (i = 0; i < intr->leaves; i++)
+		intr->func->block(intr, i, 0xffffffff);
 }
 
-const struct nvkm_mc_map
-gp100_mc_intr[] = {
-	{ 0x04000000, NVKM_ENGINE_DISP },
-	{ 0x00000100, NVKM_ENGINE_FIFO },
-	{ 0x00000200, NVKM_SUBDEV_FAULT },
-	{ 0x40000000, NVKM_SUBDEV_PRIVRING },
-	{ 0x10000000, NVKM_SUBDEV_BUS },
-	{ 0x08000000, NVKM_SUBDEV_FB },
-	{ 0x02000000, NVKM_SUBDEV_LTC },
-	{ 0x01000000, NVKM_SUBDEV_PMU },
-	{ 0x00200000, NVKM_SUBDEV_GPIO },
-	{ 0x00200000, NVKM_SUBDEV_I2C },
-	{ 0x00100000, NVKM_SUBDEV_TIMER },
-	{ 0x00040000, NVKM_SUBDEV_THERM },
-	{ 0x00002000, NVKM_SUBDEV_FB },
-	{},
+const struct nvkm_intr_func
+gp100_mc_intr = {
+	.pending = nv04_mc_intr_pending,
+	.unarm = gp100_mc_intr_unarm,
+	.rearm = gp100_mc_intr_rearm,
+	.block = gp100_mc_intr_block,
+	.allow = gp100_mc_intr_allow,
 };
 
 static const struct nvkm_mc_func
 gp100_mc = {
 	.init = nv50_mc_init,
-	.intr = gp100_mc_intr,
-	.intr_unarm = gp100_mc_intr_unarm,
-	.intr_rearm = gp100_mc_intr_rearm,
-	.intr_mask = gp100_mc_intr_mask,
-	.intr_stat = gf100_mc_intr_stat,
+	.intr = &gp100_mc_intr,
+	.intrs = gp100_mc_intrs,
+	.intr_nonstall = true,
+	.device = &nv04_mc_device,
 	.reset = gk104_mc_reset,
 };
 
 int
-gp100_mc_new_(const struct nvkm_mc_func *func, struct nvkm_device *device,
-	      enum nvkm_subdev_type type, int inst, struct nvkm_mc **pmc)
-{
-	struct gp100_mc *mc;
-
-	if (!(mc = kzalloc(sizeof(*mc), GFP_KERNEL)))
-		return -ENOMEM;
-	nvkm_mc_ctor(func, device, type, inst, &mc->base);
-	*pmc = &mc->base;
-
-	spin_lock_init(&mc->lock);
-	mc->intr = false;
-	mc->mask = 0x7fffffff;
-	return 0;
-}
-
-int
 gp100_mc_new(struct nvkm_device *device, enum nvkm_subdev_type type, int inst, struct nvkm_mc **pmc)
 {
-	return gp100_mc_new_(&gp100_mc, device, type, inst, pmc);
+	return nvkm_mc_new_(&gp100_mc, device, type, inst, pmc);
 }
diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/mc/gp10b.c b/drivers/gpu/drm/nouveau/nvkm/subdev/mc/gp10b.c
index dd581d030ced..9bed9c5ea5d3 100644
--- a/drivers/gpu/drm/nouveau/nvkm/subdev/mc/gp10b.c
+++ b/drivers/gpu/drm/nouveau/nvkm/subdev/mc/gp10b.c
@@ -34,16 +34,15 @@ gp10b_mc_init(struct nvkm_mc *mc)
 static const struct nvkm_mc_func
 gp10b_mc = {
 	.init = gp10b_mc_init,
-	.intr = gp100_mc_intr,
-	.intr_unarm = gp100_mc_intr_unarm,
-	.intr_rearm = gp100_mc_intr_rearm,
-	.intr_mask = gp100_mc_intr_mask,
-	.intr_stat = gf100_mc_intr_stat,
+	.intr = &gp100_mc_intr,
+	.intrs = gp100_mc_intrs,
+	.intr_nonstall = true,
+	.device = &nv04_mc_device,
 	.reset = gk104_mc_reset,
 };
 
 int
 gp10b_mc_new(struct nvkm_device *device, enum nvkm_subdev_type type, int inst, struct nvkm_mc **pmc)
 {
-	return gp100_mc_new_(&gp10b_mc, device, type, inst, pmc);
+	return nvkm_mc_new_(&gp10b_mc, device, type, inst, pmc);
 }
diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/mc/gt215.c b/drivers/gpu/drm/nouveau/nvkm/subdev/mc/gt215.c
index 1b4d43531dba..3d61836e42a3 100644
--- a/drivers/gpu/drm/nouveau/nvkm/subdev/mc/gt215.c
+++ b/drivers/gpu/drm/nouveau/nvkm/subdev/mc/gt215.c
@@ -34,39 +34,56 @@ gt215_mc_reset[] = {
 	{}
 };
 
-static const struct nvkm_mc_map
-gt215_mc_intr[] = {
-	{ 0x04000000, NVKM_ENGINE_DISP },
-	{ 0x00400000, NVKM_ENGINE_CE, 0 },
-	{ 0x00020000, NVKM_ENGINE_MSPDEC },
-	{ 0x00008000, NVKM_ENGINE_MSVLD },
-	{ 0x00001000, NVKM_ENGINE_GR },
-	{ 0x00000100, NVKM_ENGINE_FIFO },
-	{ 0x00000001, NVKM_ENGINE_MSPPP },
-	{ 0x00429101, NVKM_SUBDEV_FB },
-	{ 0x10000000, NVKM_SUBDEV_BUS },
-	{ 0x00200000, NVKM_SUBDEV_GPIO },
-	{ 0x00200000, NVKM_SUBDEV_I2C },
-	{ 0x00100000, NVKM_SUBDEV_TIMER },
-	{ 0x00080000, NVKM_SUBDEV_THERM },
-	{ 0x00040000, NVKM_SUBDEV_PMU },
+static const struct nvkm_intr_data
+gt215_mc_intrs[] = {
+	{ NVKM_ENGINE_DISP  , 0, 0, 0x04000000, true },
+	{ NVKM_ENGINE_CE    , 0, 0, 0x00400000, true },
+	{ NVKM_ENGINE_MSPDEC, 0, 0, 0x00020000, true },
+	{ NVKM_ENGINE_MSVLD , 0, 0, 0x00008000, true },
+	{ NVKM_ENGINE_GR    , 0, 0, 0x00001000, true },
+	{ NVKM_ENGINE_FIFO  , 0, 0, 0x00000100 },
+	{ NVKM_ENGINE_MSPPP , 0, 0, 0x00000001, true },
+	{ NVKM_SUBDEV_FB    , 0, 0, 0x00429101, true },
+	{ NVKM_SUBDEV_BUS   , 0, 0, 0x10000000, true },
+	{ NVKM_SUBDEV_GPIO  , 0, 0, 0x00200000, true },
+	{ NVKM_SUBDEV_I2C   , 0, 0, 0x00200000, true },
+	{ NVKM_SUBDEV_TIMER , 0, 0, 0x00100000, true },
+	{ NVKM_SUBDEV_THERM , 0, 0, 0x00080000, true },
+	{ NVKM_SUBDEV_PMU   , 0, 0, 0x00040000, true },
 	{},
 };
 
 static void
-gt215_mc_intr_mask(struct nvkm_mc *mc, u32 mask, u32 stat)
+gt215_mc_intr_allow(struct nvkm_intr *intr, int leaf, u32 mask)
+{
+	struct nvkm_mc *mc = container_of(intr, typeof(*mc), intr);
+
+	nvkm_mask(mc->subdev.device, 0x000640 + (leaf * 4), mask, mask);
+}
+
+static void
+gt215_mc_intr_block(struct nvkm_intr *intr, int leaf, u32 mask)
 {
-	nvkm_mask(mc->subdev.device, 0x000640, mask, stat);
+	struct nvkm_mc *mc = container_of(intr, typeof(*mc), intr);
+
+	nvkm_mask(mc->subdev.device, 0x000640 + (leaf * 4), mask, 0);
 }
 
+const struct nvkm_intr_func
+gt215_mc_intr = {
+	.pending = nv04_mc_intr_pending,
+	.unarm = nv04_mc_intr_unarm,
+	.rearm = nv04_mc_intr_rearm,
+	.block = gt215_mc_intr_block,
+	.allow = gt215_mc_intr_allow,
+};
+
 static const struct nvkm_mc_func
 gt215_mc = {
 	.init = nv50_mc_init,
-	.intr = gt215_mc_intr,
-	.intr_unarm = nv04_mc_intr_unarm,
-	.intr_rearm = nv04_mc_intr_rearm,
-	.intr_mask = gt215_mc_intr_mask,
-	.intr_stat = nv04_mc_intr_stat,
+	.intr = &nv04_mc_intr,
+	.intrs = gt215_mc_intrs,
+	.device = &nv04_mc_device,
 	.reset = gt215_mc_reset,
 };
 
diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/mc/nv04.c b/drivers/gpu/drm/nouveau/nvkm/subdev/mc/nv04.c
index bc0d09bafa99..8482a5550e5f 100644
--- a/drivers/gpu/drm/nouveau/nvkm/subdev/mc/nv04.c
+++ b/drivers/gpu/drm/nouveau/nvkm/subdev/mc/nv04.c
@@ -30,37 +30,89 @@ nv04_mc_reset[] = {
 	{}
 };
 
-static const struct nvkm_mc_map
-nv04_mc_intr[] = {
-	{ 0x01010000, NVKM_ENGINE_DISP },
-	{ 0x00001000, NVKM_ENGINE_GR },
-	{ 0x00000100, NVKM_ENGINE_FIFO },
-	{ 0x10000000, NVKM_SUBDEV_BUS },
-	{ 0x00100000, NVKM_SUBDEV_TIMER },
+static void
+nv04_mc_device_disable(struct nvkm_mc *mc, u32 mask)
+{
+	nvkm_mask(mc->subdev.device, 0x000200, mask, 0x00000000);
+}
+
+static void
+nv04_mc_device_enable(struct nvkm_mc *mc, u32 mask)
+{
+	struct nvkm_device *device = mc->subdev.device;
+
+	nvkm_mask(device, 0x000200, mask, mask);
+	nvkm_rd32(device, 0x000200);
+}
+
+static bool
+nv04_mc_device_enabled(struct nvkm_mc *mc, u32 mask)
+{
+	return (nvkm_rd32(mc->subdev.device, 0x000200) & mask) == mask;
+}
+
+const struct nvkm_mc_device_func
+nv04_mc_device = {
+	.enabled = nv04_mc_device_enabled,
+	.enable = nv04_mc_device_enable,
+	.disable = nv04_mc_device_disable,
+};
+
+static const struct nvkm_intr_data
+nv04_mc_intrs[] = {
+	{ NVKM_ENGINE_DISP , 0, 0, 0x01010000, true },
+	{ NVKM_ENGINE_GR   , 0, 0, 0x00001000, true },
+	{ NVKM_ENGINE_FIFO , 0, 0, 0x00000100 },
+	{ NVKM_SUBDEV_BUS  , 0, 0, 0x10000000, true },
+	{ NVKM_SUBDEV_TIMER, 0, 0, 0x00100000, true },
 	{}
 };
 
 void
-nv04_mc_intr_unarm(struct nvkm_mc *mc)
+nv04_mc_intr_rearm(struct nvkm_intr *intr)
 {
-	struct nvkm_device *device = mc->subdev.device;
-	nvkm_wr32(device, 0x000140, 0x00000000);
-	nvkm_rd32(device, 0x000140);
+	struct nvkm_mc *mc = container_of(intr, typeof(*mc), intr);
+	int leaf;
+
+	for (leaf = 0; leaf < intr->leaves; leaf++)
+		nvkm_wr32(mc->subdev.device, 0x000140 + (leaf * 4), 0x00000001);
 }
 
 void
-nv04_mc_intr_rearm(struct nvkm_mc *mc)
+nv04_mc_intr_unarm(struct nvkm_intr *intr)
 {
-	struct nvkm_device *device = mc->subdev.device;
-	nvkm_wr32(device, 0x000140, 0x00000001);
+	struct nvkm_mc *mc = container_of(intr, typeof(*mc), intr);
+	int leaf;
+
+	for (leaf = 0; leaf < intr->leaves; leaf++)
+		nvkm_wr32(mc->subdev.device, 0x000140 + (leaf * 4), 0x00000000);
+
+	nvkm_rd32(mc->subdev.device, 0x000140);
 }
 
-u32
-nv04_mc_intr_stat(struct nvkm_mc *mc)
+bool
+nv04_mc_intr_pending(struct nvkm_intr *intr)
 {
-	return nvkm_rd32(mc->subdev.device, 0x000100);
+	struct nvkm_mc *mc = container_of(intr, typeof(*mc), intr);
+	bool pending = false;
+	int leaf;
+
+	for (leaf = 0; leaf < intr->leaves; leaf++) {
+		intr->stat[leaf] = nvkm_rd32(mc->subdev.device, 0x000100 + (leaf * 4));
+		if (intr->stat[leaf])
+			pending = true;
+	}
+
+	return pending;
 }
 
+const struct nvkm_intr_func
+nv04_mc_intr = {
+	.pending = nv04_mc_intr_pending,
+	.unarm = nv04_mc_intr_unarm,
+	.rearm = nv04_mc_intr_rearm,
+};
+
 void
 nv04_mc_init(struct nvkm_mc *mc)
 {
@@ -72,10 +124,9 @@ nv04_mc_init(struct nvkm_mc *mc)
 static const struct nvkm_mc_func
 nv04_mc = {
 	.init = nv04_mc_init,
-	.intr = nv04_mc_intr,
-	.intr_unarm = nv04_mc_intr_unarm,
-	.intr_rearm = nv04_mc_intr_rearm,
-	.intr_stat = nv04_mc_intr_stat,
+	.intr = &nv04_mc_intr,
+	.intrs = nv04_mc_intrs,
+	.device = &nv04_mc_device,
 	.reset = nv04_mc_reset,
 };
 
diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/mc/nv11.c b/drivers/gpu/drm/nouveau/nvkm/subdev/mc/nv11.c
index ab59ca1ee068..6d6278f434a4 100644
--- a/drivers/gpu/drm/nouveau/nvkm/subdev/mc/nv11.c
+++ b/drivers/gpu/drm/nouveau/nvkm/subdev/mc/nv11.c
@@ -23,23 +23,22 @@
  */
 #include "priv.h"
 
-static const struct nvkm_mc_map
-nv11_mc_intr[] = {
-	{ 0x03010000, NVKM_ENGINE_DISP },
-	{ 0x00001000, NVKM_ENGINE_GR },
-	{ 0x00000100, NVKM_ENGINE_FIFO },
-	{ 0x10000000, NVKM_SUBDEV_BUS },
-	{ 0x00100000, NVKM_SUBDEV_TIMER },
+static const struct nvkm_intr_data
+nv11_mc_intrs[] = {
+	{ NVKM_ENGINE_DISP , 0, 0, 0x03010000, true },
+	{ NVKM_ENGINE_GR   , 0, 0, 0x00001000, true },
+	{ NVKM_ENGINE_FIFO , 0, 0, 0x00000100 },
+	{ NVKM_SUBDEV_BUS  , 0, 0, 0x10000000, true },
+	{ NVKM_SUBDEV_TIMER, 0, 0, 0x00100000, true },
 	{}
 };
 
 static const struct nvkm_mc_func
 nv11_mc = {
 	.init = nv04_mc_init,
-	.intr = nv11_mc_intr,
-	.intr_unarm = nv04_mc_intr_unarm,
-	.intr_rearm = nv04_mc_intr_rearm,
-	.intr_stat = nv04_mc_intr_stat,
+	.intr = &nv04_mc_intr,
+	.intrs = nv11_mc_intrs,
+	.device = &nv04_mc_device,
 	.reset = nv04_mc_reset,
 };
 
diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/mc/nv17.c b/drivers/gpu/drm/nouveau/nvkm/subdev/mc/nv17.c
index 03d756e26e57..dbad7c111ceb 100644
--- a/drivers/gpu/drm/nouveau/nvkm/subdev/mc/nv17.c
+++ b/drivers/gpu/drm/nouveau/nvkm/subdev/mc/nv17.c
@@ -31,24 +31,23 @@ nv17_mc_reset[] = {
 	{}
 };
 
-const struct nvkm_mc_map
-nv17_mc_intr[] = {
-	{ 0x03010000, NVKM_ENGINE_DISP },
-	{ 0x00001000, NVKM_ENGINE_GR },
-	{ 0x00000100, NVKM_ENGINE_FIFO },
-	{ 0x00000001, NVKM_ENGINE_MPEG },
-	{ 0x10000000, NVKM_SUBDEV_BUS },
-	{ 0x00100000, NVKM_SUBDEV_TIMER },
+const struct nvkm_intr_data
+nv17_mc_intrs[] = {
+	{ NVKM_ENGINE_DISP , 0, 0, 0x03010000, true },
+	{ NVKM_ENGINE_GR   , 0, 0, 0x00001000, true },
+	{ NVKM_ENGINE_FIFO , 0, 0, 0x00000100 },
+	{ NVKM_ENGINE_MPEG , 0, 0, 0x00000001, true },
+	{ NVKM_SUBDEV_BUS  , 0, 0, 0x10000000, true },
+	{ NVKM_SUBDEV_TIMER, 0, 0, 0x00100000, true },
 	{}
 };
 
 static const struct nvkm_mc_func
 nv17_mc = {
 	.init = nv04_mc_init,
-	.intr = nv17_mc_intr,
-	.intr_unarm = nv04_mc_intr_unarm,
-	.intr_rearm = nv04_mc_intr_rearm,
-	.intr_stat = nv04_mc_intr_stat,
+	.intr = &nv04_mc_intr,
+	.intrs = nv17_mc_intrs,
+	.device = &nv04_mc_device,
 	.reset = nv17_mc_reset,
 };
 
diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/mc/nv44.c b/drivers/gpu/drm/nouveau/nvkm/subdev/mc/nv44.c
index 95f65766e8b0..649a9fcc0a2f 100644
--- a/drivers/gpu/drm/nouveau/nvkm/subdev/mc/nv44.c
+++ b/drivers/gpu/drm/nouveau/nvkm/subdev/mc/nv44.c
@@ -40,10 +40,9 @@ nv44_mc_init(struct nvkm_mc *mc)
 static const struct nvkm_mc_func
 nv44_mc = {
 	.init = nv44_mc_init,
-	.intr = nv17_mc_intr,
-	.intr_unarm = nv04_mc_intr_unarm,
-	.intr_rearm = nv04_mc_intr_rearm,
-	.intr_stat = nv04_mc_intr_stat,
+	.intr = &nv04_mc_intr,
+	.intrs = nv17_mc_intrs,
+	.device = &nv04_mc_device,
 	.reset = nv17_mc_reset,
 };
 
diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/mc/nv50.c b/drivers/gpu/drm/nouveau/nvkm/subdev/mc/nv50.c
index fce3613cdfa5..d41099d35690 100644
--- a/drivers/gpu/drm/nouveau/nvkm/subdev/mc/nv50.c
+++ b/drivers/gpu/drm/nouveau/nvkm/subdev/mc/nv50.c
@@ -23,17 +23,17 @@
  */
 #include "priv.h"
 
-static const struct nvkm_mc_map
-nv50_mc_intr[] = {
-	{ 0x04000000, NVKM_ENGINE_DISP },
-	{ 0x00001000, NVKM_ENGINE_GR },
-	{ 0x00000100, NVKM_ENGINE_FIFO },
-	{ 0x00000001, NVKM_ENGINE_MPEG },
-	{ 0x00001101, NVKM_SUBDEV_FB },
-	{ 0x10000000, NVKM_SUBDEV_BUS },
-	{ 0x00200000, NVKM_SUBDEV_GPIO },
-	{ 0x00200000, NVKM_SUBDEV_I2C },
-	{ 0x00100000, NVKM_SUBDEV_TIMER },
+static const struct nvkm_intr_data
+nv50_mc_intrs[] = {
+	{ NVKM_ENGINE_DISP , 0, 0, 0x04000000, true },
+	{ NVKM_ENGINE_GR   , 0, 0, 0x00001000, true },
+	{ NVKM_ENGINE_FIFO , 0, 0, 0x00000100 },
+	{ NVKM_ENGINE_MPEG , 0, 0, 0x00000001, true },
+	{ NVKM_SUBDEV_FB   , 0, 0, 0x00001101, true },
+	{ NVKM_SUBDEV_BUS  , 0, 0, 0x10000000, true },
+	{ NVKM_SUBDEV_GPIO , 0, 0, 0x00200000, true },
+	{ NVKM_SUBDEV_I2C  , 0, 0, 0x00200000, true },
+	{ NVKM_SUBDEV_TIMER, 0, 0, 0x00100000, true },
 	{},
 };
 
@@ -47,10 +47,9 @@ nv50_mc_init(struct nvkm_mc *mc)
 static const struct nvkm_mc_func
 nv50_mc = {
 	.init = nv50_mc_init,
-	.intr = nv50_mc_intr,
-	.intr_unarm = nv04_mc_intr_unarm,
-	.intr_rearm = nv04_mc_intr_rearm,
-	.intr_stat = nv04_mc_intr_stat,
+	.intr = &nv04_mc_intr,
+	.intrs = nv50_mc_intrs,
+	.device = &nv04_mc_device,
 	.reset = nv17_mc_reset,
 };
 
diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/mc/priv.h b/drivers/gpu/drm/nouveau/nvkm/subdev/mc/priv.h
index c8bcabb98f99..7f38d54b4bc2 100644
--- a/drivers/gpu/drm/nouveau/nvkm/subdev/mc/priv.h
+++ b/drivers/gpu/drm/nouveau/nvkm/subdev/mc/priv.h
@@ -4,8 +4,6 @@
 #define nvkm_mc(p) container_of((p), struct nvkm_mc, subdev)
 #include <subdev/mc.h>
 
-void nvkm_mc_ctor(const struct nvkm_mc_func *, struct nvkm_device *, enum nvkm_subdev_type, int,
-		  struct nvkm_mc *);
 int nvkm_mc_new_(const struct nvkm_mc_func *, struct nvkm_device *, enum nvkm_subdev_type, int,
 		 struct nvkm_mc **);
 
@@ -18,46 +16,44 @@ struct nvkm_mc_map {
 
 struct nvkm_mc_func {
 	void (*init)(struct nvkm_mc *);
-	const struct nvkm_mc_map *intr;
-	/* disable reporting of interrupts to host */
-	void (*intr_unarm)(struct nvkm_mc *);
-	/* enable reporting of interrupts to host */
-	void (*intr_rearm)(struct nvkm_mc *);
-	/* (un)mask delivery of specific interrupts */
-	void (*intr_mask)(struct nvkm_mc *, u32 mask, u32 stat);
-	/* retrieve pending interrupt mask (NV_PMC_INTR) */
-	u32 (*intr_stat)(struct nvkm_mc *);
+
+	const struct nvkm_intr_func *intr;
+	const struct nvkm_intr_data *intrs;
+	bool intr_nonstall;
+
+	const struct nvkm_mc_device_func {
+		bool (*enabled)(struct nvkm_mc *, u32 mask);
+		void (*enable)(struct nvkm_mc *, u32 mask);
+		void (*disable)(struct nvkm_mc *, u32 mask);
+	} *device;
+
 	const struct nvkm_mc_map *reset;
+
 	void (*unk260)(struct nvkm_mc *, u32);
 };
 
 void nv04_mc_init(struct nvkm_mc *);
-void nv04_mc_intr_unarm(struct nvkm_mc *);
-void nv04_mc_intr_rearm(struct nvkm_mc *);
-u32 nv04_mc_intr_stat(struct nvkm_mc *);
+extern const struct nvkm_intr_func nv04_mc_intr;
+bool nv04_mc_intr_pending(struct nvkm_intr *);
+void nv04_mc_intr_unarm(struct nvkm_intr *);
+void nv04_mc_intr_rearm(struct nvkm_intr *);
+extern const struct nvkm_mc_device_func nv04_mc_device;
 extern const struct nvkm_mc_map nv04_mc_reset[];
 
-extern const struct nvkm_mc_map nv17_mc_intr[];
+extern const struct nvkm_intr_data nv17_mc_intrs[];
 extern const struct nvkm_mc_map nv17_mc_reset[];
 
 void nv44_mc_init(struct nvkm_mc *);
 
 void nv50_mc_init(struct nvkm_mc *);
-void gk104_mc_init(struct nvkm_mc *);
 
-void gf100_mc_intr_unarm(struct nvkm_mc *);
-void gf100_mc_intr_rearm(struct nvkm_mc *);
-void gf100_mc_intr_mask(struct nvkm_mc *, u32, u32);
-u32 gf100_mc_intr_stat(struct nvkm_mc *);
+extern const struct nvkm_intr_func gt215_mc_intr;
 void gf100_mc_unk260(struct nvkm_mc *, u32);
-void gp100_mc_intr_unarm(struct nvkm_mc *);
-void gp100_mc_intr_rearm(struct nvkm_mc *);
-void gp100_mc_intr_mask(struct nvkm_mc *, u32, u32);
-int gp100_mc_new_(const struct nvkm_mc_func *, struct nvkm_device *, enum nvkm_subdev_type, int,
-		  struct nvkm_mc **);
 
-extern const struct nvkm_mc_map gk104_mc_intr[];
+void gk104_mc_init(struct nvkm_mc *);
+extern const struct nvkm_intr_data gk104_mc_intrs[];
 extern const struct nvkm_mc_map gk104_mc_reset[];
 
-extern const struct nvkm_mc_map gp100_mc_intr[];
+extern const struct nvkm_intr_func gp100_mc_intr;
+extern const struct nvkm_intr_data gp100_mc_intrs[];
 #endif
diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/mc/tu102.c b/drivers/gpu/drm/nouveau/nvkm/subdev/mc/tu102.c
deleted file mode 100644
index a96084b34a78..000000000000
--- a/drivers/gpu/drm/nouveau/nvkm/subdev/mc/tu102.c
+++ /dev/null
@@ -1,136 +0,0 @@
-/*
- * Copyright 2018 Red Hat Inc.
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * the rights to use, copy, modify, merge, publish, distribute, sublicense,
- * and/or sell copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in
- * all copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
- * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
- * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
- * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
- * OTHER DEALINGS IN THE SOFTWARE.
- */
-#define tu102_mc(p) container_of((p), struct tu102_mc, base)
-#include "priv.h"
-
-struct tu102_mc {
-	struct nvkm_mc base;
-	spinlock_t lock;
-	bool intr;
-	u32 mask;
-};
-
-static void
-tu102_mc_intr_update(struct tu102_mc *mc)
-{
-	struct nvkm_device *device = mc->base.subdev.device;
-	u32 mask = mc->intr ? mc->mask : 0, i;
-
-	for (i = 0; i < 2; i++) {
-		nvkm_wr32(device, 0x000180 + (i * 0x04), ~mask);
-		nvkm_wr32(device, 0x000160 + (i * 0x04),  mask);
-	}
-
-	if (mask & 0x00000200)
-		nvkm_wr32(device, 0xb81608, 0x6);
-	else
-		nvkm_wr32(device, 0xb81610, 0x6);
-}
-
-static void
-tu102_mc_intr_unarm(struct nvkm_mc *base)
-{
-	struct tu102_mc *mc = tu102_mc(base);
-	unsigned long flags;
-
-	spin_lock_irqsave(&mc->lock, flags);
-	mc->intr = false;
-	tu102_mc_intr_update(mc);
-	spin_unlock_irqrestore(&mc->lock, flags);
-}
-
-static void
-tu102_mc_intr_rearm(struct nvkm_mc *base)
-{
-	struct tu102_mc *mc = tu102_mc(base);
-	unsigned long flags;
-
-	spin_lock_irqsave(&mc->lock, flags);
-	mc->intr = true;
-	tu102_mc_intr_update(mc);
-	spin_unlock_irqrestore(&mc->lock, flags);
-}
-
-static void
-tu102_mc_intr_mask(struct nvkm_mc *base, u32 mask, u32 intr)
-{
-	struct tu102_mc *mc = tu102_mc(base);
-	unsigned long flags;
-
-	spin_lock_irqsave(&mc->lock, flags);
-	mc->mask = (mc->mask & ~mask) | intr;
-	tu102_mc_intr_update(mc);
-	spin_unlock_irqrestore(&mc->lock, flags);
-}
-
-static u32
-tu102_mc_intr_stat(struct nvkm_mc *mc)
-{
-	struct nvkm_device *device = mc->subdev.device;
-	u32 intr0 = nvkm_rd32(device, 0x000100);
-	u32 intr1 = nvkm_rd32(device, 0x000104);
-	u32 intr_top = nvkm_rd32(device, 0xb81600);
-
-	/* Turing and above route the MMU fault interrupts via a different
-	 * interrupt tree with different control registers. For the moment remap
-	 * them back to the old PMC vector.
-	 */
-	if (intr_top & 0x00000006)
-		intr0 |= 0x00000200;
-
-	return intr0 | intr1;
-}
-
-
-static const struct nvkm_mc_func
-tu102_mc = {
-	.init = nv50_mc_init,
-	.intr = gp100_mc_intr,
-	.intr_unarm = tu102_mc_intr_unarm,
-	.intr_rearm = tu102_mc_intr_rearm,
-	.intr_mask = tu102_mc_intr_mask,
-	.intr_stat = tu102_mc_intr_stat,
-	.reset = gk104_mc_reset,
-};
-
-static int
-tu102_mc_new_(const struct nvkm_mc_func *func, struct nvkm_device *device,
-	      enum nvkm_subdev_type type, int inst, struct nvkm_mc **pmc)
-{
-	struct tu102_mc *mc;
-
-	if (!(mc = kzalloc(sizeof(*mc), GFP_KERNEL)))
-		return -ENOMEM;
-	nvkm_mc_ctor(func, device, type, inst, &mc->base);
-	*pmc = &mc->base;
-
-	spin_lock_init(&mc->lock);
-	mc->intr = false;
-	mc->mask = 0x7fffffff;
-	return 0;
-}
-
-int
-tu102_mc_new(struct nvkm_device *device, enum nvkm_subdev_type type, int inst, struct nvkm_mc **pmc)
-{
-	return tu102_mc_new_(&tu102_mc, device, type, inst, pmc);
-}
diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/uvmm.c b/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/uvmm.c
index 186b4e63e559..524cd3c0e3fe 100644
--- a/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/uvmm.c
+++ b/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/uvmm.c
@@ -39,7 +39,7 @@ nvkm_uvmm_search(struct nvkm_client *client, u64 handle)
 	if (IS_ERR(object))
 		return (void *)object;
 
-	return nvkm_uvmm(object)->vmm;
+	return nvkm_vmm_ref(nvkm_uvmm(object)->vmm);
 }
 
 static int
diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/pci/base.c b/drivers/gpu/drm/nouveau/nvkm/subdev/pci/base.c
index a7d42ea8ba28..5a0de45d36ce 100644
--- a/drivers/gpu/drm/nouveau/nvkm/subdev/pci/base.c
+++ b/drivers/gpu/drm/nouveau/nvkm/subdev/pci/base.c
@@ -26,7 +26,15 @@
 
 #include <core/option.h>
 #include <core/pci.h>
-#include <subdev/mc.h>
+
+void
+nvkm_pci_msi_rearm(struct nvkm_device *device)
+{
+	struct nvkm_pci *pci = device->pci;
+
+	if (pci && pci->msi)
+		pci->func->msi_rearm(pci);
+}
 
 u32
 nvkm_pci_rd32(struct nvkm_pci *pci, u16 addr)
@@ -65,24 +73,6 @@ nvkm_pci_rom_shadow(struct nvkm_pci *pci, bool shadow)
 	nvkm_pci_wr32(pci, 0x0050, data);
 }
 
-static irqreturn_t
-nvkm_pci_intr(int irq, void *arg)
-{
-	struct nvkm_pci *pci = arg;
-	struct nvkm_device *device = pci->subdev.device;
-	bool handled = false;
-
-	if (pci->irq < 0)
-		return IRQ_HANDLED;
-
-	nvkm_mc_intr_unarm(device);
-	if (pci->msi)
-		pci->func->msi_rearm(pci);
-	nvkm_mc_intr(device, &handled);
-	nvkm_mc_intr_rearm(device);
-	return handled ? IRQ_HANDLED : IRQ_NONE;
-}
-
 static int
 nvkm_pci_fini(struct nvkm_subdev *subdev, bool suspend)
 {
@@ -107,7 +97,6 @@ static int
 nvkm_pci_oneinit(struct nvkm_subdev *subdev)
 {
 	struct nvkm_pci *pci = nvkm_pci(subdev);
-	struct pci_dev *pdev = pci->pdev;
 	int ret;
 
 	if (pci_is_pcie(pci->pdev)) {
@@ -116,11 +105,6 @@ nvkm_pci_oneinit(struct nvkm_subdev *subdev)
 			return ret;
 	}
 
-	ret = request_irq(pdev->irq, nvkm_pci_intr, IRQF_SHARED, "nvkm", pci);
-	if (ret)
-		return ret;
-
-	pci->irq = pdev->irq;
 	return 0;
 }
 
@@ -157,15 +141,6 @@ nvkm_pci_dtor(struct nvkm_subdev *subdev)
 
 	nvkm_agp_dtor(pci);
 
-	if (pci->irq >= 0) {
-		/* freq_irq() will call the handler, we use pci->irq == -1
-		 * to signal that it's been torn down and should be a noop.
-		 */
-		int irq = pci->irq;
-		pci->irq = -1;
-		free_irq(irq, pci);
-	}
-
 	if (pci->msi)
 		pci_disable_msi(pci->pdev);
 
@@ -192,7 +167,6 @@ nvkm_pci_new_(const struct nvkm_pci_func *func, struct nvkm_device *device,
 	nvkm_subdev_ctor(&nvkm_pci_func, device, type, inst, &pci->subdev);
 	pci->func = func;
 	pci->pdev = device->func->pci(device)->pdev;
-	pci->irq = -1;
 	pci->pcie.speed = -1;
 	pci->pcie.width = -1;
 
diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/pmu/base.c b/drivers/gpu/drm/nouveau/nvkm/subdev/pmu/base.c
index 455e95a89259..8f2f50ad4ded 100644
--- a/drivers/gpu/drm/nouveau/nvkm/subdev/pmu/base.c
+++ b/drivers/gpu/drm/nouveau/nvkm/subdev/pmu/base.c
@@ -81,43 +81,12 @@ nvkm_pmu_fini(struct nvkm_subdev *subdev, bool suspend)
 {
 	struct nvkm_pmu *pmu = nvkm_pmu(subdev);
 
+	if (!subdev->use.enabled)
+		return 0;
+
 	if (pmu->func->fini)
 		pmu->func->fini(pmu);
 
-	flush_work(&pmu->recv.work);
-
-	reinit_completion(&pmu->wpr_ready);
-
-	nvkm_falcon_cmdq_fini(pmu->lpq);
-	nvkm_falcon_cmdq_fini(pmu->hpq);
-	pmu->initmsg_received = false;
-	return 0;
-}
-
-static void
-nvkm_pmu_reset(struct nvkm_pmu *pmu)
-{
-	struct nvkm_device *device = pmu->subdev.device;
-
-	if (!pmu->func->enabled(pmu))
-		return;
-
-	/* Reset. */
-	if (pmu->func->reset)
-		pmu->func->reset(pmu);
-
-	/* Wait for IMEM/DMEM scrubbing to be complete. */
-	nvkm_msec(device, 2000,
-		if (!(nvkm_rd32(device, 0x10a10c) & 0x00000006))
-			break;
-	);
-}
-
-static int
-nvkm_pmu_preinit(struct nvkm_subdev *subdev)
-{
-	struct nvkm_pmu *pmu = nvkm_pmu(subdev);
-	nvkm_pmu_reset(pmu);
 	return 0;
 }
 
@@ -125,22 +94,10 @@ static int
 nvkm_pmu_init(struct nvkm_subdev *subdev)
 {
 	struct nvkm_pmu *pmu = nvkm_pmu(subdev);
-	struct nvkm_device *device = pmu->subdev.device;
 
 	if (!pmu->func->init)
 		return 0;
 
-	if (pmu->func->enabled(pmu)) {
-		/* Inhibit interrupts, and wait for idle. */
-		nvkm_wr32(device, 0x10a014, 0x0000ffff);
-		nvkm_msec(device, 2000,
-			if (!nvkm_rd32(device, 0x10a04c))
-				break;
-		);
-
-		nvkm_pmu_reset(pmu);
-	}
-
 	return pmu->func->init(pmu);
 }
 
@@ -160,7 +117,6 @@ nvkm_pmu_dtor(struct nvkm_subdev *subdev)
 static const struct nvkm_subdev_func
 nvkm_pmu = {
 	.dtor = nvkm_pmu_dtor,
-	.preinit = nvkm_pmu_preinit,
 	.init = nvkm_pmu_init,
 	.fini = nvkm_pmu_fini,
 	.intr = nvkm_pmu_intr,
diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/pmu/gk20a.c b/drivers/gpu/drm/nouveau/nvkm/subdev/pmu/gk20a.c
index a67a42e73f08..b5e52b35f5d0 100644
--- a/drivers/gpu/drm/nouveau/nvkm/subdev/pmu/gk20a.c
+++ b/drivers/gpu/drm/nouveau/nvkm/subdev/pmu/gk20a.c
@@ -197,7 +197,6 @@ gk20a_dvfs_data= {
 static const struct nvkm_pmu_func
 gk20a_pmu = {
 	.flcn = &gt215_pmu_flcn,
-	.enabled = gf100_pmu_enabled,
 	.init = gk20a_pmu_init,
 	.fini = gk20a_pmu_fini,
 	.reset = gf100_pmu_reset,
diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/pmu/gm200.c b/drivers/gpu/drm/nouveau/nvkm/subdev/pmu/gm200.c
index 40439e329aa9..7359991f94c2 100644
--- a/drivers/gpu/drm/nouveau/nvkm/subdev/pmu/gm200.c
+++ b/drivers/gpu/drm/nouveau/nvkm/subdev/pmu/gm200.c
@@ -24,30 +24,36 @@
 #include "priv.h"
 
 static int
-gm200_pmu_flcn_reset(struct nvkm_falcon *falcon)
+gm200_pmu_flcn_bind_stat(struct nvkm_falcon *falcon, bool intr)
 {
-	struct nvkm_pmu *pmu = container_of(falcon, typeof(*pmu), falcon);
+	nvkm_falcon_wr32(falcon, 0x200, 0x0000030e);
+	return (nvkm_falcon_rd32(falcon, 0x20c) & 0x00007000) >> 12;
+}
 
-	nvkm_falcon_wr32(falcon, 0x014, 0x0000ffff);
-	pmu->func->reset(pmu);
-	return nvkm_falcon_enable(falcon);
+void
+gm200_pmu_flcn_bind_inst(struct nvkm_falcon *falcon, int target, u64 addr)
+{
+	nvkm_falcon_wr32(falcon, 0xe00, 4); /* DMAIDX_UCODE */
+	nvkm_falcon_wr32(falcon, 0xe04, 0); /* DMAIDX_VIRT */
+	nvkm_falcon_wr32(falcon, 0xe08, 4); /* DMAIDX_PHYS_VID */
+	nvkm_falcon_wr32(falcon, 0xe0c, 5); /* DMAIDX_PHYS_SYS_COH */
+	nvkm_falcon_wr32(falcon, 0xe10, 6); /* DMAIDX_PHYS_SYS_NCOH */
+	nvkm_falcon_mask(falcon, 0x090, 0x00010000, 0x00010000);
+	nvkm_falcon_wr32(falcon, 0x480, (1 << 30) | (target << 28) | (addr >> 12));
 }
 
 const struct nvkm_falcon_func
 gm200_pmu_flcn = {
+	.disable = gm200_flcn_disable,
+	.enable = gm200_flcn_enable,
+	.reset_pmc = true,
+	.reset_wait_mem_scrubbing = gm200_flcn_reset_wait_mem_scrubbing,
 	.debug = 0xc08,
-	.fbif = 0xe00,
-	.load_imem = nvkm_falcon_v1_load_imem,
-	.load_dmem = nvkm_falcon_v1_load_dmem,
-	.read_dmem = nvkm_falcon_v1_read_dmem,
-	.bind_context = nvkm_falcon_v1_bind_context,
-	.wait_for_halt = nvkm_falcon_v1_wait_for_halt,
-	.clear_interrupt = nvkm_falcon_v1_clear_interrupt,
-	.set_start_addr = nvkm_falcon_v1_set_start_addr,
+	.bind_inst = gm200_pmu_flcn_bind_inst,
+	.bind_stat = gm200_pmu_flcn_bind_stat,
+	.imem_pio = &gm200_flcn_imem_pio,
+	.dmem_pio = &gm200_flcn_dmem_pio,
 	.start = nvkm_falcon_v1_start,
-	.enable = nvkm_falcon_v1_enable,
-	.disable = nvkm_falcon_v1_disable,
-	.reset = gm200_pmu_flcn_reset,
 	.cmdq = { 0x4a0, 0x4b0, 4 },
 	.msgq = { 0x4c8, 0x4cc, 0 },
 };
@@ -55,11 +61,9 @@ gm200_pmu_flcn = {
 static const struct nvkm_pmu_func
 gm200_pmu = {
 	.flcn = &gm200_pmu_flcn,
-	.enabled = gf100_pmu_enabled,
 	.reset = gf100_pmu_reset,
 };
 
-
 int
 gm200_pmu_nofw(struct nvkm_pmu *pmu, int ver, const struct nvkm_pmu_fwif *fwif)
 {
diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/pmu/gm20b.c b/drivers/gpu/drm/nouveau/nvkm/subdev/pmu/gm20b.c
index 612310d5d481..a72403777329 100644
--- a/drivers/gpu/drm/nouveau/nvkm/subdev/pmu/gm20b.c
+++ b/drivers/gpu/drm/nouveau/nvkm/subdev/pmu/gm20b.c
@@ -62,16 +62,6 @@ gm20b_pmu_acr_bootstrap_falcon(struct nvkm_falcon *falcon,
 	return ret;
 }
 
-int
-gm20b_pmu_acr_boot(struct nvkm_falcon *falcon)
-{
-	struct nv_pmu_args args = { .secure_mode = true };
-	const u32 addr_args = falcon->data.limit - sizeof(struct nv_pmu_args);
-	nvkm_falcon_load_dmem(falcon, &args, addr_args, sizeof(args), 0);
-	nvkm_falcon_start(falcon);
-	return 0;
-}
-
 void
 gm20b_pmu_acr_bld_patch(struct nvkm_acr *acr, u32 bld, s64 adjust)
 {
@@ -125,7 +115,6 @@ gm20b_pmu_acr = {
 	.bld_size = sizeof(struct loader_config),
 	.bld_write = gm20b_pmu_acr_bld_write,
 	.bld_patch = gm20b_pmu_acr_bld_patch,
-	.boot = gm20b_pmu_acr_boot,
 	.bootstrap_falcons = BIT_ULL(NVKM_ACR_LSF_PMU) |
 			     BIT_ULL(NVKM_ACR_LSF_FECS) |
 			     BIT_ULL(NVKM_ACR_LSF_GPCCS),
@@ -166,7 +155,7 @@ gm20b_pmu_acr_init_wpr(struct nvkm_pmu *pmu)
 				     gm20b_pmu_acr_init_wpr_callback, pmu, 0);
 }
 
-int
+static int
 gm20b_pmu_initmsg(struct nvkm_pmu *pmu)
 {
 	struct nv_pmu_init_msg msg;
@@ -192,14 +181,13 @@ gm20b_pmu_initmsg(struct nvkm_pmu *pmu)
 	return gm20b_pmu_acr_init_wpr(pmu);
 }
 
-void
+static void
 gm20b_pmu_recv(struct nvkm_pmu *pmu)
 {
 	if (!pmu->initmsg_received) {
 		int ret = pmu->func->initmsg(pmu);
 		if (ret) {
-			nvkm_error(&pmu->subdev,
-				   "error parsing init message: %d\n", ret);
+			nvkm_error(&pmu->subdev, "error parsing init message: %d\n", ret);
 			return;
 		}
 
@@ -209,10 +197,44 @@ gm20b_pmu_recv(struct nvkm_pmu *pmu)
 	nvkm_falcon_msgq_recv(pmu->msgq);
 }
 
-static const struct nvkm_pmu_func
+static void
+gm20b_pmu_fini(struct nvkm_pmu *pmu)
+{
+	/*TODO: shutdown RTOS. */
+
+	flush_work(&pmu->recv.work);
+	nvkm_falcon_cmdq_fini(pmu->lpq);
+	nvkm_falcon_cmdq_fini(pmu->hpq);
+
+	reinit_completion(&pmu->wpr_ready);
+
+	nvkm_falcon_put(&pmu->falcon, &pmu->subdev);
+}
+
+static int
+gm20b_pmu_init(struct nvkm_pmu *pmu)
+{
+	struct nvkm_falcon *falcon = &pmu->falcon;
+	struct nv_pmu_args args = { .secure_mode = true };
+	u32 addr_args = falcon->data.limit - sizeof(args);
+	int ret;
+
+	ret = nvkm_falcon_get(&pmu->falcon, &pmu->subdev);
+	if (ret)
+		return ret;
+
+	pmu->initmsg_received = false;
+
+	nvkm_falcon_load_dmem(falcon, &args, addr_args, sizeof(args), 0);
+	nvkm_falcon_start(falcon);
+	return 0;
+}
+
+const struct nvkm_pmu_func
 gm20b_pmu = {
 	.flcn = &gm200_pmu_flcn,
-	.enabled = gf100_pmu_enabled,
+	.init = gm20b_pmu_init,
+	.fini = gm20b_pmu_fini,
 	.intr = gt215_pmu_intr,
 	.recv = gm20b_pmu_recv,
 	.initmsg = gm20b_pmu_initmsg,
diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/pmu/gp102.c b/drivers/gpu/drm/nouveau/nvkm/subdev/pmu/gp102.c
index 1a6f9c3af5ec..cd3148360996 100644
--- a/drivers/gpu/drm/nouveau/nvkm/subdev/pmu/gp102.c
+++ b/drivers/gpu/drm/nouveau/nvkm/subdev/pmu/gp102.c
@@ -23,25 +23,25 @@
  */
 #include "priv.h"
 
-void
-gp102_pmu_reset(struct nvkm_pmu *pmu)
-{
-	struct nvkm_device *device = pmu->subdev.device;
-	nvkm_mask(device, 0x10a3c0, 0x00000001, 0x00000001);
-	nvkm_mask(device, 0x10a3c0, 0x00000001, 0x00000000);
-}
-
-static bool
-gp102_pmu_enabled(struct nvkm_pmu *pmu)
-{
-	return !(nvkm_rd32(pmu->subdev.device, 0x10a3c0) & 0x00000001);
-}
+static const struct nvkm_falcon_func
+gp102_pmu_flcn = {
+	.disable = gm200_flcn_disable,
+	.enable = gm200_flcn_enable,
+	.reset_eng = gp102_flcn_reset_eng,
+	.reset_wait_mem_scrubbing = gm200_flcn_reset_wait_mem_scrubbing,
+	.debug = 0xc08,
+	.bind_inst = gm200_pmu_flcn_bind_inst,
+	.bind_stat = gm200_flcn_bind_stat,
+	.imem_pio = &gm200_flcn_imem_pio,
+	.dmem_pio = &gm200_flcn_dmem_pio,
+	.start = nvkm_falcon_v1_start,
+	.cmdq = { 0x4a0, 0x4b0, 4 },
+	.msgq = { 0x4c8, 0x4cc, 0 },
+};
 
 static const struct nvkm_pmu_func
 gp102_pmu = {
-	.flcn = &gm200_pmu_flcn,
-	.enabled = gp102_pmu_enabled,
-	.reset = gp102_pmu_reset,
+	.flcn = &gp102_pmu_flcn,
 };
 
 static const struct nvkm_pmu_fwif
diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/pmu/gp10b.c b/drivers/gpu/drm/nouveau/nvkm/subdev/pmu/gp10b.c
index 94cfb1791af6..a6f410ba60bc 100644
--- a/drivers/gpu/drm/nouveau/nvkm/subdev/pmu/gp10b.c
+++ b/drivers/gpu/drm/nouveau/nvkm/subdev/pmu/gp10b.c
@@ -68,7 +68,6 @@ gp10b_pmu_acr = {
 	.bld_size = sizeof(struct loader_config),
 	.bld_write = gm20b_pmu_acr_bld_write,
 	.bld_patch = gm20b_pmu_acr_bld_patch,
-	.boot = gm20b_pmu_acr_boot,
 	.bootstrap_falcons = BIT_ULL(NVKM_ACR_LSF_PMU) |
 			     BIT_ULL(NVKM_ACR_LSF_FECS) |
 			     BIT_ULL(NVKM_ACR_LSF_GPCCS),
@@ -76,16 +75,6 @@ gp10b_pmu_acr = {
 	.bootstrap_multiple_falcons = gp10b_pmu_acr_bootstrap_multiple_falcons,
 };
 
-static const struct nvkm_pmu_func
-gp10b_pmu = {
-	.flcn = &gm200_pmu_flcn,
-	.enabled = gf100_pmu_enabled,
-	.intr = gt215_pmu_intr,
-	.recv = gm20b_pmu_recv,
-	.initmsg = gm20b_pmu_initmsg,
-	.reset = gp102_pmu_reset,
-};
-
 #if IS_ENABLED(CONFIG_ARCH_TEGRA_210_SOC)
 MODULE_FIRMWARE("nvidia/gp10b/pmu/desc.bin");
 MODULE_FIRMWARE("nvidia/gp10b/pmu/image.bin");
@@ -94,8 +83,8 @@ MODULE_FIRMWARE("nvidia/gp10b/pmu/sig.bin");
 
 static const struct nvkm_pmu_fwif
 gp10b_pmu_fwif[] = {
-	{  0, gm20b_pmu_load, &gp10b_pmu, &gp10b_pmu_acr },
-	{ -1, gm200_pmu_nofw, &gp10b_pmu },
+	{  0, gm20b_pmu_load, &gm20b_pmu, &gp10b_pmu_acr },
+	{ -1, gm200_pmu_nofw, &gm20b_pmu },
 	{}
 };
 
diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/pmu/gt215.c b/drivers/gpu/drm/nouveau/nvkm/subdev/pmu/gt215.c
index b0407b86bc10..32cee21ed858 100644
--- a/drivers/gpu/drm/nouveau/nvkm/subdev/pmu/gt215.c
+++ b/drivers/gpu/drm/nouveau/nvkm/subdev/pmu/gt215.c
@@ -178,12 +178,14 @@ void
 gt215_pmu_fini(struct nvkm_pmu *pmu)
 {
 	nvkm_wr32(pmu->subdev.device, 0x10a014, 0x00000060);
+	flush_work(&pmu->recv.work);
 }
 
 static void
 gt215_pmu_reset(struct nvkm_pmu *pmu)
 {
 	struct nvkm_device *device = pmu->subdev.device;
+
 	nvkm_mask(device, 0x022210, 0x00000001, 0x00000000);
 	nvkm_mask(device, 0x022210, 0x00000001, 0x00000001);
 	nvkm_rd32(device, 0x022210);
@@ -201,6 +203,23 @@ gt215_pmu_init(struct nvkm_pmu *pmu)
 	struct nvkm_device *device = pmu->subdev.device;
 	int i;
 
+	/* Inhibit interrupts, and wait for idle. */
+	if (pmu->func->enabled(pmu)) {
+		nvkm_wr32(device, 0x10a014, 0x0000ffff);
+		nvkm_msec(device, 2000,
+			if (!nvkm_rd32(device, 0x10a04c))
+				break;
+		);
+	}
+
+	pmu->func->reset(pmu);
+
+	/* Wait for IMEM/DMEM scrubbing to be complete. */
+	nvkm_msec(device, 2000,
+		if (!(nvkm_rd32(device, 0x10a10c) & 0x00000006))
+			break;
+	);
+
 	/* upload data segment */
 	nvkm_wr32(device, 0x10a1c0, 0x01000000);
 	for (i = 0; i < pmu->func->data.size / 4; i++)
@@ -243,20 +262,6 @@ gt215_pmu_init(struct nvkm_pmu *pmu)
 
 const struct nvkm_falcon_func
 gt215_pmu_flcn = {
-	.debug = 0xc08,
-	.fbif = 0xe00,
-	.load_imem = nvkm_falcon_v1_load_imem,
-	.load_dmem = nvkm_falcon_v1_load_dmem,
-	.read_dmem = nvkm_falcon_v1_read_dmem,
-	.bind_context = nvkm_falcon_v1_bind_context,
-	.wait_for_halt = nvkm_falcon_v1_wait_for_halt,
-	.clear_interrupt = nvkm_falcon_v1_clear_interrupt,
-	.set_start_addr = nvkm_falcon_v1_set_start_addr,
-	.start = nvkm_falcon_v1_start,
-	.enable = nvkm_falcon_v1_enable,
-	.disable = nvkm_falcon_v1_disable,
-	.cmdq = { 0x4a0, 0x4b0, 4 },
-	.msgq = { 0x4c8, 0x4cc, 0 },
 };
 
 static const struct nvkm_pmu_func
diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/pmu/priv.h b/drivers/gpu/drm/nouveau/nvkm/subdev/pmu/priv.h
index 21abf31f4442..2d0a8fa6f196 100644
--- a/drivers/gpu/drm/nouveau/nvkm/subdev/pmu/priv.h
+++ b/drivers/gpu/drm/nouveau/nvkm/subdev/pmu/priv.h
@@ -46,13 +46,12 @@ void gp102_pmu_reset(struct nvkm_pmu *pmu);
 void gk110_pmu_pgob(struct nvkm_pmu *, bool);
 
 extern const struct nvkm_falcon_func gm200_pmu_flcn;
+void gm200_pmu_flcn_bind_inst(struct nvkm_falcon *, int, u64);
 
+extern const struct nvkm_pmu_func gm20b_pmu;
 void gm20b_pmu_acr_bld_patch(struct nvkm_acr *, u32, s64);
 void gm20b_pmu_acr_bld_write(struct nvkm_acr *, u32, struct nvkm_acr_lsfw *);
-int gm20b_pmu_acr_boot(struct nvkm_falcon *);
 int gm20b_pmu_acr_bootstrap_falcon(struct nvkm_falcon *, enum nvkm_acr_lsf_id);
-void gm20b_pmu_recv(struct nvkm_pmu *);
-int gm20b_pmu_initmsg(struct nvkm_pmu *);
 
 struct nvkm_pmu_fwif {
 	int version;
diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/top/base.c b/drivers/gpu/drm/nouveau/nvkm/subdev/top/base.c
index 28d0789f50fe..eb348dfc1d7a 100644
--- a/drivers/gpu/drm/nouveau/nvkm/subdev/top/base.c
+++ b/drivers/gpu/drm/nouveau/nvkm/subdev/top/base.c
@@ -117,11 +117,15 @@ nvkm_top_fault(struct nvkm_device *device, int fault)
 	return NULL;
 }
 
-static int
-nvkm_top_oneinit(struct nvkm_subdev *subdev)
+int
+nvkm_top_parse(struct nvkm_device *device)
 {
-	struct nvkm_top *top = nvkm_top(subdev);
-	return top->func->oneinit(top);
+	struct nvkm_top *top = device->top;
+
+	if (!top || !list_empty(&top->device))
+		return 0;
+
+	return top->func->parse(top);
 }
 
 static void *
@@ -141,7 +145,6 @@ nvkm_top_dtor(struct nvkm_subdev *subdev)
 static const struct nvkm_subdev_func
 nvkm_top = {
 	.dtor = nvkm_top_dtor,
-	.oneinit = nvkm_top_oneinit,
 };
 
 int
diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/top/ga100.c b/drivers/gpu/drm/nouveau/nvkm/subdev/top/ga100.c
index c982d834c8d9..84790cf52b90 100644
--- a/drivers/gpu/drm/nouveau/nvkm/subdev/top/ga100.c
+++ b/drivers/gpu/drm/nouveau/nvkm/subdev/top/ga100.c
@@ -22,7 +22,7 @@
 #include "priv.h"
 
 static int
-ga100_top_oneinit(struct nvkm_top *top)
+ga100_top_parse(struct nvkm_top *top)
 {
 	struct nvkm_subdev *subdev = &top->subdev;
 	struct nvkm_device *device = subdev->device;
@@ -97,7 +97,7 @@ ga100_top_oneinit(struct nvkm_top *top)
 
 static const struct nvkm_top_func
 ga100_top = {
-	.oneinit = ga100_top_oneinit,
+	.parse = ga100_top_parse,
 };
 
 int
diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/top/gk104.c b/drivers/gpu/drm/nouveau/nvkm/subdev/top/gk104.c
index 4dcad97bd505..2bbba8244cbf 100644
--- a/drivers/gpu/drm/nouveau/nvkm/subdev/top/gk104.c
+++ b/drivers/gpu/drm/nouveau/nvkm/subdev/top/gk104.c
@@ -24,7 +24,7 @@
 #include "priv.h"
 
 static int
-gk104_top_oneinit(struct nvkm_top *top)
+gk104_top_parse(struct nvkm_top *top)
 {
 	struct nvkm_subdev *subdev = &top->subdev;
 	struct nvkm_device *device = subdev->device;
@@ -108,7 +108,7 @@ gk104_top_oneinit(struct nvkm_top *top)
 
 static const struct nvkm_top_func
 gk104_top = {
-	.oneinit = gk104_top_oneinit,
+	.parse = gk104_top_parse,
 };
 
 int
diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/top/priv.h b/drivers/gpu/drm/nouveau/nvkm/subdev/top/priv.h
index 8e103a836705..532be91d8fd9 100644
--- a/drivers/gpu/drm/nouveau/nvkm/subdev/top/priv.h
+++ b/drivers/gpu/drm/nouveau/nvkm/subdev/top/priv.h
@@ -5,7 +5,7 @@
 #include <subdev/top.h>
 
 struct nvkm_top_func {
-	int (*oneinit)(struct nvkm_top *);
+	int (*parse)(struct nvkm_top *);
 };
 
 int nvkm_top_new_(const struct nvkm_top_func *, struct nvkm_device *, enum nvkm_subdev_type, int,
diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/vfn/Kbuild b/drivers/gpu/drm/nouveau/nvkm/subdev/vfn/Kbuild
new file mode 100644
index 000000000000..23cd21b40a25
--- /dev/null
+++ b/drivers/gpu/drm/nouveau/nvkm/subdev/vfn/Kbuild
@@ -0,0 +1,6 @@
+# SPDX-License-Identifier: MIT
+nvkm-y += nvkm/subdev/vfn/base.o
+nvkm-y += nvkm/subdev/vfn/uvfn.o
+nvkm-y += nvkm/subdev/vfn/gv100.o
+nvkm-y += nvkm/subdev/vfn/tu102.o
+nvkm-y += nvkm/subdev/vfn/ga100.o
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/disp/rootnv04.c b/drivers/gpu/drm/nouveau/nvkm/subdev/vfn/base.c
index 9acaec5c271e..62e81d551f44 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/disp/rootnv04.c
+++ b/drivers/gpu/drm/nouveau/nvkm/subdev/vfn/base.c
@@ -1,5 +1,5 @@
 /*
- * Copyright 2012 Red Hat Inc.
+ * Copyright 2021 Red Hat Inc.
  *
  * Permission is hereby granted, free of charge, to any person obtaining a
  * copy of this software and associated documentation files (the "Software"),
@@ -18,45 +18,43 @@
  * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
  * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
  * OTHER DEALINGS IN THE SOFTWARE.
- *
- * Authors: Ben Skeggs
  */
 #include "priv.h"
-#include "head.h"
 
-#include <core/client.h>
+static void *
+nvkm_vfn_dtor(struct nvkm_subdev *subdev)
+{
+	return nvkm_vfn(subdev);
+}
 
-#include <nvif/cl0046.h>
-#include <nvif/unpack.h>
+static const struct nvkm_subdev_func
+nvkm_vfn = {
+	.dtor = nvkm_vfn_dtor,
+};
 
 int
-nv04_disp_mthd(struct nvkm_object *object, u32 mthd, void *data, u32 size)
+nvkm_vfn_new_(const struct nvkm_vfn_func *func, struct nvkm_device *device,
+	      enum nvkm_subdev_type type, int inst, u32 addr, struct nvkm_vfn **pvfn)
 {
-	struct nvkm_disp *disp = nvkm_disp(object->engine);
-	union {
-		struct nv04_disp_mthd_v0 v0;
-	} *args = data;
-	struct nvkm_head *head;
-	int id, ret = -ENOSYS;
+	struct nvkm_vfn *vfn;
+	int ret;
 
-	nvif_ioctl(object, "disp mthd size %d\n", size);
-	if (!(ret = nvif_unpack(ret, &data, &size, args->v0, 0, 0, true))) {
-		nvif_ioctl(object, "disp mthd vers %d mthd %02x head %d\n",
-			   args->v0.version, args->v0.method, args->v0.head);
-		mthd = args->v0.method;
-		id   = args->v0.head;
-	} else
-		return ret;
+	if (!(vfn = *pvfn = kzalloc(sizeof(*vfn), GFP_KERNEL)))
+		return -ENOMEM;
 
-	if (!(head = nvkm_head_find(disp, id)))
-		return -ENXIO;
+	nvkm_subdev_ctor(&nvkm_vfn, device, type, inst, &vfn->subdev);
+	vfn->func = func;
+	vfn->addr.priv = addr;
+	vfn->addr.user = vfn->addr.priv + func->user.addr;
 
-	switch (mthd) {
-	case NV04_DISP_SCANOUTPOS:
-		return nvkm_head_mthd_scanoutpos(object, head, data, size);
-	default:
-		break;
+	if (vfn->func->intr) {
+		ret = nvkm_intr_add(vfn->func->intr, vfn->func->intrs,
+				    &vfn->subdev, 8, &vfn->intr);
+		if (ret)
+			return ret;
 	}
 
-	return -EINVAL;
+	vfn->user.ctor = nvkm_uvfn_new;
+	vfn->user.base = func->user.base;
+	return 0;
 }
diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/vfn/ga100.c b/drivers/gpu/drm/nouveau/nvkm/subdev/vfn/ga100.c
new file mode 100644
index 000000000000..fd5c6931322d
--- /dev/null
+++ b/drivers/gpu/drm/nouveau/nvkm/subdev/vfn/ga100.c
@@ -0,0 +1,47 @@
+/*
+ * Copyright 2021 Red Hat Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+#include "priv.h"
+
+#include <nvif/class.h>
+
+static const struct nvkm_intr_data
+ga100_vfn_intrs[] = {
+	{ NVKM_ENGINE_DISP    , 0, 4, 0x04000000, true },
+	{ NVKM_SUBDEV_GPIO    , 0, 4, 0x00200000, true },
+	{ NVKM_SUBDEV_I2C     , 0, 4, 0x00200000, true },
+	{ NVKM_SUBDEV_PRIVRING, 0, 4, 0x40000000, true },
+	{}
+};
+
+static const struct nvkm_vfn_func
+ga100_vfn = {
+	.intr = &tu102_vfn_intr,
+	.intrs = ga100_vfn_intrs,
+	.user = { 0x030000, 0x010000, { -1, -1, AMPERE_USERMODE_A } },
+};
+
+int
+ga100_vfn_new(struct nvkm_device *device,
+	      enum nvkm_subdev_type type, int inst, struct nvkm_vfn **pvfn)
+{
+	return nvkm_vfn_new_(&ga100_vfn, device, type, inst, 0xb80000, pvfn);
+}
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/fifo/usertu102.c b/drivers/gpu/drm/nouveau/nvkm/subdev/vfn/gv100.c
index 217268f8ccad..ddd39d714c4a 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/fifo/usertu102.c
+++ b/drivers/gpu/drm/nouveau/nvkm/subdev/vfn/gv100.c
@@ -1,5 +1,5 @@
 /*
- * Copyright 2018 Red Hat Inc.
+ * Copyright 2021 Red Hat Inc.
  *
  * Permission is hereby granted, free of charge, to any person obtaining a
  * copy of this software and associated documentation files (the "Software"),
@@ -19,27 +19,18 @@
  * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
  * OTHER DEALINGS IN THE SOFTWARE.
  */
-#include "user.h"
+#include "priv.h"
 
-static int
-tu102_fifo_user_map(struct nvkm_object *object, void *argv, u32 argc,
-		    enum nvkm_object_map *type, u64 *addr, u64 *size)
-{
-	struct nvkm_device *device = object->engine->subdev.device;
-	*addr = 0xbb0000 + device->func->resource_addr(device, 0);
-	*size = 0x010000;
-	*type = NVKM_OBJECT_MAP_IO;
-	return 0;
-}
+#include <nvif/class.h>
 
-static const struct nvkm_object_func
-tu102_fifo_user = {
-	.map = tu102_fifo_user_map,
+static const struct nvkm_vfn_func
+gv100_vfn = {
+	.user = { 0x810000, 0x010000, { -1, -1, VOLTA_USERMODE_A } },
 };
 
 int
-tu102_fifo_user_new(const struct nvkm_oclass *oclass, void *argv, u32 argc,
-		    struct nvkm_object **pobject)
+gv100_vfn_new(struct nvkm_device *device,
+	      enum nvkm_subdev_type type, int inst, struct nvkm_vfn **pvfn)
 {
-	return nvkm_object_new_(&tu102_fifo_user, oclass, argv, argc, pobject);
+	return nvkm_vfn_new_(&gv100_vfn, device, type, inst, 0, pvfn);
 }
diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/vfn/priv.h b/drivers/gpu/drm/nouveau/nvkm/subdev/vfn/priv.h
new file mode 100644
index 000000000000..96d53c02041b
--- /dev/null
+++ b/drivers/gpu/drm/nouveau/nvkm/subdev/vfn/priv.h
@@ -0,0 +1,25 @@
+/* SPDX-License-Identifier: MIT */
+#ifndef __NVKM_VFN_PRIV_H__
+#define __NVKM_VFN_PRIV_H__
+#define nvkm_vfn(p) container_of((p), struct nvkm_vfn, subdev)
+#include <subdev/vfn.h>
+
+struct nvkm_vfn_func {
+	const struct nvkm_intr_func *intr;
+	const struct nvkm_intr_data *intrs;
+
+	struct {
+		u32 addr;
+		u32 size;
+		const struct nvkm_sclass base;
+	} user;
+};
+
+int nvkm_vfn_new_(const struct nvkm_vfn_func *, struct nvkm_device *, enum nvkm_subdev_type, int,
+		  u32 addr, struct nvkm_vfn **);
+
+extern const struct nvkm_intr_func tu102_vfn_intr;
+
+int nvkm_uvfn_new(struct nvkm_device *, const struct nvkm_oclass *, void *, u32,
+		  struct nvkm_object **);
+#endif
diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/vfn/tu102.c b/drivers/gpu/drm/nouveau/nvkm/subdev/vfn/tu102.c
new file mode 100644
index 000000000000..3d063fb5e136
--- /dev/null
+++ b/drivers/gpu/drm/nouveau/nvkm/subdev/vfn/tu102.c
@@ -0,0 +1,108 @@
+/*
+ * Copyright 2021 Red Hat Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+#include "priv.h"
+
+#include <nvif/class.h>
+
+static void
+tu102_vfn_intr_reset(struct nvkm_intr *intr, int leaf, u32 mask)
+{
+	struct nvkm_vfn *vfn = container_of(intr, typeof(*vfn), intr);
+
+	nvkm_wr32(vfn->subdev.device, vfn->addr.priv + 0x1000 + (leaf * 4), mask);
+}
+
+static void
+tu102_vfn_intr_allow(struct nvkm_intr *intr, int leaf, u32 mask)
+{
+	struct nvkm_vfn *vfn = container_of(intr, typeof(*vfn), intr);
+
+	nvkm_wr32(vfn->subdev.device, vfn->addr.priv + 0x1200 + (leaf * 4), mask);
+}
+
+static void
+tu102_vfn_intr_block(struct nvkm_intr *intr, int leaf, u32 mask)
+{
+	struct nvkm_vfn *vfn = container_of(intr, typeof(*vfn), intr);
+
+	nvkm_wr32(vfn->subdev.device, vfn->addr.priv + 0x1400 + (leaf * 4), mask);
+}
+
+static void
+tu102_vfn_intr_rearm(struct nvkm_intr *intr)
+{
+	struct nvkm_vfn *vfn = container_of(intr, typeof(*vfn), intr);
+
+	nvkm_wr32(vfn->subdev.device, vfn->addr.priv + 0x1608, 0x0000000f);
+}
+
+static void
+tu102_vfn_intr_unarm(struct nvkm_intr *intr)
+{
+	struct nvkm_vfn *vfn = container_of(intr, typeof(*vfn), intr);
+
+	nvkm_wr32(vfn->subdev.device, vfn->addr.priv + 0x1610, 0x0000000f);
+}
+
+static bool
+tu102_vfn_intr_pending(struct nvkm_intr *intr)
+{
+	struct nvkm_vfn *vfn = container_of(intr, typeof(*vfn), intr);
+	struct nvkm_device *device = vfn->subdev.device;
+	u32 intr_top = nvkm_rd32(device, vfn->addr.priv + 0x1600);
+	int pending = 0, leaf;
+
+	for (leaf = 0; leaf < 8; leaf++) {
+		if (intr_top & BIT(leaf / 2)) {
+			intr->stat[leaf] = nvkm_rd32(device, vfn->addr.priv + 0x1000 + (leaf * 4));
+			if (intr->stat[leaf])
+				pending++;
+		} else {
+			intr->stat[leaf] = 0;
+		}
+	}
+
+	return pending != 0;
+}
+
+const struct nvkm_intr_func
+tu102_vfn_intr = {
+	.pending = tu102_vfn_intr_pending,
+	.unarm = tu102_vfn_intr_unarm,
+	.rearm = tu102_vfn_intr_rearm,
+	.block = tu102_vfn_intr_block,
+	.allow = tu102_vfn_intr_allow,
+	.reset = tu102_vfn_intr_reset,
+};
+
+static const struct nvkm_vfn_func
+tu102_vfn = {
+	.intr = &tu102_vfn_intr,
+	.user = { 0x030000, 0x010000, { -1, -1, TURING_USERMODE_A } },
+};
+
+int
+tu102_vfn_new(struct nvkm_device *device,
+	      enum nvkm_subdev_type type, int inst, struct nvkm_vfn **pvfn)
+{
+	return nvkm_vfn_new_(&tu102_vfn, device, type, inst, 0xb80000, pvfn);
+}
diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/vfn/uvfn.c b/drivers/gpu/drm/nouveau/nvkm/subdev/vfn/uvfn.c
new file mode 100644
index 000000000000..c5460a14c541
--- /dev/null
+++ b/drivers/gpu/drm/nouveau/nvkm/subdev/vfn/uvfn.c
@@ -0,0 +1,67 @@
+/*
+ * Copyright 2021 Red Hat Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+#define nvkm_uvfn(p) container_of((p), struct nvkm_uvfn, object)
+#include "priv.h"
+
+#include <core/object.h>
+
+struct nvkm_uvfn {
+	struct nvkm_object object;
+	struct nvkm_vfn *vfn;
+};
+
+static int
+nvkm_uvfn_map(struct nvkm_object *object, void *argv, u32 argc,
+	      enum nvkm_object_map *type, u64 *addr, u64 *size)
+{
+	struct nvkm_vfn *vfn = nvkm_uvfn(object)->vfn;
+	struct nvkm_device *device = vfn->subdev.device;
+
+	*addr = device->func->resource_addr(device, 0) + vfn->addr.user;
+	*size = vfn->func->user.size;
+	*type = NVKM_OBJECT_MAP_IO;
+	return 0;
+}
+
+static const struct nvkm_object_func
+nvkm_uvfn = {
+	.map = nvkm_uvfn_map,
+};
+
+int
+nvkm_uvfn_new(struct nvkm_device *device, const struct nvkm_oclass *oclass,
+	      void *argv, u32 argc, struct nvkm_object **pobject)
+{
+	struct nvkm_uvfn *uvfn;
+
+	if (argc != 0)
+		return -ENOSYS;
+
+	if (!(uvfn = kzalloc(sizeof(*uvfn), GFP_KERNEL)))
+		return -ENOMEM;
+
+	nvkm_object_ctor(&nvkm_uvfn, oclass, &uvfn->object);
+	uvfn->vfn = device->vfn;
+
+	*pobject = &uvfn->object;
+	return 0;
+}
diff --git a/drivers/gpu/drm/omapdrm/omap_fbdev.c b/drivers/gpu/drm/omapdrm/omap_fbdev.c
index ed67dd25794c..98d8758048fc 100644
--- a/drivers/gpu/drm/omapdrm/omap_fbdev.c
+++ b/drivers/gpu/drm/omapdrm/omap_fbdev.c
@@ -38,7 +38,7 @@ static struct drm_fb_helper *get_fb(struct fb_info *fbi);
 static void pan_worker(struct work_struct *work)
 {
 	struct omap_fbdev *fbdev = container_of(work, struct omap_fbdev, work);
-	struct fb_info *fbi = fbdev->base.fbdev;
+	struct fb_info *fbi = fbdev->base.info;
 	int npages;
 
 	/* DMM roll shifts in 4K pages: */
@@ -161,7 +161,7 @@ static int omap_fbdev_create(struct drm_fb_helper *helper,
 		goto fail;
 	}
 
-	fbi = drm_fb_helper_alloc_fbi(helper);
+	fbi = drm_fb_helper_alloc_info(helper);
 	if (IS_ERR(fbi)) {
 		dev_err(dev->dev, "failed to allocate fb info\n");
 		ret = PTR_ERR(fbi);
@@ -272,7 +272,7 @@ void omap_fbdev_fini(struct drm_device *dev)
 	if (!helper)
 		return;
 
-	drm_fb_helper_unregister_fbi(helper);
+	drm_fb_helper_unregister_info(helper);
 
 	drm_fb_helper_fini(helper);
 
diff --git a/drivers/gpu/drm/omapdrm/omap_gem_dmabuf.c b/drivers/gpu/drm/omapdrm/omap_gem_dmabuf.c
index 8e194dbc9506..3abc47521b2c 100644
--- a/drivers/gpu/drm/omapdrm/omap_gem_dmabuf.c
+++ b/drivers/gpu/drm/omapdrm/omap_gem_dmabuf.c
@@ -66,6 +66,8 @@ static int omap_gem_dmabuf_mmap(struct dma_buf *buffer,
 	struct drm_gem_object *obj = buffer->priv;
 	int ret = 0;
 
+	dma_resv_assert_held(buffer->resv);
+
 	ret = drm_gem_mmap_obj(obj, omap_gem_mmap_size(obj), vma);
 	if (ret < 0)
 		return ret;
diff --git a/drivers/gpu/drm/panel/Kconfig b/drivers/gpu/drm/panel/Kconfig
index a582ddd583c2..737edcdf9eef 100644
--- a/drivers/gpu/drm/panel/Kconfig
+++ b/drivers/gpu/drm/panel/Kconfig
@@ -203,6 +203,16 @@ config DRM_PANEL_INNOLUX_P079ZCA
 	  24 bit RGB per pixel. It provides a MIPI DSI interface to
 	  the host and has a built-in LED backlight.
 
+config DRM_PANEL_JADARD_JD9365DA_H3
+	tristate "Jadard JD9365DA-H3 WXGA DSI panel"
+	depends on OF
+	depends on DRM_MIPI_DSI
+	depends on BACKLIGHT_CLASS_DEVICE
+	help
+	  Say Y here if you want to enable support for Jadard JD9365DA-H3
+	  WXGA MIPI DSI panel. The panel support TFT dot matrix LCD with
+	  800RGBx1280 dots at maximum.
+
 config DRM_PANEL_JDI_LT070ME05000
 	tristate "JDI LT070ME05000 WUXGA DSI panel"
 	depends on OF
@@ -296,6 +306,15 @@ config DRM_PANEL_NEC_NL8048HL11
 	  panel (found on the Zoom2/3/3630 SDP boards). To compile this driver
 	  as a module, choose M here.
 
+config DRM_PANEL_NEWVISION_NV3051D
+	tristate "NewVision NV3051D DSI panel"
+	depends on OF
+	depends on DRM_MIPI_DSI
+	depends on BACKLIGHT_CLASS_DEVICE
+	help
+	  This driver supports the NV3051D based panel found on the Anbernic
+	  RG353P and RG353V.
+
 config DRM_PANEL_NEWVISION_NV3052C
 	tristate "NewVision NV3052C RGB/SPI panel"
 	depends on OF && SPI
diff --git a/drivers/gpu/drm/panel/Makefile b/drivers/gpu/drm/panel/Makefile
index 34e717382dbb..f8f9d9f6a307 100644
--- a/drivers/gpu/drm/panel/Makefile
+++ b/drivers/gpu/drm/panel/Makefile
@@ -18,6 +18,7 @@ obj-$(CONFIG_DRM_PANEL_ILITEK_ILI9341) += panel-ilitek-ili9341.o
 obj-$(CONFIG_DRM_PANEL_ILITEK_ILI9881C) += panel-ilitek-ili9881c.o
 obj-$(CONFIG_DRM_PANEL_INNOLUX_EJ030NA) += panel-innolux-ej030na.o
 obj-$(CONFIG_DRM_PANEL_INNOLUX_P079ZCA) += panel-innolux-p079zca.o
+obj-$(CONFIG_DRM_PANEL_JADARD_JD9365DA_H3) += panel-jadard-jd9365da-h3.o
 obj-$(CONFIG_DRM_PANEL_JDI_LT070ME05000) += panel-jdi-lt070me05000.o
 obj-$(CONFIG_DRM_PANEL_JDI_R63452) += panel-jdi-fhd-r63452.o
 obj-$(CONFIG_DRM_PANEL_KHADAS_TS050) += panel-khadas-ts050.o
@@ -27,6 +28,7 @@ obj-$(CONFIG_DRM_PANEL_LEADTEK_LTK500HD1829) += panel-leadtek-ltk500hd1829.o
 obj-$(CONFIG_DRM_PANEL_LG_LB035Q02) += panel-lg-lb035q02.o
 obj-$(CONFIG_DRM_PANEL_LG_LG4573) += panel-lg-lg4573.o
 obj-$(CONFIG_DRM_PANEL_NEC_NL8048HL11) += panel-nec-nl8048hl11.o
+obj-$(CONFIG_DRM_PANEL_NEWVISION_NV3051D) += panel-newvision-nv3051d.o
 obj-$(CONFIG_DRM_PANEL_NEWVISION_NV3052C) += panel-newvision-nv3052c.o
 obj-$(CONFIG_DRM_PANEL_NOVATEK_NT35510) += panel-novatek-nt35510.o
 obj-$(CONFIG_DRM_PANEL_NOVATEK_NT35560) += panel-novatek-nt35560.o
diff --git a/drivers/gpu/drm/panel/panel-ilitek-ili9341.c b/drivers/gpu/drm/panel/panel-ilitek-ili9341.c
index 39dc40cf681f..384a724f2822 100644
--- a/drivers/gpu/drm/panel/panel-ilitek-ili9341.c
+++ b/drivers/gpu/drm/panel/panel-ilitek-ili9341.c
@@ -18,6 +18,7 @@
  * Copyright 2018 David Lechner <david@lechnology.com>
  */
 
+#include <linux/backlight.h>
 #include <linux/bitops.h>
 #include <linux/delay.h>
 #include <linux/gpio/consumer.h>
@@ -30,7 +31,7 @@
 
 #include <drm/drm_atomic_helper.h>
 #include <drm/drm_drv.h>
-#include <drm/drm_fb_helper.h>
+#include <drm/drm_fbdev_generic.h>
 #include <drm/drm_gem_atomic_helper.h>
 #include <drm/drm_gem_dma_helper.h>
 #include <drm/drm_gem_framebuffer_helper.h>
diff --git a/drivers/gpu/drm/panel/panel-jadard-jd9365da-h3.c b/drivers/gpu/drm/panel/panel-jadard-jd9365da-h3.c
new file mode 100644
index 000000000000..48c1702a863b
--- /dev/null
+++ b/drivers/gpu/drm/panel/panel-jadard-jd9365da-h3.c
@@ -0,0 +1,473 @@
+// SPDX-License-Identifier: GPL-2.0+
+/*
+ * Copyright (c) 2019 Radxa Limited
+ * Copyright (c) 2022 Edgeble AI Technologies Pvt. Ltd.
+ *
+ * Author:
+ * - Jagan Teki <jagan@amarulasolutions.com>
+ * - Stephen Chen <stephen@radxa.com>
+ */
+
+#include <drm/drm_mipi_dsi.h>
+#include <drm/drm_modes.h>
+#include <drm/drm_panel.h>
+#include <drm/drm_print.h>
+
+#include <linux/gpio/consumer.h>
+#include <linux/delay.h>
+#include <linux/module.h>
+#include <linux/of_device.h>
+#include <linux/regulator/consumer.h>
+
+#define JD9365DA_INIT_CMD_LEN		2
+
+struct jadard_init_cmd {
+	u8 data[JD9365DA_INIT_CMD_LEN];
+};
+
+struct jadard_panel_desc {
+	const struct drm_display_mode mode;
+	unsigned int lanes;
+	enum mipi_dsi_pixel_format format;
+	const struct jadard_init_cmd *init_cmds;
+	u32 num_init_cmds;
+};
+
+struct jadard {
+	struct drm_panel panel;
+	struct mipi_dsi_device *dsi;
+	const struct jadard_panel_desc *desc;
+
+	struct regulator *vdd;
+	struct regulator *vccio;
+	struct gpio_desc *reset;
+};
+
+static inline struct jadard *panel_to_jadard(struct drm_panel *panel)
+{
+	return container_of(panel, struct jadard, panel);
+}
+
+static int jadard_enable(struct drm_panel *panel)
+{
+	struct device *dev = panel->dev;
+	struct jadard *jadard = panel_to_jadard(panel);
+	const struct jadard_panel_desc *desc = jadard->desc;
+	struct mipi_dsi_device *dsi = jadard->dsi;
+	unsigned int i;
+	int err;
+
+	msleep(10);
+
+	for (i = 0; i < desc->num_init_cmds; i++) {
+		const struct jadard_init_cmd *cmd = &desc->init_cmds[i];
+
+		err = mipi_dsi_dcs_write_buffer(dsi, cmd->data, JD9365DA_INIT_CMD_LEN);
+		if (err < 0)
+			return err;
+	}
+
+	msleep(120);
+
+	err = mipi_dsi_dcs_exit_sleep_mode(dsi);
+	if (err < 0)
+		DRM_DEV_ERROR(dev, "failed to exit sleep mode ret = %d\n", err);
+
+	err =  mipi_dsi_dcs_set_display_on(dsi);
+	if (err < 0)
+		DRM_DEV_ERROR(dev, "failed to set display on ret = %d\n", err);
+
+	return 0;
+}
+
+static int jadard_disable(struct drm_panel *panel)
+{
+	struct device *dev = panel->dev;
+	struct jadard *jadard = panel_to_jadard(panel);
+	int ret;
+
+	ret = mipi_dsi_dcs_set_display_off(jadard->dsi);
+	if (ret < 0)
+		DRM_DEV_ERROR(dev, "failed to set display off: %d\n", ret);
+
+	ret = mipi_dsi_dcs_enter_sleep_mode(jadard->dsi);
+	if (ret < 0)
+		DRM_DEV_ERROR(dev, "failed to enter sleep mode: %d\n", ret);
+
+	return 0;
+}
+
+static int jadard_prepare(struct drm_panel *panel)
+{
+	struct jadard *jadard = panel_to_jadard(panel);
+	int ret;
+
+	ret = regulator_enable(jadard->vccio);
+	if (ret)
+		return ret;
+
+	ret = regulator_enable(jadard->vdd);
+	if (ret)
+		return ret;
+
+	gpiod_set_value(jadard->reset, 1);
+	msleep(5);
+
+	gpiod_set_value(jadard->reset, 0);
+	msleep(10);
+
+	gpiod_set_value(jadard->reset, 1);
+	msleep(120);
+
+	return 0;
+}
+
+static int jadard_unprepare(struct drm_panel *panel)
+{
+	struct jadard *jadard = panel_to_jadard(panel);
+
+	gpiod_set_value(jadard->reset, 1);
+	msleep(120);
+
+	regulator_disable(jadard->vdd);
+	regulator_disable(jadard->vccio);
+
+	return 0;
+}
+
+static int jadard_get_modes(struct drm_panel *panel,
+			    struct drm_connector *connector)
+{
+	struct jadard *jadard = panel_to_jadard(panel);
+	const struct drm_display_mode *desc_mode = &jadard->desc->mode;
+	struct drm_display_mode *mode;
+
+	mode = drm_mode_duplicate(connector->dev, desc_mode);
+	if (!mode) {
+		DRM_DEV_ERROR(&jadard->dsi->dev, "failed to add mode %ux%ux@%u\n",
+			      desc_mode->hdisplay, desc_mode->vdisplay,
+			      drm_mode_vrefresh(desc_mode));
+		return -ENOMEM;
+	}
+
+	drm_mode_set_name(mode);
+	drm_mode_probed_add(connector, mode);
+
+	connector->display_info.width_mm = mode->width_mm;
+	connector->display_info.height_mm = mode->height_mm;
+
+	return 1;
+}
+
+static const struct drm_panel_funcs jadard_funcs = {
+	.disable = jadard_disable,
+	.unprepare = jadard_unprepare,
+	.prepare = jadard_prepare,
+	.enable = jadard_enable,
+	.get_modes = jadard_get_modes,
+};
+
+static const struct jadard_init_cmd cz101b4001_init_cmds[] = {
+	{ .data = { 0xE0, 0x00 } },
+	{ .data = { 0xE1, 0x93 } },
+	{ .data = { 0xE2, 0x65 } },
+	{ .data = { 0xE3, 0xF8 } },
+	{ .data = { 0x80, 0x03 } },
+	{ .data = { 0xE0, 0x01 } },
+	{ .data = { 0x00, 0x00 } },
+	{ .data = { 0x01, 0x3B } },
+	{ .data = { 0x0C, 0x74 } },
+	{ .data = { 0x17, 0x00 } },
+	{ .data = { 0x18, 0xAF } },
+	{ .data = { 0x19, 0x00 } },
+	{ .data = { 0x1A, 0x00 } },
+	{ .data = { 0x1B, 0xAF } },
+	{ .data = { 0x1C, 0x00 } },
+	{ .data = { 0x35, 0x26 } },
+	{ .data = { 0x37, 0x09 } },
+	{ .data = { 0x38, 0x04 } },
+	{ .data = { 0x39, 0x00 } },
+	{ .data = { 0x3A, 0x01 } },
+	{ .data = { 0x3C, 0x78 } },
+	{ .data = { 0x3D, 0xFF } },
+	{ .data = { 0x3E, 0xFF } },
+	{ .data = { 0x3F, 0x7F } },
+	{ .data = { 0x40, 0x06 } },
+	{ .data = { 0x41, 0xA0 } },
+	{ .data = { 0x42, 0x81 } },
+	{ .data = { 0x43, 0x14 } },
+	{ .data = { 0x44, 0x23 } },
+	{ .data = { 0x45, 0x28 } },
+	{ .data = { 0x55, 0x02 } },
+	{ .data = { 0x57, 0x69 } },
+	{ .data = { 0x59, 0x0A } },
+	{ .data = { 0x5A, 0x2A } },
+	{ .data = { 0x5B, 0x17 } },
+	{ .data = { 0x5D, 0x7F } },
+	{ .data = { 0x5E, 0x6B } },
+	{ .data = { 0x5F, 0x5C } },
+	{ .data = { 0x60, 0x4F } },
+	{ .data = { 0x61, 0x4D } },
+	{ .data = { 0x62, 0x3F } },
+	{ .data = { 0x63, 0x42 } },
+	{ .data = { 0x64, 0x2B } },
+	{ .data = { 0x65, 0x44 } },
+	{ .data = { 0x66, 0x43 } },
+	{ .data = { 0x67, 0x43 } },
+	{ .data = { 0x68, 0x63 } },
+	{ .data = { 0x69, 0x52 } },
+	{ .data = { 0x6A, 0x5A } },
+	{ .data = { 0x6B, 0x4F } },
+	{ .data = { 0x6C, 0x4E } },
+	{ .data = { 0x6D, 0x20 } },
+	{ .data = { 0x6E, 0x0F } },
+	{ .data = { 0x6F, 0x00 } },
+	{ .data = { 0x70, 0x7F } },
+	{ .data = { 0x71, 0x6B } },
+	{ .data = { 0x72, 0x5C } },
+	{ .data = { 0x73, 0x4F } },
+	{ .data = { 0x74, 0x4D } },
+	{ .data = { 0x75, 0x3F } },
+	{ .data = { 0x76, 0x42 } },
+	{ .data = { 0x77, 0x2B } },
+	{ .data = { 0x78, 0x44 } },
+	{ .data = { 0x79, 0x43 } },
+	{ .data = { 0x7A, 0x43 } },
+	{ .data = { 0x7B, 0x63 } },
+	{ .data = { 0x7C, 0x52 } },
+	{ .data = { 0x7D, 0x5A } },
+	{ .data = { 0x7E, 0x4F } },
+	{ .data = { 0x7F, 0x4E } },
+	{ .data = { 0x80, 0x20 } },
+	{ .data = { 0x81, 0x0F } },
+	{ .data = { 0x82, 0x00 } },
+	{ .data = { 0xE0, 0x02 } },
+	{ .data = { 0x00, 0x02 } },
+	{ .data = { 0x01, 0x02 } },
+	{ .data = { 0x02, 0x00 } },
+	{ .data = { 0x03, 0x00 } },
+	{ .data = { 0x04, 0x1E } },
+	{ .data = { 0x05, 0x1E } },
+	{ .data = { 0x06, 0x1F } },
+	{ .data = { 0x07, 0x1F } },
+	{ .data = { 0x08, 0x1F } },
+	{ .data = { 0x09, 0x17 } },
+	{ .data = { 0x0A, 0x17 } },
+	{ .data = { 0x0B, 0x37 } },
+	{ .data = { 0x0C, 0x37 } },
+	{ .data = { 0x0D, 0x47 } },
+	{ .data = { 0x0E, 0x47 } },
+	{ .data = { 0x0F, 0x45 } },
+	{ .data = { 0x10, 0x45 } },
+	{ .data = { 0x11, 0x4B } },
+	{ .data = { 0x12, 0x4B } },
+	{ .data = { 0x13, 0x49 } },
+	{ .data = { 0x14, 0x49 } },
+	{ .data = { 0x15, 0x1F } },
+	{ .data = { 0x16, 0x01 } },
+	{ .data = { 0x17, 0x01 } },
+	{ .data = { 0x18, 0x00 } },
+	{ .data = { 0x19, 0x00 } },
+	{ .data = { 0x1A, 0x1E } },
+	{ .data = { 0x1B, 0x1E } },
+	{ .data = { 0x1C, 0x1F } },
+	{ .data = { 0x1D, 0x1F } },
+	{ .data = { 0x1E, 0x1F } },
+	{ .data = { 0x1F, 0x17 } },
+	{ .data = { 0x20, 0x17 } },
+	{ .data = { 0x21, 0x37 } },
+	{ .data = { 0x22, 0x37 } },
+	{ .data = { 0x23, 0x46 } },
+	{ .data = { 0x24, 0x46 } },
+	{ .data = { 0x25, 0x44 } },
+	{ .data = { 0x26, 0x44 } },
+	{ .data = { 0x27, 0x4A } },
+	{ .data = { 0x28, 0x4A } },
+	{ .data = { 0x29, 0x48 } },
+	{ .data = { 0x2A, 0x48 } },
+	{ .data = { 0x2B, 0x1F } },
+	{ .data = { 0x2C, 0x01 } },
+	{ .data = { 0x2D, 0x01 } },
+	{ .data = { 0x2E, 0x00 } },
+	{ .data = { 0x2F, 0x00 } },
+	{ .data = { 0x30, 0x1F } },
+	{ .data = { 0x31, 0x1F } },
+	{ .data = { 0x32, 0x1E } },
+	{ .data = { 0x33, 0x1E } },
+	{ .data = { 0x34, 0x1F } },
+	{ .data = { 0x35, 0x17 } },
+	{ .data = { 0x36, 0x17 } },
+	{ .data = { 0x37, 0x37 } },
+	{ .data = { 0x38, 0x37 } },
+	{ .data = { 0x39, 0x08 } },
+	{ .data = { 0x3A, 0x08 } },
+	{ .data = { 0x3B, 0x0A } },
+	{ .data = { 0x3C, 0x0A } },
+	{ .data = { 0x3D, 0x04 } },
+	{ .data = { 0x3E, 0x04 } },
+	{ .data = { 0x3F, 0x06 } },
+	{ .data = { 0x40, 0x06 } },
+	{ .data = { 0x41, 0x1F } },
+	{ .data = { 0x42, 0x02 } },
+	{ .data = { 0x43, 0x02 } },
+	{ .data = { 0x44, 0x00 } },
+	{ .data = { 0x45, 0x00 } },
+	{ .data = { 0x46, 0x1F } },
+	{ .data = { 0x47, 0x1F } },
+	{ .data = { 0x48, 0x1E } },
+	{ .data = { 0x49, 0x1E } },
+	{ .data = { 0x4A, 0x1F } },
+	{ .data = { 0x4B, 0x17 } },
+	{ .data = { 0x4C, 0x17 } },
+	{ .data = { 0x4D, 0x37 } },
+	{ .data = { 0x4E, 0x37 } },
+	{ .data = { 0x4F, 0x09 } },
+	{ .data = { 0x50, 0x09 } },
+	{ .data = { 0x51, 0x0B } },
+	{ .data = { 0x52, 0x0B } },
+	{ .data = { 0x53, 0x05 } },
+	{ .data = { 0x54, 0x05 } },
+	{ .data = { 0x55, 0x07 } },
+	{ .data = { 0x56, 0x07 } },
+	{ .data = { 0x57, 0x1F } },
+	{ .data = { 0x58, 0x40 } },
+	{ .data = { 0x5B, 0x30 } },
+	{ .data = { 0x5C, 0x16 } },
+	{ .data = { 0x5D, 0x34 } },
+	{ .data = { 0x5E, 0x05 } },
+	{ .data = { 0x5F, 0x02 } },
+	{ .data = { 0x63, 0x00 } },
+	{ .data = { 0x64, 0x6A } },
+	{ .data = { 0x67, 0x73 } },
+	{ .data = { 0x68, 0x1D } },
+	{ .data = { 0x69, 0x08 } },
+	{ .data = { 0x6A, 0x6A } },
+	{ .data = { 0x6B, 0x08 } },
+	{ .data = { 0x6C, 0x00 } },
+	{ .data = { 0x6D, 0x00 } },
+	{ .data = { 0x6E, 0x00 } },
+	{ .data = { 0x6F, 0x88 } },
+	{ .data = { 0x75, 0xFF } },
+	{ .data = { 0x77, 0xDD } },
+	{ .data = { 0x78, 0x3F } },
+	{ .data = { 0x79, 0x15 } },
+	{ .data = { 0x7A, 0x17 } },
+	{ .data = { 0x7D, 0x14 } },
+	{ .data = { 0x7E, 0x82 } },
+	{ .data = { 0xE0, 0x04 } },
+	{ .data = { 0x00, 0x0E } },
+	{ .data = { 0x02, 0xB3 } },
+	{ .data = { 0x09, 0x61 } },
+	{ .data = { 0x0E, 0x48 } },
+	{ .data = { 0xE0, 0x00 } },
+	{ .data = { 0xE6, 0x02 } },
+	{ .data = { 0xE7, 0x0C } },
+};
+
+static const struct jadard_panel_desc cz101b4001_desc = {
+	.mode = {
+		.clock		= 70000,
+
+		.hdisplay	= 800,
+		.hsync_start	= 800 + 40,
+		.hsync_end	= 800 + 40 + 18,
+		.htotal		= 800 + 40 + 18 + 20,
+
+		.vdisplay	= 1280,
+		.vsync_start	= 1280 + 20,
+		.vsync_end	= 1280 + 20 + 4,
+		.vtotal		= 1280 + 20 + 4 + 20,
+
+		.width_mm	= 62,
+		.height_mm	= 110,
+		.type		= DRM_MODE_TYPE_DRIVER | DRM_MODE_TYPE_PREFERRED,
+	},
+	.lanes = 4,
+	.format = MIPI_DSI_FMT_RGB888,
+	.init_cmds = cz101b4001_init_cmds,
+	.num_init_cmds = ARRAY_SIZE(cz101b4001_init_cmds),
+};
+
+static int jadard_dsi_probe(struct mipi_dsi_device *dsi)
+{
+	struct device *dev = &dsi->dev;
+	const struct jadard_panel_desc *desc;
+	struct jadard *jadard;
+	int ret;
+
+	jadard = devm_kzalloc(&dsi->dev, sizeof(*jadard), GFP_KERNEL);
+	if (!jadard)
+		return -ENOMEM;
+
+	desc = of_device_get_match_data(dev);
+	dsi->mode_flags = MIPI_DSI_MODE_VIDEO | MIPI_DSI_MODE_VIDEO_BURST |
+			  MIPI_DSI_MODE_NO_EOT_PACKET;
+	dsi->format = desc->format;
+	dsi->lanes = desc->lanes;
+
+	jadard->reset = devm_gpiod_get(dev, "reset", GPIOD_OUT_LOW);
+	if (IS_ERR(jadard->reset)) {
+		DRM_DEV_ERROR(&dsi->dev, "failed to get our reset GPIO\n");
+		return PTR_ERR(jadard->reset);
+	}
+
+	jadard->vdd = devm_regulator_get(dev, "vdd");
+	if (IS_ERR(jadard->vdd)) {
+		DRM_DEV_ERROR(&dsi->dev, "failed to get vdd regulator\n");
+		return PTR_ERR(jadard->vdd);
+	}
+
+	jadard->vccio = devm_regulator_get(dev, "vccio");
+	if (IS_ERR(jadard->vccio)) {
+		DRM_DEV_ERROR(&dsi->dev, "failed to get vccio regulator\n");
+		return PTR_ERR(jadard->vccio);
+	}
+
+	drm_panel_init(&jadard->panel, dev, &jadard_funcs,
+		       DRM_MODE_CONNECTOR_DSI);
+
+	ret = drm_panel_of_backlight(&jadard->panel);
+	if (ret)
+		return ret;
+
+	drm_panel_add(&jadard->panel);
+
+	mipi_dsi_set_drvdata(dsi, jadard);
+	jadard->dsi = dsi;
+	jadard->desc = desc;
+
+	ret = mipi_dsi_attach(dsi);
+	if (ret < 0)
+		drm_panel_remove(&jadard->panel);
+
+	return ret;
+}
+
+static void jadard_dsi_remove(struct mipi_dsi_device *dsi)
+{
+	struct jadard *jadard = mipi_dsi_get_drvdata(dsi);
+
+	mipi_dsi_detach(dsi);
+	drm_panel_remove(&jadard->panel);
+}
+
+static const struct of_device_id jadard_of_match[] = {
+	{ .compatible = "chongzhou,cz101b4001", .data = &cz101b4001_desc },
+	{ /* sentinel */ }
+};
+MODULE_DEVICE_TABLE(of, jadard_of_match);
+
+static struct mipi_dsi_driver jadard_driver = {
+	.probe = jadard_dsi_probe,
+	.remove = jadard_dsi_remove,
+	.driver = {
+		.name = "jadard-jd9365da",
+		.of_match_table = jadard_of_match,
+	},
+};
+module_mipi_dsi_driver(jadard_driver);
+
+MODULE_AUTHOR("Jagan Teki <jagan@edgeble.ai>");
+MODULE_AUTHOR("Stephen Chen <stephen@radxa.com>");
+MODULE_DESCRIPTION("Jadard JD9365DA-H3 WXGA DSI panel");
+MODULE_LICENSE("GPL");
diff --git a/drivers/gpu/drm/panel/panel-newvision-nv3051d.c b/drivers/gpu/drm/panel/panel-newvision-nv3051d.c
new file mode 100644
index 000000000000..a07958038ffd
--- /dev/null
+++ b/drivers/gpu/drm/panel/panel-newvision-nv3051d.c
@@ -0,0 +1,504 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * NV3051D MIPI-DSI panel driver for Anbernic RG353x
+ * Copyright (C) 2022 Chris Morgan
+ *
+ * based on
+ *
+ * Elida kd35t133 3.5" MIPI-DSI panel driver
+ * Copyright (C) Theobroma Systems 2020
+ */
+
+#include <linux/delay.h>
+#include <linux/gpio/consumer.h>
+#include <linux/media-bus-format.h>
+#include <linux/module.h>
+#include <linux/of_device.h>
+#include <linux/regulator/consumer.h>
+
+#include <video/display_timing.h>
+#include <video/mipi_display.h>
+
+#include <drm/drm_mipi_dsi.h>
+#include <drm/drm_modes.h>
+#include <drm/drm_panel.h>
+
+struct nv3051d_panel_info {
+	const struct drm_display_mode *display_modes;
+	unsigned int num_modes;
+	u16 width_mm, height_mm;
+	u32 bus_flags;
+};
+
+struct panel_nv3051d {
+	struct device *dev;
+	struct drm_panel panel;
+	struct gpio_desc *reset_gpio;
+	const struct nv3051d_panel_info *panel_info;
+	struct regulator *vdd;
+};
+
+static inline struct panel_nv3051d *panel_to_panelnv3051d(struct drm_panel *panel)
+{
+	return container_of(panel, struct panel_nv3051d, panel);
+}
+
+static int panel_nv3051d_init_sequence(struct panel_nv3051d *ctx)
+{
+	struct mipi_dsi_device *dsi = to_mipi_dsi_device(ctx->dev);
+
+	/*
+	 * Init sequence was supplied by device vendor with no
+	 * documentation.
+	 */
+
+	mipi_dsi_dcs_write_seq(dsi, 0xFF, 0x30);
+	mipi_dsi_dcs_write_seq(dsi, 0xFF, 0x52);
+	mipi_dsi_dcs_write_seq(dsi, 0xFF, 0x01);
+	mipi_dsi_dcs_write_seq(dsi, 0xE3, 0x00);
+	mipi_dsi_dcs_write_seq(dsi, 0x03, 0x40);
+	mipi_dsi_dcs_write_seq(dsi, 0x04, 0x00);
+	mipi_dsi_dcs_write_seq(dsi, 0x05, 0x03);
+	mipi_dsi_dcs_write_seq(dsi, 0x24, 0x12);
+	mipi_dsi_dcs_write_seq(dsi, 0x25, 0x1E);
+	mipi_dsi_dcs_write_seq(dsi, 0x26, 0x28);
+	mipi_dsi_dcs_write_seq(dsi, 0x27, 0x52);
+	mipi_dsi_dcs_write_seq(dsi, 0x28, 0x57);
+	mipi_dsi_dcs_write_seq(dsi, 0x29, 0x01);
+	mipi_dsi_dcs_write_seq(dsi, 0x2A, 0xDF);
+	mipi_dsi_dcs_write_seq(dsi, 0x38, 0x9C);
+	mipi_dsi_dcs_write_seq(dsi, 0x39, 0xA7);
+	mipi_dsi_dcs_write_seq(dsi, 0x3A, 0x53);
+	mipi_dsi_dcs_write_seq(dsi, 0x44, 0x00);
+	mipi_dsi_dcs_write_seq(dsi, 0x49, 0x3C);
+	mipi_dsi_dcs_write_seq(dsi, 0x59, 0xFE);
+	mipi_dsi_dcs_write_seq(dsi, 0x5C, 0x00);
+	mipi_dsi_dcs_write_seq(dsi, 0x91, 0x77);
+	mipi_dsi_dcs_write_seq(dsi, 0x92, 0x77);
+	mipi_dsi_dcs_write_seq(dsi, 0xA0, 0x55);
+	mipi_dsi_dcs_write_seq(dsi, 0xA1, 0x50);
+	mipi_dsi_dcs_write_seq(dsi, 0xA4, 0x9C);
+	mipi_dsi_dcs_write_seq(dsi, 0xA7, 0x02);
+	mipi_dsi_dcs_write_seq(dsi, 0xA8, 0x01);
+	mipi_dsi_dcs_write_seq(dsi, 0xA9, 0x01);
+	mipi_dsi_dcs_write_seq(dsi, 0xAA, 0xFC);
+	mipi_dsi_dcs_write_seq(dsi, 0xAB, 0x28);
+	mipi_dsi_dcs_write_seq(dsi, 0xAC, 0x06);
+	mipi_dsi_dcs_write_seq(dsi, 0xAD, 0x06);
+	mipi_dsi_dcs_write_seq(dsi, 0xAE, 0x06);
+	mipi_dsi_dcs_write_seq(dsi, 0xAF, 0x03);
+	mipi_dsi_dcs_write_seq(dsi, 0xB0, 0x08);
+	mipi_dsi_dcs_write_seq(dsi, 0xB1, 0x26);
+	mipi_dsi_dcs_write_seq(dsi, 0xB2, 0x28);
+	mipi_dsi_dcs_write_seq(dsi, 0xB3, 0x28);
+	mipi_dsi_dcs_write_seq(dsi, 0xB4, 0x33);
+	mipi_dsi_dcs_write_seq(dsi, 0xB5, 0x08);
+	mipi_dsi_dcs_write_seq(dsi, 0xB6, 0x26);
+	mipi_dsi_dcs_write_seq(dsi, 0xB7, 0x08);
+	mipi_dsi_dcs_write_seq(dsi, 0xB8, 0x26);
+	mipi_dsi_dcs_write_seq(dsi, 0xFF, 0x30);
+	mipi_dsi_dcs_write_seq(dsi, 0xFF, 0x52);
+	mipi_dsi_dcs_write_seq(dsi, 0xFF, 0x02);
+	mipi_dsi_dcs_write_seq(dsi, 0xB1, 0x0E);
+	mipi_dsi_dcs_write_seq(dsi, 0xD1, 0x0E);
+	mipi_dsi_dcs_write_seq(dsi, 0xB4, 0x29);
+	mipi_dsi_dcs_write_seq(dsi, 0xD4, 0x2B);
+	mipi_dsi_dcs_write_seq(dsi, 0xB2, 0x0C);
+	mipi_dsi_dcs_write_seq(dsi, 0xD2, 0x0A);
+	mipi_dsi_dcs_write_seq(dsi, 0xB3, 0x28);
+	mipi_dsi_dcs_write_seq(dsi, 0xD3, 0x28);
+	mipi_dsi_dcs_write_seq(dsi, 0xB6, 0x11);
+	mipi_dsi_dcs_write_seq(dsi, 0xD6, 0x0D);
+	mipi_dsi_dcs_write_seq(dsi, 0xB7, 0x32);
+	mipi_dsi_dcs_write_seq(dsi, 0xD7, 0x30);
+	mipi_dsi_dcs_write_seq(dsi, 0xC1, 0x04);
+	mipi_dsi_dcs_write_seq(dsi, 0xE1, 0x06);
+	mipi_dsi_dcs_write_seq(dsi, 0xB8, 0x0A);
+	mipi_dsi_dcs_write_seq(dsi, 0xD8, 0x0A);
+	mipi_dsi_dcs_write_seq(dsi, 0xB9, 0x01);
+	mipi_dsi_dcs_write_seq(dsi, 0xD9, 0x01);
+	mipi_dsi_dcs_write_seq(dsi, 0xBD, 0x13);
+	mipi_dsi_dcs_write_seq(dsi, 0xDD, 0x13);
+	mipi_dsi_dcs_write_seq(dsi, 0xBC, 0x11);
+	mipi_dsi_dcs_write_seq(dsi, 0xDC, 0x11);
+	mipi_dsi_dcs_write_seq(dsi, 0xBB, 0x0F);
+	mipi_dsi_dcs_write_seq(dsi, 0xDB, 0x0F);
+	mipi_dsi_dcs_write_seq(dsi, 0xBA, 0x0F);
+	mipi_dsi_dcs_write_seq(dsi, 0xDA, 0x0F);
+	mipi_dsi_dcs_write_seq(dsi, 0xBE, 0x18);
+	mipi_dsi_dcs_write_seq(dsi, 0xDE, 0x18);
+	mipi_dsi_dcs_write_seq(dsi, 0xBF, 0x0F);
+	mipi_dsi_dcs_write_seq(dsi, 0xDF, 0x0F);
+	mipi_dsi_dcs_write_seq(dsi, 0xC0, 0x17);
+	mipi_dsi_dcs_write_seq(dsi, 0xE0, 0x17);
+	mipi_dsi_dcs_write_seq(dsi, 0xB5, 0x3B);
+	mipi_dsi_dcs_write_seq(dsi, 0xD5, 0x3C);
+	mipi_dsi_dcs_write_seq(dsi, 0xB0, 0x0B);
+	mipi_dsi_dcs_write_seq(dsi, 0xD0, 0x0C);
+	mipi_dsi_dcs_write_seq(dsi, 0xFF, 0x30);
+	mipi_dsi_dcs_write_seq(dsi, 0xFF, 0x52);
+	mipi_dsi_dcs_write_seq(dsi, 0xFF, 0x03);
+	mipi_dsi_dcs_write_seq(dsi, 0x00, 0x2A);
+	mipi_dsi_dcs_write_seq(dsi, 0x01, 0x2A);
+	mipi_dsi_dcs_write_seq(dsi, 0x02, 0x2A);
+	mipi_dsi_dcs_write_seq(dsi, 0x03, 0x2A);
+	mipi_dsi_dcs_write_seq(dsi, 0x04, 0x61);
+	mipi_dsi_dcs_write_seq(dsi, 0x05, 0x80);
+	mipi_dsi_dcs_write_seq(dsi, 0x06, 0xC7);
+	mipi_dsi_dcs_write_seq(dsi, 0x07, 0x01);
+	mipi_dsi_dcs_write_seq(dsi, 0x08, 0x82);
+	mipi_dsi_dcs_write_seq(dsi, 0x09, 0x83);
+	mipi_dsi_dcs_write_seq(dsi, 0x30, 0x2A);
+	mipi_dsi_dcs_write_seq(dsi, 0x31, 0x2A);
+	mipi_dsi_dcs_write_seq(dsi, 0x32, 0x2A);
+	mipi_dsi_dcs_write_seq(dsi, 0x33, 0x2A);
+	mipi_dsi_dcs_write_seq(dsi, 0x34, 0x61);
+	mipi_dsi_dcs_write_seq(dsi, 0x35, 0xC5);
+	mipi_dsi_dcs_write_seq(dsi, 0x36, 0x80);
+	mipi_dsi_dcs_write_seq(dsi, 0x37, 0x23);
+	mipi_dsi_dcs_write_seq(dsi, 0x40, 0x82);
+	mipi_dsi_dcs_write_seq(dsi, 0x41, 0x83);
+	mipi_dsi_dcs_write_seq(dsi, 0x42, 0x80);
+	mipi_dsi_dcs_write_seq(dsi, 0x43, 0x81);
+	mipi_dsi_dcs_write_seq(dsi, 0x44, 0x11);
+	mipi_dsi_dcs_write_seq(dsi, 0x45, 0xF2);
+	mipi_dsi_dcs_write_seq(dsi, 0x46, 0xF1);
+	mipi_dsi_dcs_write_seq(dsi, 0x47, 0x11);
+	mipi_dsi_dcs_write_seq(dsi, 0x48, 0xF4);
+	mipi_dsi_dcs_write_seq(dsi, 0x49, 0xF3);
+	mipi_dsi_dcs_write_seq(dsi, 0x50, 0x02);
+	mipi_dsi_dcs_write_seq(dsi, 0x51, 0x01);
+	mipi_dsi_dcs_write_seq(dsi, 0x52, 0x04);
+	mipi_dsi_dcs_write_seq(dsi, 0x53, 0x03);
+	mipi_dsi_dcs_write_seq(dsi, 0x54, 0x11);
+	mipi_dsi_dcs_write_seq(dsi, 0x55, 0xF6);
+	mipi_dsi_dcs_write_seq(dsi, 0x56, 0xF5);
+	mipi_dsi_dcs_write_seq(dsi, 0x57, 0x11);
+	mipi_dsi_dcs_write_seq(dsi, 0x58, 0xF8);
+	mipi_dsi_dcs_write_seq(dsi, 0x59, 0xF7);
+	mipi_dsi_dcs_write_seq(dsi, 0x7E, 0x02);
+	mipi_dsi_dcs_write_seq(dsi, 0x7F, 0x80);
+	mipi_dsi_dcs_write_seq(dsi, 0xE0, 0x5A);
+	mipi_dsi_dcs_write_seq(dsi, 0xB1, 0x00);
+	mipi_dsi_dcs_write_seq(dsi, 0xB4, 0x0E);
+	mipi_dsi_dcs_write_seq(dsi, 0xB5, 0x0F);
+	mipi_dsi_dcs_write_seq(dsi, 0xB6, 0x04);
+	mipi_dsi_dcs_write_seq(dsi, 0xB7, 0x07);
+	mipi_dsi_dcs_write_seq(dsi, 0xB8, 0x06);
+	mipi_dsi_dcs_write_seq(dsi, 0xB9, 0x05);
+	mipi_dsi_dcs_write_seq(dsi, 0xBA, 0x0F);
+	mipi_dsi_dcs_write_seq(dsi, 0xC7, 0x00);
+	mipi_dsi_dcs_write_seq(dsi, 0xCA, 0x0E);
+	mipi_dsi_dcs_write_seq(dsi, 0xCB, 0x0F);
+	mipi_dsi_dcs_write_seq(dsi, 0xCC, 0x04);
+	mipi_dsi_dcs_write_seq(dsi, 0xCD, 0x07);
+	mipi_dsi_dcs_write_seq(dsi, 0xCE, 0x06);
+	mipi_dsi_dcs_write_seq(dsi, 0xCF, 0x05);
+	mipi_dsi_dcs_write_seq(dsi, 0xD0, 0x0F);
+	mipi_dsi_dcs_write_seq(dsi, 0x81, 0x0F);
+	mipi_dsi_dcs_write_seq(dsi, 0x84, 0x0E);
+	mipi_dsi_dcs_write_seq(dsi, 0x85, 0x0F);
+	mipi_dsi_dcs_write_seq(dsi, 0x86, 0x07);
+	mipi_dsi_dcs_write_seq(dsi, 0x87, 0x04);
+	mipi_dsi_dcs_write_seq(dsi, 0x88, 0x05);
+	mipi_dsi_dcs_write_seq(dsi, 0x89, 0x06);
+	mipi_dsi_dcs_write_seq(dsi, 0x8A, 0x00);
+	mipi_dsi_dcs_write_seq(dsi, 0x97, 0x0F);
+	mipi_dsi_dcs_write_seq(dsi, 0x9A, 0x0E);
+	mipi_dsi_dcs_write_seq(dsi, 0x9B, 0x0F);
+	mipi_dsi_dcs_write_seq(dsi, 0x9C, 0x07);
+	mipi_dsi_dcs_write_seq(dsi, 0x9D, 0x04);
+	mipi_dsi_dcs_write_seq(dsi, 0x9E, 0x05);
+	mipi_dsi_dcs_write_seq(dsi, 0x9F, 0x06);
+	mipi_dsi_dcs_write_seq(dsi, 0xA0, 0x00);
+	mipi_dsi_dcs_write_seq(dsi, 0xFF, 0x30);
+	mipi_dsi_dcs_write_seq(dsi, 0xFF, 0x52);
+	mipi_dsi_dcs_write_seq(dsi, 0xFF, 0x02);
+	mipi_dsi_dcs_write_seq(dsi, 0x01, 0x01);
+	mipi_dsi_dcs_write_seq(dsi, 0x02, 0xDA);
+	mipi_dsi_dcs_write_seq(dsi, 0x03, 0xBA);
+	mipi_dsi_dcs_write_seq(dsi, 0x04, 0xA8);
+	mipi_dsi_dcs_write_seq(dsi, 0x05, 0x9A);
+	mipi_dsi_dcs_write_seq(dsi, 0x06, 0x70);
+	mipi_dsi_dcs_write_seq(dsi, 0x07, 0xFF);
+	mipi_dsi_dcs_write_seq(dsi, 0x08, 0x91);
+	mipi_dsi_dcs_write_seq(dsi, 0x09, 0x90);
+	mipi_dsi_dcs_write_seq(dsi, 0x0A, 0xFF);
+	mipi_dsi_dcs_write_seq(dsi, 0x0B, 0x8F);
+	mipi_dsi_dcs_write_seq(dsi, 0x0C, 0x60);
+	mipi_dsi_dcs_write_seq(dsi, 0x0D, 0x58);
+	mipi_dsi_dcs_write_seq(dsi, 0x0E, 0x48);
+	mipi_dsi_dcs_write_seq(dsi, 0x0F, 0x38);
+	mipi_dsi_dcs_write_seq(dsi, 0x10, 0x2B);
+	mipi_dsi_dcs_write_seq(dsi, 0xFF, 0x30);
+	mipi_dsi_dcs_write_seq(dsi, 0xFF, 0x52);
+	mipi_dsi_dcs_write_seq(dsi, 0xFF, 0x00);
+	mipi_dsi_dcs_write_seq(dsi, 0x36, 0x02);
+	mipi_dsi_dcs_write_seq(dsi, 0x3A, 0x70);
+
+	dev_dbg(ctx->dev, "Panel init sequence done\n");
+
+	return 0;
+}
+
+static int panel_nv3051d_unprepare(struct drm_panel *panel)
+{
+	struct panel_nv3051d *ctx = panel_to_panelnv3051d(panel);
+	struct mipi_dsi_device *dsi = to_mipi_dsi_device(ctx->dev);
+	int ret;
+
+	ret = mipi_dsi_dcs_set_display_off(dsi);
+	if (ret < 0)
+		dev_err(ctx->dev, "failed to set display off: %d\n", ret);
+
+	msleep(20);
+
+	ret = mipi_dsi_dcs_enter_sleep_mode(dsi);
+	if (ret < 0) {
+		dev_err(ctx->dev, "failed to enter sleep mode: %d\n", ret);
+		return ret;
+	}
+
+	usleep_range(10000, 15000);
+
+	regulator_disable(ctx->vdd);
+
+	return 0;
+}
+
+static int panel_nv3051d_prepare(struct drm_panel *panel)
+{
+	struct panel_nv3051d *ctx = panel_to_panelnv3051d(panel);
+	struct mipi_dsi_device *dsi = to_mipi_dsi_device(ctx->dev);
+	int ret;
+
+	dev_dbg(ctx->dev, "Resetting the panel\n");
+	ret = regulator_enable(ctx->vdd);
+	if (ret < 0) {
+		dev_err(ctx->dev, "Failed to enable vdd supply: %d\n", ret);
+		return ret;
+	}
+
+	usleep_range(2000, 3000);
+	gpiod_set_value_cansleep(ctx->reset_gpio, 1);
+	msleep(150);
+	gpiod_set_value_cansleep(ctx->reset_gpio, 0);
+	msleep(20);
+
+	ret = panel_nv3051d_init_sequence(ctx);
+	if (ret < 0) {
+		dev_err(ctx->dev, "Panel init sequence failed: %d\n", ret);
+		goto disable_vdd;
+	}
+
+	ret = mipi_dsi_dcs_exit_sleep_mode(dsi);
+	if (ret < 0) {
+		dev_err(ctx->dev, "Failed to exit sleep mode: %d\n", ret);
+		goto disable_vdd;
+	}
+
+	msleep(200);
+
+	ret = mipi_dsi_dcs_set_display_on(dsi);
+	if (ret < 0) {
+		dev_err(ctx->dev, "Failed to set display on: %d\n", ret);
+		goto disable_vdd;
+	}
+
+	usleep_range(10000, 15000);
+
+	return 0;
+
+disable_vdd:
+	regulator_disable(ctx->vdd);
+	return ret;
+}
+
+static int panel_nv3051d_get_modes(struct drm_panel *panel,
+				   struct drm_connector *connector)
+{
+	struct panel_nv3051d *ctx = panel_to_panelnv3051d(panel);
+	const struct nv3051d_panel_info *panel_info = ctx->panel_info;
+	struct drm_display_mode *mode;
+	unsigned int i;
+
+	for (i = 0; i < panel_info->num_modes; i++) {
+		mode = drm_mode_duplicate(connector->dev,
+					  &panel_info->display_modes[i]);
+		if (!mode)
+			return -ENOMEM;
+
+		drm_mode_set_name(mode);
+
+		mode->type = DRM_MODE_TYPE_DRIVER;
+		if (panel_info->num_modes == 1)
+			mode->type |= DRM_MODE_TYPE_PREFERRED;
+
+		drm_mode_probed_add(connector, mode);
+	}
+
+	connector->display_info.bpc = 8;
+	connector->display_info.width_mm = panel_info->width_mm;
+	connector->display_info.height_mm = panel_info->height_mm;
+	connector->display_info.bus_flags = panel_info->bus_flags;
+
+	return panel_info->num_modes;
+}
+
+static const struct drm_panel_funcs panel_nv3051d_funcs = {
+	.unprepare	= panel_nv3051d_unprepare,
+	.prepare	= panel_nv3051d_prepare,
+	.get_modes	= panel_nv3051d_get_modes,
+};
+
+static int panel_nv3051d_probe(struct mipi_dsi_device *dsi)
+{
+	struct device *dev = &dsi->dev;
+	struct panel_nv3051d *ctx;
+	int ret;
+
+	ctx = devm_kzalloc(dev, sizeof(*ctx), GFP_KERNEL);
+	if (!ctx)
+		return -ENOMEM;
+
+	ctx->dev = dev;
+
+	ctx->panel_info = of_device_get_match_data(dev);
+	if (!ctx->panel_info)
+		return -EINVAL;
+
+	ctx->reset_gpio = devm_gpiod_get_optional(dev, "reset", GPIOD_OUT_HIGH);
+	if (IS_ERR(ctx->reset_gpio)) {
+		dev_err(dev, "cannot get reset gpio\n");
+		return PTR_ERR(ctx->reset_gpio);
+	}
+
+	ctx->vdd = devm_regulator_get(dev, "vdd");
+	if (IS_ERR(ctx->vdd)) {
+		ret = PTR_ERR(ctx->vdd);
+		if (ret != -EPROBE_DEFER)
+			dev_err(dev, "Failed to request vdd regulator: %d\n", ret);
+		return ret;
+	}
+
+	mipi_dsi_set_drvdata(dsi, ctx);
+
+	dsi->lanes = 4;
+	dsi->format = MIPI_DSI_FMT_RGB888;
+	dsi->mode_flags = MIPI_DSI_MODE_VIDEO | MIPI_DSI_MODE_VIDEO_BURST |
+			  MIPI_DSI_MODE_LPM | MIPI_DSI_MODE_NO_EOT_PACKET;
+
+	drm_panel_init(&ctx->panel, &dsi->dev, &panel_nv3051d_funcs,
+		       DRM_MODE_CONNECTOR_DSI);
+
+	ret = drm_panel_of_backlight(&ctx->panel);
+	if (ret)
+		return ret;
+
+	drm_panel_add(&ctx->panel);
+
+	ret = mipi_dsi_attach(dsi);
+	if (ret < 0) {
+		dev_err(dev, "mipi_dsi_attach failed: %d\n", ret);
+		drm_panel_remove(&ctx->panel);
+		return ret;
+	}
+
+	return 0;
+}
+
+static void panel_nv3051d_shutdown(struct mipi_dsi_device *dsi)
+{
+	struct panel_nv3051d *ctx = mipi_dsi_get_drvdata(dsi);
+	int ret;
+
+	ret = drm_panel_unprepare(&ctx->panel);
+	if (ret < 0)
+		dev_err(&dsi->dev, "Failed to unprepare panel: %d\n", ret);
+
+	ret = drm_panel_disable(&ctx->panel);
+	if (ret < 0)
+		dev_err(&dsi->dev, "Failed to disable panel: %d\n", ret);
+}
+
+static void panel_nv3051d_remove(struct mipi_dsi_device *dsi)
+{
+	struct panel_nv3051d *ctx = mipi_dsi_get_drvdata(dsi);
+	int ret;
+
+	panel_nv3051d_shutdown(dsi);
+
+	ret = mipi_dsi_detach(dsi);
+	if (ret < 0)
+		dev_err(&dsi->dev, "Failed to detach from DSI host: %d\n", ret);
+
+	drm_panel_remove(&ctx->panel);
+}
+
+static const struct drm_display_mode nv3051d_rgxx3_modes[] = {
+	{ /* 120hz */
+		.hdisplay	= 640,
+		.hsync_start	= 640 + 40,
+		.hsync_end	= 640 + 40 + 2,
+		.htotal		= 640 + 40 + 2 + 80,
+		.vdisplay	= 480,
+		.vsync_start	= 480 + 18,
+		.vsync_end	= 480 + 18 + 2,
+		.vtotal		= 480 + 18 + 2 + 28,
+		.clock		= 48300,
+		.flags		= DRM_MODE_FLAG_NHSYNC | DRM_MODE_FLAG_NVSYNC,
+	},
+	{ /* 100hz */
+		.hdisplay       = 640,
+		.hsync_start    = 640 + 40,
+		.hsync_end      = 640 + 40 + 2,
+		.htotal         = 640 + 40 + 2 + 80,
+		.vdisplay       = 480,
+		.vsync_start    = 480 + 18,
+		.vsync_end      = 480 + 18 + 2,
+		.vtotal         = 480 + 18 + 2 + 28,
+		.clock          = 40250,
+		.flags		= DRM_MODE_FLAG_NHSYNC | DRM_MODE_FLAG_NVSYNC,
+	},
+	{ /* 60hz */
+		.hdisplay	= 640,
+		.hsync_start	= 640 + 40,
+		.hsync_end	= 640 + 40 + 2,
+		.htotal		= 640 + 40 + 2 + 80,
+		.vdisplay	= 480,
+		.vsync_start	= 480 + 18,
+		.vsync_end	= 480 + 18 + 2,
+		.vtotal		= 480 + 18 + 2 + 28,
+		.clock		= 24150,
+		.flags		= DRM_MODE_FLAG_NHSYNC | DRM_MODE_FLAG_NVSYNC,
+	},
+};
+
+static const struct nv3051d_panel_info nv3051d_rgxx3_info = {
+	.display_modes = nv3051d_rgxx3_modes,
+	.num_modes = ARRAY_SIZE(nv3051d_rgxx3_modes),
+	.width_mm = 70,
+	.height_mm = 57,
+	.bus_flags = DRM_BUS_FLAG_DE_LOW | DRM_BUS_FLAG_PIXDATA_DRIVE_NEGEDGE,
+};
+
+static const struct of_device_id newvision_nv3051d_of_match[] = {
+	{ .compatible = "newvision,nv3051d", .data = &nv3051d_rgxx3_info },
+	{ /* sentinel */ }
+};
+MODULE_DEVICE_TABLE(of, newvision_nv3051d_of_match);
+
+static struct mipi_dsi_driver newvision_nv3051d_driver = {
+	.driver = {
+		.name = "panel-newvision-nv3051d",
+		.of_match_table = newvision_nv3051d_of_match,
+	},
+	.probe	= panel_nv3051d_probe,
+	.remove = panel_nv3051d_remove,
+	.shutdown = panel_nv3051d_shutdown,
+};
+module_mipi_dsi_driver(newvision_nv3051d_driver);
+
+MODULE_AUTHOR("Chris Morgan <macromorgan@hotmail.com>");
+MODULE_DESCRIPTION("DRM driver for Newvision NV3051D based MIPI DSI panels");
+MODULE_LICENSE("GPL");
diff --git a/drivers/gpu/drm/panel/panel-simple.c b/drivers/gpu/drm/panel/panel-simple.c
index 2944228a8e2c..8a3b685c2fcc 100644
--- a/drivers/gpu/drm/panel/panel-simple.c
+++ b/drivers/gpu/drm/panel/panel-simple.c
@@ -2500,6 +2500,7 @@ static const struct display_timing logictechno_lt161010_2nh_timing = {
 static const struct panel_desc logictechno_lt161010_2nh = {
 	.timings = &logictechno_lt161010_2nh_timing,
 	.num_timings = 1,
+	.bpc = 6,
 	.size = {
 		.width = 154,
 		.height = 86,
@@ -2529,6 +2530,7 @@ static const struct display_timing logictechno_lt170410_2whc_timing = {
 static const struct panel_desc logictechno_lt170410_2whc = {
 	.timings = &logictechno_lt170410_2whc_timing,
 	.num_timings = 1,
+	.bpc = 8,
 	.size = {
 		.width = 217,
 		.height = 136,
diff --git a/drivers/gpu/drm/panfrost/panfrost_mmu.c b/drivers/gpu/drm/panfrost/panfrost_mmu.c
index e246d914e7f6..4e83a1891f3e 100644
--- a/drivers/gpu/drm/panfrost/panfrost_mmu.c
+++ b/drivers/gpu/drm/panfrost/panfrost_mmu.c
@@ -250,13 +250,22 @@ void panfrost_mmu_reset(struct panfrost_device *pfdev)
 
 static size_t get_pgsize(u64 addr, size_t size, size_t *count)
 {
+	/*
+	 * io-pgtable only operates on multiple pages within a single table
+	 * entry, so we need to split at boundaries of the table size, i.e.
+	 * the next block size up. The distance from address A to the next
+	 * boundary of block size B is logically B - A % B, but in unsigned
+	 * two's complement where B is a power of two we get the equivalence
+	 * B - A % B == (B - A) % B == (n * B - A) % B, and choose n = 0 :)
+	 */
 	size_t blk_offset = -addr % SZ_2M;
 
 	if (blk_offset || size < SZ_2M) {
 		*count = min_not_zero(blk_offset, size) / SZ_4K;
 		return SZ_4K;
 	}
-	*count = size / SZ_2M;
+	blk_offset = -addr % SZ_1G ?: SZ_1G;
+	*count = min(blk_offset, size) / SZ_2M;
 	return SZ_2M;
 }
 
diff --git a/drivers/gpu/drm/pl111/pl111_drv.c b/drivers/gpu/drm/pl111/pl111_drv.c
index eb25eedb5ee0..00deba0b7271 100644
--- a/drivers/gpu/drm/pl111/pl111_drv.c
+++ b/drivers/gpu/drm/pl111/pl111_drv.c
@@ -48,7 +48,7 @@
 #include <drm/drm_atomic_helper.h>
 #include <drm/drm_bridge.h>
 #include <drm/drm_drv.h>
-#include <drm/drm_fb_helper.h>
+#include <drm/drm_fbdev_generic.h>
 #include <drm/drm_fourcc.h>
 #include <drm/drm_gem_dma_helper.h>
 #include <drm/drm_gem_framebuffer_helper.h>
diff --git a/drivers/gpu/drm/qxl/qxl_drv.c b/drivers/gpu/drm/qxl/qxl_drv.c
index 3044ca948ce2..a3b83f89e061 100644
--- a/drivers/gpu/drm/qxl/qxl_drv.c
+++ b/drivers/gpu/drm/qxl/qxl_drv.c
@@ -37,6 +37,7 @@
 #include <drm/drm_aperture.h>
 #include <drm/drm_atomic_helper.h>
 #include <drm/drm_drv.h>
+#include <drm/drm_fbdev_generic.h>
 #include <drm/drm_file.h>
 #include <drm/drm_gem_ttm_helper.h>
 #include <drm/drm_module.h>
diff --git a/drivers/gpu/drm/qxl/qxl_drv.h b/drivers/gpu/drm/qxl/qxl_drv.h
index 432758ad39a3..76f060810f63 100644
--- a/drivers/gpu/drm/qxl/qxl_drv.h
+++ b/drivers/gpu/drm/qxl/qxl_drv.h
@@ -38,7 +38,6 @@
 
 #include <drm/drm_crtc.h>
 #include <drm/drm_encoder.h>
-#include <drm/drm_fb_helper.h>
 #include <drm/drm_gem_ttm_helper.h>
 #include <drm/drm_ioctl.h>
 #include <drm/drm_gem.h>
diff --git a/drivers/gpu/drm/radeon/Kconfig b/drivers/gpu/drm/radeon/Kconfig
index 52819e7f1fca..97a277f9a25e 100644
--- a/drivers/gpu/drm/radeon/Kconfig
+++ b/drivers/gpu/drm/radeon/Kconfig
@@ -1,4 +1,34 @@
 # SPDX-License-Identifier: MIT
+
+config DRM_RADEON
+	tristate "ATI Radeon"
+	depends on DRM && PCI && MMU
+	depends on AGP || !AGP
+	select FW_LOADER
+	select DRM_DISPLAY_DP_HELPER
+	select DRM_DISPLAY_HELPER
+        select DRM_KMS_HELPER
+        select DRM_TTM
+	select DRM_TTM_HELPER
+	select SND_HDA_COMPONENT if SND_HDA_CORE
+	select POWER_SUPPLY
+	select HWMON
+	select BACKLIGHT_CLASS_DEVICE
+	select INTERVAL_TREE
+	# radeon depends on ACPI_VIDEO when ACPI is enabled, for select to work
+	# ACPI_VIDEO's dependencies must also be selected.
+	select INPUT if ACPI
+	select ACPI_VIDEO if ACPI
+	# On x86 ACPI_VIDEO also needs ACPI_WMI
+	select X86_PLATFORM_DEVICES if ACPI && X86
+	select ACPI_WMI if ACPI && X86
+	help
+	  Choose this option if you have an ATI Radeon graphics card.  There
+	  are both PCI and AGP versions.  You don't need to choose this to
+	  run the Radeon in plain VGA mode.
+
+	  If M is selected, the module will be called radeon.
+
 config DRM_RADEON_USERPTR
 	bool "Always enable userptr support"
 	depends on DRM_RADEON
diff --git a/drivers/gpu/drm/radeon/atombios.h b/drivers/gpu/drm/radeon/atombios.h
index da35a970fcc0..235e59b547a1 100644
--- a/drivers/gpu/drm/radeon/atombios.h
+++ b/drivers/gpu/drm/radeon/atombios.h
@@ -3615,7 +3615,7 @@ typedef struct _ATOM_FAKE_EDID_PATCH_RECORD
 {
   UCHAR ucRecordType;
   UCHAR ucFakeEDIDLength;
-  UCHAR ucFakeEDIDString[1];    // This actually has ucFakeEdidLength elements.
+  UCHAR ucFakeEDIDString[];    // This actually has ucFakeEdidLength elements.
 } ATOM_FAKE_EDID_PATCH_RECORD;
 
 typedef struct  _ATOM_PANEL_RESOLUTION_PATCH_RECORD
diff --git a/drivers/gpu/drm/radeon/radeon.h b/drivers/gpu/drm/radeon/radeon.h
index 166c18d62f6d..2e7161acd443 100644
--- a/drivers/gpu/drm/radeon/radeon.h
+++ b/drivers/gpu/drm/radeon/radeon.h
@@ -79,6 +79,7 @@
 #include <drm/ttm/ttm_execbuf_util.h>
 
 #include <drm/drm_gem.h>
+#include <drm/drm_audio_component.h>
 
 #include "radeon_family.h"
 #include "radeon_mode.h"
@@ -1796,6 +1797,9 @@ struct r600_audio {
 	struct radeon_audio_funcs *hdmi_funcs;
 	struct radeon_audio_funcs *dp_funcs;
 	struct radeon_audio_basic_funcs *funcs;
+	struct drm_audio_component *component;
+	bool component_registered;
+	struct mutex component_mutex;
 };
 
 /*
@@ -2994,6 +2998,10 @@ void radeon_irq_kms_set_irq_n_enabled(struct radeon_device *rdev,
 				      bool enable, const char *name,
 				      unsigned n);
 
+/* Audio component binding */
+void radeon_audio_component_init(struct radeon_device *rdev);
+void radeon_audio_component_fini(struct radeon_device *rdev);
+
 #include "radeon_object.h"
 
 #endif
diff --git a/drivers/gpu/drm/radeon/radeon_atombios.c b/drivers/gpu/drm/radeon/radeon_atombios.c
index 204127bad89c..4ad5a328d920 100644
--- a/drivers/gpu/drm/radeon/radeon_atombios.c
+++ b/drivers/gpu/drm/radeon/radeon_atombios.c
@@ -1727,8 +1727,11 @@ struct radeon_encoder_atom_dig *radeon_atombios_get_lvds_info(struct
 						}
 					}
 					record += fake_edid_record->ucFakeEDIDLength ?
-						fake_edid_record->ucFakeEDIDLength + 2 :
-						sizeof(ATOM_FAKE_EDID_PATCH_RECORD);
+						  struct_size(fake_edid_record,
+							      ucFakeEDIDString,
+							      fake_edid_record->ucFakeEDIDLength) :
+						  /* empty fake edid record must be 3 bytes long */
+						  sizeof(ATOM_FAKE_EDID_PATCH_RECORD) + 1;
 					break;
 				case LCD_PANEL_RESOLUTION_RECORD_TYPE:
 					panel_res_record = (ATOM_PANEL_RESOLUTION_PATCH_RECORD *)record;
diff --git a/drivers/gpu/drm/radeon/radeon_audio.c b/drivers/gpu/drm/radeon/radeon_audio.c
index 7c5e80d03fc9..d6ccaf24ee0c 100644
--- a/drivers/gpu/drm/radeon/radeon_audio.c
+++ b/drivers/gpu/drm/radeon/radeon_audio.c
@@ -23,6 +23,7 @@
  */
 
 #include <linux/gcd.h>
+#include <linux/component.h>
 
 #include <drm/drm_crtc.h>
 #include "dce6_afmt.h"
@@ -180,6 +181,8 @@ static struct radeon_audio_funcs dce6_dp_funcs = {
 	.dpms = evergreen_dp_enable,
 };
 
+static void radeon_audio_component_notify(struct radeon_device *rdev, int port);
+
 static void radeon_audio_enable(struct radeon_device *rdev,
 				struct r600_audio_pin *pin, u8 enable_mask)
 {
@@ -207,6 +210,8 @@ static void radeon_audio_enable(struct radeon_device *rdev,
 
 	if (rdev->audio.funcs->enable)
 		rdev->audio.funcs->enable(rdev, pin, enable_mask);
+
+	radeon_audio_component_notify(rdev, pin->id);
 }
 
 static void radeon_audio_interface_init(struct radeon_device *rdev)
@@ -721,3 +726,115 @@ unsigned int radeon_audio_decode_dfs_div(unsigned int div)
 	else
 		return 0;
 }
+
+/*
+ * Audio component support
+ */
+static void radeon_audio_component_notify(struct radeon_device *rdev, int port)
+{
+	struct drm_audio_component *acomp;
+
+	mutex_lock(&rdev->audio.component_mutex);
+	acomp = rdev->audio.component;
+	if (acomp && acomp->audio_ops && acomp->audio_ops->pin_eld_notify)
+		acomp->audio_ops->pin_eld_notify(acomp->audio_ops->audio_ptr,
+						 port, -1);
+	mutex_unlock(&rdev->audio.component_mutex);
+}
+
+static int radeon_audio_component_get_eld(struct device *kdev, int port,
+					  int pipe, bool *enabled,
+					  unsigned char *buf, int max_bytes)
+{
+	struct drm_device *dev = dev_get_drvdata(kdev);
+	struct radeon_device *rdev = dev->dev_private;
+	struct drm_encoder *encoder;
+	struct radeon_encoder *radeon_encoder;
+	struct radeon_encoder_atom_dig *dig;
+	struct drm_connector *connector;
+	int ret = 0;
+
+	*enabled = false;
+	if (!rdev->audio.enabled || !rdev->mode_info.mode_config_initialized)
+		return 0;
+
+	list_for_each_entry(encoder, &rdev->ddev->mode_config.encoder_list, head) {
+		if (!radeon_encoder_is_digital(encoder))
+			continue;
+		radeon_encoder = to_radeon_encoder(encoder);
+		dig = radeon_encoder->enc_priv;
+		if (!dig->pin || dig->pin->id != port)
+			continue;
+		connector = radeon_get_connector_for_encoder(encoder);
+		if (!connector)
+			continue;
+		*enabled = true;
+		ret = drm_eld_size(connector->eld);
+		memcpy(buf, connector->eld, min(max_bytes, ret));
+		break;
+	}
+
+	return ret;
+}
+
+static const struct drm_audio_component_ops radeon_audio_component_ops = {
+	.get_eld = radeon_audio_component_get_eld,
+};
+
+static int radeon_audio_component_bind(struct device *kdev,
+				       struct device *hda_kdev, void *data)
+{
+	struct drm_device *dev = dev_get_drvdata(kdev);
+	struct radeon_device *rdev = dev->dev_private;
+	struct drm_audio_component *acomp = data;
+
+	if (WARN_ON(!device_link_add(hda_kdev, kdev, DL_FLAG_STATELESS)))
+		return -ENOMEM;
+
+	mutex_lock(&rdev->audio.component_mutex);
+	acomp->ops = &radeon_audio_component_ops;
+	acomp->dev = kdev;
+	rdev->audio.component = acomp;
+	mutex_unlock(&rdev->audio.component_mutex);
+
+	return 0;
+}
+
+static void radeon_audio_component_unbind(struct device *kdev,
+					  struct device *hda_kdev, void *data)
+{
+	struct drm_device *dev = dev_get_drvdata(kdev);
+	struct radeon_device *rdev = dev->dev_private;
+	struct drm_audio_component *acomp = data;
+
+	device_link_remove(hda_kdev, kdev);
+
+	mutex_lock(&rdev->audio.component_mutex);
+	rdev->audio.component = NULL;
+	acomp->ops = NULL;
+	acomp->dev = NULL;
+	mutex_unlock(&rdev->audio.component_mutex);
+}
+
+static const struct component_ops radeon_audio_component_bind_ops = {
+	.bind	= radeon_audio_component_bind,
+	.unbind	= radeon_audio_component_unbind,
+};
+
+void radeon_audio_component_init(struct radeon_device *rdev)
+{
+	if (rdev->audio.component_registered ||
+	    !radeon_audio || !radeon_audio_chipset_supported(rdev))
+		return;
+
+	if (!component_add(rdev->dev, &radeon_audio_component_bind_ops))
+		rdev->audio.component_registered = true;
+}
+
+void radeon_audio_component_fini(struct radeon_device *rdev)
+{
+	if (rdev->audio.component_registered) {
+		component_del(rdev->dev, &radeon_audio_component_bind_ops);
+		rdev->audio.component_registered = false;
+	}
+}
diff --git a/drivers/gpu/drm/radeon/radeon_bios.c b/drivers/gpu/drm/radeon/radeon_bios.c
index 33121655d50b..63bdc9f6fc24 100644
--- a/drivers/gpu/drm/radeon/radeon_bios.c
+++ b/drivers/gpu/drm/radeon/radeon_bios.c
@@ -227,6 +227,7 @@ static bool radeon_atrm_get_bios(struct radeon_device *rdev)
 
 	if (!found)
 		return false;
+	pci_dev_put(pdev);
 
 	rdev->bios = kmalloc(size, GFP_KERNEL);
 	if (!rdev->bios) {
@@ -612,13 +613,14 @@ static bool radeon_acpi_vfct_bios(struct radeon_device *rdev)
 	acpi_size tbl_size;
 	UEFI_ACPI_VFCT *vfct;
 	unsigned offset;
+	bool r = false;
 
 	if (!ACPI_SUCCESS(acpi_get_table("VFCT", 1, &hdr)))
 		return false;
 	tbl_size = hdr->length;
 	if (tbl_size < sizeof(UEFI_ACPI_VFCT)) {
 		DRM_ERROR("ACPI VFCT table present but broken (too short #1)\n");
-		return false;
+		goto out;
 	}
 
 	vfct = (UEFI_ACPI_VFCT *)hdr;
@@ -631,13 +633,13 @@ static bool radeon_acpi_vfct_bios(struct radeon_device *rdev)
 		offset += sizeof(VFCT_IMAGE_HEADER);
 		if (offset > tbl_size) {
 			DRM_ERROR("ACPI VFCT image header truncated\n");
-			return false;
+			goto out;
 		}
 
 		offset += vhdr->ImageLength;
 		if (offset > tbl_size) {
 			DRM_ERROR("ACPI VFCT image truncated\n");
-			return false;
+			goto out;
 		}
 
 		if (vhdr->ImageLength &&
@@ -649,15 +651,18 @@ static bool radeon_acpi_vfct_bios(struct radeon_device *rdev)
 			rdev->bios = kmemdup(&vbios->VbiosContent,
 					     vhdr->ImageLength,
 					     GFP_KERNEL);
+			if (rdev->bios)
+				r = true;
 
-			if (!rdev->bios)
-				return false;
-			return true;
+			goto out;
 		}
 	}
 
 	DRM_ERROR("ACPI VFCT table present but broken (too short #2)\n");
-	return false;
+
+out:
+	acpi_put_table(hdr);
+	return r;
 }
 #else
 static inline bool radeon_acpi_vfct_bios(struct radeon_device *rdev)
diff --git a/drivers/gpu/drm/radeon/radeon_device.c b/drivers/gpu/drm/radeon/radeon_device.c
index a556b6be1137..6344454a7721 100644
--- a/drivers/gpu/drm/radeon/radeon_device.c
+++ b/drivers/gpu/drm/radeon/radeon_device.c
@@ -1207,7 +1207,7 @@ static void radeon_check_arguments(struct radeon_device *rdev)
  * @pdev: pci dev pointer
  * @state: vga_switcheroo state
  *
- * Callback for the switcheroo driver.  Suspends or resumes the
+ * Callback for the switcheroo driver.  Suspends or resumes
  * the asics before or after it is powered up using ACPI methods.
  */
 static void radeon_switcheroo_set_state(struct pci_dev *pdev, enum vga_switcheroo_state state)
@@ -1312,6 +1312,7 @@ int radeon_device_init(struct radeon_device *rdev,
 	mutex_init(&rdev->pm.mutex);
 	mutex_init(&rdev->gpu_clock_mutex);
 	mutex_init(&rdev->srbm_mutex);
+	mutex_init(&rdev->audio.component_mutex);
 	init_rwsem(&rdev->pm.mclk_lock);
 	init_rwsem(&rdev->exclusive_lock);
 	init_waitqueue_head(&rdev->irq.vblank_queue);
@@ -1451,6 +1452,8 @@ int radeon_device_init(struct radeon_device *rdev,
 			goto failed;
 	}
 
+	radeon_audio_component_init(rdev);
+
 	r = radeon_ib_ring_tests(rdev);
 	if (r)
 		DRM_ERROR("ib ring test failed (%d).\n", r);
@@ -1513,6 +1516,7 @@ void radeon_device_fini(struct radeon_device *rdev)
 	rdev->shutdown = true;
 	/* evict vram memory */
 	radeon_bo_evict_vram(rdev);
+	radeon_audio_component_fini(rdev);
 	radeon_fini(rdev);
 	if (!pci_is_thunderbolt_attached(rdev->pdev))
 		vga_switcheroo_unregister_client(rdev->pdev);
diff --git a/drivers/gpu/drm/radeon/radeon_fb.c b/drivers/gpu/drm/radeon/radeon_fb.c
index cc6754d88b81..c1710ed1cab8 100644
--- a/drivers/gpu/drm/radeon/radeon_fb.c
+++ b/drivers/gpu/drm/radeon/radeon_fb.c
@@ -80,6 +80,8 @@ static const struct fb_ops radeonfb_ops = {
 	DRM_FB_HELPER_DEFAULT_OPS,
 	.fb_open = radeonfb_open,
 	.fb_release = radeonfb_release,
+	.fb_read = drm_fb_helper_cfb_read,
+	.fb_write = drm_fb_helper_cfb_write,
 	.fb_fillrect = drm_fb_helper_cfb_fillrect,
 	.fb_copyarea = drm_fb_helper_cfb_copyarea,
 	.fb_imageblit = drm_fb_helper_cfb_imageblit,
@@ -243,7 +245,7 @@ static int radeonfb_create(struct drm_fb_helper *helper,
 	rbo = gem_to_radeon_bo(gobj);
 
 	/* okay we have an object now allocate the framebuffer */
-	info = drm_fb_helper_alloc_fbi(helper);
+	info = drm_fb_helper_alloc_info(helper);
 	if (IS_ERR(info)) {
 		ret = PTR_ERR(info);
 		goto out;
@@ -309,7 +311,7 @@ static int radeon_fbdev_destroy(struct drm_device *dev, struct radeon_fbdev *rfb
 {
 	struct drm_framebuffer *fb = &rfbdev->fb;
 
-	drm_fb_helper_unregister_fbi(&rfbdev->helper);
+	drm_fb_helper_unregister_info(&rfbdev->helper);
 
 	if (fb->obj[0]) {
 		radeonfb_destroy_pinned_object(fb->obj[0]);
diff --git a/drivers/gpu/drm/radeon/radeon_ttm.c b/drivers/gpu/drm/radeon/radeon_ttm.c
index fff48306c05f..30402b5ce4c5 100644
--- a/drivers/gpu/drm/radeon/radeon_ttm.c
+++ b/drivers/gpu/drm/radeon/radeon_ttm.c
@@ -869,11 +869,11 @@ static ssize_t radeon_ttm_gtt_read(struct file *f, char __user *buf,
 
 		page = rdev->gart.pages[p];
 		if (page) {
-			ptr = kmap(page);
+			ptr = kmap_local_page(page);
 			ptr += off;
 
 			r = copy_to_user(buf, ptr, cur_size);
-			kunmap(rdev->gart.pages[p]);
+			kunmap_local(ptr);
 		} else
 			r = clear_user(buf, cur_size);
 
diff --git a/drivers/gpu/drm/rcar-du/Kconfig b/drivers/gpu/drm/rcar-du/Kconfig
index c959e8c6be7d..b2bddbeca878 100644
--- a/drivers/gpu/drm/rcar-du/Kconfig
+++ b/drivers/gpu/drm/rcar-du/Kconfig
@@ -41,15 +41,26 @@ config DRM_RCAR_LVDS
 	depends on DRM_RCAR_USE_LVDS
 	select DRM_KMS_HELPER
 	select DRM_PANEL
-	select OF_FLATTREE
-	select OF_OVERLAY
+
+config DRM_RCAR_USE_MIPI_DSI
+	bool "R-Car DU MIPI DSI Encoder Support"
+	depends on DRM_BRIDGE && OF
+	default DRM_RCAR_DU
+	help
+	  Enable support for the R-Car Display Unit embedded MIPI DSI encoders.
 
 config DRM_RCAR_MIPI_DSI
-	tristate "R-Car DU MIPI DSI Encoder Support"
+	def_tristate DRM_RCAR_DU
+	depends on DRM_RCAR_USE_MIPI_DSI
+	select DRM_MIPI_DSI
+
+config DRM_RZG2L_MIPI_DSI
+	tristate "RZ/G2L MIPI DSI Encoder Support"
 	depends on DRM && DRM_BRIDGE && OF
+	depends on ARCH_RENESAS || COMPILE_TEST
 	select DRM_MIPI_DSI
 	help
-	  Enable support for the R-Car Display Unit embedded MIPI DSI encoders.
+	  Enable support for the RZ/G2L Display Unit embedded MIPI DSI encoders.
 
 config DRM_RCAR_VSP
 	bool "R-Car DU VSP Compositor Support" if ARM
diff --git a/drivers/gpu/drm/rcar-du/Makefile b/drivers/gpu/drm/rcar-du/Makefile
index 6f132325c8b7..b8f2c82651d9 100644
--- a/drivers/gpu/drm/rcar-du/Makefile
+++ b/drivers/gpu/drm/rcar-du/Makefile
@@ -14,3 +14,5 @@ obj-$(CONFIG_DRM_RCAR_DU)		+= rcar-du-drm.o
 obj-$(CONFIG_DRM_RCAR_DW_HDMI)		+= rcar_dw_hdmi.o
 obj-$(CONFIG_DRM_RCAR_LVDS)		+= rcar_lvds.o
 obj-$(CONFIG_DRM_RCAR_MIPI_DSI)		+= rcar_mipi_dsi.o
+
+obj-$(CONFIG_DRM_RZG2L_MIPI_DSI)	+= rzg2l_mipi_dsi.o
diff --git a/drivers/gpu/drm/rcar-du/rcar_du_drv.c b/drivers/gpu/drm/rcar-du/rcar_du_drv.c
index a2776f1d6f2c..d003e8d9e7a2 100644
--- a/drivers/gpu/drm/rcar-du/rcar_du_drv.c
+++ b/drivers/gpu/drm/rcar-du/rcar_du_drv.c
@@ -20,7 +20,7 @@
 
 #include <drm/drm_atomic_helper.h>
 #include <drm/drm_drv.h>
-#include <drm/drm_fb_helper.h>
+#include <drm/drm_fbdev_generic.h>
 #include <drm/drm_gem_dma_helper.h>
 #include <drm/drm_managed.h>
 #include <drm/drm_probe_helper.h>
diff --git a/drivers/gpu/drm/rcar-du/rzg2l_mipi_dsi.c b/drivers/gpu/drm/rcar-du/rzg2l_mipi_dsi.c
new file mode 100644
index 000000000000..aa95b85a2964
--- /dev/null
+++ b/drivers/gpu/drm/rcar-du/rzg2l_mipi_dsi.c
@@ -0,0 +1,816 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * RZ/G2L MIPI DSI Encoder Driver
+ *
+ * Copyright (C) 2022 Renesas Electronics Corporation
+ */
+#include <linux/clk.h>
+#include <linux/delay.h>
+#include <linux/io.h>
+#include <linux/iopoll.h>
+#include <linux/module.h>
+#include <linux/of.h>
+#include <linux/of_device.h>
+#include <linux/of_graph.h>
+#include <linux/platform_device.h>
+#include <linux/pm_runtime.h>
+#include <linux/reset.h>
+#include <linux/slab.h>
+
+#include <drm/drm_atomic.h>
+#include <drm/drm_atomic_helper.h>
+#include <drm/drm_bridge.h>
+#include <drm/drm_mipi_dsi.h>
+#include <drm/drm_of.h>
+#include <drm/drm_panel.h>
+#include <drm/drm_probe_helper.h>
+
+#include "rzg2l_mipi_dsi_regs.h"
+
+struct rzg2l_mipi_dsi {
+	struct device *dev;
+	void __iomem *mmio;
+
+	struct reset_control *rstc;
+	struct reset_control *arstc;
+	struct reset_control *prstc;
+
+	struct mipi_dsi_host host;
+	struct drm_bridge bridge;
+	struct drm_bridge *next_bridge;
+
+	struct clk *vclk;
+
+	enum mipi_dsi_pixel_format format;
+	unsigned int num_data_lanes;
+	unsigned int lanes;
+	unsigned long mode_flags;
+};
+
+static inline struct rzg2l_mipi_dsi *
+bridge_to_rzg2l_mipi_dsi(struct drm_bridge *bridge)
+{
+	return container_of(bridge, struct rzg2l_mipi_dsi, bridge);
+}
+
+static inline struct rzg2l_mipi_dsi *
+host_to_rzg2l_mipi_dsi(struct mipi_dsi_host *host)
+{
+	return container_of(host, struct rzg2l_mipi_dsi, host);
+}
+
+struct rzg2l_mipi_dsi_timings {
+	unsigned long hsfreq_max;
+	u32 t_init;
+	u32 tclk_prepare;
+	u32 ths_prepare;
+	u32 tclk_zero;
+	u32 tclk_pre;
+	u32 tclk_post;
+	u32 tclk_trail;
+	u32 ths_zero;
+	u32 ths_trail;
+	u32 ths_exit;
+	u32 tlpx;
+};
+
+static const struct rzg2l_mipi_dsi_timings rzg2l_mipi_dsi_global_timings[] = {
+	{
+		.hsfreq_max = 80000,
+		.t_init = 79801,
+		.tclk_prepare = 8,
+		.ths_prepare = 13,
+		.tclk_zero = 33,
+		.tclk_pre = 24,
+		.tclk_post = 94,
+		.tclk_trail = 10,
+		.ths_zero = 23,
+		.ths_trail = 17,
+		.ths_exit = 13,
+		.tlpx = 6,
+	},
+	{
+		.hsfreq_max = 125000,
+		.t_init = 79801,
+		.tclk_prepare = 8,
+		.ths_prepare = 12,
+		.tclk_zero = 33,
+		.tclk_pre = 15,
+		.tclk_post = 94,
+		.tclk_trail = 10,
+		.ths_zero = 23,
+		.ths_trail = 17,
+		.ths_exit = 13,
+		.tlpx = 6,
+	},
+	{
+		.hsfreq_max = 250000,
+		.t_init = 79801,
+		.tclk_prepare = 8,
+		.ths_prepare = 12,
+		.tclk_zero = 33,
+		.tclk_pre = 13,
+		.tclk_post = 94,
+		.tclk_trail = 10,
+		.ths_zero = 23,
+		.ths_trail = 16,
+		.ths_exit = 13,
+		.tlpx = 6,
+	},
+	{
+		.hsfreq_max = 360000,
+		.t_init = 79801,
+		.tclk_prepare = 8,
+		.ths_prepare = 10,
+		.tclk_zero = 33,
+		.tclk_pre = 4,
+		.tclk_post = 35,
+		.tclk_trail = 7,
+		.ths_zero = 16,
+		.ths_trail = 9,
+		.ths_exit = 13,
+		.tlpx = 6,
+	},
+	{
+		.hsfreq_max = 720000,
+		.t_init = 79801,
+		.tclk_prepare = 8,
+		.ths_prepare = 9,
+		.tclk_zero = 33,
+		.tclk_pre = 4,
+		.tclk_post = 35,
+		.tclk_trail = 7,
+		.ths_zero = 16,
+		.ths_trail = 9,
+		.ths_exit = 13,
+		.tlpx = 6,
+	},
+	{
+		.hsfreq_max = 1500000,
+		.t_init = 79801,
+		.tclk_prepare = 8,
+		.ths_prepare = 9,
+		.tclk_zero = 33,
+		.tclk_pre = 4,
+		.tclk_post = 35,
+		.tclk_trail = 7,
+		.ths_zero = 16,
+		.ths_trail = 9,
+		.ths_exit = 13,
+		.tlpx = 6,
+	},
+};
+
+static void rzg2l_mipi_dsi_phy_write(struct rzg2l_mipi_dsi *dsi, u32 reg, u32 data)
+{
+	iowrite32(data, dsi->mmio + reg);
+}
+
+static void rzg2l_mipi_dsi_link_write(struct rzg2l_mipi_dsi *dsi, u32 reg, u32 data)
+{
+	iowrite32(data, dsi->mmio + LINK_REG_OFFSET + reg);
+}
+
+static u32 rzg2l_mipi_dsi_phy_read(struct rzg2l_mipi_dsi *dsi, u32 reg)
+{
+	return ioread32(dsi->mmio + reg);
+}
+
+static u32 rzg2l_mipi_dsi_link_read(struct rzg2l_mipi_dsi *dsi, u32 reg)
+{
+	return ioread32(dsi->mmio + LINK_REG_OFFSET + reg);
+}
+
+/* -----------------------------------------------------------------------------
+ * Hardware Setup
+ */
+
+static int rzg2l_mipi_dsi_dphy_init(struct rzg2l_mipi_dsi *dsi,
+				    unsigned long hsfreq)
+{
+	const struct rzg2l_mipi_dsi_timings *dphy_timings;
+	unsigned int i;
+	u32 dphyctrl0;
+	u32 dphytim0;
+	u32 dphytim1;
+	u32 dphytim2;
+	u32 dphytim3;
+	int ret;
+
+	/* All DSI global operation timings are set with recommended setting */
+	for (i = 0; i < ARRAY_SIZE(rzg2l_mipi_dsi_global_timings); ++i) {
+		dphy_timings = &rzg2l_mipi_dsi_global_timings[i];
+		if (hsfreq <= dphy_timings->hsfreq_max)
+			break;
+	}
+
+	/* Initializing DPHY before accessing LINK */
+	dphyctrl0 = DSIDPHYCTRL0_CAL_EN_HSRX_OFS | DSIDPHYCTRL0_CMN_MASTER_EN |
+		    DSIDPHYCTRL0_RE_VDD_DETVCCQLV18 | DSIDPHYCTRL0_EN_BGR;
+
+	rzg2l_mipi_dsi_phy_write(dsi, DSIDPHYCTRL0, dphyctrl0);
+	usleep_range(20, 30);
+
+	dphyctrl0 |= DSIDPHYCTRL0_EN_LDO1200;
+	rzg2l_mipi_dsi_phy_write(dsi, DSIDPHYCTRL0, dphyctrl0);
+	usleep_range(10, 20);
+
+	dphytim0 = DSIDPHYTIM0_TCLK_MISS(0) |
+		   DSIDPHYTIM0_T_INIT(dphy_timings->t_init);
+	dphytim1 = DSIDPHYTIM1_THS_PREPARE(dphy_timings->ths_prepare) |
+		   DSIDPHYTIM1_TCLK_PREPARE(dphy_timings->tclk_prepare) |
+		   DSIDPHYTIM1_THS_SETTLE(0) |
+		   DSIDPHYTIM1_TCLK_SETTLE(0);
+	dphytim2 = DSIDPHYTIM2_TCLK_TRAIL(dphy_timings->tclk_trail) |
+		   DSIDPHYTIM2_TCLK_POST(dphy_timings->tclk_post) |
+		   DSIDPHYTIM2_TCLK_PRE(dphy_timings->tclk_pre) |
+		   DSIDPHYTIM2_TCLK_ZERO(dphy_timings->tclk_zero);
+	dphytim3 = DSIDPHYTIM3_TLPX(dphy_timings->tlpx) |
+		   DSIDPHYTIM3_THS_EXIT(dphy_timings->ths_exit) |
+		   DSIDPHYTIM3_THS_TRAIL(dphy_timings->ths_trail) |
+		   DSIDPHYTIM3_THS_ZERO(dphy_timings->ths_zero);
+
+	rzg2l_mipi_dsi_phy_write(dsi, DSIDPHYTIM0, dphytim0);
+	rzg2l_mipi_dsi_phy_write(dsi, DSIDPHYTIM1, dphytim1);
+	rzg2l_mipi_dsi_phy_write(dsi, DSIDPHYTIM2, dphytim2);
+	rzg2l_mipi_dsi_phy_write(dsi, DSIDPHYTIM3, dphytim3);
+
+	ret = reset_control_deassert(dsi->rstc);
+	if (ret < 0)
+		return ret;
+
+	udelay(1);
+
+	return 0;
+}
+
+static void rzg2l_mipi_dsi_dphy_exit(struct rzg2l_mipi_dsi *dsi)
+{
+	u32 dphyctrl0;
+
+	dphyctrl0 = rzg2l_mipi_dsi_phy_read(dsi, DSIDPHYCTRL0);
+
+	dphyctrl0 &= ~(DSIDPHYCTRL0_EN_LDO1200 | DSIDPHYCTRL0_EN_BGR);
+	rzg2l_mipi_dsi_phy_write(dsi, DSIDPHYCTRL0, dphyctrl0);
+
+	reset_control_assert(dsi->rstc);
+}
+
+static int rzg2l_mipi_dsi_startup(struct rzg2l_mipi_dsi *dsi,
+				  const struct drm_display_mode *mode)
+{
+	unsigned long hsfreq;
+	unsigned int bpp;
+	u32 txsetr;
+	u32 clstptsetr;
+	u32 lptrnstsetr;
+	u32 clkkpt;
+	u32 clkbfht;
+	u32 clkstpt;
+	u32 golpbkt;
+	int ret;
+
+	/*
+	 * Relationship between hsclk and vclk must follow
+	 * vclk * bpp = hsclk * 8 * lanes
+	 * where vclk: video clock (Hz)
+	 *       bpp: video pixel bit depth
+	 *       hsclk: DSI HS Byte clock frequency (Hz)
+	 *       lanes: number of data lanes
+	 *
+	 * hsclk(bit) = hsclk(byte) * 8
+	 */
+	bpp = mipi_dsi_pixel_format_to_bpp(dsi->format);
+	hsfreq = (mode->clock * bpp * 8) / (8 * dsi->lanes);
+
+	ret = pm_runtime_resume_and_get(dsi->dev);
+	if (ret < 0)
+		return ret;
+
+	clk_set_rate(dsi->vclk, mode->clock * 1000);
+
+	ret = rzg2l_mipi_dsi_dphy_init(dsi, hsfreq);
+	if (ret < 0)
+		goto err_phy;
+
+	/* Enable Data lanes and Clock lanes */
+	txsetr = TXSETR_DLEN | TXSETR_NUMLANEUSE(dsi->lanes - 1) | TXSETR_CLEN;
+	rzg2l_mipi_dsi_link_write(dsi, TXSETR, txsetr);
+
+	/*
+	 * Global timings characteristic depends on high speed Clock Frequency
+	 * Currently MIPI DSI-IF just supports maximum FHD@60 with:
+	 * - videoclock = 148.5 (MHz)
+	 * - bpp: maximum 24bpp
+	 * - data lanes: maximum 4 lanes
+	 * Therefore maximum hsclk will be 891 Mbps.
+	 */
+	if (hsfreq > 445500) {
+		clkkpt = 12;
+		clkbfht = 15;
+		clkstpt = 48;
+		golpbkt = 75;
+	} else if (hsfreq > 250000) {
+		clkkpt = 7;
+		clkbfht = 8;
+		clkstpt = 27;
+		golpbkt = 40;
+	} else {
+		clkkpt = 8;
+		clkbfht = 6;
+		clkstpt = 24;
+		golpbkt = 29;
+	}
+
+	clstptsetr = CLSTPTSETR_CLKKPT(clkkpt) | CLSTPTSETR_CLKBFHT(clkbfht) |
+		     CLSTPTSETR_CLKSTPT(clkstpt);
+	rzg2l_mipi_dsi_link_write(dsi, CLSTPTSETR, clstptsetr);
+
+	lptrnstsetr = LPTRNSTSETR_GOLPBKT(golpbkt);
+	rzg2l_mipi_dsi_link_write(dsi, LPTRNSTSETR, lptrnstsetr);
+
+	return 0;
+
+err_phy:
+	rzg2l_mipi_dsi_dphy_exit(dsi);
+	pm_runtime_put(dsi->dev);
+
+	return ret;
+}
+
+static void rzg2l_mipi_dsi_stop(struct rzg2l_mipi_dsi *dsi)
+{
+	rzg2l_mipi_dsi_dphy_exit(dsi);
+	pm_runtime_put(dsi->dev);
+}
+
+static void rzg2l_mipi_dsi_set_display_timing(struct rzg2l_mipi_dsi *dsi,
+					      const struct drm_display_mode *mode)
+{
+	u32 vich1ppsetr;
+	u32 vich1vssetr;
+	u32 vich1vpsetr;
+	u32 vich1hssetr;
+	u32 vich1hpsetr;
+	int dsi_format;
+	u32 delay[2];
+	u8 index;
+
+	/* Configuration for Pixel Packet */
+	dsi_format = mipi_dsi_pixel_format_to_bpp(dsi->format);
+	switch (dsi_format) {
+	case 24:
+		vich1ppsetr = VICH1PPSETR_DT_RGB24;
+		break;
+	case 18:
+		vich1ppsetr = VICH1PPSETR_DT_RGB18;
+		break;
+	}
+
+	if ((dsi->mode_flags & MIPI_DSI_MODE_VIDEO_SYNC_PULSE) &&
+	    !(dsi->mode_flags & MIPI_DSI_MODE_VIDEO_BURST))
+		vich1ppsetr |= VICH1PPSETR_TXESYNC_PULSE;
+
+	rzg2l_mipi_dsi_link_write(dsi, VICH1PPSETR, vich1ppsetr);
+
+	/* Configuration for Video Parameters */
+	vich1vssetr = VICH1VSSETR_VACTIVE(mode->vdisplay) |
+		      VICH1VSSETR_VSA(mode->vsync_end - mode->vsync_start);
+	vich1vssetr |= (mode->flags & DRM_MODE_FLAG_PVSYNC) ?
+			VICH1VSSETR_VSPOL_HIGH : VICH1VSSETR_VSPOL_LOW;
+
+	vich1vpsetr = VICH1VPSETR_VFP(mode->vsync_start - mode->vdisplay) |
+		      VICH1VPSETR_VBP(mode->vtotal - mode->vsync_end);
+
+	vich1hssetr = VICH1HSSETR_HACTIVE(mode->hdisplay) |
+		      VICH1HSSETR_HSA(mode->hsync_end - mode->hsync_start);
+	vich1hssetr |= (mode->flags & DRM_MODE_FLAG_PHSYNC) ?
+			VICH1HSSETR_HSPOL_HIGH : VICH1HSSETR_HSPOL_LOW;
+
+	vich1hpsetr = VICH1HPSETR_HFP(mode->hsync_start - mode->hdisplay) |
+		      VICH1HPSETR_HBP(mode->htotal - mode->hsync_end);
+
+	rzg2l_mipi_dsi_link_write(dsi, VICH1VSSETR, vich1vssetr);
+	rzg2l_mipi_dsi_link_write(dsi, VICH1VPSETR, vich1vpsetr);
+	rzg2l_mipi_dsi_link_write(dsi, VICH1HSSETR, vich1hssetr);
+	rzg2l_mipi_dsi_link_write(dsi, VICH1HPSETR, vich1hpsetr);
+
+	/*
+	 * Configuration for Delay Value
+	 * Delay value based on 2 ranges of video clock.
+	 * 74.25MHz is videoclock of HD@60p or FHD@30p
+	 */
+	if (mode->clock > 74250) {
+		delay[0] = 231;
+		delay[1] = 216;
+	} else {
+		delay[0] = 220;
+		delay[1] = 212;
+	}
+
+	if (dsi->mode_flags & MIPI_DSI_CLOCK_NON_CONTINUOUS)
+		index = 0;
+	else
+		index = 1;
+
+	rzg2l_mipi_dsi_link_write(dsi, VICH1SET1R,
+				  VICH1SET1R_DLY(delay[index]));
+}
+
+static int rzg2l_mipi_dsi_start_hs_clock(struct rzg2l_mipi_dsi *dsi)
+{
+	bool is_clk_cont;
+	u32 hsclksetr;
+	u32 status;
+	int ret;
+
+	is_clk_cont = !(dsi->mode_flags & MIPI_DSI_CLOCK_NON_CONTINUOUS);
+
+	/* Start HS clock */
+	hsclksetr = HSCLKSETR_HSCLKRUN_HS | (is_clk_cont ?
+					     HSCLKSETR_HSCLKMODE_CONT :
+					     HSCLKSETR_HSCLKMODE_NON_CONT);
+	rzg2l_mipi_dsi_link_write(dsi, HSCLKSETR, hsclksetr);
+
+	if (is_clk_cont) {
+		ret = read_poll_timeout(rzg2l_mipi_dsi_link_read, status,
+					status & PLSR_CLLP2HS,
+					2000, 20000, false, dsi, PLSR);
+		if (ret < 0) {
+			dev_err(dsi->dev, "failed to start HS clock\n");
+			return ret;
+		}
+	}
+
+	dev_dbg(dsi->dev, "Start High Speed Clock with %s clock mode",
+		is_clk_cont ? "continuous" : "non-continuous");
+
+	return 0;
+}
+
+static int rzg2l_mipi_dsi_stop_hs_clock(struct rzg2l_mipi_dsi *dsi)
+{
+	bool is_clk_cont;
+	u32 status;
+	int ret;
+
+	is_clk_cont = !(dsi->mode_flags & MIPI_DSI_CLOCK_NON_CONTINUOUS);
+
+	/* Stop HS clock */
+	rzg2l_mipi_dsi_link_write(dsi, HSCLKSETR,
+				  is_clk_cont ? HSCLKSETR_HSCLKMODE_CONT :
+				  HSCLKSETR_HSCLKMODE_NON_CONT);
+
+	if (is_clk_cont) {
+		ret = read_poll_timeout(rzg2l_mipi_dsi_link_read, status,
+					status & PLSR_CLHS2LP,
+					2000, 20000, false, dsi, PLSR);
+		if (ret < 0) {
+			dev_err(dsi->dev, "failed to stop HS clock\n");
+			return ret;
+		}
+	}
+
+	return 0;
+}
+
+static int rzg2l_mipi_dsi_start_video(struct rzg2l_mipi_dsi *dsi)
+{
+	u32 vich1set0r;
+	u32 status;
+	int ret;
+
+	/* Configuration for Blanking sequence and start video input*/
+	vich1set0r = VICH1SET0R_HFPNOLP | VICH1SET0R_HBPNOLP |
+		     VICH1SET0R_HSANOLP | VICH1SET0R_VSTART;
+	rzg2l_mipi_dsi_link_write(dsi, VICH1SET0R, vich1set0r);
+
+	ret = read_poll_timeout(rzg2l_mipi_dsi_link_read, status,
+				status & VICH1SR_VIRDY,
+				2000, 20000, false, dsi, VICH1SR);
+	if (ret < 0)
+		dev_err(dsi->dev, "Failed to start video signal input\n");
+
+	return ret;
+}
+
+static int rzg2l_mipi_dsi_stop_video(struct rzg2l_mipi_dsi *dsi)
+{
+	u32 status;
+	int ret;
+
+	rzg2l_mipi_dsi_link_write(dsi, VICH1SET0R, VICH1SET0R_VSTPAFT);
+	ret = read_poll_timeout(rzg2l_mipi_dsi_link_read, status,
+				(status & VICH1SR_STOP) && (!(status & VICH1SR_RUNNING)),
+				2000, 20000, false, dsi, VICH1SR);
+	if (ret < 0)
+		goto err;
+
+	ret = read_poll_timeout(rzg2l_mipi_dsi_link_read, status,
+				!(status & LINKSR_HSBUSY),
+				2000, 20000, false, dsi, LINKSR);
+	if (ret < 0)
+		goto err;
+
+	return 0;
+
+err:
+	dev_err(dsi->dev, "Failed to stop video signal input\n");
+	return ret;
+}
+
+/* -----------------------------------------------------------------------------
+ * Bridge
+ */
+
+static int rzg2l_mipi_dsi_attach(struct drm_bridge *bridge,
+				 enum drm_bridge_attach_flags flags)
+{
+	struct rzg2l_mipi_dsi *dsi = bridge_to_rzg2l_mipi_dsi(bridge);
+
+	return drm_bridge_attach(bridge->encoder, dsi->next_bridge, bridge,
+				 flags);
+}
+
+static void rzg2l_mipi_dsi_atomic_enable(struct drm_bridge *bridge,
+					 struct drm_bridge_state *old_bridge_state)
+{
+	struct drm_atomic_state *state = old_bridge_state->base.state;
+	struct rzg2l_mipi_dsi *dsi = bridge_to_rzg2l_mipi_dsi(bridge);
+	const struct drm_display_mode *mode;
+	struct drm_connector *connector;
+	struct drm_crtc *crtc;
+	int ret;
+
+	connector = drm_atomic_get_new_connector_for_encoder(state, bridge->encoder);
+	crtc = drm_atomic_get_new_connector_state(state, connector)->crtc;
+	mode = &drm_atomic_get_new_crtc_state(state, crtc)->adjusted_mode;
+
+	ret = rzg2l_mipi_dsi_startup(dsi, mode);
+	if (ret < 0)
+		return;
+
+	rzg2l_mipi_dsi_set_display_timing(dsi, mode);
+
+	ret = rzg2l_mipi_dsi_start_hs_clock(dsi);
+	if (ret < 0)
+		goto err_stop;
+
+	ret = rzg2l_mipi_dsi_start_video(dsi);
+	if (ret < 0)
+		goto err_stop_clock;
+
+	return;
+
+err_stop_clock:
+	rzg2l_mipi_dsi_stop_hs_clock(dsi);
+err_stop:
+	rzg2l_mipi_dsi_stop(dsi);
+}
+
+static void rzg2l_mipi_dsi_atomic_disable(struct drm_bridge *bridge,
+					  struct drm_bridge_state *old_bridge_state)
+{
+	struct rzg2l_mipi_dsi *dsi = bridge_to_rzg2l_mipi_dsi(bridge);
+
+	rzg2l_mipi_dsi_stop_video(dsi);
+	rzg2l_mipi_dsi_stop_hs_clock(dsi);
+	rzg2l_mipi_dsi_stop(dsi);
+}
+
+static enum drm_mode_status
+rzg2l_mipi_dsi_bridge_mode_valid(struct drm_bridge *bridge,
+				 const struct drm_display_info *info,
+				 const struct drm_display_mode *mode)
+{
+	if (mode->clock > 148500)
+		return MODE_CLOCK_HIGH;
+
+	return MODE_OK;
+}
+
+static const struct drm_bridge_funcs rzg2l_mipi_dsi_bridge_ops = {
+	.attach = rzg2l_mipi_dsi_attach,
+	.atomic_duplicate_state = drm_atomic_helper_bridge_duplicate_state,
+	.atomic_destroy_state = drm_atomic_helper_bridge_destroy_state,
+	.atomic_reset = drm_atomic_helper_bridge_reset,
+	.atomic_enable = rzg2l_mipi_dsi_atomic_enable,
+	.atomic_disable = rzg2l_mipi_dsi_atomic_disable,
+	.mode_valid = rzg2l_mipi_dsi_bridge_mode_valid,
+};
+
+/* -----------------------------------------------------------------------------
+ * Host setting
+ */
+
+static int rzg2l_mipi_dsi_host_attach(struct mipi_dsi_host *host,
+				      struct mipi_dsi_device *device)
+{
+	struct rzg2l_mipi_dsi *dsi = host_to_rzg2l_mipi_dsi(host);
+	int ret;
+
+	if (device->lanes > dsi->num_data_lanes) {
+		dev_err(dsi->dev,
+			"Number of lines of device (%u) exceeds host (%u)\n",
+			device->lanes, dsi->num_data_lanes);
+		return -EINVAL;
+	}
+
+	switch (mipi_dsi_pixel_format_to_bpp(device->format)) {
+	case 24:
+	case 18:
+		break;
+	default:
+		dev_err(dsi->dev, "Unsupported format 0x%04x\n", device->format);
+		return -EINVAL;
+	}
+
+	dsi->lanes = device->lanes;
+	dsi->format = device->format;
+	dsi->mode_flags = device->mode_flags;
+
+	dsi->next_bridge = devm_drm_of_get_bridge(dsi->dev, dsi->dev->of_node,
+						  1, 0);
+	if (IS_ERR(dsi->next_bridge)) {
+		ret = PTR_ERR(dsi->next_bridge);
+		dev_err(dsi->dev, "failed to get next bridge: %d\n", ret);
+		return ret;
+	}
+
+	drm_bridge_add(&dsi->bridge);
+
+	return 0;
+}
+
+static int rzg2l_mipi_dsi_host_detach(struct mipi_dsi_host *host,
+				      struct mipi_dsi_device *device)
+{
+	struct rzg2l_mipi_dsi *dsi = host_to_rzg2l_mipi_dsi(host);
+
+	drm_bridge_remove(&dsi->bridge);
+
+	return 0;
+}
+
+static const struct mipi_dsi_host_ops rzg2l_mipi_dsi_host_ops = {
+	.attach = rzg2l_mipi_dsi_host_attach,
+	.detach = rzg2l_mipi_dsi_host_detach,
+};
+
+/* -----------------------------------------------------------------------------
+ * Power Management
+ */
+
+static int __maybe_unused rzg2l_mipi_pm_runtime_suspend(struct device *dev)
+{
+	struct rzg2l_mipi_dsi *dsi = dev_get_drvdata(dev);
+
+	reset_control_assert(dsi->prstc);
+	reset_control_assert(dsi->arstc);
+
+	return 0;
+}
+
+static int __maybe_unused rzg2l_mipi_pm_runtime_resume(struct device *dev)
+{
+	struct rzg2l_mipi_dsi *dsi = dev_get_drvdata(dev);
+	int ret;
+
+	ret = reset_control_deassert(dsi->arstc);
+	if (ret < 0)
+		return ret;
+
+	ret = reset_control_deassert(dsi->prstc);
+	if (ret < 0)
+		reset_control_assert(dsi->arstc);
+
+	return ret;
+}
+
+static const struct dev_pm_ops rzg2l_mipi_pm_ops = {
+	SET_RUNTIME_PM_OPS(rzg2l_mipi_pm_runtime_suspend, rzg2l_mipi_pm_runtime_resume, NULL)
+};
+
+/* -----------------------------------------------------------------------------
+ * Probe & Remove
+ */
+
+static int rzg2l_mipi_dsi_probe(struct platform_device *pdev)
+{
+	unsigned int num_data_lanes;
+	struct rzg2l_mipi_dsi *dsi;
+	u32 txsetr;
+	int ret;
+
+	dsi = devm_kzalloc(&pdev->dev, sizeof(*dsi), GFP_KERNEL);
+	if (!dsi)
+		return -ENOMEM;
+
+	platform_set_drvdata(pdev, dsi);
+	dsi->dev = &pdev->dev;
+
+	ret = drm_of_get_data_lanes_count_ep(dsi->dev->of_node, 1, 0, 1, 4);
+	if (ret < 0)
+		return dev_err_probe(dsi->dev, ret,
+				     "missing or invalid data-lanes property\n");
+
+	num_data_lanes = ret;
+
+	dsi->mmio = devm_platform_ioremap_resource(pdev, 0);
+	if (IS_ERR(dsi->mmio))
+		return PTR_ERR(dsi->mmio);
+
+	dsi->vclk = devm_clk_get(dsi->dev, "vclk");
+	if (IS_ERR(dsi->vclk))
+		return PTR_ERR(dsi->vclk);
+
+	dsi->rstc = devm_reset_control_get_exclusive(dsi->dev, "rst");
+	if (IS_ERR(dsi->rstc))
+		return dev_err_probe(dsi->dev, PTR_ERR(dsi->rstc),
+				     "failed to get rst\n");
+
+	dsi->arstc = devm_reset_control_get_exclusive(dsi->dev, "arst");
+	if (IS_ERR(dsi->arstc))
+		return dev_err_probe(&pdev->dev, PTR_ERR(dsi->arstc),
+				     "failed to get arst\n");
+
+	dsi->prstc = devm_reset_control_get_exclusive(dsi->dev, "prst");
+	if (IS_ERR(dsi->prstc))
+		return dev_err_probe(dsi->dev, PTR_ERR(dsi->prstc),
+				     "failed to get prst\n");
+
+	platform_set_drvdata(pdev, dsi);
+
+	pm_runtime_enable(dsi->dev);
+
+	ret = pm_runtime_resume_and_get(dsi->dev);
+	if (ret < 0)
+		goto err_pm_disable;
+
+	/*
+	 * TXSETR register can be read only after DPHY init. But during probe
+	 * mode->clock and format are not available. So initialize DPHY with
+	 * timing parameters for 80Mbps.
+	 */
+	ret = rzg2l_mipi_dsi_dphy_init(dsi, 80000);
+	if (ret < 0)
+		goto err_phy;
+
+	txsetr = rzg2l_mipi_dsi_link_read(dsi, TXSETR);
+	dsi->num_data_lanes = min(((txsetr >> 16) & 3) + 1, num_data_lanes);
+	rzg2l_mipi_dsi_dphy_exit(dsi);
+	pm_runtime_put(dsi->dev);
+
+	/* Initialize the DRM bridge. */
+	dsi->bridge.funcs = &rzg2l_mipi_dsi_bridge_ops;
+	dsi->bridge.of_node = dsi->dev->of_node;
+
+	/* Init host device */
+	dsi->host.dev = dsi->dev;
+	dsi->host.ops = &rzg2l_mipi_dsi_host_ops;
+	ret = mipi_dsi_host_register(&dsi->host);
+	if (ret < 0)
+		goto err_pm_disable;
+
+	return 0;
+
+err_phy:
+	rzg2l_mipi_dsi_dphy_exit(dsi);
+	pm_runtime_put(dsi->dev);
+err_pm_disable:
+	pm_runtime_disable(dsi->dev);
+	return ret;
+}
+
+static int rzg2l_mipi_dsi_remove(struct platform_device *pdev)
+{
+	struct rzg2l_mipi_dsi *dsi = platform_get_drvdata(pdev);
+
+	mipi_dsi_host_unregister(&dsi->host);
+	pm_runtime_disable(&pdev->dev);
+
+	return 0;
+}
+
+static const struct of_device_id rzg2l_mipi_dsi_of_table[] = {
+	{ .compatible = "renesas,rzg2l-mipi-dsi" },
+	{ /* sentinel */ }
+};
+
+MODULE_DEVICE_TABLE(of, rzg2l_mipi_dsi_of_table);
+
+static struct platform_driver rzg2l_mipi_dsi_platform_driver = {
+	.probe	= rzg2l_mipi_dsi_probe,
+	.remove	= rzg2l_mipi_dsi_remove,
+	.driver	= {
+		.name = "rzg2l-mipi-dsi",
+		.pm = &rzg2l_mipi_pm_ops,
+		.of_match_table = rzg2l_mipi_dsi_of_table,
+	},
+};
+
+module_platform_driver(rzg2l_mipi_dsi_platform_driver);
+
+MODULE_AUTHOR("Biju Das <biju.das.jz@bp.renesas.com>");
+MODULE_DESCRIPTION("Renesas RZ/G2L MIPI DSI Encoder Driver");
+MODULE_LICENSE("GPL");
diff --git a/drivers/gpu/drm/rcar-du/rzg2l_mipi_dsi_regs.h b/drivers/gpu/drm/rcar-du/rzg2l_mipi_dsi_regs.h
new file mode 100644
index 000000000000..1dbc16ec64a4
--- /dev/null
+++ b/drivers/gpu/drm/rcar-du/rzg2l_mipi_dsi_regs.h
@@ -0,0 +1,151 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * RZ/G2L MIPI DSI Interface Registers Definitions
+ *
+ * Copyright (C) 2022 Renesas Electronics Corporation
+ */
+
+#ifndef __RZG2L_MIPI_DSI_REGS_H__
+#define __RZG2L_MIPI_DSI_REGS_H__
+
+#include <linux/bits.h>
+
+/* DPHY Registers */
+#define DSIDPHYCTRL0			0x00
+#define DSIDPHYCTRL0_CAL_EN_HSRX_OFS	BIT(16)
+#define DSIDPHYCTRL0_CMN_MASTER_EN	BIT(8)
+#define DSIDPHYCTRL0_RE_VDD_DETVCCQLV18	BIT(2)
+#define DSIDPHYCTRL0_EN_LDO1200		BIT(1)
+#define DSIDPHYCTRL0_EN_BGR		BIT(0)
+
+#define DSIDPHYTIM0			0x04
+#define DSIDPHYTIM0_TCLK_MISS(x)	((x) << 24)
+#define DSIDPHYTIM0_T_INIT(x)		((x) << 0)
+
+#define DSIDPHYTIM1			0x08
+#define DSIDPHYTIM1_THS_PREPARE(x)	((x) << 24)
+#define DSIDPHYTIM1_TCLK_PREPARE(x)	((x) << 16)
+#define DSIDPHYTIM1_THS_SETTLE(x)	((x) << 8)
+#define DSIDPHYTIM1_TCLK_SETTLE(x)	((x) << 0)
+
+#define DSIDPHYTIM2			0x0c
+#define DSIDPHYTIM2_TCLK_TRAIL(x)	((x) << 24)
+#define DSIDPHYTIM2_TCLK_POST(x)	((x) << 16)
+#define DSIDPHYTIM2_TCLK_PRE(x)		((x) << 8)
+#define DSIDPHYTIM2_TCLK_ZERO(x)	((x) << 0)
+
+#define DSIDPHYTIM3			0x10
+#define DSIDPHYTIM3_TLPX(x)		((x) << 24)
+#define DSIDPHYTIM3_THS_EXIT(x)		((x) << 16)
+#define DSIDPHYTIM3_THS_TRAIL(x)	((x) << 8)
+#define DSIDPHYTIM3_THS_ZERO(x)		((x) << 0)
+
+/* --------------------------------------------------------*/
+/* Link Registers */
+#define LINK_REG_OFFSET			0x10000
+
+/* Link Status Register */
+#define LINKSR				0x10
+#define LINKSR_LPBUSY			BIT(13)
+#define LINKSR_HSBUSY			BIT(12)
+#define LINKSR_VICHRUN1			BIT(8)
+#define LINKSR_SQCHRUN1			BIT(4)
+#define LINKSR_SQCHRUN0			BIT(0)
+
+/* Tx Set Register */
+#define TXSETR				0x100
+#define TXSETR_NUMLANECAP		(0x3 << 16)
+#define TXSETR_DLEN			(1 << 9)
+#define TXSETR_CLEN			(1 << 8)
+#define TXSETR_NUMLANEUSE(x)		(((x) & 0x3) << 0)
+
+/* HS Clock Set Register */
+#define HSCLKSETR			0x104
+#define HSCLKSETR_HSCLKMODE_CONT	(1 << 1)
+#define HSCLKSETR_HSCLKMODE_NON_CONT	(0 << 1)
+#define HSCLKSETR_HSCLKRUN_HS		(1 << 0)
+#define HSCLKSETR_HSCLKRUN_LP		(0 << 0)
+
+/* Reset Control Register */
+#define RSTCR				0x110
+#define RSTCR_SWRST			BIT(0)
+#define RSTCR_FCETXSTP			BIT(16)
+
+/* Reset Status Register */
+#define RSTSR				0x114
+#define RSTSR_DL0DIR			(1 << 15)
+#define RSTSR_DLSTPST			(0xf << 8)
+#define RSTSR_SWRSTV1			(1 << 4)
+#define RSTSR_SWRSTIB			(1 << 3)
+#define RSTSR_SWRSTAPB			(1 << 2)
+#define RSTSR_SWRSTLP			(1 << 1)
+#define RSTSR_SWRSTHS			(1 << 0)
+
+/* Clock Lane Stop Time Set Register */
+#define CLSTPTSETR			0x314
+#define CLSTPTSETR_CLKKPT(x)		((x) << 24)
+#define CLSTPTSETR_CLKBFHT(x)		((x) << 16)
+#define CLSTPTSETR_CLKSTPT(x)		((x) << 2)
+
+/* LP Transition Time Set Register */
+#define LPTRNSTSETR			0x318
+#define LPTRNSTSETR_GOLPBKT(x)		((x) << 0)
+
+/* Physical Lane Status Register */
+#define PLSR				0x320
+#define PLSR_CLHS2LP			BIT(27)
+#define PLSR_CLLP2HS			BIT(26)
+
+/* Video-Input Channel 1 Set 0 Register */
+#define VICH1SET0R			0x400
+#define VICH1SET0R_VSEN			BIT(12)
+#define VICH1SET0R_HFPNOLP		BIT(10)
+#define VICH1SET0R_HBPNOLP		BIT(9)
+#define VICH1SET0R_HSANOLP		BIT(8)
+#define VICH1SET0R_VSTPAFT		BIT(1)
+#define VICH1SET0R_VSTART		BIT(0)
+
+/* Video-Input Channel 1 Set 1 Register */
+#define VICH1SET1R			0x404
+#define VICH1SET1R_DLY(x)		(((x) & 0xfff) << 2)
+
+/* Video-Input Channel 1 Status Register */
+#define VICH1SR				0x410
+#define VICH1SR_VIRDY			BIT(3)
+#define VICH1SR_RUNNING			BIT(2)
+#define VICH1SR_STOP			BIT(1)
+#define VICH1SR_START			BIT(0)
+
+/* Video-Input Channel 1 Pixel Packet Set Register */
+#define VICH1PPSETR			0x420
+#define VICH1PPSETR_DT_RGB18		(0x1e << 16)
+#define VICH1PPSETR_DT_RGB18_LS		(0x2e << 16)
+#define VICH1PPSETR_DT_RGB24		(0x3e << 16)
+#define VICH1PPSETR_TXESYNC_PULSE	(1 << 15)
+#define VICH1PPSETR_VC(x)		((x) << 22)
+
+/* Video-Input Channel 1 Vertical Size Set Register */
+#define VICH1VSSETR			0x428
+#define VICH1VSSETR_VACTIVE(x)		(((x) & 0x7fff) << 16)
+#define VICH1VSSETR_VSPOL_LOW		(1 << 15)
+#define VICH1VSSETR_VSPOL_HIGH		(0 << 15)
+#define VICH1VSSETR_VSA(x)		(((x) & 0xfff) << 0)
+
+/* Video-Input Channel 1 Vertical Porch Set Register */
+#define VICH1VPSETR			0x42c
+#define VICH1VPSETR_VFP(x)		(((x) & 0x1fff) << 16)
+#define VICH1VPSETR_VBP(x)		(((x) & 0x1fff) << 0)
+
+/* Video-Input Channel 1 Horizontal Size Set Register */
+#define VICH1HSSETR			0x430
+#define VICH1HSSETR_HACTIVE(x)		(((x) & 0x7fff) << 16)
+#define VICH1HSSETR_HSPOL_LOW		(1 << 15)
+#define VICH1HSSETR_HSPOL_HIGH		(0 << 15)
+#define VICH1HSSETR_HSA(x)		(((x) & 0xfff) << 0)
+
+/* Video-Input Channel 1 Horizontal Porch Set Register */
+#define VICH1HPSETR			0x434
+#define VICH1HPSETR_HFP(x)		(((x) & 0x1fff) << 16)
+#define VICH1HPSETR_HBP(x)		(((x) & 0x1fff) << 0)
+
+#endif /* __RZG2L_MIPI_DSI_REGS_H__ */
diff --git a/drivers/gpu/drm/rockchip/cdn-dp-core.c b/drivers/gpu/drm/rockchip/cdn-dp-core.c
index 518ee13b1d6f..8526dda91931 100644
--- a/drivers/gpu/drm/rockchip/cdn-dp-core.c
+++ b/drivers/gpu/drm/rockchip/cdn-dp-core.c
@@ -571,7 +571,7 @@ static void cdn_dp_encoder_mode_set(struct drm_encoder *encoder,
 	video->v_sync_polarity = !!(mode->flags & DRM_MODE_FLAG_NVSYNC);
 	video->h_sync_polarity = !!(mode->flags & DRM_MODE_FLAG_NHSYNC);
 
-	memcpy(&dp->mode, adjusted, sizeof(*mode));
+	drm_mode_copy(&dp->mode, adjusted);
 }
 
 static bool cdn_dp_check_link_status(struct cdn_dp_device *dp)
diff --git a/drivers/gpu/drm/rockchip/dw-mipi-dsi-rockchip.c b/drivers/gpu/drm/rockchip/dw-mipi-dsi-rockchip.c
index 92b599b089f9..7901c3babc8c 100644
--- a/drivers/gpu/drm/rockchip/dw-mipi-dsi-rockchip.c
+++ b/drivers/gpu/drm/rockchip/dw-mipi-dsi-rockchip.c
@@ -746,7 +746,7 @@ static void dw_mipi_dsi_rockchip_config(struct dw_mipi_dsi_rockchip *dsi)
 static void dw_mipi_dsi_rockchip_set_lcdsel(struct dw_mipi_dsi_rockchip *dsi,
 					    int mux)
 {
-	if (dsi->cdata->lcdsel_grf_reg < 0)
+	if (dsi->cdata->lcdsel_grf_reg)
 		regmap_write(dsi->grf_regmap, dsi->cdata->lcdsel_grf_reg,
 			mux ? dsi->cdata->lcdsel_lit : dsi->cdata->lcdsel_big);
 }
@@ -1045,23 +1045,31 @@ static int dw_mipi_dsi_rockchip_host_attach(void *priv_data,
 	if (ret) {
 		DRM_DEV_ERROR(dsi->dev, "Failed to register component: %d\n",
 					ret);
-		return ret;
+		goto out;
 	}
 
 	second = dw_mipi_dsi_rockchip_find_second(dsi);
-	if (IS_ERR(second))
-		return PTR_ERR(second);
+	if (IS_ERR(second)) {
+		ret = PTR_ERR(second);
+		goto out;
+	}
 	if (second) {
 		ret = component_add(second, &dw_mipi_dsi_rockchip_ops);
 		if (ret) {
 			DRM_DEV_ERROR(second,
 				      "Failed to register component: %d\n",
 				      ret);
-			return ret;
+			goto out;
 		}
 	}
 
 	return 0;
+
+out:
+	mutex_lock(&dsi->usage_mutex);
+	dsi->usage_mode = DW_DSI_USAGE_IDLE;
+	mutex_unlock(&dsi->usage_mutex);
+	return ret;
 }
 
 static int dw_mipi_dsi_rockchip_host_detach(void *priv_data,
@@ -1629,7 +1637,6 @@ static const struct rockchip_dw_dsi_chip_data rk3399_chip_data[] = {
 static const struct rockchip_dw_dsi_chip_data rk3568_chip_data[] = {
 	{
 		.reg = 0xfe060000,
-		.lcdsel_grf_reg = -1,
 		.lanecfg1_grf_reg = RK3568_GRF_VO_CON2,
 		.lanecfg1 = HIWORD_UPDATE(0, RK3568_DSI0_SKEWCALHS |
 					  RK3568_DSI0_FORCETXSTOPMODE |
@@ -1639,7 +1646,6 @@ static const struct rockchip_dw_dsi_chip_data rk3568_chip_data[] = {
 	},
 	{
 		.reg = 0xfe070000,
-		.lcdsel_grf_reg = -1,
 		.lanecfg1_grf_reg = RK3568_GRF_VO_CON3,
 		.lanecfg1 = HIWORD_UPDATE(0, RK3568_DSI1_SKEWCALHS |
 					  RK3568_DSI1_FORCETXSTOPMODE |
@@ -1675,5 +1681,11 @@ struct platform_driver dw_mipi_dsi_rockchip_driver = {
 		.of_match_table = dw_mipi_dsi_rockchip_dt_ids,
 		.pm	= &dw_mipi_dsi_rockchip_pm_ops,
 		.name	= "dw-mipi-dsi-rockchip",
+		/*
+		 * For dual-DSI display, one DSI pokes at the other DSI's
+		 * drvdata in dw_mipi_dsi_rockchip_find_second(). This is not
+		 * safe for asynchronous probe.
+		 */
+		.probe_type = PROBE_FORCE_SYNCHRONOUS,
 	},
 };
diff --git a/drivers/gpu/drm/rockchip/dw_hdmi-rockchip.c b/drivers/gpu/drm/rockchip/dw_hdmi-rockchip.c
index c14f88893868..2f4b8f64cbad 100644
--- a/drivers/gpu/drm/rockchip/dw_hdmi-rockchip.c
+++ b/drivers/gpu/drm/rockchip/dw_hdmi-rockchip.c
@@ -565,7 +565,8 @@ static int dw_hdmi_rockchip_bind(struct device *dev, struct device *master,
 
 	ret = rockchip_hdmi_parse_dt(hdmi);
 	if (ret) {
-		DRM_DEV_ERROR(hdmi->dev, "Unable to parse OF data\n");
+		if (ret != -EPROBE_DEFER)
+			DRM_DEV_ERROR(hdmi->dev, "Unable to parse OF data\n");
 		return ret;
 	}
 
diff --git a/drivers/gpu/drm/rockchip/inno_hdmi.c b/drivers/gpu/drm/rockchip/inno_hdmi.c
index 87b2243ea23e..f51774866f41 100644
--- a/drivers/gpu/drm/rockchip/inno_hdmi.c
+++ b/drivers/gpu/drm/rockchip/inno_hdmi.c
@@ -499,7 +499,7 @@ static void inno_hdmi_encoder_mode_set(struct drm_encoder *encoder,
 	inno_hdmi_setup(hdmi, adj_mode);
 
 	/* Store the display mode for plugin/DPMS poweron events */
-	memcpy(&hdmi->previous_mode, adj_mode, sizeof(hdmi->previous_mode));
+	drm_mode_copy(&hdmi->previous_mode, adj_mode);
 }
 
 static void inno_hdmi_encoder_enable(struct drm_encoder *encoder)
diff --git a/drivers/gpu/drm/rockchip/rk3066_hdmi.c b/drivers/gpu/drm/rockchip/rk3066_hdmi.c
index cf2cf51091a3..90145ad96984 100644
--- a/drivers/gpu/drm/rockchip/rk3066_hdmi.c
+++ b/drivers/gpu/drm/rockchip/rk3066_hdmi.c
@@ -395,7 +395,7 @@ rk3066_hdmi_encoder_mode_set(struct drm_encoder *encoder,
 	struct rk3066_hdmi *hdmi = encoder_to_rk3066_hdmi(encoder);
 
 	/* Store the display mode for plugin/DPMS poweron events. */
-	memcpy(&hdmi->previous_mode, adj_mode, sizeof(hdmi->previous_mode));
+	drm_mode_copy(&hdmi->previous_mode, adj_mode);
 }
 
 static void rk3066_hdmi_encoder_enable(struct drm_encoder *encoder)
diff --git a/drivers/gpu/drm/rockchip/rockchip_drm_drv.c b/drivers/gpu/drm/rockchip/rockchip_drm_drv.c
index 813f9f8c8698..6e0788d14c10 100644
--- a/drivers/gpu/drm/rockchip/rockchip_drm_drv.c
+++ b/drivers/gpu/drm/rockchip/rockchip_drm_drv.c
@@ -17,7 +17,7 @@
 
 #include <drm/drm_aperture.h>
 #include <drm/drm_drv.h>
-#include <drm/drm_fb_helper.h>
+#include <drm/drm_fbdev_generic.h>
 #include <drm/drm_gem_dma_helper.h>
 #include <drm/drm_of.h>
 #include <drm/drm_probe_helper.h>
diff --git a/drivers/gpu/drm/rockchip/rockchip_drm_drv.h b/drivers/gpu/drm/rockchip/rockchip_drm_drv.h
index 1641440837af..aeb03a57240f 100644
--- a/drivers/gpu/drm/rockchip/rockchip_drm_drv.h
+++ b/drivers/gpu/drm/rockchip/rockchip_drm_drv.h
@@ -9,10 +9,10 @@
 #ifndef _ROCKCHIP_DRM_DRV_H
 #define _ROCKCHIP_DRM_DRV_H
 
-#include <drm/drm_fb_helper.h>
 #include <drm/drm_atomic_helper.h>
 #include <drm/drm_gem.h>
 
+#include <linux/i2c.h>
 #include <linux/module.h>
 #include <linux/component.h>
 
diff --git a/drivers/gpu/drm/rockchip/rockchip_drm_fb.c b/drivers/gpu/drm/rockchip/rockchip_drm_fb.c
index 409eaa1bf092..cfe8b793d344 100644
--- a/drivers/gpu/drm/rockchip/rockchip_drm_fb.c
+++ b/drivers/gpu/drm/rockchip/rockchip_drm_fb.c
@@ -9,7 +9,6 @@
 #include <drm/drm.h>
 #include <drm/drm_atomic.h>
 #include <drm/drm_damage_helper.h>
-#include <drm/drm_fb_helper.h>
 #include <drm/drm_fourcc.h>
 #include <drm/drm_framebuffer.h>
 #include <drm/drm_gem_framebuffer_helper.h>
@@ -72,7 +71,6 @@ rockchip_fb_create(struct drm_device *dev, struct drm_file *file,
 
 static const struct drm_mode_config_funcs rockchip_drm_mode_config_funcs = {
 	.fb_create = rockchip_fb_create,
-	.output_poll_changed = drm_fb_helper_output_poll_changed,
 	.atomic_check = drm_atomic_helper_check,
 	.atomic_commit = drm_atomic_helper_commit,
 };
diff --git a/drivers/gpu/drm/rockchip/rockchip_drm_gem.c b/drivers/gpu/drm/rockchip/rockchip_drm_gem.c
index 614e97aaac80..6edb7c52cb3d 100644
--- a/drivers/gpu/drm/rockchip/rockchip_drm_gem.c
+++ b/drivers/gpu/drm/rockchip/rockchip_drm_gem.c
@@ -9,6 +9,7 @@
 #include <linux/vmalloc.h>
 
 #include <drm/drm.h>
+#include <drm/drm_fb_helper.h>
 #include <drm/drm_gem.h>
 #include <drm/drm_gem_dma_helper.h>
 #include <drm/drm_prime.h>
@@ -364,9 +365,12 @@ rockchip_gem_create_with_handle(struct drm_file *file_priv,
 {
 	struct rockchip_gem_object *rk_obj;
 	struct drm_gem_object *obj;
+	bool is_framebuffer;
 	int ret;
 
-	rk_obj = rockchip_gem_create_object(drm, size, false);
+	is_framebuffer = drm->fb_helper && file_priv == drm->fb_helper->client.file;
+
+	rk_obj = rockchip_gem_create_object(drm, size, is_framebuffer);
 	if (IS_ERR(rk_obj))
 		return ERR_CAST(rk_obj);
 
diff --git a/drivers/gpu/drm/rockchip/rockchip_drm_vop2.c b/drivers/gpu/drm/rockchip/rockchip_drm_vop2.c
index ad87db2fcaf6..8cecf81a5ae0 100644
--- a/drivers/gpu/drm/rockchip/rockchip_drm_vop2.c
+++ b/drivers/gpu/drm/rockchip/rockchip_drm_vop2.c
@@ -877,10 +877,14 @@ static void vop2_crtc_atomic_disable(struct drm_crtc *crtc,
 {
 	struct vop2_video_port *vp = to_vop2_video_port(crtc);
 	struct vop2 *vop2 = vp->vop2;
+	struct drm_crtc_state *old_crtc_state;
 	int ret;
 
 	vop2_lock(vop2);
 
+	old_crtc_state = drm_atomic_get_old_crtc_state(state, crtc);
+	drm_atomic_helper_disable_planes_on_crtc(old_crtc_state, false);
+
 	drm_crtc_vblank_off(crtc);
 
 	/*
@@ -996,13 +1000,15 @@ static int vop2_plane_atomic_check(struct drm_plane *plane,
 static void vop2_plane_atomic_disable(struct drm_plane *plane,
 				      struct drm_atomic_state *state)
 {
-	struct drm_plane_state *old_pstate = drm_atomic_get_old_plane_state(state, plane);
+	struct drm_plane_state *old_pstate = NULL;
 	struct vop2_win *win = to_vop2_win(plane);
 	struct vop2 *vop2 = win->vop2;
 
 	drm_dbg(vop2->drm, "%s disable\n", win->data->name);
 
-	if (!old_pstate->crtc)
+	if (state)
+		old_pstate = drm_atomic_get_old_plane_state(state, plane);
+	if (old_pstate && !old_pstate->crtc)
 		return;
 
 	vop2_win_disable(win);
diff --git a/drivers/gpu/drm/scheduler/sched_entity.c b/drivers/gpu/drm/scheduler/sched_entity.c
index c97bc1149663..fe09e5be79bd 100644
--- a/drivers/gpu/drm/scheduler/sched_entity.c
+++ b/drivers/gpu/drm/scheduler/sched_entity.c
@@ -140,6 +140,73 @@ bool drm_sched_entity_is_ready(struct drm_sched_entity *entity)
 	return true;
 }
 
+static void drm_sched_entity_kill_jobs_work(struct work_struct *wrk)
+{
+	struct drm_sched_job *job = container_of(wrk, typeof(*job), work);
+
+	drm_sched_fence_finished(job->s_fence);
+	WARN_ON(job->s_fence->parent);
+	job->sched->ops->free_job(job);
+}
+
+/* Signal the scheduler finished fence when the entity in question is killed. */
+static void drm_sched_entity_kill_jobs_cb(struct dma_fence *f,
+					  struct dma_fence_cb *cb)
+{
+	struct drm_sched_job *job = container_of(cb, struct drm_sched_job,
+						 finish_cb);
+	int r;
+
+	dma_fence_put(f);
+
+	/* Wait for all dependencies to avoid data corruptions */
+	while (!xa_empty(&job->dependencies)) {
+		f = xa_erase(&job->dependencies, job->last_dependency++);
+		r = dma_fence_add_callback(f, &job->finish_cb,
+					   drm_sched_entity_kill_jobs_cb);
+		if (!r)
+			return;
+
+		dma_fence_put(f);
+	}
+
+	INIT_WORK(&job->work, drm_sched_entity_kill_jobs_work);
+	schedule_work(&job->work);
+}
+
+/* Remove the entity from the scheduler and kill all pending jobs */
+static void drm_sched_entity_kill(struct drm_sched_entity *entity)
+{
+	struct drm_sched_job *job;
+	struct dma_fence *prev;
+
+	if (!entity->rq)
+		return;
+
+	spin_lock(&entity->rq_lock);
+	entity->stopped = true;
+	drm_sched_rq_remove_entity(entity->rq, entity);
+	spin_unlock(&entity->rq_lock);
+
+	/* Make sure this entity is not used by the scheduler at the moment */
+	wait_for_completion(&entity->entity_idle);
+
+	prev = dma_fence_get(entity->last_scheduled);
+	while ((job = to_drm_sched_job(spsc_queue_pop(&entity->job_queue)))) {
+		struct drm_sched_fence *s_fence = job->s_fence;
+
+		dma_fence_set_error(&s_fence->finished, -ESRCH);
+
+		dma_fence_get(&s_fence->finished);
+		if (!prev || dma_fence_add_callback(prev, &job->finish_cb,
+					   drm_sched_entity_kill_jobs_cb))
+			drm_sched_entity_kill_jobs_cb(NULL, &job->finish_cb);
+
+		prev = &s_fence->finished;
+	}
+	dma_fence_put(prev);
+}
+
 /**
  * drm_sched_entity_flush - Flush a context entity
  *
@@ -180,91 +247,13 @@ long drm_sched_entity_flush(struct drm_sched_entity *entity, long timeout)
 	/* For killed process disable any more IBs enqueue right now */
 	last_user = cmpxchg(&entity->last_user, current->group_leader, NULL);
 	if ((!last_user || last_user == current->group_leader) &&
-	    (current->flags & PF_EXITING) && (current->exit_code == SIGKILL)) {
-		spin_lock(&entity->rq_lock);
-		entity->stopped = true;
-		drm_sched_rq_remove_entity(entity->rq, entity);
-		spin_unlock(&entity->rq_lock);
-	}
+	    (current->flags & PF_EXITING) && (current->exit_code == SIGKILL))
+		drm_sched_entity_kill(entity);
 
 	return ret;
 }
 EXPORT_SYMBOL(drm_sched_entity_flush);
 
-static void drm_sched_entity_kill_jobs_work(struct work_struct *wrk)
-{
-	struct drm_sched_job *job = container_of(wrk, typeof(*job), work);
-
-	drm_sched_fence_finished(job->s_fence);
-	WARN_ON(job->s_fence->parent);
-	job->sched->ops->free_job(job);
-}
-
-
-/* Signal the scheduler finished fence when the entity in question is killed. */
-static void drm_sched_entity_kill_jobs_cb(struct dma_fence *f,
-					  struct dma_fence_cb *cb)
-{
-	struct drm_sched_job *job = container_of(cb, struct drm_sched_job,
-						 finish_cb);
-
-	dma_fence_put(f);
-	INIT_WORK(&job->work, drm_sched_entity_kill_jobs_work);
-	schedule_work(&job->work);
-}
-
-static struct dma_fence *
-drm_sched_job_dependency(struct drm_sched_job *job,
-			 struct drm_sched_entity *entity)
-{
-	if (!xa_empty(&job->dependencies))
-		return xa_erase(&job->dependencies, job->last_dependency++);
-
-	if (job->sched->ops->dependency)
-		return job->sched->ops->dependency(job, entity);
-
-	return NULL;
-}
-
-static void drm_sched_entity_kill_jobs(struct drm_sched_entity *entity)
-{
-	struct drm_sched_job *job;
-	struct dma_fence *f;
-	int r;
-
-	while ((job = to_drm_sched_job(spsc_queue_pop(&entity->job_queue)))) {
-		struct drm_sched_fence *s_fence = job->s_fence;
-
-		/* Wait for all dependencies to avoid data corruptions */
-		while ((f = drm_sched_job_dependency(job, entity))) {
-			dma_fence_wait(f, false);
-			dma_fence_put(f);
-		}
-
-		drm_sched_fence_scheduled(s_fence);
-		dma_fence_set_error(&s_fence->finished, -ESRCH);
-
-		/*
-		 * When pipe is hanged by older entity, new entity might
-		 * not even have chance to submit it's first job to HW
-		 * and so entity->last_scheduled will remain NULL
-		 */
-		if (!entity->last_scheduled) {
-			drm_sched_entity_kill_jobs_cb(NULL, &job->finish_cb);
-			continue;
-		}
-
-		dma_fence_get(entity->last_scheduled);
-		r = dma_fence_add_callback(entity->last_scheduled,
-					   &job->finish_cb,
-					   drm_sched_entity_kill_jobs_cb);
-		if (r == -ENOENT)
-			drm_sched_entity_kill_jobs_cb(NULL, &job->finish_cb);
-		else if (r)
-			DRM_ERROR("fence add callback failed (%d)\n", r);
-	}
-}
-
 /**
  * drm_sched_entity_fini - Destroy a context entity
  *
@@ -278,33 +267,17 @@ static void drm_sched_entity_kill_jobs(struct drm_sched_entity *entity)
  */
 void drm_sched_entity_fini(struct drm_sched_entity *entity)
 {
-	struct drm_gpu_scheduler *sched = NULL;
-
-	if (entity->rq) {
-		sched = entity->rq->sched;
-		drm_sched_rq_remove_entity(entity->rq, entity);
-	}
-
-	/* Consumption of existing IBs wasn't completed. Forcefully
-	 * remove them here.
+	/*
+	 * If consumption of existing IBs wasn't completed. Forcefully remove
+	 * them here. Also makes sure that the scheduler won't touch this entity
+	 * any more.
 	 */
-	if (spsc_queue_count(&entity->job_queue)) {
-		if (sched) {
-			/*
-			 * Wait for thread to idle to make sure it isn't processing
-			 * this entity.
-			 */
-			wait_for_completion(&entity->entity_idle);
+	drm_sched_entity_kill(entity);
 
-		}
-		if (entity->dependency) {
-			dma_fence_remove_callback(entity->dependency,
-						  &entity->cb);
-			dma_fence_put(entity->dependency);
-			entity->dependency = NULL;
-		}
-
-		drm_sched_entity_kill_jobs(entity);
+	if (entity->dependency) {
+		dma_fence_remove_callback(entity->dependency, &entity->cb);
+		dma_fence_put(entity->dependency);
+		entity->dependency = NULL;
 	}
 
 	dma_fence_put(entity->last_scheduled);
@@ -417,6 +390,19 @@ static bool drm_sched_entity_add_dependency_cb(struct drm_sched_entity *entity)
 	return false;
 }
 
+static struct dma_fence *
+drm_sched_job_dependency(struct drm_sched_job *job,
+			 struct drm_sched_entity *entity)
+{
+	if (!xa_empty(&job->dependencies))
+		return xa_erase(&job->dependencies, job->last_dependency++);
+
+	if (job->sched->ops->prepare_job)
+		return job->sched->ops->prepare_job(job, entity);
+
+	return NULL;
+}
+
 struct drm_sched_job *drm_sched_entity_pop_job(struct drm_sched_entity *entity)
 {
 	struct drm_sched_job *sched_job;
diff --git a/drivers/gpu/drm/scheduler/sched_main.c b/drivers/gpu/drm/scheduler/sched_main.c
index d0ff9e11cb69..31f3a1267be4 100644
--- a/drivers/gpu/drm/scheduler/sched_main.c
+++ b/drivers/gpu/drm/scheduler/sched_main.c
@@ -286,32 +286,6 @@ static void drm_sched_job_done_cb(struct dma_fence *f, struct dma_fence_cb *cb)
 }
 
 /**
- * drm_sched_dependency_optimized - test if the dependency can be optimized
- *
- * @fence: the dependency fence
- * @entity: the entity which depends on the above fence
- *
- * Returns true if the dependency can be optimized and false otherwise
- */
-bool drm_sched_dependency_optimized(struct dma_fence* fence,
-				    struct drm_sched_entity *entity)
-{
-	struct drm_gpu_scheduler *sched = entity->rq->sched;
-	struct drm_sched_fence *s_fence;
-
-	if (!fence || dma_fence_is_signaled(fence))
-		return false;
-	if (fence->context == entity->fence_context)
-		return true;
-	s_fence = to_drm_sched_fence(fence);
-	if (s_fence && s_fence->sched == sched)
-		return true;
-
-	return false;
-}
-EXPORT_SYMBOL(drm_sched_dependency_optimized);
-
-/**
  * drm_sched_start_timeout - start timeout for reset worker
  *
  * @sched: scheduler instance to start the worker for
@@ -443,27 +417,6 @@ static void drm_sched_job_timedout(struct work_struct *work)
 	}
 }
 
- /**
-  * drm_sched_increase_karma - Update sched_entity guilty flag
-  *
-  * @bad: The job guilty of time out
-  *
-  * Increment on every hang caused by the 'bad' job. If this exceeds the hang
-  * limit of the scheduler then the respective sched entity is marked guilty and
-  * jobs from it will not be scheduled further
-  */
-void drm_sched_increase_karma(struct drm_sched_job *bad)
-{
-	drm_sched_increase_karma_ext(bad, 1);
-}
-EXPORT_SYMBOL(drm_sched_increase_karma);
-
-void drm_sched_reset_karma(struct drm_sched_job *bad)
-{
-	drm_sched_increase_karma_ext(bad, 0);
-}
-EXPORT_SYMBOL(drm_sched_reset_karma);
-
 /**
  * drm_sched_stop - stop the scheduler
  *
@@ -605,31 +558,14 @@ EXPORT_SYMBOL(drm_sched_start);
  */
 void drm_sched_resubmit_jobs(struct drm_gpu_scheduler *sched)
 {
-	drm_sched_resubmit_jobs_ext(sched, INT_MAX);
-}
-EXPORT_SYMBOL(drm_sched_resubmit_jobs);
-
-/**
- * drm_sched_resubmit_jobs_ext - helper to relunch certain number of jobs from mirror ring list
- *
- * @sched: scheduler instance
- * @max: job numbers to relaunch
- *
- */
-void drm_sched_resubmit_jobs_ext(struct drm_gpu_scheduler *sched, int max)
-{
 	struct drm_sched_job *s_job, *tmp;
 	uint64_t guilty_context;
 	bool found_guilty = false;
 	struct dma_fence *fence;
-	int i = 0;
 
 	list_for_each_entry_safe(s_job, tmp, &sched->pending_list, list) {
 		struct drm_sched_fence *s_fence = s_job->s_fence;
 
-		if (i >= max)
-			break;
-
 		if (!found_guilty && atomic_read(&s_job->karma) > sched->hang_limit) {
 			found_guilty = true;
 			guilty_context = s_job->s_fence->scheduled.context;
@@ -639,7 +575,6 @@ void drm_sched_resubmit_jobs_ext(struct drm_gpu_scheduler *sched, int max)
 			dma_fence_set_error(&s_fence->finished, -ECANCELED);
 
 		fence = sched->ops->run_job(s_job);
-		i++;
 
 		if (IS_ERR_OR_NULL(fence)) {
 			if (IS_ERR(fence))
@@ -655,7 +590,7 @@ void drm_sched_resubmit_jobs_ext(struct drm_gpu_scheduler *sched, int max)
 		}
 	}
 }
-EXPORT_SYMBOL(drm_sched_resubmit_jobs_ext);
+EXPORT_SYMBOL(drm_sched_resubmit_jobs);
 
 /**
  * drm_sched_job_init - init a scheduler job
@@ -773,32 +708,28 @@ int drm_sched_job_add_dependency(struct drm_sched_job *job,
 EXPORT_SYMBOL(drm_sched_job_add_dependency);
 
 /**
- * drm_sched_job_add_implicit_dependencies - adds implicit dependencies as job
- *   dependencies
+ * drm_sched_job_add_resv_dependencies - add all fences from the resv to the job
  * @job: scheduler job to add the dependencies to
- * @obj: the gem object to add new dependencies from.
- * @write: whether the job might write the object (so we need to depend on
- * shared fences in the reservation object).
+ * @resv: the dma_resv object to get the fences from
+ * @usage: the dma_resv_usage to use to filter the fences
  *
- * This should be called after drm_gem_lock_reservations() on your array of
- * GEM objects used in the job but before updating the reservations with your
- * own fences.
+ * This adds all fences matching the given usage from @resv to @job.
+ * Must be called with the @resv lock held.
  *
  * Returns:
  * 0 on success, or an error on failing to expand the array.
  */
-int drm_sched_job_add_implicit_dependencies(struct drm_sched_job *job,
-					    struct drm_gem_object *obj,
-					    bool write)
+int drm_sched_job_add_resv_dependencies(struct drm_sched_job *job,
+					struct dma_resv *resv,
+					enum dma_resv_usage usage)
 {
 	struct dma_resv_iter cursor;
 	struct dma_fence *fence;
 	int ret;
 
-	dma_resv_assert_held(obj->resv);
+	dma_resv_assert_held(resv);
 
-	dma_resv_for_each_fence(&cursor, obj->resv, dma_resv_usage_rw(write),
-				fence) {
+	dma_resv_for_each_fence(&cursor, resv, usage, fence) {
 		/* Make sure to grab an additional ref on the added fence */
 		dma_fence_get(fence);
 		ret = drm_sched_job_add_dependency(job, fence);
@@ -809,8 +740,31 @@ int drm_sched_job_add_implicit_dependencies(struct drm_sched_job *job,
 	}
 	return 0;
 }
-EXPORT_SYMBOL(drm_sched_job_add_implicit_dependencies);
+EXPORT_SYMBOL(drm_sched_job_add_resv_dependencies);
 
+/**
+ * drm_sched_job_add_implicit_dependencies - adds implicit dependencies as job
+ *   dependencies
+ * @job: scheduler job to add the dependencies to
+ * @obj: the gem object to add new dependencies from.
+ * @write: whether the job might write the object (so we need to depend on
+ * shared fences in the reservation object).
+ *
+ * This should be called after drm_gem_lock_reservations() on your array of
+ * GEM objects used in the job but before updating the reservations with your
+ * own fences.
+ *
+ * Returns:
+ * 0 on success, or an error on failing to expand the array.
+ */
+int drm_sched_job_add_implicit_dependencies(struct drm_sched_job *job,
+					    struct drm_gem_object *obj,
+					    bool write)
+{
+	return drm_sched_job_add_resv_dependencies(job, obj->resv,
+						   dma_resv_usage_rw(write));
+}
+EXPORT_SYMBOL(drm_sched_job_add_implicit_dependencies);
 
 /**
  * drm_sched_job_cleanup - clean up scheduler job resources
@@ -1172,13 +1126,15 @@ void drm_sched_fini(struct drm_gpu_scheduler *sched)
 EXPORT_SYMBOL(drm_sched_fini);
 
 /**
- * drm_sched_increase_karma_ext - Update sched_entity guilty flag
+ * drm_sched_increase_karma - Update sched_entity guilty flag
  *
  * @bad: The job guilty of time out
- * @type: type for increase/reset karma
  *
+ * Increment on every hang caused by the 'bad' job. If this exceeds the hang
+ * limit of the scheduler then the respective sched entity is marked guilty and
+ * jobs from it will not be scheduled further
  */
-void drm_sched_increase_karma_ext(struct drm_sched_job *bad, int type)
+void drm_sched_increase_karma(struct drm_sched_job *bad)
 {
 	int i;
 	struct drm_sched_entity *tmp;
@@ -1190,10 +1146,7 @@ void drm_sched_increase_karma_ext(struct drm_sched_job *bad, int type)
 	 * corrupt but keep in mind that kernel jobs always considered good.
 	 */
 	if (bad->s_priority != DRM_SCHED_PRIORITY_KERNEL) {
-		if (type == 0)
-			atomic_set(&bad->karma, 0);
-		else if (type == 1)
-			atomic_inc(&bad->karma);
+		atomic_inc(&bad->karma);
 
 		for (i = DRM_SCHED_PRIORITY_MIN; i < DRM_SCHED_PRIORITY_KERNEL;
 		     i++) {
@@ -1204,7 +1157,7 @@ void drm_sched_increase_karma_ext(struct drm_sched_job *bad, int type)
 				if (bad->s_fence->scheduled.context ==
 				    entity->fence_context) {
 					if (entity->guilty)
-						atomic_set(entity->guilty, type);
+						atomic_set(entity->guilty, 1);
 					break;
 				}
 			}
@@ -1214,4 +1167,4 @@ void drm_sched_increase_karma_ext(struct drm_sched_job *bad, int type)
 		}
 	}
 }
-EXPORT_SYMBOL(drm_sched_increase_karma_ext);
+EXPORT_SYMBOL(drm_sched_increase_karma);
diff --git a/drivers/gpu/drm/solomon/ssd130x.c b/drivers/gpu/drm/solomon/ssd130x.c
index f2795f90ea69..53464afc2b9a 100644
--- a/drivers/gpu/drm/solomon/ssd130x.c
+++ b/drivers/gpu/drm/solomon/ssd130x.c
@@ -23,7 +23,7 @@
 #include <drm/drm_crtc_helper.h>
 #include <drm/drm_damage_helper.h>
 #include <drm/drm_edid.h>
-#include <drm/drm_fb_helper.h>
+#include <drm/drm_fbdev_generic.h>
 #include <drm/drm_format_helper.h>
 #include <drm/drm_framebuffer.h>
 #include <drm/drm_gem_atomic_helper.h>
diff --git a/drivers/gpu/drm/sti/sti_drv.c b/drivers/gpu/drm/sti/sti_drv.c
index 7abf010a3293..ef6a4e63198f 100644
--- a/drivers/gpu/drm/sti/sti_drv.c
+++ b/drivers/gpu/drm/sti/sti_drv.c
@@ -14,7 +14,7 @@
 #include <drm/drm_atomic_helper.h>
 #include <drm/drm_debugfs.h>
 #include <drm/drm_drv.h>
-#include <drm/drm_fb_helper.h>
+#include <drm/drm_fbdev_generic.h>
 #include <drm/drm_gem_dma_helper.h>
 #include <drm/drm_gem_framebuffer_helper.h>
 #include <drm/drm_of.h>
diff --git a/drivers/gpu/drm/sti/sti_dvo.c b/drivers/gpu/drm/sti/sti_dvo.c
index b6ee8a82e656..577c477b5f46 100644
--- a/drivers/gpu/drm/sti/sti_dvo.c
+++ b/drivers/gpu/drm/sti/sti_dvo.c
@@ -288,7 +288,7 @@ static void sti_dvo_set_mode(struct drm_bridge *bridge,
 
 	DRM_DEBUG_DRIVER("\n");
 
-	memcpy(&dvo->mode, mode, sizeof(struct drm_display_mode));
+	drm_mode_copy(&dvo->mode, mode);
 
 	/* According to the path used (main or aux), the dvo clocks should
 	 * have a different parent clock. */
@@ -346,8 +346,9 @@ static int sti_dvo_connector_get_modes(struct drm_connector *connector)
 
 #define CLK_TOLERANCE_HZ 50
 
-static int sti_dvo_connector_mode_valid(struct drm_connector *connector,
-					struct drm_display_mode *mode)
+static enum drm_mode_status
+sti_dvo_connector_mode_valid(struct drm_connector *connector,
+			     struct drm_display_mode *mode)
 {
 	int target = mode->clock * 1000;
 	int target_min = target - CLK_TOLERANCE_HZ;
diff --git a/drivers/gpu/drm/sti/sti_hda.c b/drivers/gpu/drm/sti/sti_hda.c
index 03cc401ed593..15097ac67931 100644
--- a/drivers/gpu/drm/sti/sti_hda.c
+++ b/drivers/gpu/drm/sti/sti_hda.c
@@ -524,7 +524,7 @@ static void sti_hda_set_mode(struct drm_bridge *bridge,
 
 	DRM_DEBUG_DRIVER("\n");
 
-	memcpy(&hda->mode, mode, sizeof(struct drm_display_mode));
+	drm_mode_copy(&hda->mode, mode);
 
 	if (!hda_get_mode_idx(hda->mode, &mode_idx)) {
 		DRM_ERROR("Undefined mode\n");
@@ -601,8 +601,9 @@ static int sti_hda_connector_get_modes(struct drm_connector *connector)
 
 #define CLK_TOLERANCE_HZ 50
 
-static int sti_hda_connector_mode_valid(struct drm_connector *connector,
-					struct drm_display_mode *mode)
+static enum drm_mode_status
+sti_hda_connector_mode_valid(struct drm_connector *connector,
+			     struct drm_display_mode *mode)
 {
 	int target = mode->clock * 1000;
 	int target_min = target - CLK_TOLERANCE_HZ;
diff --git a/drivers/gpu/drm/sti/sti_hdmi.c b/drivers/gpu/drm/sti/sti_hdmi.c
index cb82622877d2..8539fe1fedc4 100644
--- a/drivers/gpu/drm/sti/sti_hdmi.c
+++ b/drivers/gpu/drm/sti/sti_hdmi.c
@@ -941,7 +941,7 @@ static void sti_hdmi_set_mode(struct drm_bridge *bridge,
 	DRM_DEBUG_DRIVER("\n");
 
 	/* Copy the drm display mode in the connector local structure */
-	memcpy(&hdmi->mode, mode, sizeof(struct drm_display_mode));
+	drm_mode_copy(&hdmi->mode, mode);
 
 	/* Update clock framerate according to the selected mode */
 	ret = clk_set_rate(hdmi->clk_pix, mode->clock * 1000);
@@ -1004,8 +1004,9 @@ fail:
 
 #define CLK_TOLERANCE_HZ 50
 
-static int sti_hdmi_connector_mode_valid(struct drm_connector *connector,
-					struct drm_display_mode *mode)
+static enum drm_mode_status
+sti_hdmi_connector_mode_valid(struct drm_connector *connector,
+			      struct drm_display_mode *mode)
 {
 	int target = mode->clock * 1000;
 	int target_min = target - CLK_TOLERANCE_HZ;
diff --git a/drivers/gpu/drm/stm/drv.c b/drivers/gpu/drm/stm/drv.c
index d7914f5122df..50410bd99dfe 100644
--- a/drivers/gpu/drm/stm/drv.c
+++ b/drivers/gpu/drm/stm/drv.c
@@ -18,7 +18,7 @@
 #include <drm/drm_atomic.h>
 #include <drm/drm_atomic_helper.h>
 #include <drm/drm_drv.h>
-#include <drm/drm_fb_helper.h>
+#include <drm/drm_fbdev_generic.h>
 #include <drm/drm_gem_dma_helper.h>
 #include <drm/drm_gem_framebuffer_helper.h>
 #include <drm/drm_module.h>
diff --git a/drivers/gpu/drm/sun4i/sun4i_drv.c b/drivers/gpu/drm/sun4i/sun4i_drv.c
index d06ffd99d86e..cc94efbbf2d4 100644
--- a/drivers/gpu/drm/sun4i/sun4i_drv.c
+++ b/drivers/gpu/drm/sun4i/sun4i_drv.c
@@ -17,7 +17,7 @@
 #include <drm/drm_aperture.h>
 #include <drm/drm_atomic_helper.h>
 #include <drm/drm_drv.h>
-#include <drm/drm_fb_helper.h>
+#include <drm/drm_fbdev_generic.h>
 #include <drm/drm_gem_dma_helper.h>
 #include <drm/drm_module.h>
 #include <drm/drm_of.h>
diff --git a/drivers/gpu/drm/sun4i/sun6i_mipi_dsi.c b/drivers/gpu/drm/sun4i/sun6i_mipi_dsi.c
index 34234a144e87..760ff05eabf4 100644
--- a/drivers/gpu/drm/sun4i/sun6i_mipi_dsi.c
+++ b/drivers/gpu/drm/sun4i/sun6i_mipi_dsi.c
@@ -1101,12 +1101,16 @@ static const struct component_ops sun6i_dsi_ops = {
 
 static int sun6i_dsi_probe(struct platform_device *pdev)
 {
+	const struct sun6i_dsi_variant *variant;
 	struct device *dev = &pdev->dev;
-	const char *bus_clk_name = NULL;
 	struct sun6i_dsi *dsi;
 	void __iomem *base;
 	int ret;
 
+	variant = device_get_match_data(dev);
+	if (!variant)
+		return -EINVAL;
+
 	dsi = devm_kzalloc(dev, sizeof(*dsi), GFP_KERNEL);
 	if (!dsi)
 		return -ENOMEM;
@@ -1114,10 +1118,7 @@ static int sun6i_dsi_probe(struct platform_device *pdev)
 	dsi->dev = dev;
 	dsi->host.ops = &sun6i_dsi_host_ops;
 	dsi->host.dev = dev;
-
-	if (of_device_is_compatible(dev->of_node,
-				    "allwinner,sun6i-a31-mipi-dsi"))
-		bus_clk_name = "bus";
+	dsi->variant = variant;
 
 	base = devm_platform_ioremap_resource(pdev, 0);
 	if (IS_ERR(base)) {
@@ -1142,7 +1143,7 @@ static int sun6i_dsi_probe(struct platform_device *pdev)
 		return PTR_ERR(dsi->regs);
 	}
 
-	dsi->bus_clk = devm_clk_get(dev, bus_clk_name);
+	dsi->bus_clk = devm_clk_get(dev, variant->has_mod_clk ? "bus" : NULL);
 	if (IS_ERR(dsi->bus_clk))
 		return dev_err_probe(dev, PTR_ERR(dsi->bus_clk),
 				     "Couldn't get the DSI bus clock\n");
@@ -1151,21 +1152,21 @@ static int sun6i_dsi_probe(struct platform_device *pdev)
 	if (ret)
 		return ret;
 
-	if (of_device_is_compatible(dev->of_node,
-				    "allwinner,sun6i-a31-mipi-dsi")) {
+	if (variant->has_mod_clk) {
 		dsi->mod_clk = devm_clk_get(dev, "mod");
 		if (IS_ERR(dsi->mod_clk)) {
 			dev_err(dev, "Couldn't get the DSI mod clock\n");
 			ret = PTR_ERR(dsi->mod_clk);
 			goto err_attach_clk;
 		}
-	}
 
-	/*
-	 * In order to operate properly, that clock seems to be always
-	 * set to 297MHz.
-	 */
-	clk_set_rate_exclusive(dsi->mod_clk, 297000000);
+		/*
+		 * In order to operate properly, the module clock on the
+		 * A31 variant always seems to be set to 297MHz.
+		 */
+		if (variant->set_mod_clk)
+			clk_set_rate_exclusive(dsi->mod_clk, 297000000);
+	}
 
 	dsi->dphy = devm_phy_get(dev, "dphy");
 	if (IS_ERR(dsi->dphy)) {
@@ -1191,7 +1192,8 @@ static int sun6i_dsi_probe(struct platform_device *pdev)
 err_remove_dsi_host:
 	mipi_dsi_host_unregister(&dsi->host);
 err_unprotect_clk:
-	clk_rate_exclusive_put(dsi->mod_clk);
+	if (dsi->variant->has_mod_clk && dsi->variant->set_mod_clk)
+		clk_rate_exclusive_put(dsi->mod_clk);
 err_attach_clk:
 	regmap_mmio_detach_clk(dsi->regs);
 
@@ -1205,16 +1207,39 @@ static int sun6i_dsi_remove(struct platform_device *pdev)
 
 	component_del(&pdev->dev, &sun6i_dsi_ops);
 	mipi_dsi_host_unregister(&dsi->host);
-	clk_rate_exclusive_put(dsi->mod_clk);
+	if (dsi->variant->has_mod_clk && dsi->variant->set_mod_clk)
+		clk_rate_exclusive_put(dsi->mod_clk);
 
 	regmap_mmio_detach_clk(dsi->regs);
 
 	return 0;
 }
 
+static const struct sun6i_dsi_variant sun6i_a31_mipi_dsi_variant = {
+	.has_mod_clk	= true,
+	.set_mod_clk	= true,
+};
+
+static const struct sun6i_dsi_variant sun50i_a64_mipi_dsi_variant = {
+};
+
+static const struct sun6i_dsi_variant sun50i_a100_mipi_dsi_variant = {
+	.has_mod_clk	= true,
+};
+
 static const struct of_device_id sun6i_dsi_of_table[] = {
-	{ .compatible = "allwinner,sun6i-a31-mipi-dsi" },
-	{ .compatible = "allwinner,sun50i-a64-mipi-dsi" },
+	{
+		.compatible	= "allwinner,sun6i-a31-mipi-dsi",
+		.data		= &sun6i_a31_mipi_dsi_variant,
+	},
+	{
+		.compatible	= "allwinner,sun50i-a64-mipi-dsi",
+		.data		= &sun50i_a64_mipi_dsi_variant,
+	},
+	{
+		.compatible	= "allwinner,sun50i-a100-mipi-dsi",
+		.data		= &sun50i_a100_mipi_dsi_variant,
+	},
 	{ }
 };
 MODULE_DEVICE_TABLE(of, sun6i_dsi_of_table);
diff --git a/drivers/gpu/drm/sun4i/sun6i_mipi_dsi.h b/drivers/gpu/drm/sun4i/sun6i_mipi_dsi.h
index c863900ae3b4..f1ddefe0f554 100644
--- a/drivers/gpu/drm/sun4i/sun6i_mipi_dsi.h
+++ b/drivers/gpu/drm/sun4i/sun6i_mipi_dsi.h
@@ -15,6 +15,11 @@
 
 #define SUN6I_DSI_TCON_DIV	4
 
+struct sun6i_dsi_variant {
+	bool			has_mod_clk;
+	bool			set_mod_clk;
+};
+
 struct sun6i_dsi {
 	struct drm_connector	connector;
 	struct drm_encoder	encoder;
@@ -31,6 +36,8 @@ struct sun6i_dsi {
 	struct mipi_dsi_device	*device;
 	struct drm_device	*drm;
 	struct drm_panel	*panel;
+
+	const struct sun6i_dsi_variant *variant;
 };
 
 static inline struct sun6i_dsi *host_to_sun6i_dsi(struct mipi_dsi_host *host)
diff --git a/drivers/gpu/drm/tegra/Makefile b/drivers/gpu/drm/tegra/Makefile
index df6cc986aeba..bb0d2c144b55 100644
--- a/drivers/gpu/drm/tegra/Makefile
+++ b/drivers/gpu/drm/tegra/Makefile
@@ -24,7 +24,8 @@ tegra-drm-y := \
 	gr3d.o \
 	falcon.o \
 	vic.o \
-	nvdec.o
+	nvdec.o \
+	riscv.o
 
 tegra-drm-y += trace.o
 
diff --git a/drivers/gpu/drm/tegra/dc.c b/drivers/gpu/drm/tegra/dc.c
index bd0f60704467..a67453cee883 100644
--- a/drivers/gpu/drm/tegra/dc.c
+++ b/drivers/gpu/drm/tegra/dc.c
@@ -3205,8 +3205,10 @@ static int tegra_dc_probe(struct platform_device *pdev)
 	usleep_range(2000, 4000);
 
 	err = reset_control_assert(dc->rst);
-	if (err < 0)
+	if (err < 0) {
+		clk_disable_unprepare(dc->clk);
 		return err;
+	}
 
 	usleep_range(2000, 4000);
 
diff --git a/drivers/gpu/drm/tegra/drm.c b/drivers/gpu/drm/tegra/drm.c
index 6748ec1e0005..7bd2e65c2a16 100644
--- a/drivers/gpu/drm/tegra/drm.c
+++ b/drivers/gpu/drm/tegra/drm.c
@@ -1093,6 +1093,10 @@ static bool host1x_drm_wants_iommu(struct host1x_device *dev)
 	struct host1x *host1x = dev_get_drvdata(dev->dev.parent);
 	struct iommu_domain *domain;
 
+	/* Our IOMMU usage policy doesn't currently play well with GART */
+	if (of_machine_is_compatible("nvidia,tegra20"))
+		return false;
+
 	/*
 	 * If the Tegra DRM clients are backed by an IOMMU, push buffers are
 	 * likely to be allocated beyond the 32-bit boundary if sufficient
@@ -1382,6 +1386,7 @@ static const struct of_device_id host1x_drm_subdevs[] = {
 	{ .compatible = "nvidia,tegra194-vic", },
 	{ .compatible = "nvidia,tegra194-nvdec", },
 	{ .compatible = "nvidia,tegra234-vic", },
+	{ .compatible = "nvidia,tegra234-nvdec", },
 	{ /* sentinel */ }
 };
 
diff --git a/drivers/gpu/drm/tegra/fb.c b/drivers/gpu/drm/tegra/fb.c
index bce71c0ccc9e..a900300ae5bd 100644
--- a/drivers/gpu/drm/tegra/fb.c
+++ b/drivers/gpu/drm/tegra/fb.c
@@ -206,6 +206,8 @@ static int tegra_fb_mmap(struct fb_info *info, struct vm_area_struct *vma)
 static const struct fb_ops tegra_fb_ops = {
 	.owner = THIS_MODULE,
 	DRM_FB_HELPER_DEFAULT_OPS,
+	.fb_read = drm_fb_helper_sys_read,
+	.fb_write = drm_fb_helper_sys_write,
 	.fb_fillrect = drm_fb_helper_sys_fillrect,
 	.fb_copyarea = drm_fb_helper_sys_copyarea,
 	.fb_imageblit = drm_fb_helper_sys_imageblit,
@@ -243,7 +245,7 @@ static int tegra_fbdev_probe(struct drm_fb_helper *helper,
 	if (IS_ERR(bo))
 		return PTR_ERR(bo);
 
-	info = drm_fb_helper_alloc_fbi(helper);
+	info = drm_fb_helper_alloc_info(helper);
 	if (IS_ERR(info)) {
 		dev_err(drm->dev, "failed to allocate framebuffer info\n");
 		drm_gem_object_put(&bo->gem);
@@ -261,7 +263,7 @@ static int tegra_fbdev_probe(struct drm_fb_helper *helper,
 
 	fb = fbdev->fb;
 	helper->fb = fb;
-	helper->fbdev = info;
+	helper->info = info;
 
 	info->fbops = &tegra_fb_ops;
 
@@ -347,7 +349,7 @@ fini:
 
 static void tegra_fbdev_exit(struct tegra_fbdev *fbdev)
 {
-	drm_fb_helper_unregister_fbi(&fbdev->base);
+	drm_fb_helper_unregister_info(&fbdev->base);
 
 	if (fbdev->fb) {
 		struct tegra_bo *bo = tegra_fb_get_plane(fbdev->fb, 0);
diff --git a/drivers/gpu/drm/tegra/gem.c b/drivers/gpu/drm/tegra/gem.c
index b09b8ab40ae4..979e7bc902f6 100644
--- a/drivers/gpu/drm/tegra/gem.c
+++ b/drivers/gpu/drm/tegra/gem.c
@@ -694,6 +694,8 @@ static int tegra_gem_prime_mmap(struct dma_buf *buf, struct vm_area_struct *vma)
 	struct drm_gem_object *gem = buf->priv;
 	int err;
 
+	dma_resv_assert_held(buf->resv);
+
 	err = drm_gem_mmap_obj(gem, gem->size, vma);
 	if (err < 0)
 		return err;
diff --git a/drivers/gpu/drm/tegra/hdmi.c b/drivers/gpu/drm/tegra/hdmi.c
index bf240767dad9..40ec3e6cf204 100644
--- a/drivers/gpu/drm/tegra/hdmi.c
+++ b/drivers/gpu/drm/tegra/hdmi.c
@@ -867,14 +867,7 @@ static int tegra_hdmi_reconfigure_audio(struct tegra_hdmi *hdmi)
 
 static bool tegra_output_is_hdmi(struct tegra_output *output)
 {
-	struct edid *edid;
-
-	if (!output->connector.edid_blob_ptr)
-		return false;
-
-	edid = (struct edid *)output->connector.edid_blob_ptr->data;
-
-	return drm_detect_hdmi_monitor(edid);
+	return output->connector.display_info.is_hdmi;
 }
 
 static enum drm_connector_status
diff --git a/drivers/gpu/drm/tegra/nvdec.c b/drivers/gpu/drm/tegra/nvdec.c
index 276fe0472730..10fd21517281 100644
--- a/drivers/gpu/drm/tegra/nvdec.c
+++ b/drivers/gpu/drm/tegra/nvdec.c
@@ -1,6 +1,6 @@
 // SPDX-License-Identifier: GPL-2.0-only
 /*
- * Copyright (c) 2015-2021, NVIDIA Corporation.
+ * Copyright (c) 2015-2022, NVIDIA Corporation.
  */
 
 #include <linux/clk.h>
@@ -8,6 +8,7 @@
 #include <linux/dma-mapping.h>
 #include <linux/host1x.h>
 #include <linux/iommu.h>
+#include <linux/iopoll.h>
 #include <linux/module.h>
 #include <linux/of.h>
 #include <linux/of_device.h>
@@ -16,18 +17,22 @@
 #include <linux/pm_runtime.h>
 #include <linux/reset.h>
 
-#include <soc/tegra/pmc.h>
+#include <soc/tegra/mc.h>
 
 #include "drm.h"
 #include "falcon.h"
+#include "riscv.h"
 #include "vic.h"
 
+#define NVDEC_FALCON_DEBUGINFO			0x1094
 #define NVDEC_TFBIF_TRANSCFG			0x2c44
 
 struct nvdec_config {
 	const char *firmware;
 	unsigned int version;
 	bool supports_sid;
+	bool has_riscv;
+	bool has_extra_clocks;
 };
 
 struct nvdec {
@@ -37,10 +42,16 @@ struct nvdec {
 	struct tegra_drm_client client;
 	struct host1x_channel *channel;
 	struct device *dev;
-	struct clk *clk;
+	struct clk_bulk_data clks[3];
+	unsigned int num_clks;
+	struct reset_control *reset;
 
 	/* Platform configuration */
 	const struct nvdec_config *config;
+
+	/* RISC-V specific data */
+	struct tegra_drm_riscv riscv;
+	phys_addr_t carveout_base;
 };
 
 static inline struct nvdec *to_nvdec(struct tegra_drm_client *client)
@@ -54,7 +65,7 @@ static inline void nvdec_writel(struct nvdec *nvdec, u32 value,
 	writel(value, nvdec->regs + offset);
 }
 
-static int nvdec_boot(struct nvdec *nvdec)
+static int nvdec_boot_falcon(struct nvdec *nvdec)
 {
 #ifdef CONFIG_IOMMU_API
 	struct iommu_fwspec *spec = dev_iommu_fwspec_get(nvdec->dev);
@@ -90,6 +101,64 @@ static int nvdec_boot(struct nvdec *nvdec)
 	return 0;
 }
 
+static int nvdec_wait_debuginfo(struct nvdec *nvdec, const char *phase)
+{
+	int err;
+	u32 val;
+
+	err = readl_poll_timeout(nvdec->regs + NVDEC_FALCON_DEBUGINFO, val, val == 0x0, 10, 100000);
+	if (err) {
+		dev_err(nvdec->dev, "failed to boot %s, debuginfo=0x%x\n", phase, val);
+		return err;
+	}
+
+	return 0;
+}
+
+static int nvdec_boot_riscv(struct nvdec *nvdec)
+{
+	int err;
+
+	err = reset_control_acquire(nvdec->reset);
+	if (err)
+		return err;
+
+	nvdec_writel(nvdec, 0xabcd1234, NVDEC_FALCON_DEBUGINFO);
+
+	err = tegra_drm_riscv_boot_bootrom(&nvdec->riscv, nvdec->carveout_base, 1,
+					   &nvdec->riscv.bl_desc);
+	if (err) {
+		dev_err(nvdec->dev, "failed to execute bootloader\n");
+		goto release_reset;
+	}
+
+	err = nvdec_wait_debuginfo(nvdec, "bootloader");
+	if (err)
+		goto release_reset;
+
+	err = reset_control_reset(nvdec->reset);
+	if (err)
+		goto release_reset;
+
+	nvdec_writel(nvdec, 0xabcd1234, NVDEC_FALCON_DEBUGINFO);
+
+	err = tegra_drm_riscv_boot_bootrom(&nvdec->riscv, nvdec->carveout_base, 1,
+					   &nvdec->riscv.os_desc);
+	if (err) {
+		dev_err(nvdec->dev, "failed to execute firmware\n");
+		goto release_reset;
+	}
+
+	err = nvdec_wait_debuginfo(nvdec, "firmware");
+	if (err)
+		goto release_reset;
+
+release_reset:
+	reset_control_release(nvdec->reset);
+
+	return err;
+}
+
 static int nvdec_init(struct host1x_client *client)
 {
 	struct tegra_drm_client *drm = host1x_to_drm_client(client);
@@ -189,7 +258,7 @@ static const struct host1x_client_ops nvdec_client_ops = {
 	.exit = nvdec_exit,
 };
 
-static int nvdec_load_firmware(struct nvdec *nvdec)
+static int nvdec_load_falcon_firmware(struct nvdec *nvdec)
 {
 	struct host1x_client *client = &nvdec->client.base;
 	struct tegra_drm *tegra = nvdec->client.drm;
@@ -252,30 +321,35 @@ cleanup:
 	return err;
 }
 
-
 static __maybe_unused int nvdec_runtime_resume(struct device *dev)
 {
 	struct nvdec *nvdec = dev_get_drvdata(dev);
 	int err;
 
-	err = clk_prepare_enable(nvdec->clk);
+	err = clk_bulk_prepare_enable(nvdec->num_clks, nvdec->clks);
 	if (err < 0)
 		return err;
 
 	usleep_range(10, 20);
 
-	err = nvdec_load_firmware(nvdec);
-	if (err < 0)
-		goto disable;
+	if (nvdec->config->has_riscv) {
+		err = nvdec_boot_riscv(nvdec);
+		if (err < 0)
+			goto disable;
+	} else {
+		err = nvdec_load_falcon_firmware(nvdec);
+		if (err < 0)
+			goto disable;
 
-	err = nvdec_boot(nvdec);
-	if (err < 0)
-		goto disable;
+		err = nvdec_boot_falcon(nvdec);
+		if (err < 0)
+			goto disable;
+	}
 
 	return 0;
 
 disable:
-	clk_disable_unprepare(nvdec->clk);
+	clk_bulk_disable_unprepare(nvdec->num_clks, nvdec->clks);
 	return err;
 }
 
@@ -285,7 +359,7 @@ static __maybe_unused int nvdec_runtime_suspend(struct device *dev)
 
 	host1x_channel_stop(nvdec->channel);
 
-	clk_disable_unprepare(nvdec->clk);
+	clk_bulk_disable_unprepare(nvdec->num_clks, nvdec->clks);
 
 	return 0;
 }
@@ -346,10 +420,18 @@ static const struct nvdec_config nvdec_t194_config = {
 	.supports_sid = true,
 };
 
+static const struct nvdec_config nvdec_t234_config = {
+	.version = 0x23,
+	.supports_sid = true,
+	.has_riscv = true,
+	.has_extra_clocks = true,
+};
+
 static const struct of_device_id tegra_nvdec_of_match[] = {
 	{ .compatible = "nvidia,tegra210-nvdec", .data = &nvdec_t210_config },
 	{ .compatible = "nvidia,tegra186-nvdec", .data = &nvdec_t186_config },
 	{ .compatible = "nvidia,tegra194-nvdec", .data = &nvdec_t194_config },
+	{ .compatible = "nvidia,tegra234-nvdec", .data = &nvdec_t234_config },
 	{ },
 };
 MODULE_DEVICE_TABLE(of, tegra_nvdec_of_match);
@@ -383,13 +465,22 @@ static int nvdec_probe(struct platform_device *pdev)
 	if (IS_ERR(nvdec->regs))
 		return PTR_ERR(nvdec->regs);
 
-	nvdec->clk = devm_clk_get(dev, NULL);
-	if (IS_ERR(nvdec->clk)) {
-		dev_err(&pdev->dev, "failed to get clock\n");
-		return PTR_ERR(nvdec->clk);
+	nvdec->clks[0].id = "nvdec";
+	nvdec->num_clks = 1;
+
+	if (nvdec->config->has_extra_clocks) {
+		nvdec->num_clks = 3;
+		nvdec->clks[1].id = "fuse";
+		nvdec->clks[2].id = "tsec_pka";
 	}
 
-	err = clk_set_rate(nvdec->clk, ULONG_MAX);
+	err = devm_clk_bulk_get(dev, nvdec->num_clks, nvdec->clks);
+	if (err) {
+		dev_err(&pdev->dev, "failed to get clock(s)\n");
+		return err;
+	}
+
+	err = clk_set_rate(nvdec->clks[0].clk, ULONG_MAX);
 	if (err < 0) {
 		dev_err(&pdev->dev, "failed to set clock rate\n");
 		return err;
@@ -399,12 +490,42 @@ static int nvdec_probe(struct platform_device *pdev)
 	if (err < 0)
 		host_class = HOST1X_CLASS_NVDEC;
 
-	nvdec->falcon.dev = dev;
-	nvdec->falcon.regs = nvdec->regs;
+	if (nvdec->config->has_riscv) {
+		struct tegra_mc *mc;
 
-	err = falcon_init(&nvdec->falcon);
-	if (err < 0)
-		return err;
+		mc = devm_tegra_memory_controller_get(dev);
+		if (IS_ERR(mc)) {
+			dev_err_probe(dev, PTR_ERR(mc),
+				"failed to get memory controller handle\n");
+			return PTR_ERR(mc);
+		}
+
+		err = tegra_mc_get_carveout_info(mc, 1, &nvdec->carveout_base, NULL);
+		if (err) {
+			dev_err(dev, "failed to get carveout info: %d\n", err);
+			return err;
+		}
+
+		nvdec->reset = devm_reset_control_get_exclusive_released(dev, "nvdec");
+		if (IS_ERR(nvdec->reset)) {
+			dev_err_probe(dev, PTR_ERR(nvdec->reset), "failed to get reset\n");
+			return PTR_ERR(nvdec->reset);
+		}
+
+		nvdec->riscv.dev = dev;
+		nvdec->riscv.regs = nvdec->regs;
+
+		err = tegra_drm_riscv_read_descriptors(&nvdec->riscv);
+		if (err < 0)
+			return err;
+	} else {
+		nvdec->falcon.dev = dev;
+		nvdec->falcon.regs = nvdec->regs;
+
+		err = falcon_init(&nvdec->falcon);
+		if (err < 0)
+			return err;
+	}
 
 	platform_set_drvdata(pdev, nvdec);
 
diff --git a/drivers/gpu/drm/tegra/output.c b/drivers/gpu/drm/tegra/output.c
index 47d26b5d9945..a8925dcd7edd 100644
--- a/drivers/gpu/drm/tegra/output.c
+++ b/drivers/gpu/drm/tegra/output.c
@@ -133,11 +133,11 @@ int tegra_output_probe(struct tegra_output *output)
 		}
 	}
 
-	output->hpd_gpio = devm_gpiod_get_from_of_node(output->dev,
-						       output->of_node,
-						       "nvidia,hpd-gpio", 0,
-						       GPIOD_IN,
-						       "HDMI hotplug detect");
+	output->hpd_gpio = devm_fwnode_gpiod_get(output->dev,
+					of_fwnode_handle(output->of_node),
+					"nvidia,hpd",
+					GPIOD_IN,
+					"HDMI hotplug detect");
 	if (IS_ERR(output->hpd_gpio)) {
 		if (PTR_ERR(output->hpd_gpio) != -ENOENT)
 			return PTR_ERR(output->hpd_gpio);
diff --git a/drivers/gpu/drm/tegra/riscv.c b/drivers/gpu/drm/tegra/riscv.c
new file mode 100644
index 000000000000..6580416408f8
--- /dev/null
+++ b/drivers/gpu/drm/tegra/riscv.c
@@ -0,0 +1,106 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright (c) 2022, NVIDIA Corporation.
+ */
+
+#include <linux/dev_printk.h>
+#include <linux/device.h>
+#include <linux/iopoll.h>
+#include <linux/of.h>
+
+#include "riscv.h"
+
+#define RISCV_CPUCTL					0x4388
+#define RISCV_CPUCTL_STARTCPU_TRUE			(1 << 0)
+#define RISCV_BR_RETCODE				0x465c
+#define RISCV_BR_RETCODE_RESULT_V(x)			((x) & 0x3)
+#define RISCV_BR_RETCODE_RESULT_PASS_V			3
+#define RISCV_BCR_CTRL					0x4668
+#define RISCV_BCR_CTRL_CORE_SELECT_RISCV		(1 << 4)
+#define RISCV_BCR_DMACFG				0x466c
+#define RISCV_BCR_DMACFG_TARGET_LOCAL_FB		(0 << 0)
+#define RISCV_BCR_DMACFG_LOCK_LOCKED			(1 << 31)
+#define RISCV_BCR_DMAADDR_PKCPARAM_LO			0x4670
+#define RISCV_BCR_DMAADDR_PKCPARAM_HI			0x4674
+#define RISCV_BCR_DMAADDR_FMCCODE_LO			0x4678
+#define RISCV_BCR_DMAADDR_FMCCODE_HI			0x467c
+#define RISCV_BCR_DMAADDR_FMCDATA_LO			0x4680
+#define RISCV_BCR_DMAADDR_FMCDATA_HI			0x4684
+#define RISCV_BCR_DMACFG_SEC				0x4694
+#define RISCV_BCR_DMACFG_SEC_GSCID(v)			((v) << 16)
+
+static void riscv_writel(struct tegra_drm_riscv *riscv, u32 value, u32 offset)
+{
+	writel(value, riscv->regs + offset);
+}
+
+int tegra_drm_riscv_read_descriptors(struct tegra_drm_riscv *riscv)
+{
+	struct tegra_drm_riscv_descriptor *bl = &riscv->bl_desc;
+	struct tegra_drm_riscv_descriptor *os = &riscv->os_desc;
+	const struct device_node *np = riscv->dev->of_node;
+	int err;
+
+#define READ_PROP(name, location) \
+	err = of_property_read_u32(np, name, location); \
+	if (err) { \
+		dev_err(riscv->dev, "failed to read " name ": %d\n", err); \
+		return err; \
+	}
+
+	READ_PROP("nvidia,bl-manifest-offset", &bl->manifest_offset);
+	READ_PROP("nvidia,bl-code-offset", &bl->code_offset);
+	READ_PROP("nvidia,bl-data-offset", &bl->data_offset);
+	READ_PROP("nvidia,os-manifest-offset", &os->manifest_offset);
+	READ_PROP("nvidia,os-code-offset", &os->code_offset);
+	READ_PROP("nvidia,os-data-offset", &os->data_offset);
+#undef READ_PROP
+
+	if (bl->manifest_offset == 0 && bl->code_offset == 0 &&
+	    bl->data_offset == 0 && os->manifest_offset == 0 &&
+	    os->code_offset == 0 && os->data_offset == 0) {
+		dev_err(riscv->dev, "descriptors not available\n");
+		return -EINVAL;
+	}
+
+	return 0;
+}
+
+int tegra_drm_riscv_boot_bootrom(struct tegra_drm_riscv *riscv, phys_addr_t image_address,
+				 u32 gscid, const struct tegra_drm_riscv_descriptor *desc)
+{
+	phys_addr_t addr;
+	int err;
+	u32 val;
+
+	riscv_writel(riscv, RISCV_BCR_CTRL_CORE_SELECT_RISCV, RISCV_BCR_CTRL);
+
+	addr = image_address + desc->manifest_offset;
+	riscv_writel(riscv, lower_32_bits(addr >> 8), RISCV_BCR_DMAADDR_PKCPARAM_LO);
+	riscv_writel(riscv, upper_32_bits(addr >> 8), RISCV_BCR_DMAADDR_PKCPARAM_HI);
+
+	addr = image_address + desc->code_offset;
+	riscv_writel(riscv, lower_32_bits(addr >> 8), RISCV_BCR_DMAADDR_FMCCODE_LO);
+	riscv_writel(riscv, upper_32_bits(addr >> 8), RISCV_BCR_DMAADDR_FMCCODE_HI);
+
+	addr = image_address + desc->data_offset;
+	riscv_writel(riscv, lower_32_bits(addr >> 8), RISCV_BCR_DMAADDR_FMCDATA_LO);
+	riscv_writel(riscv, upper_32_bits(addr >> 8), RISCV_BCR_DMAADDR_FMCDATA_HI);
+
+	riscv_writel(riscv, RISCV_BCR_DMACFG_SEC_GSCID(gscid), RISCV_BCR_DMACFG_SEC);
+	riscv_writel(riscv,
+		RISCV_BCR_DMACFG_TARGET_LOCAL_FB | RISCV_BCR_DMACFG_LOCK_LOCKED, RISCV_BCR_DMACFG);
+
+	riscv_writel(riscv, RISCV_CPUCTL_STARTCPU_TRUE, RISCV_CPUCTL);
+
+	err = readl_poll_timeout(
+		riscv->regs + RISCV_BR_RETCODE, val,
+		RISCV_BR_RETCODE_RESULT_V(val) == RISCV_BR_RETCODE_RESULT_PASS_V,
+		10, 100000);
+	if (err) {
+		dev_err(riscv->dev, "error during bootrom execution. BR_RETCODE=%d\n", val);
+		return err;
+	}
+
+	return 0;
+}
diff --git a/drivers/gpu/drm/tegra/riscv.h b/drivers/gpu/drm/tegra/riscv.h
new file mode 100644
index 000000000000..bbeb2db078b6
--- /dev/null
+++ b/drivers/gpu/drm/tegra/riscv.h
@@ -0,0 +1,30 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * Copyright (c) 2022, NVIDIA Corporation.
+ */
+
+#ifndef DRM_TEGRA_RISCV_H
+#define DRM_TEGRA_RISCV_H
+
+struct tegra_drm_riscv_descriptor {
+	u32 manifest_offset;
+	u32 code_offset;
+	u32 code_size;
+	u32 data_offset;
+	u32 data_size;
+};
+
+struct tegra_drm_riscv {
+	/* User initializes */
+	struct device *dev;
+	void __iomem *regs;
+
+	struct tegra_drm_riscv_descriptor bl_desc;
+	struct tegra_drm_riscv_descriptor os_desc;
+};
+
+int tegra_drm_riscv_read_descriptors(struct tegra_drm_riscv *riscv);
+int tegra_drm_riscv_boot_bootrom(struct tegra_drm_riscv *riscv, phys_addr_t image_address,
+				 u32 gscid, const struct tegra_drm_riscv_descriptor *desc);
+
+#endif
diff --git a/drivers/gpu/drm/tegra/submit.c b/drivers/gpu/drm/tegra/submit.c
index b24738bdf3df..066f88564169 100644
--- a/drivers/gpu/drm/tegra/submit.c
+++ b/drivers/gpu/drm/tegra/submit.c
@@ -133,7 +133,7 @@ static void gather_bo_munmap(struct host1x_bo *host_bo, void *addr)
 {
 }
 
-const struct host1x_bo_ops gather_bo_ops = {
+static const struct host1x_bo_ops gather_bo_ops = {
 	.get = gather_bo_get,
 	.put = gather_bo_put,
 	.pin = gather_bo_pin,
@@ -169,14 +169,9 @@ static void *alloc_copy_user_array(void __user *from, size_t count, size_t size)
 	if (copy_len > 0x4000)
 		return ERR_PTR(-E2BIG);
 
-	data = kvmalloc(copy_len, GFP_KERNEL);
-	if (!data)
-		return ERR_PTR(-ENOMEM);
-
-	if (copy_from_user(data, from, copy_len)) {
-		kvfree(data);
-		return ERR_PTR(-EFAULT);
-	}
+	data = vmemdup_user(from, copy_len);
+	if (IS_ERR(data))
+		return ERR_CAST(data);
 
 	return data;
 }
diff --git a/drivers/gpu/drm/tegra/uapi.c b/drivers/gpu/drm/tegra/uapi.c
index a98239cb0e29..5adab6b22916 100644
--- a/drivers/gpu/drm/tegra/uapi.c
+++ b/drivers/gpu/drm/tegra/uapi.c
@@ -116,7 +116,7 @@ int tegra_drm_ioctl_channel_open(struct drm_device *drm, void *data, struct drm_
 
 		if (supported)
 			context->memory_context = host1x_memory_context_alloc(
-				host, get_task_pid(current, PIDTYPE_TGID));
+				host, client->base.dev, get_task_pid(current, PIDTYPE_TGID));
 
 		if (IS_ERR(context->memory_context)) {
 			if (PTR_ERR(context->memory_context) != -EOPNOTSUPP) {
diff --git a/drivers/gpu/drm/tests/Makefile b/drivers/gpu/drm/tests/Makefile
index 2d9f49b62ecb..b29ef1085cad 100644
--- a/drivers/gpu/drm/tests/Makefile
+++ b/drivers/gpu/drm/tests/Makefile
@@ -8,6 +8,7 @@ obj-$(CONFIG_DRM_KUNIT_TEST) += \
 	drm_format_helper_test.o \
 	drm_format_test.o \
 	drm_framebuffer_test.o \
+	drm_kunit_helpers.o \
 	drm_mm_test.o \
 	drm_plane_helper_test.o \
 	drm_rect_test.o
diff --git a/drivers/gpu/drm/tests/drm_client_modeset_test.c b/drivers/gpu/drm/tests/drm_client_modeset_test.c
new file mode 100644
index 000000000000..362a5fbd82f5
--- /dev/null
+++ b/drivers/gpu/drm/tests/drm_client_modeset_test.c
@@ -0,0 +1,98 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (c) 2022 Maxime Ripard <mripard@kernel.org>
+ */
+
+#include <kunit/test.h>
+
+#include <drm/drm_connector.h>
+#include <drm/drm_edid.h>
+#include <drm/drm_drv.h>
+#include <drm/drm_modes.h>
+#include <drm/drm_modeset_helper_vtables.h>
+#include <drm/drm_probe_helper.h>
+
+#include "drm_kunit_helpers.h"
+
+struct drm_client_modeset_test_priv {
+	struct drm_device *drm;
+	struct drm_connector connector;
+};
+
+static int drm_client_modeset_connector_get_modes(struct drm_connector *connector)
+{
+	return drm_add_modes_noedid(connector, 1920, 1200);
+}
+
+static const struct drm_connector_helper_funcs drm_client_modeset_connector_helper_funcs = {
+	.get_modes = drm_client_modeset_connector_get_modes,
+};
+
+static const struct drm_connector_funcs drm_client_modeset_connector_funcs = {
+};
+
+static int drm_client_modeset_test_init(struct kunit *test)
+{
+	struct drm_client_modeset_test_priv *priv;
+	int ret;
+
+	priv = kunit_kzalloc(test, sizeof(*priv), GFP_KERNEL);
+	KUNIT_ASSERT_NOT_NULL(test, priv);
+
+	test->priv = priv;
+
+	priv->drm = drm_kunit_device_init(test, DRIVER_MODESET, "drm-client-modeset-test");
+	KUNIT_ASSERT_NOT_ERR_OR_NULL(test, priv->drm);
+
+	ret = drmm_connector_init(priv->drm, &priv->connector,
+				  &drm_client_modeset_connector_funcs,
+				  DRM_MODE_CONNECTOR_Unknown,
+				  NULL);
+	KUNIT_ASSERT_EQ(test, ret, 0);
+
+	drm_connector_helper_add(&priv->connector, &drm_client_modeset_connector_helper_funcs);
+
+	return 0;
+}
+
+static void drm_test_pick_cmdline_res_1920_1080_60(struct kunit *test)
+{
+	struct drm_client_modeset_test_priv *priv = test->priv;
+	struct drm_device *drm = priv->drm;
+	struct drm_connector *connector = &priv->connector;
+	struct drm_cmdline_mode *cmdline_mode = &connector->cmdline_mode;
+	struct drm_display_mode *expected_mode, *mode;
+	const char *cmdline = "1920x1080@60";
+	int ret;
+
+	expected_mode = drm_mode_find_dmt(priv->drm, 1920, 1080, 60, false);
+	KUNIT_ASSERT_NOT_NULL(test, expected_mode);
+
+	KUNIT_ASSERT_TRUE(test,
+			  drm_mode_parse_command_line_for_connector(cmdline,
+								    connector,
+								    cmdline_mode));
+
+	mutex_lock(&drm->mode_config.mutex);
+	ret = drm_helper_probe_single_connector_modes(connector, 1920, 1080);
+	mutex_unlock(&drm->mode_config.mutex);
+	KUNIT_ASSERT_GT(test, ret, 0);
+
+	mode = drm_connector_pick_cmdline_mode(connector);
+	KUNIT_ASSERT_NOT_NULL(test, mode);
+
+	KUNIT_EXPECT_TRUE(test, drm_mode_equal(expected_mode, mode));
+}
+
+static struct kunit_case drm_test_pick_cmdline_tests[] = {
+	KUNIT_CASE(drm_test_pick_cmdline_res_1920_1080_60),
+	{}
+};
+
+static struct kunit_suite drm_test_pick_cmdline_test_suite = {
+	.name = "drm_test_pick_cmdline",
+	.init = drm_client_modeset_test_init,
+	.test_cases = drm_test_pick_cmdline_tests
+};
+
+kunit_test_suite(drm_test_pick_cmdline_test_suite);
diff --git a/drivers/gpu/drm/tests/drm_format_helper_test.c b/drivers/gpu/drm/tests/drm_format_helper_test.c
index 2191e57f2297..567c71f95edc 100644
--- a/drivers/gpu/drm/tests/drm_format_helper_test.c
+++ b/drivers/gpu/drm/tests/drm_format_helper_test.c
@@ -315,7 +315,7 @@ static void drm_test_fb_xrgb8888_to_gray8(struct kunit *test)
 	iosys_map_set_vaddr(&src, xrgb8888);
 
 	drm_fb_xrgb8888_to_gray8(&dst, &result->dst_pitch, &src, &fb, &params->clip);
-	KUNIT_EXPECT_EQ(test, memcmp(buf, result->expected, dst_size), 0);
+	KUNIT_EXPECT_MEMEQ(test, buf, result->expected, dst_size);
 }
 
 static void drm_test_fb_xrgb8888_to_rgb332(struct kunit *test)
@@ -345,7 +345,7 @@ static void drm_test_fb_xrgb8888_to_rgb332(struct kunit *test)
 	iosys_map_set_vaddr(&src, xrgb8888);
 
 	drm_fb_xrgb8888_to_rgb332(&dst, &result->dst_pitch, &src, &fb, &params->clip);
-	KUNIT_EXPECT_EQ(test, memcmp(buf, result->expected, dst_size), 0);
+	KUNIT_EXPECT_MEMEQ(test, buf, result->expected, dst_size);
 }
 
 static void drm_test_fb_xrgb8888_to_rgb565(struct kunit *test)
@@ -375,10 +375,10 @@ static void drm_test_fb_xrgb8888_to_rgb565(struct kunit *test)
 	iosys_map_set_vaddr(&src, xrgb8888);
 
 	drm_fb_xrgb8888_to_rgb565(&dst, &result->dst_pitch, &src, &fb, &params->clip, false);
-	KUNIT_EXPECT_EQ(test, memcmp(buf, result->expected, dst_size), 0);
+	KUNIT_EXPECT_MEMEQ(test, buf, result->expected, dst_size);
 
 	drm_fb_xrgb8888_to_rgb565(&dst, &result->dst_pitch, &src, &fb, &params->clip, true);
-	KUNIT_EXPECT_EQ(test, memcmp(buf, result->expected_swab, dst_size), 0);
+	KUNIT_EXPECT_MEMEQ(test, buf, result->expected_swab, dst_size);
 }
 
 static void drm_test_fb_xrgb8888_to_rgb888(struct kunit *test)
@@ -408,7 +408,7 @@ static void drm_test_fb_xrgb8888_to_rgb888(struct kunit *test)
 	iosys_map_set_vaddr(&src, xrgb8888);
 
 	drm_fb_xrgb8888_to_rgb888(&dst, &result->dst_pitch, &src, &fb, &params->clip);
-	KUNIT_EXPECT_EQ(test, memcmp(buf, result->expected, dst_size), 0);
+	KUNIT_EXPECT_MEMEQ(test, buf, result->expected, dst_size);
 }
 
 static void drm_test_fb_xrgb8888_to_xrgb2101010(struct kunit *test)
@@ -439,7 +439,7 @@ static void drm_test_fb_xrgb8888_to_xrgb2101010(struct kunit *test)
 
 	drm_fb_xrgb8888_to_xrgb2101010(&dst, &result->dst_pitch, &src, &fb, &params->clip);
 	buf = le32buf_to_cpu(test, buf, dst_size / sizeof(u32));
-	KUNIT_EXPECT_EQ(test, memcmp(buf, result->expected, dst_size), 0);
+	KUNIT_EXPECT_MEMEQ(test, buf, result->expected, dst_size);
 }
 
 static struct kunit_case drm_format_helper_test_cases[] = {
diff --git a/drivers/gpu/drm/tests/drm_kunit_helpers.c b/drivers/gpu/drm/tests/drm_kunit_helpers.c
new file mode 100644
index 000000000000..f1662091f250
--- /dev/null
+++ b/drivers/gpu/drm/tests/drm_kunit_helpers.c
@@ -0,0 +1,71 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include <drm/drm_drv.h>
+#include <drm/drm_managed.h>
+
+#include <kunit/resource.h>
+
+#include <linux/device.h>
+
+#include "drm_kunit_helpers.h"
+
+struct kunit_dev {
+	struct drm_device base;
+};
+
+static const struct drm_mode_config_funcs drm_mode_config_funcs = {
+};
+
+static int dev_init(struct kunit_resource *res, void *ptr)
+{
+	char *name = ptr;
+	struct device *dev;
+
+	dev = root_device_register(name);
+	if (IS_ERR(dev))
+		return PTR_ERR(dev);
+
+	res->data = dev;
+	return 0;
+}
+
+static void dev_free(struct kunit_resource *res)
+{
+	struct device *dev = res->data;
+
+	root_device_unregister(dev);
+}
+
+struct drm_device *drm_kunit_device_init(struct kunit *test, u32 features, char *name)
+{
+	struct kunit_dev *kdev;
+	struct drm_device *drm;
+	struct drm_driver *driver;
+	struct device *dev;
+	int ret;
+
+	dev = kunit_alloc_resource(test, dev_init, dev_free, GFP_KERNEL, name);
+	if (!dev)
+		return ERR_PTR(-ENOMEM);
+
+	driver = kunit_kzalloc(test, sizeof(*driver), GFP_KERNEL);
+	if (!driver)
+		return ERR_PTR(-ENOMEM);
+
+	driver->driver_features = features;
+	kdev = devm_drm_dev_alloc(dev, driver, struct kunit_dev, base);
+	if (IS_ERR(kdev))
+		return ERR_CAST(kdev);
+
+	drm = &kdev->base;
+	drm->mode_config.funcs = &drm_mode_config_funcs;
+
+	ret = drmm_mode_config_init(drm);
+	if (ret)
+		return ERR_PTR(ret);
+
+	return drm;
+}
+
+MODULE_AUTHOR("Maxime Ripard <maxime@cerno.tech>");
+MODULE_LICENSE("GPL");
diff --git a/drivers/gpu/drm/tests/drm_kunit_helpers.h b/drivers/gpu/drm/tests/drm_kunit_helpers.h
new file mode 100644
index 000000000000..20ab6eec4c89
--- /dev/null
+++ b/drivers/gpu/drm/tests/drm_kunit_helpers.h
@@ -0,0 +1,11 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#ifndef DRM_KUNIT_HELPERS_H_
+#define DRM_KUNIT_HELPERS_H_
+
+struct drm_device;
+struct kunit;
+
+struct drm_device *drm_kunit_device_init(struct kunit *test, u32 features, char *name);
+
+#endif // DRM_KUNIT_HELPERS_H_
diff --git a/drivers/gpu/drm/tidss/tidss_drv.c b/drivers/gpu/drm/tidss/tidss_drv.c
index 15cd9b91b7e2..07d94b1e8089 100644
--- a/drivers/gpu/drm/tidss/tidss_drv.c
+++ b/drivers/gpu/drm/tidss/tidss_drv.c
@@ -14,7 +14,7 @@
 #include <drm/drm_crtc.h>
 #include <drm/drm_crtc_helper.h>
 #include <drm/drm_drv.h>
-#include <drm/drm_fb_helper.h>
+#include <drm/drm_fbdev_generic.h>
 #include <drm/drm_gem_dma_helper.h>
 #include <drm/drm_managed.h>
 #include <drm/drm_module.h>
diff --git a/drivers/gpu/drm/tidss/tidss_kms.c b/drivers/gpu/drm/tidss/tidss_kms.c
index afb2879980c6..345bcc3011e4 100644
--- a/drivers/gpu/drm/tidss/tidss_kms.c
+++ b/drivers/gpu/drm/tidss/tidss_kms.c
@@ -10,7 +10,6 @@
 #include <drm/drm_atomic_helper.h>
 #include <drm/drm_bridge.h>
 #include <drm/drm_crtc_helper.h>
-#include <drm/drm_fb_helper.h>
 #include <drm/drm_gem_framebuffer_helper.h>
 #include <drm/drm_of.h>
 #include <drm/drm_panel.h>
diff --git a/drivers/gpu/drm/tilcdc/tilcdc_drv.c b/drivers/gpu/drm/tilcdc/tilcdc_drv.c
index f72755b8ea14..80615ecdae0b 100644
--- a/drivers/gpu/drm/tilcdc/tilcdc_drv.c
+++ b/drivers/gpu/drm/tilcdc/tilcdc_drv.c
@@ -16,7 +16,7 @@
 #include <drm/drm_atomic_helper.h>
 #include <drm/drm_debugfs.h>
 #include <drm/drm_drv.h>
-#include <drm/drm_fb_helper.h>
+#include <drm/drm_fbdev_generic.h>
 #include <drm/drm_fourcc.h>
 #include <drm/drm_gem_dma_helper.h>
 #include <drm/drm_gem_framebuffer_helper.h>
diff --git a/drivers/gpu/drm/tiny/Kconfig b/drivers/gpu/drm/tiny/Kconfig
index a300b03a3c7a..f6889f649bc1 100644
--- a/drivers/gpu/drm/tiny/Kconfig
+++ b/drivers/gpu/drm/tiny/Kconfig
@@ -53,7 +53,7 @@ config DRM_GM12U320
 
 config DRM_OFDRM
 	tristate "Open Firmware display driver"
-	depends on DRM && OF && (PPC || COMPILE_TEST)
+	depends on DRM && MMU && OF && (PPC || COMPILE_TEST)
 	select APERTURE_HELPERS
 	select DRM_GEM_SHMEM_HELPER
 	select DRM_KMS_HELPER
diff --git a/drivers/gpu/drm/tiny/arcpgu.c b/drivers/gpu/drm/tiny/arcpgu.c
index bb302a3fd6b5..611bbee15071 100644
--- a/drivers/gpu/drm/tiny/arcpgu.c
+++ b/drivers/gpu/drm/tiny/arcpgu.c
@@ -12,7 +12,7 @@
 #include <drm/drm_drv.h>
 #include <drm/drm_edid.h>
 #include <drm/drm_fb_dma_helper.h>
-#include <drm/drm_fb_helper.h>
+#include <drm/drm_fbdev_generic.h>
 #include <drm/drm_fourcc.h>
 #include <drm/drm_framebuffer.h>
 #include <drm/drm_gem_dma_helper.h>
diff --git a/drivers/gpu/drm/tiny/bochs.c b/drivers/gpu/drm/tiny/bochs.c
index 04682f831544..024346054c70 100644
--- a/drivers/gpu/drm/tiny/bochs.c
+++ b/drivers/gpu/drm/tiny/bochs.c
@@ -7,7 +7,7 @@
 #include <drm/drm_atomic_helper.h>
 #include <drm/drm_drv.h>
 #include <drm/drm_edid.h>
-#include <drm/drm_fb_helper.h>
+#include <drm/drm_fbdev_generic.h>
 #include <drm/drm_fourcc.h>
 #include <drm/drm_framebuffer.h>
 #include <drm/drm_gem_framebuffer_helper.h>
diff --git a/drivers/gpu/drm/tiny/cirrus.c b/drivers/gpu/drm/tiny/cirrus.c
index 354d5e854a6f..678c2ef1cae7 100644
--- a/drivers/gpu/drm/tiny/cirrus.c
+++ b/drivers/gpu/drm/tiny/cirrus.c
@@ -30,7 +30,7 @@
 #include <drm/drm_damage_helper.h>
 #include <drm/drm_drv.h>
 #include <drm/drm_edid.h>
-#include <drm/drm_fb_helper.h>
+#include <drm/drm_fbdev_generic.h>
 #include <drm/drm_file.h>
 #include <drm/drm_format_helper.h>
 #include <drm/drm_fourcc.h>
diff --git a/drivers/gpu/drm/tiny/gm12u320.c b/drivers/gpu/drm/tiny/gm12u320.c
index 7441d992a5d7..130fd07a967d 100644
--- a/drivers/gpu/drm/tiny/gm12u320.c
+++ b/drivers/gpu/drm/tiny/gm12u320.c
@@ -12,7 +12,7 @@
 #include <drm/drm_damage_helper.h>
 #include <drm/drm_drv.h>
 #include <drm/drm_edid.h>
-#include <drm/drm_fb_helper.h>
+#include <drm/drm_fbdev_generic.h>
 #include <drm/drm_file.h>
 #include <drm/drm_format_helper.h>
 #include <drm/drm_fourcc.h>
diff --git a/drivers/gpu/drm/tiny/hx8357d.c b/drivers/gpu/drm/tiny/hx8357d.c
index 48c24aa8c28a..9f634f720817 100644
--- a/drivers/gpu/drm/tiny/hx8357d.c
+++ b/drivers/gpu/drm/tiny/hx8357d.c
@@ -18,7 +18,7 @@
 
 #include <drm/drm_atomic_helper.h>
 #include <drm/drm_drv.h>
-#include <drm/drm_fb_helper.h>
+#include <drm/drm_fbdev_generic.h>
 #include <drm/drm_gem_atomic_helper.h>
 #include <drm/drm_gem_dma_helper.h>
 #include <drm/drm_managed.h>
diff --git a/drivers/gpu/drm/tiny/ili9163.c b/drivers/gpu/drm/tiny/ili9163.c
index 9a1a5943bee0..ca0451f79962 100644
--- a/drivers/gpu/drm/tiny/ili9163.c
+++ b/drivers/gpu/drm/tiny/ili9163.c
@@ -9,7 +9,7 @@
 
 #include <drm/drm_atomic_helper.h>
 #include <drm/drm_drv.h>
-#include <drm/drm_fb_helper.h>
+#include <drm/drm_fbdev_generic.h>
 #include <drm/drm_gem_atomic_helper.h>
 #include <drm/drm_gem_dma_helper.h>
 #include <drm/drm_mipi_dbi.h>
diff --git a/drivers/gpu/drm/tiny/ili9225.c b/drivers/gpu/drm/tiny/ili9225.c
index a79da2b4af64..815bab285823 100644
--- a/drivers/gpu/drm/tiny/ili9225.c
+++ b/drivers/gpu/drm/tiny/ili9225.c
@@ -20,7 +20,7 @@
 #include <drm/drm_damage_helper.h>
 #include <drm/drm_drv.h>
 #include <drm/drm_fb_dma_helper.h>
-#include <drm/drm_fb_helper.h>
+#include <drm/drm_fbdev_generic.h>
 #include <drm/drm_fourcc.h>
 #include <drm/drm_framebuffer.h>
 #include <drm/drm_gem_atomic_helper.h>
diff --git a/drivers/gpu/drm/tiny/ili9341.c b/drivers/gpu/drm/tiny/ili9341.c
index 69b265e78096..420f6005a956 100644
--- a/drivers/gpu/drm/tiny/ili9341.c
+++ b/drivers/gpu/drm/tiny/ili9341.c
@@ -17,7 +17,7 @@
 
 #include <drm/drm_atomic_helper.h>
 #include <drm/drm_drv.h>
-#include <drm/drm_fb_helper.h>
+#include <drm/drm_fbdev_generic.h>
 #include <drm/drm_gem_atomic_helper.h>
 #include <drm/drm_gem_dma_helper.h>
 #include <drm/drm_managed.h>
diff --git a/drivers/gpu/drm/tiny/ili9486.c b/drivers/gpu/drm/tiny/ili9486.c
index c80028bb1d11..1bb847466b10 100644
--- a/drivers/gpu/drm/tiny/ili9486.c
+++ b/drivers/gpu/drm/tiny/ili9486.c
@@ -16,7 +16,7 @@
 
 #include <drm/drm_atomic_helper.h>
 #include <drm/drm_drv.h>
-#include <drm/drm_fb_helper.h>
+#include <drm/drm_fbdev_generic.h>
 #include <drm/drm_gem_atomic_helper.h>
 #include <drm/drm_gem_dma_helper.h>
 #include <drm/drm_managed.h>
diff --git a/drivers/gpu/drm/tiny/mi0283qt.c b/drivers/gpu/drm/tiny/mi0283qt.c
index bc522fb3d94d..47df2b5a3048 100644
--- a/drivers/gpu/drm/tiny/mi0283qt.c
+++ b/drivers/gpu/drm/tiny/mi0283qt.c
@@ -15,7 +15,7 @@
 
 #include <drm/drm_atomic_helper.h>
 #include <drm/drm_drv.h>
-#include <drm/drm_fb_helper.h>
+#include <drm/drm_fbdev_generic.h>
 #include <drm/drm_gem_atomic_helper.h>
 #include <drm/drm_gem_dma_helper.h>
 #include <drm/drm_managed.h>
diff --git a/drivers/gpu/drm/tiny/ofdrm.c b/drivers/gpu/drm/tiny/ofdrm.c
index 0e1cc2369afc..dc9e4d71b12a 100644
--- a/drivers/gpu/drm/tiny/ofdrm.c
+++ b/drivers/gpu/drm/tiny/ofdrm.c
@@ -11,7 +11,7 @@
 #include <drm/drm_damage_helper.h>
 #include <drm/drm_device.h>
 #include <drm/drm_drv.h>
-#include <drm/drm_fb_helper.h>
+#include <drm/drm_fbdev_generic.h>
 #include <drm/drm_format_helper.h>
 #include <drm/drm_framebuffer.h>
 #include <drm/drm_gem_atomic_helper.h>
@@ -231,7 +231,7 @@ static u64 display_get_address_of(struct drm_device *dev, struct device_node *of
 	return address;
 }
 
-static bool is_avivo(__be32 vendor, __be32 device)
+static bool is_avivo(u32 vendor, u32 device)
 {
 	/* This will match most R5xx */
 	return (vendor == PCI_VENDOR_ID_ATI) &&
@@ -265,8 +265,13 @@ static enum ofdrm_model display_get_model_of(struct drm_device *dev, struct devi
 		of_parent = of_get_parent(of_node);
 		vendor_p = of_get_property(of_parent, "vendor-id", NULL);
 		device_p = of_get_property(of_parent, "device-id", NULL);
-		if (vendor_p && device_p && is_avivo(*vendor_p, *device_p))
-			model = OFDRM_MODEL_AVIVO;
+		if (vendor_p && device_p) {
+			u32 vendor = be32_to_cpup(vendor_p);
+			u32 device = be32_to_cpup(device_p);
+
+			if (is_avivo(vendor, device))
+				model = OFDRM_MODEL_AVIVO;
+		}
 		of_node_put(of_parent);
 	} else if (of_device_is_compatible(of_node, "qemu,std-vga")) {
 		model = OFDRM_MODEL_QEMU;
@@ -433,21 +438,21 @@ static void __iomem *get_cmap_address_of(struct ofdrm_device *odev, struct devic
 	if (!addr_p)
 		addr_p = of_get_address(of_node, bar_no, &max_size, &flags);
 	if (!addr_p)
-		return ERR_PTR(-ENODEV);
+		return IOMEM_ERR_PTR(-ENODEV);
 
 	if ((flags & (IORESOURCE_IO | IORESOURCE_MEM)) == 0)
-		return ERR_PTR(-ENODEV);
+		return IOMEM_ERR_PTR(-ENODEV);
 
 	if ((offset + size) >= max_size)
-		return ERR_PTR(-ENODEV);
+		return IOMEM_ERR_PTR(-ENODEV);
 
 	address = of_translate_address(of_node, addr_p);
 	if (address == OF_BAD_ADDR)
-		return ERR_PTR(-ENODEV);
+		return IOMEM_ERR_PTR(-ENODEV);
 
 	mem = devm_ioremap(dev->dev, address + offset, size);
 	if (!mem)
-		return ERR_PTR(-ENOMEM);
+		return IOMEM_ERR_PTR(-ENOMEM);
 
 	return mem;
 }
@@ -465,7 +470,7 @@ static void __iomem *ofdrm_mach64_cmap_ioremap(struct ofdrm_device *odev,
 
 	cmap_base = devm_ioremap(dev->dev, address, 0x1000);
 	if (!cmap_base)
-		return ERR_PTR(-ENOMEM);
+		return IOMEM_ERR_PTR(-ENOMEM);
 
 	return cmap_base;
 }
@@ -624,11 +629,11 @@ static void __iomem *ofdrm_qemu_cmap_ioremap(struct ofdrm_device *odev,
 
 	address = of_translate_address(of_node, io_of_addr);
 	if (address == OF_BAD_ADDR)
-		return ERR_PTR(-ENODEV);
+		return IOMEM_ERR_PTR(-ENODEV);
 
 	cmap_base = devm_ioremap(dev->dev, address + 0x3c8, 2);
 	if (!cmap_base)
-		return ERR_PTR(-ENOMEM);
+		return IOMEM_ERR_PTR(-ENOMEM);
 
 	return cmap_base;
 }
diff --git a/drivers/gpu/drm/tiny/panel-mipi-dbi.c b/drivers/gpu/drm/tiny/panel-mipi-dbi.c
index 955a61d628e7..03a7d569cd56 100644
--- a/drivers/gpu/drm/tiny/panel-mipi-dbi.c
+++ b/drivers/gpu/drm/tiny/panel-mipi-dbi.c
@@ -16,7 +16,7 @@
 
 #include <drm/drm_atomic_helper.h>
 #include <drm/drm_drv.h>
-#include <drm/drm_fb_helper.h>
+#include <drm/drm_fbdev_generic.h>
 #include <drm/drm_gem_atomic_helper.h>
 #include <drm/drm_gem_dma_helper.h>
 #include <drm/drm_managed.h>
diff --git a/drivers/gpu/drm/tiny/repaper.c b/drivers/gpu/drm/tiny/repaper.c
index e62f4d16b2c6..c2677d081a7b 100644
--- a/drivers/gpu/drm/tiny/repaper.c
+++ b/drivers/gpu/drm/tiny/repaper.c
@@ -26,7 +26,7 @@
 #include <drm/drm_damage_helper.h>
 #include <drm/drm_drv.h>
 #include <drm/drm_fb_dma_helper.h>
-#include <drm/drm_fb_helper.h>
+#include <drm/drm_fbdev_generic.h>
 #include <drm/drm_format_helper.h>
 #include <drm/drm_framebuffer.h>
 #include <drm/drm_gem_atomic_helper.h>
diff --git a/drivers/gpu/drm/tiny/simpledrm.c b/drivers/gpu/drm/tiny/simpledrm.c
index cbb100753154..162eb44dcba8 100644
--- a/drivers/gpu/drm/tiny/simpledrm.c
+++ b/drivers/gpu/drm/tiny/simpledrm.c
@@ -15,7 +15,7 @@
 #include <drm/drm_damage_helper.h>
 #include <drm/drm_device.h>
 #include <drm/drm_drv.h>
-#include <drm/drm_fb_helper.h>
+#include <drm/drm_fbdev_generic.h>
 #include <drm/drm_format_helper.h>
 #include <drm/drm_gem_atomic_helper.h>
 #include <drm/drm_gem_framebuffer_helper.h>
diff --git a/drivers/gpu/drm/tiny/st7586.c b/drivers/gpu/drm/tiny/st7586.c
index b6f620b902e6..ce57fa9917e5 100644
--- a/drivers/gpu/drm/tiny/st7586.c
+++ b/drivers/gpu/drm/tiny/st7586.c
@@ -16,7 +16,7 @@
 #include <drm/drm_damage_helper.h>
 #include <drm/drm_drv.h>
 #include <drm/drm_fb_dma_helper.h>
-#include <drm/drm_fb_helper.h>
+#include <drm/drm_fbdev_generic.h>
 #include <drm/drm_format_helper.h>
 #include <drm/drm_framebuffer.h>
 #include <drm/drm_gem_atomic_helper.h>
diff --git a/drivers/gpu/drm/tiny/st7735r.c b/drivers/gpu/drm/tiny/st7735r.c
index c36ba08acda1..15d9cf283c66 100644
--- a/drivers/gpu/drm/tiny/st7735r.c
+++ b/drivers/gpu/drm/tiny/st7735r.c
@@ -18,7 +18,7 @@
 
 #include <drm/drm_atomic_helper.h>
 #include <drm/drm_drv.h>
-#include <drm/drm_fb_helper.h>
+#include <drm/drm_fbdev_generic.h>
 #include <drm/drm_gem_atomic_helper.h>
 #include <drm/drm_gem_dma_helper.h>
 #include <drm/drm_managed.h>
diff --git a/drivers/gpu/drm/ttm/ttm_pool.c b/drivers/gpu/drm/ttm/ttm_pool.c
index 21b61631f73a..9f6764bf3b15 100644
--- a/drivers/gpu/drm/ttm/ttm_pool.c
+++ b/drivers/gpu/drm/ttm/ttm_pool.c
@@ -344,6 +344,28 @@ static unsigned int ttm_pool_page_order(struct ttm_pool *pool, struct page *p)
 	return p->private;
 }
 
+/* Called when we got a page, either from a pool or newly allocated */
+static int ttm_pool_page_allocated(struct ttm_pool *pool, unsigned int order,
+				   struct page *p, dma_addr_t **dma_addr,
+				   unsigned long *num_pages,
+				   struct page ***pages)
+{
+	unsigned int i;
+	int r;
+
+	if (*dma_addr) {
+		r = ttm_pool_map(pool, order, p, dma_addr);
+		if (r)
+			return r;
+	}
+
+	*num_pages -= 1 << order;
+	for (i = 1 << order; i; --i, ++(*pages), ++p)
+		**pages = p;
+
+	return 0;
+}
+
 /**
  * ttm_pool_alloc - Fill a ttm_tt object
  *
@@ -385,45 +407,57 @@ int ttm_pool_alloc(struct ttm_pool *pool, struct ttm_tt *tt,
 	for (order = min_t(unsigned int, MAX_ORDER - 1, __fls(num_pages));
 	     num_pages;
 	     order = min_t(unsigned int, order, __fls(num_pages))) {
-		bool apply_caching = false;
 		struct ttm_pool_type *pt;
 
 		pt = ttm_pool_select_type(pool, tt->caching, order);
 		p = pt ? ttm_pool_type_take(pt) : NULL;
 		if (p) {
-			apply_caching = true;
-		} else {
-			p = ttm_pool_alloc_page(pool, gfp_flags, order);
-			if (p && PageHighMem(p))
-				apply_caching = true;
-		}
-
-		if (!p) {
-			if (order) {
-				--order;
-				continue;
-			}
-			r = -ENOMEM;
-			goto error_free_all;
-		}
-
-		if (apply_caching) {
 			r = ttm_pool_apply_caching(caching, pages,
 						   tt->caching);
 			if (r)
 				goto error_free_page;
-			caching = pages + (1 << order);
+
+			do {
+				r = ttm_pool_page_allocated(pool, order, p,
+							    &dma_addr,
+							    &num_pages,
+							    &pages);
+				if (r)
+					goto error_free_page;
+
+				if (num_pages < (1 << order))
+					break;
+
+				p = ttm_pool_type_take(pt);
+			} while (p);
+			caching = pages;
 		}
 
-		if (dma_addr) {
-			r = ttm_pool_map(pool, order, p, &dma_addr);
+		while (num_pages >= (1 << order) &&
+		       (p = ttm_pool_alloc_page(pool, gfp_flags, order))) {
+
+			if (PageHighMem(p)) {
+				r = ttm_pool_apply_caching(caching, pages,
+							   tt->caching);
+				if (r)
+					goto error_free_page;
+			}
+			r = ttm_pool_page_allocated(pool, order, p, &dma_addr,
+						    &num_pages, &pages);
 			if (r)
 				goto error_free_page;
+			if (PageHighMem(p))
+				caching = pages;
 		}
 
-		num_pages -= 1 << order;
-		for (i = 1 << order; i; --i)
-			*(pages++) = p++;
+		if (!p) {
+			if (order) {
+				--order;
+				continue;
+			}
+			r = -ENOMEM;
+			goto error_free_all;
+		}
 	}
 
 	r = ttm_pool_apply_caching(caching, pages, tt->caching);
diff --git a/drivers/gpu/drm/tve200/tve200_drv.c b/drivers/gpu/drm/tve200/tve200_drv.c
index 04db72e3fa9c..0d05c386d303 100644
--- a/drivers/gpu/drm/tve200/tve200_drv.c
+++ b/drivers/gpu/drm/tve200/tve200_drv.c
@@ -32,6 +32,7 @@
 #include <linux/irq.h>
 #include <linux/io.h>
 #include <linux/module.h>
+#include <linux/of.h>
 #include <linux/platform_device.h>
 #include <linux/shmem_fs.h>
 #include <linux/slab.h>
@@ -39,7 +40,7 @@
 #include <drm/drm_atomic_helper.h>
 #include <drm/drm_bridge.h>
 #include <drm/drm_drv.h>
-#include <drm/drm_fb_helper.h>
+#include <drm/drm_fbdev_generic.h>
 #include <drm/drm_gem_dma_helper.h>
 #include <drm/drm_gem_framebuffer_helper.h>
 #include <drm/drm_module.h>
diff --git a/drivers/gpu/drm/udl/udl_drv.c b/drivers/gpu/drm/udl/udl_drv.c
index 91effdcefb6d..e81352126a0f 100644
--- a/drivers/gpu/drm/udl/udl_drv.c
+++ b/drivers/gpu/drm/udl/udl_drv.c
@@ -7,7 +7,7 @@
 
 #include <drm/drm_crtc_helper.h>
 #include <drm/drm_drv.h>
-#include <drm/drm_fb_helper.h>
+#include <drm/drm_fbdev_generic.h>
 #include <drm/drm_file.h>
 #include <drm/drm_gem_shmem_helper.h>
 #include <drm/drm_managed.h>
diff --git a/drivers/gpu/drm/v3d/v3d_drv.c b/drivers/gpu/drm/v3d/v3d_drv.c
index e8c975b81585..478f1f0f60de 100644
--- a/drivers/gpu/drm/v3d/v3d_drv.c
+++ b/drivers/gpu/drm/v3d/v3d_drv.c
@@ -22,7 +22,6 @@
 #include <linux/reset.h>
 
 #include <drm/drm_drv.h>
-#include <drm/drm_fb_helper.h>
 #include <drm/drm_managed.h>
 #include <uapi/drm/v3d_drm.h>
 
diff --git a/drivers/gpu/drm/v3d/v3d_gem.c b/drivers/gpu/drm/v3d/v3d_gem.c
index b8980440d137..96af1cb5202a 100644
--- a/drivers/gpu/drm/v3d/v3d_gem.c
+++ b/drivers/gpu/drm/v3d/v3d_gem.c
@@ -10,6 +10,7 @@
 #include <linux/sched/signal.h>
 #include <linux/uaccess.h>
 
+#include <drm/drm_managed.h>
 #include <drm/drm_syncobj.h>
 #include <uapi/drm/v3d_drm.h>
 
@@ -1075,10 +1076,18 @@ v3d_gem_init(struct drm_device *dev)
 
 	spin_lock_init(&v3d->mm_lock);
 	spin_lock_init(&v3d->job_lock);
-	mutex_init(&v3d->bo_lock);
-	mutex_init(&v3d->reset_lock);
-	mutex_init(&v3d->sched_lock);
-	mutex_init(&v3d->cache_clean_lock);
+	ret = drmm_mutex_init(dev, &v3d->bo_lock);
+	if (ret)
+		return ret;
+	ret = drmm_mutex_init(dev, &v3d->reset_lock);
+	if (ret)
+		return ret;
+	ret = drmm_mutex_init(dev, &v3d->sched_lock);
+	if (ret)
+		return ret;
+	ret = drmm_mutex_init(dev, &v3d->cache_clean_lock);
+	if (ret)
+		return ret;
 
 	/* Note: We don't allocate address 0.  Various bits of HW
 	 * treat 0 as special, such as the occlusion query counters
diff --git a/drivers/gpu/drm/v3d/v3d_perfmon.c b/drivers/gpu/drm/v3d/v3d_perfmon.c
index 48aaaa972c49..e1be7368b87d 100644
--- a/drivers/gpu/drm/v3d/v3d_perfmon.c
+++ b/drivers/gpu/drm/v3d/v3d_perfmon.c
@@ -17,8 +17,10 @@ void v3d_perfmon_get(struct v3d_perfmon *perfmon)
 
 void v3d_perfmon_put(struct v3d_perfmon *perfmon)
 {
-	if (perfmon && refcount_dec_and_test(&perfmon->refcnt))
+	if (perfmon && refcount_dec_and_test(&perfmon->refcnt)) {
+		mutex_destroy(&perfmon->lock);
 		kfree(perfmon);
+	}
 }
 
 void v3d_perfmon_start(struct v3d_dev *v3d, struct v3d_perfmon *perfmon)
@@ -113,6 +115,7 @@ void v3d_perfmon_close_file(struct v3d_file_priv *v3d_priv)
 	idr_for_each(&v3d_priv->perfmon.idr, v3d_perfmon_idr_del, NULL);
 	idr_destroy(&v3d_priv->perfmon.idr);
 	mutex_unlock(&v3d_priv->perfmon.lock);
+	mutex_destroy(&v3d_priv->perfmon.lock);
 }
 
 int v3d_perfmon_create_ioctl(struct drm_device *dev, void *data,
@@ -154,6 +157,7 @@ int v3d_perfmon_create_ioctl(struct drm_device *dev, void *data,
 	mutex_unlock(&v3d_priv->perfmon.lock);
 
 	if (ret < 0) {
+		mutex_destroy(&perfmon->lock);
 		kfree(perfmon);
 		return ret;
 	}
diff --git a/drivers/gpu/drm/vboxvideo/vbox_drv.c b/drivers/gpu/drm/vboxvideo/vbox_drv.c
index f4f2bd79a7cb..b450f449a3ab 100644
--- a/drivers/gpu/drm/vboxvideo/vbox_drv.c
+++ b/drivers/gpu/drm/vboxvideo/vbox_drv.c
@@ -14,7 +14,7 @@
 #include <drm/drm_aperture.h>
 #include <drm/drm_crtc_helper.h>
 #include <drm/drm_drv.h>
-#include <drm/drm_fb_helper.h>
+#include <drm/drm_fbdev_generic.h>
 #include <drm/drm_file.h>
 #include <drm/drm_ioctl.h>
 #include <drm/drm_managed.h>
@@ -178,8 +178,6 @@ static const struct drm_driver driver = {
 	.driver_features =
 	    DRIVER_MODESET | DRIVER_GEM | DRIVER_ATOMIC,
 
-	.lastclose = drm_fb_helper_lastclose,
-
 	.fops = &vbox_fops,
 	.name = DRIVER_NAME,
 	.desc = DRIVER_DESC,
diff --git a/drivers/gpu/drm/vboxvideo/vbox_main.c b/drivers/gpu/drm/vboxvideo/vbox_main.c
index c9e8b3a63c62..3b83e550f4df 100644
--- a/drivers/gpu/drm/vboxvideo/vbox_main.c
+++ b/drivers/gpu/drm/vboxvideo/vbox_main.c
@@ -11,7 +11,6 @@
 #include <linux/pci.h>
 #include <linux/vbox_err.h>
 
-#include <drm/drm_fb_helper.h>
 #include <drm/drm_crtc_helper.h>
 #include <drm/drm_damage_helper.h>
 
diff --git a/drivers/gpu/drm/vc4/vc4_drv.c b/drivers/gpu/drm/vc4/vc4_drv.c
index 2027063fdc30..5990d8f8c363 100644
--- a/drivers/gpu/drm/vc4/vc4_drv.c
+++ b/drivers/gpu/drm/vc4/vc4_drv.c
@@ -33,7 +33,7 @@
 #include <drm/drm_aperture.h>
 #include <drm/drm_atomic_helper.h>
 #include <drm/drm_drv.h>
-#include <drm/drm_fb_helper.h>
+#include <drm/drm_fbdev_generic.h>
 #include <drm/drm_vblank.h>
 
 #include <soc/bcm2835/raspberrypi-firmware.h>
@@ -476,7 +476,12 @@ static int __init vc4_drm_register(void)
 	if (ret)
 		return ret;
 
-	return platform_driver_register(&vc4_platform_driver);
+	ret = platform_driver_register(&vc4_platform_driver);
+	if (ret)
+		platform_unregister_drivers(component_drivers,
+					    ARRAY_SIZE(component_drivers));
+
+	return ret;
 }
 
 static void __exit vc4_drm_unregister(void)
diff --git a/drivers/gpu/drm/vc4/vc4_hdmi.c b/drivers/gpu/drm/vc4/vc4_hdmi.c
index 0d78c800ed51..12a00d644b61 100644
--- a/drivers/gpu/drm/vc4/vc4_hdmi.c
+++ b/drivers/gpu/drm/vc4/vc4_hdmi.c
@@ -124,9 +124,8 @@ static unsigned long long
 vc4_hdmi_encoder_compute_mode_clock(const struct drm_display_mode *mode,
 				    unsigned int bpc, enum vc4_hdmi_output_format fmt);
 
-static bool vc4_hdmi_supports_scrambling(struct drm_encoder *encoder)
+static bool vc4_hdmi_supports_scrambling(struct vc4_hdmi *vc4_hdmi)
 {
-	struct vc4_hdmi *vc4_hdmi = encoder_to_vc4_hdmi(encoder);
 	struct drm_display_info *display = &vc4_hdmi->connector.display_info;
 
 	lockdep_assert_held(&vc4_hdmi->mutex);
@@ -319,9 +318,8 @@ out:
 static int vc4_hdmi_reset_link(struct drm_connector *connector,
 			       struct drm_modeset_acquire_ctx *ctx)
 {
-	struct drm_device *drm = connector->dev;
-	struct vc4_hdmi *vc4_hdmi = connector_to_vc4_hdmi(connector);
-	struct drm_encoder *encoder = &vc4_hdmi->encoder.base;
+	struct drm_device *drm;
+	struct vc4_hdmi *vc4_hdmi;
 	struct drm_connector_state *conn_state;
 	struct drm_crtc_state *crtc_state;
 	struct drm_crtc *crtc;
@@ -332,6 +330,7 @@ static int vc4_hdmi_reset_link(struct drm_connector *connector,
 	if (!connector)
 		return 0;
 
+	drm = connector->dev;
 	ret = drm_modeset_lock(&drm->mode_config.connection_mutex, ctx);
 	if (ret)
 		return ret;
@@ -349,27 +348,41 @@ static int vc4_hdmi_reset_link(struct drm_connector *connector,
 	if (!crtc_state->active)
 		return 0;
 
-	if (!vc4_hdmi_supports_scrambling(encoder))
+	vc4_hdmi = connector_to_vc4_hdmi(connector);
+	mutex_lock(&vc4_hdmi->mutex);
+
+	if (!vc4_hdmi_supports_scrambling(vc4_hdmi)) {
+		mutex_unlock(&vc4_hdmi->mutex);
 		return 0;
+	}
 
 	scrambling_needed = vc4_hdmi_mode_needs_scrambling(&vc4_hdmi->saved_adjusted_mode,
 							   vc4_hdmi->output_bpc,
 							   vc4_hdmi->output_format);
-	if (!scrambling_needed)
+	if (!scrambling_needed) {
+		mutex_unlock(&vc4_hdmi->mutex);
 		return 0;
+	}
 
 	if (conn_state->commit &&
-	    !try_wait_for_completion(&conn_state->commit->hw_done))
+	    !try_wait_for_completion(&conn_state->commit->hw_done)) {
+		mutex_unlock(&vc4_hdmi->mutex);
 		return 0;
+	}
 
 	ret = drm_scdc_readb(connector->ddc, SCDC_TMDS_CONFIG, &config);
 	if (ret < 0) {
 		drm_err(drm, "Failed to read TMDS config: %d\n", ret);
+		mutex_unlock(&vc4_hdmi->mutex);
 		return 0;
 	}
 
-	if (!!(config & SCDC_SCRAMBLING_ENABLE) == scrambling_needed)
+	if (!!(config & SCDC_SCRAMBLING_ENABLE) == scrambling_needed) {
+		mutex_unlock(&vc4_hdmi->mutex);
 		return 0;
+	}
+
+	mutex_unlock(&vc4_hdmi->mutex);
 
 	/*
 	 * HDMI 2.0 says that one should not send scrambled data
@@ -397,9 +410,8 @@ static void vc4_hdmi_handle_hotplug(struct vc4_hdmi *vc4_hdmi,
 	 * .adap_enable, which leads to that funtion being called with
 	 * our mutex held.
 	 *
-	 * A similar situation occurs with
-	 * drm_atomic_helper_connector_hdmi_reset_link() that will call
-	 * into our KMS hooks if the scrambling was enabled.
+	 * A similar situation occurs with vc4_hdmi_reset_link() that
+	 * will call into our KMS hooks if the scrambling was enabled.
 	 *
 	 * Concurrency isn't an issue at the moment since we don't share
 	 * any state with any of the other frameworks so we can ignore
@@ -867,7 +879,7 @@ static void vc4_hdmi_enable_scrambling(struct drm_encoder *encoder)
 
 	lockdep_assert_held(&vc4_hdmi->mutex);
 
-	if (!vc4_hdmi_supports_scrambling(encoder))
+	if (!vc4_hdmi_supports_scrambling(vc4_hdmi))
 		return;
 
 	if (!vc4_hdmi_mode_needs_scrambling(mode,
@@ -3169,9 +3181,16 @@ static int vc4_hdmi_init_resources(struct drm_device *drm,
 		DRM_ERROR("Failed to get HDMI state machine clock\n");
 		return PTR_ERR(vc4_hdmi->hsm_clock);
 	}
+
 	vc4_hdmi->audio_clock = vc4_hdmi->hsm_clock;
 	vc4_hdmi->cec_clock = vc4_hdmi->hsm_clock;
 
+	vc4_hdmi->hsm_rpm_clock = devm_clk_get(dev, "hdmi");
+	if (IS_ERR(vc4_hdmi->hsm_rpm_clock)) {
+		DRM_ERROR("Failed to get HDMI state machine clock\n");
+		return PTR_ERR(vc4_hdmi->hsm_rpm_clock);
+	}
+
 	return 0;
 }
 
@@ -3254,6 +3273,12 @@ static int vc5_hdmi_init_resources(struct drm_device *drm,
 		return PTR_ERR(vc4_hdmi->hsm_clock);
 	}
 
+	vc4_hdmi->hsm_rpm_clock = devm_clk_get(dev, "hdmi");
+	if (IS_ERR(vc4_hdmi->hsm_rpm_clock)) {
+		DRM_ERROR("Failed to get HDMI state machine clock\n");
+		return PTR_ERR(vc4_hdmi->hsm_rpm_clock);
+	}
+
 	vc4_hdmi->pixel_bvb_clock = devm_clk_get(dev, "bvb");
 	if (IS_ERR(vc4_hdmi->pixel_bvb_clock)) {
 		DRM_ERROR("Failed to get pixel bvb clock\n");
@@ -3317,7 +3342,7 @@ static int vc4_hdmi_runtime_suspend(struct device *dev)
 {
 	struct vc4_hdmi *vc4_hdmi = dev_get_drvdata(dev);
 
-	clk_disable_unprepare(vc4_hdmi->hsm_clock);
+	clk_disable_unprepare(vc4_hdmi->hsm_rpm_clock);
 
 	return 0;
 }
@@ -3335,11 +3360,11 @@ static int vc4_hdmi_runtime_resume(struct device *dev)
 	 * its frequency while the power domain is active so that it
 	 * keeps its rate.
 	 */
-	ret = clk_set_min_rate(vc4_hdmi->hsm_clock, HSM_MIN_CLOCK_FREQ);
+	ret = clk_set_min_rate(vc4_hdmi->hsm_rpm_clock, HSM_MIN_CLOCK_FREQ);
 	if (ret)
 		return ret;
 
-	ret = clk_prepare_enable(vc4_hdmi->hsm_clock);
+	ret = clk_prepare_enable(vc4_hdmi->hsm_rpm_clock);
 	if (ret)
 		return ret;
 
@@ -3352,7 +3377,7 @@ static int vc4_hdmi_runtime_resume(struct device *dev)
 	 * case, it will lead to a silent CPU stall. Let's make sure we
 	 * prevent such a case.
 	 */
-	rate = clk_get_rate(vc4_hdmi->hsm_clock);
+	rate = clk_get_rate(vc4_hdmi->hsm_rpm_clock);
 	if (!rate) {
 		ret = -EINVAL;
 		goto err_disable_clk;
diff --git a/drivers/gpu/drm/vc4/vc4_hdmi.h b/drivers/gpu/drm/vc4/vc4_hdmi.h
index e3619836ca17..dc3ccd8002a0 100644
--- a/drivers/gpu/drm/vc4/vc4_hdmi.h
+++ b/drivers/gpu/drm/vc4/vc4_hdmi.h
@@ -164,6 +164,7 @@ struct vc4_hdmi {
 	struct clk *cec_clock;
 	struct clk *pixel_clock;
 	struct clk *hsm_clock;
+	struct clk *hsm_rpm_clock;
 	struct clk *audio_clock;
 	struct clk *pixel_bvb_clock;
 
diff --git a/drivers/gpu/drm/vc4/vc4_kms.c b/drivers/gpu/drm/vc4/vc4_kms.c
index 5c97642ed66a..8fbeecdf2ec4 100644
--- a/drivers/gpu/drm/vc4/vc4_kms.c
+++ b/drivers/gpu/drm/vc4/vc4_kms.c
@@ -197,8 +197,8 @@ vc4_hvs_get_new_global_state(struct drm_atomic_state *state)
 	struct drm_private_state *priv_state;
 
 	priv_state = drm_atomic_get_new_private_obj_state(state, &vc4->hvs_channels);
-	if (IS_ERR(priv_state))
-		return ERR_CAST(priv_state);
+	if (!priv_state)
+		return ERR_PTR(-EINVAL);
 
 	return to_vc4_hvs_state(priv_state);
 }
@@ -210,8 +210,8 @@ vc4_hvs_get_old_global_state(struct drm_atomic_state *state)
 	struct drm_private_state *priv_state;
 
 	priv_state = drm_atomic_get_old_private_obj_state(state, &vc4->hvs_channels);
-	if (IS_ERR(priv_state))
-		return ERR_CAST(priv_state);
+	if (!priv_state)
+		return ERR_PTR(-EINVAL);
 
 	return to_vc4_hvs_state(priv_state);
 }
diff --git a/drivers/gpu/drm/virtio/virtgpu_drv.c b/drivers/gpu/drm/virtio/virtgpu_drv.c
index 0035affc3e59..ae97b98750b6 100644
--- a/drivers/gpu/drm/virtio/virtgpu_drv.c
+++ b/drivers/gpu/drm/virtio/virtgpu_drv.c
@@ -35,6 +35,7 @@
 #include <drm/drm_aperture.h>
 #include <drm/drm_atomic_helper.h>
 #include <drm/drm_drv.h>
+#include <drm/drm_fbdev_generic.h>
 #include <drm/drm_file.h>
 
 #include "virtgpu_drv.h"
diff --git a/drivers/gpu/drm/virtio/virtgpu_drv.h b/drivers/gpu/drm/virtio/virtgpu_drv.h
index 9b98470593b0..b7a64c7dcc2c 100644
--- a/drivers/gpu/drm/virtio/virtgpu_drv.h
+++ b/drivers/gpu/drm/virtio/virtgpu_drv.h
@@ -35,7 +35,6 @@
 #include <drm/drm_atomic.h>
 #include <drm/drm_drv.h>
 #include <drm/drm_encoder.h>
-#include <drm/drm_fb_helper.h>
 #include <drm/drm_fourcc.h>
 #include <drm/drm_framebuffer.h>
 #include <drm/drm_gem.h>
diff --git a/drivers/gpu/drm/vkms/vkms_drv.c b/drivers/gpu/drm/vkms/vkms_drv.c
index 0ffe5f0e33f7..293dbca50c31 100644
--- a/drivers/gpu/drm/vkms/vkms_drv.c
+++ b/drivers/gpu/drm/vkms/vkms_drv.c
@@ -17,7 +17,7 @@
 #include <drm/drm_atomic.h>
 #include <drm/drm_atomic_helper.h>
 #include <drm/drm_drv.h>
-#include <drm/drm_fb_helper.h>
+#include <drm/drm_fbdev_generic.h>
 #include <drm/drm_file.h>
 #include <drm/drm_gem_framebuffer_helper.h>
 #include <drm/drm_ioctl.h>
diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_drv.c b/drivers/gpu/drm/vmwgfx/vmwgfx_drv.c
index 63496773f714..bd02cb0e6837 100644
--- a/drivers/gpu/drm/vmwgfx/vmwgfx_drv.c
+++ b/drivers/gpu/drm/vmwgfx/vmwgfx_drv.c
@@ -35,7 +35,7 @@
 
 #include <drm/drm_aperture.h>
 #include <drm/drm_drv.h>
-#include <drm/drm_fb_helper.h>
+#include <drm/drm_fbdev_generic.h>
 #include <drm/drm_gem_ttm_helper.h>
 #include <drm/drm_ioctl.h>
 #include <drm/drm_module.h>
diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_msg.c b/drivers/gpu/drm/vmwgfx/vmwgfx_msg.c
index 06d9e106e3c5..e76976a95a1e 100644
--- a/drivers/gpu/drm/vmwgfx/vmwgfx_msg.c
+++ b/drivers/gpu/drm/vmwgfx/vmwgfx_msg.c
@@ -1097,21 +1097,21 @@ int vmw_mksstat_add_ioctl(struct drm_device *dev, void *data,
 	reset_ppn_array(pdesc->strsPPNs, ARRAY_SIZE(pdesc->strsPPNs));
 
 	/* Pin mksGuestStat user pages and store those in the instance descriptor */
-	nr_pinned_stat = pin_user_pages(arg->stat, num_pages_stat, FOLL_LONGTERM, pages_stat, NULL);
+	nr_pinned_stat = pin_user_pages_fast(arg->stat, num_pages_stat, FOLL_LONGTERM, pages_stat);
 	if (num_pages_stat != nr_pinned_stat)
 		goto err_pin_stat;
 
 	for (i = 0; i < num_pages_stat; ++i)
 		pdesc->statPPNs[i] = page_to_pfn(pages_stat[i]);
 
-	nr_pinned_info = pin_user_pages(arg->info, num_pages_info, FOLL_LONGTERM, pages_info, NULL);
+	nr_pinned_info = pin_user_pages_fast(arg->info, num_pages_info, FOLL_LONGTERM, pages_info);
 	if (num_pages_info != nr_pinned_info)
 		goto err_pin_info;
 
 	for (i = 0; i < num_pages_info; ++i)
 		pdesc->infoPPNs[i] = page_to_pfn(pages_info[i]);
 
-	nr_pinned_strs = pin_user_pages(arg->strs, num_pages_strs, FOLL_LONGTERM, pages_strs, NULL);
+	nr_pinned_strs = pin_user_pages_fast(arg->strs, num_pages_strs, FOLL_LONGTERM, pages_strs);
 	if (num_pages_strs != nr_pinned_strs)
 		goto err_pin_strs;
 
diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_scrn.c b/drivers/gpu/drm/vmwgfx/vmwgfx_scrn.c
index 8db61c541a80..e1f36a09c59c 100644
--- a/drivers/gpu/drm/vmwgfx/vmwgfx_scrn.c
+++ b/drivers/gpu/drm/vmwgfx/vmwgfx_scrn.c
@@ -926,6 +926,10 @@ int vmw_kms_sou_init_display(struct vmw_private *dev_priv)
 	struct drm_device *dev = &dev_priv->drm;
 	int i;
 
+	/* Screen objects won't work if GMR's aren't available */
+	if (!dev_priv->has_gmr)
+		return -ENOSYS;
+
 	if (!(dev_priv->capabilities & SVGA_CAP_SCREEN_OBJECT_2)) {
 		return -ENOSYS;
 	}
diff --git a/drivers/gpu/drm/xen/xen_drm_front_gem.c b/drivers/gpu/drm/xen/xen_drm_front_gem.c
index e31554d7139f..4c95ebcdcc2d 100644
--- a/drivers/gpu/drm/xen/xen_drm_front_gem.c
+++ b/drivers/gpu/drm/xen/xen_drm_front_gem.c
@@ -12,7 +12,6 @@
 #include <linux/scatterlist.h>
 #include <linux/shmem_fs.h>
 
-#include <drm/drm_fb_helper.h>
 #include <drm/drm_gem.h>
 #include <drm/drm_prime.h>
 #include <drm/drm_probe_helper.h>
diff --git a/drivers/gpu/drm/xlnx/zynqmp_dp.c b/drivers/gpu/drm/xlnx/zynqmp_dp.c
index 7c9ae167eac7..0a7b466446fb 100644
--- a/drivers/gpu/drm/xlnx/zynqmp_dp.c
+++ b/drivers/gpu/drm/xlnx/zynqmp_dp.c
@@ -1362,9 +1362,10 @@ static void zynqmp_dp_bridge_detach(struct drm_bridge *bridge)
 	zynqmp_dp_aux_cleanup(dp);
 }
 
-static int zynqmp_dp_bridge_mode_valid(struct drm_bridge *bridge,
-				       const struct drm_display_info *info,
-				       const struct drm_display_mode *mode)
+static enum drm_mode_status
+zynqmp_dp_bridge_mode_valid(struct drm_bridge *bridge,
+			    const struct drm_display_info *info,
+			    const struct drm_display_mode *mode)
 {
 	struct zynqmp_dp *dp = bridge_to_dp(bridge);
 	int rate;
diff --git a/drivers/gpu/drm/xlnx/zynqmp_kms.c b/drivers/gpu/drm/xlnx/zynqmp_kms.c
index 1847792cf13d..776ef5480206 100644
--- a/drivers/gpu/drm/xlnx/zynqmp_kms.c
+++ b/drivers/gpu/drm/xlnx/zynqmp_kms.c
@@ -19,7 +19,7 @@
 #include <drm/drm_device.h>
 #include <drm/drm_drv.h>
 #include <drm/drm_encoder.h>
-#include <drm/drm_fb_helper.h>
+#include <drm/drm_fbdev_generic.h>
 #include <drm/drm_fourcc.h>
 #include <drm/drm_framebuffer.h>
 #include <drm/drm_gem_dma_helper.h>