47 files changed, 9244 insertions, 3092 deletions
diff --git a/drivers/gpu/drm/tegra/Kconfig b/drivers/gpu/drm/tegra/Kconfig
index cf54847a8bd1..8a3b16aac5d6 100644
--- a/drivers/gpu/drm/tegra/Kconfig
+++ b/drivers/gpu/drm/tegra/Kconfig
@@ -1,15 +1,27 @@
+# SPDX-License-Identifier: GPL-2.0-only
 config DRM_TEGRA
 	tristate "NVIDIA Tegra DRM"
-	depends on ARCH_TEGRA || (ARM && COMPILE_TEST)
+	depends on ARCH_TEGRA || COMPILE_TEST
 	depends on COMMON_CLK
 	depends on DRM
 	depends on OF
+	select DRM_CLIENT_SELECTION
+	select DRM_DISPLAY_DP_HELPER
+	select DRM_DISPLAY_HDMI_HELPER
+	select DRM_DISPLAY_HELPER
+	select DRM_BRIDGE_CONNECTOR
+	select DRM_DISPLAY_DP_AUX_BUS
 	select DRM_KMS_HELPER
 	select DRM_MIPI_DSI
 	select DRM_PANEL
+	select FB_DMAMEM_HELPERS if DRM_FBDEV_EMULATION
 	select TEGRA_HOST1X
-	select IOMMU_IOVA if IOMMU_SUPPORT
+	select INTERCONNECT
+	select IOMMU_IOVA
 	select CEC_CORE if CEC_NOTIFIER
+	select SND_SIMPLE_CARD if SND_SOC_TEGRA20_SPDIF
+	select SND_SOC_HDMI_CODEC if SND_SOC_TEGRA20_SPDIF
+	select SND_AUDIO_GRAPH_CARD if SND_SOC_TEGRA20_SPDIF
 	help
 	  Choose this option if you have an NVIDIA Tegra SoC.
 
diff --git a/drivers/gpu/drm/tegra/Makefile b/drivers/gpu/drm/tegra/Makefile
index 2e0d6213f6bc..e399b40d64a1 100644
--- a/drivers/gpu/drm/tegra/Makefile
+++ b/drivers/gpu/drm/tegra/Makefile
@@ -3,13 +3,18 @@ ccflags-$(CONFIG_DRM_TEGRA_DEBUG) += -DDEBUG
 
 tegra-drm-y := \
 	drm.o \
+	uapi.o \
+	submit.o \
+	firewall.o \
 	gem.o \
 	fb.o \
+	dp.o \
 	hub.o \
 	plane.o \
 	dc.o \
 	output.o \
 	rgb.o \
+	hda.o \
 	hdmi.o \
 	mipi-phy.o \
 	dsi.o \
@@ -18,8 +23,13 @@ tegra-drm-y := \
 	gr2d.o \
 	gr3d.o \
 	falcon.o \
-	vic.o
+	vic.o \
+	nvdec.o \
+	nvjpg.o \
+	riscv.o
 
 tegra-drm-y += trace.o
 
+tegra-drm-$(CONFIG_DRM_FBDEV_EMULATION) += fbdev.o
+
 obj-$(CONFIG_DRM_TEGRA) += tegra-drm.o
diff --git a/drivers/gpu/drm/tegra/dc.c b/drivers/gpu/drm/tegra/dc.c
index 607a6ea17ecc..01e9d5011dd8 100644
--- a/drivers/gpu/drm/tegra/dc.c
+++ b/drivers/gpu/drm/tegra/dc.c
@@ -1,30 +1,43 @@
+// SPDX-License-Identifier: GPL-2.0-only
 /*
  * Copyright (C) 2012 Avionic Design GmbH
  * Copyright (C) 2012 NVIDIA CORPORATION.  All rights reserved.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
  */
 
 #include <linux/clk.h>
 #include <linux/debugfs.h>
+#include <linux/delay.h>
+#include <linux/dma-mapping.h>
 #include <linux/iommu.h>
-#include <linux/of_device.h>
+#include <linux/interconnect.h>
+#include <linux/module.h>
+#include <linux/of.h>
+#include <linux/platform_device.h>
+#include <linux/pm_domain.h>
+#include <linux/pm_opp.h>
 #include <linux/pm_runtime.h>
 #include <linux/reset.h>
 
+#include <soc/tegra/common.h>
 #include <soc/tegra/pmc.h>
 
+#include <drm/drm_atomic.h>
+#include <drm/drm_atomic_helper.h>
+#include <drm/drm_blend.h>
+#include <drm/drm_debugfs.h>
+#include <drm/drm_fourcc.h>
+#include <drm/drm_framebuffer.h>
+#include <drm/drm_print.h>
+#include <drm/drm_vblank.h>
+
 #include "dc.h"
 #include "drm.h"
 #include "gem.h"
 #include "hub.h"
 #include "plane.h"
 
-#include <drm/drm_atomic.h>
-#include <drm/drm_atomic_helper.h>
-#include <drm/drm_plane_helper.h>
+static void tegra_crtc_atomic_destroy_state(struct drm_crtc *crtc,
+					    struct drm_crtc_state *state);
 
 static void tegra_dc_stats_reset(struct tegra_dc_stats *stats)
 {
@@ -130,7 +143,7 @@ static inline u32 compute_dda_inc(unsigned int in, unsigned int out, bool v,
 
 		default:
 			WARN_ON_ONCE(1);
-			/* fallthrough */
+			fallthrough;
 		case 4:
 			max = 4;
 			break;
@@ -336,18 +349,19 @@ static void tegra_dc_setup_window(struct tegra_plane *plane,
 {
 	unsigned h_offset, v_offset, h_size, v_size, h_dda, v_dda, bpp;
 	struct tegra_dc *dc = plane->dc;
-	bool yuv, planar;
+	unsigned int planes;
 	u32 value;
+	bool yuv;
 
 	/*
 	 * For YUV planar modes, the number of bytes per pixel takes into
 	 * account only the luma component and therefore is 1.
 	 */
-	yuv = tegra_plane_format_is_yuv(window->format, &planar);
+	yuv = tegra_plane_format_is_yuv(window->format, &planes, NULL);
 	if (!yuv)
 		bpp = window->bits_per_pixel / 8;
 	else
-		bpp = planar ? 1 : 2;
+		bpp = (planes > 1) ? 1 : 2;
 
 	tegra_plane_writel(plane, window->format, DC_WIN_COLOR_DEPTH);
 	tegra_plane_writel(plane, window->swap, DC_WIN_BYTE_SWAP);
@@ -363,6 +377,12 @@ static void tegra_dc_setup_window(struct tegra_plane *plane,
 	h_size = window->src.w * bpp;
 	v_size = window->src.h;
 
+	if (window->reflect_x)
+		h_offset += (window->src.w - 1) * bpp;
+
+	if (window->reflect_y)
+		v_offset += window->src.h - 1;
+
 	value = V_PRESCALED_SIZE(v_size) | H_PRESCALED_SIZE(h_size);
 	tegra_plane_writel(plane, value, DC_WIN_PRESCALED_SIZE);
 
@@ -370,7 +390,7 @@ static void tegra_dc_setup_window(struct tegra_plane *plane,
 	 * For DDA computations the number of bytes per pixel for YUV planar
 	 * modes needs to take into account all Y, U and V components.
 	 */
-	if (yuv && planar)
+	if (yuv && planes > 1)
 		bpp = 2;
 
 	h_dda = compute_dda_inc(window->src.w, window->dst.w, false, bpp);
@@ -390,18 +410,18 @@ static void tegra_dc_setup_window(struct tegra_plane *plane,
 
 	tegra_plane_writel(plane, window->base[0], DC_WINBUF_START_ADDR);
 
-	if (yuv && planar) {
+	if (yuv && planes > 1) {
 		tegra_plane_writel(plane, window->base[1], DC_WINBUF_START_ADDR_U);
-		tegra_plane_writel(plane, window->base[2], DC_WINBUF_START_ADDR_V);
+
+		if (planes > 2)
+			tegra_plane_writel(plane, window->base[2], DC_WINBUF_START_ADDR_V);
+
 		value = window->stride[1] << 16 | window->stride[0];
 		tegra_plane_writel(plane, value, DC_WIN_LINE_STRIDE);
 	} else {
 		tegra_plane_writel(plane, window->stride[0], DC_WIN_LINE_STRIDE);
 	}
 
-	if (window->bottom_up)
-		v_offset += window->src.h - 1;
-
 	tegra_plane_writel(plane, h_offset, DC_WINBUF_ADDR_H_OFFSET);
 	tegra_plane_writel(plane, v_offset, DC_WINBUF_ADDR_V_OFFSET);
 
@@ -465,7 +485,10 @@ static void tegra_dc_setup_window(struct tegra_plane *plane,
 		value |= COLOR_EXPAND;
 	}
 
-	if (window->bottom_up)
+	if (window->reflect_x)
+		value |= H_DIRECTION;
+
+	if (window->reflect_y)
 		value |= V_DIRECTION;
 
 	if (tegra_plane_use_horizontal_filtering(plane, window)) {
@@ -593,20 +616,30 @@ static const u64 tegra124_modifiers[] = {
 };
 
 static int tegra_plane_atomic_check(struct drm_plane *plane,
-				    struct drm_plane_state *state)
+				    struct drm_atomic_state *state)
 {
-	struct tegra_plane_state *plane_state = to_tegra_plane_state(state);
-	unsigned int rotation = DRM_MODE_ROTATE_0 | DRM_MODE_REFLECT_Y;
+	struct drm_plane_state *new_plane_state = drm_atomic_get_new_plane_state(state,
+										 plane);
+	struct tegra_plane_state *plane_state = to_tegra_plane_state(new_plane_state);
+	unsigned int supported_rotation = DRM_MODE_ROTATE_0 |
+					  DRM_MODE_REFLECT_X |
+					  DRM_MODE_REFLECT_Y;
+	unsigned int rotation = new_plane_state->rotation;
 	struct tegra_bo_tiling *tiling = &plane_state->tiling;
 	struct tegra_plane *tegra = to_tegra_plane(plane);
-	struct tegra_dc *dc = to_tegra_dc(state->crtc);
+	struct tegra_dc *dc = to_tegra_dc(new_plane_state->crtc);
 	int err;
 
+	plane_state->peak_memory_bandwidth = 0;
+	plane_state->avg_memory_bandwidth = 0;
+
 	/* no need for further checks if the plane is being disabled */
-	if (!state->crtc)
+	if (!new_plane_state->crtc) {
+		plane_state->total_peak_memory_bandwidth = 0;
 		return 0;
+	}
 
-	err = tegra_plane_format(state->fb->format->format,
+	err = tegra_plane_format(new_plane_state->fb->format->format,
 				 &plane_state->format,
 				 &plane_state->swap);
 	if (err < 0)
@@ -624,7 +657,7 @@ static int tegra_plane_atomic_check(struct drm_plane *plane,
 			return err;
 	}
 
-	err = tegra_fb_get_tiling(state->fb, tiling);
+	err = tegra_fb_get_tiling(new_plane_state->fb, tiling);
 	if (err < 0)
 		return err;
 
@@ -634,26 +667,40 @@ static int tegra_plane_atomic_check(struct drm_plane *plane,
 		return -EINVAL;
 	}
 
-	rotation = drm_rotation_simplify(state->rotation, rotation);
+	/*
+	 * Older userspace used custom BO flag in order to specify the Y
+	 * reflection, while modern userspace uses the generic DRM rotation
+	 * property in order to achieve the same result.  The legacy BO flag
+	 * duplicates the DRM rotation property when both are set.
+	 */
+	if (tegra_fb_is_bottom_up(new_plane_state->fb))
+		rotation |= DRM_MODE_REFLECT_Y;
+
+	rotation = drm_rotation_simplify(rotation, supported_rotation);
+
+	if (rotation & DRM_MODE_REFLECT_X)
+		plane_state->reflect_x = true;
+	else
+		plane_state->reflect_x = false;
 
 	if (rotation & DRM_MODE_REFLECT_Y)
-		plane_state->bottom_up = true;
+		plane_state->reflect_y = true;
 	else
-		plane_state->bottom_up = false;
+		plane_state->reflect_y = false;
 
 	/*
 	 * Tegra doesn't support different strides for U and V planes so we
 	 * error out if the user tries to display a framebuffer with such a
 	 * configuration.
 	 */
-	if (state->fb->format->num_planes > 2) {
-		if (state->fb->pitches[2] != state->fb->pitches[1]) {
+	if (new_plane_state->fb->format->num_planes > 2) {
+		if (new_plane_state->fb->pitches[2] != new_plane_state->fb->pitches[1]) {
 			DRM_ERROR("unsupported UV-plane configuration\n");
 			return -EINVAL;
 		}
 	}
 
-	err = tegra_plane_state_add(tegra, state);
+	err = tegra_plane_state_add(tegra, new_plane_state);
 	if (err < 0)
 		return err;
 
@@ -661,8 +708,10 @@ static int tegra_plane_atomic_check(struct drm_plane *plane,
 }
 
 static void tegra_plane_atomic_disable(struct drm_plane *plane,
-				       struct drm_plane_state *old_state)
+				       struct drm_atomic_state *state)
 {
+	struct drm_plane_state *old_state = drm_atomic_get_old_plane_state(state,
+									   plane);
 	struct tegra_plane *p = to_tegra_plane(plane);
 	u32 value;
 
@@ -676,43 +725,44 @@ static void tegra_plane_atomic_disable(struct drm_plane *plane,
 }
 
 static void tegra_plane_atomic_update(struct drm_plane *plane,
-				      struct drm_plane_state *old_state)
+				      struct drm_atomic_state *state)
 {
-	struct tegra_plane_state *state = to_tegra_plane_state(plane->state);
-	struct drm_framebuffer *fb = plane->state->fb;
+	struct drm_plane_state *new_state = drm_atomic_get_new_plane_state(state,
+									   plane);
+	struct tegra_plane_state *tegra_plane_state = to_tegra_plane_state(new_state);
+	struct drm_framebuffer *fb = new_state->fb;
 	struct tegra_plane *p = to_tegra_plane(plane);
 	struct tegra_dc_window window;
 	unsigned int i;
 
 	/* rien ne va plus */
-	if (!plane->state->crtc || !plane->state->fb)
+	if (!new_state->crtc || !new_state->fb)
 		return;
 
-	if (!plane->state->visible)
-		return tegra_plane_atomic_disable(plane, old_state);
+	if (!new_state->visible)
+		return tegra_plane_atomic_disable(plane, state);
 
 	memset(&window, 0, sizeof(window));
-	window.src.x = plane->state->src.x1 >> 16;
-	window.src.y = plane->state->src.y1 >> 16;
-	window.src.w = drm_rect_width(&plane->state->src) >> 16;
-	window.src.h = drm_rect_height(&plane->state->src) >> 16;
-	window.dst.x = plane->state->dst.x1;
-	window.dst.y = plane->state->dst.y1;
-	window.dst.w = drm_rect_width(&plane->state->dst);
-	window.dst.h = drm_rect_height(&plane->state->dst);
+	window.src.x = new_state->src.x1 >> 16;
+	window.src.y = new_state->src.y1 >> 16;
+	window.src.w = drm_rect_width(&new_state->src) >> 16;
+	window.src.h = drm_rect_height(&new_state->src) >> 16;
+	window.dst.x = new_state->dst.x1;
+	window.dst.y = new_state->dst.y1;
+	window.dst.w = drm_rect_width(&new_state->dst);
+	window.dst.h = drm_rect_height(&new_state->dst);
 	window.bits_per_pixel = fb->format->cpp[0] * 8;
-	window.bottom_up = tegra_fb_is_bottom_up(fb) || state->bottom_up;
+	window.reflect_x = tegra_plane_state->reflect_x;
+	window.reflect_y = tegra_plane_state->reflect_y;
 
 	/* copy from state */
-	window.zpos = plane->state->normalized_zpos;
-	window.tiling = state->tiling;
-	window.format = state->format;
-	window.swap = state->swap;
+	window.zpos = new_state->normalized_zpos;
+	window.tiling = tegra_plane_state->tiling;
+	window.format = tegra_plane_state->format;
+	window.swap = tegra_plane_state->swap;
 
 	for (i = 0; i < fb->format->num_planes; i++) {
-		struct tegra_bo *bo = tegra_fb_get_plane(fb, i);
-
-		window.base[i] = bo->paddr + fb->offsets[i];
+		window.base[i] = tegra_plane_state->iova[i] + fb->offsets[i];
 
 		/*
 		 * Tegra uses a shared stride for UV planes. Framebuffers are
@@ -727,6 +777,8 @@ static void tegra_plane_atomic_update(struct drm_plane *plane,
 }
 
 static const struct drm_plane_helper_funcs tegra_plane_helper_funcs = {
+	.prepare_fb = tegra_plane_prepare_fb,
+	.cleanup_fb = tegra_plane_cleanup_fb,
 	.atomic_check = tegra_plane_atomic_check,
 	.atomic_disable = tegra_plane_atomic_disable,
 	.atomic_update = tegra_plane_atomic_update,
@@ -773,6 +825,12 @@ static struct drm_plane *tegra_primary_plane_create(struct drm_device *drm,
 	formats = dc->soc->primary_formats;
 	modifiers = dc->soc->modifiers;
 
+	err = tegra_plane_interconnect_init(plane);
+	if (err) {
+		kfree(plane);
+		return ERR_PTR(err);
+	}
+
 	err = drm_universal_plane_init(drm, &plane->base, possible_crtcs,
 				       &tegra_plane_funcs, formats,
 				       num_formats, modifiers, type, NULL);
@@ -787,6 +845,8 @@ static struct drm_plane *tegra_primary_plane_create(struct drm_device *drm,
 	err = drm_plane_create_rotation_property(&plane->base,
 						 DRM_MODE_ROTATE_0,
 						 DRM_MODE_ROTATE_0 |
+						 DRM_MODE_ROTATE_180 |
+						 DRM_MODE_REFLECT_X |
 						 DRM_MODE_REFLECT_Y);
 	if (err < 0)
 		dev_err(dc->dev, "failed to create rotation property: %d\n",
@@ -795,53 +855,76 @@ static struct drm_plane *tegra_primary_plane_create(struct drm_device *drm,
 	return &plane->base;
 }
 
-static const u32 tegra_cursor_plane_formats[] = {
+static const u32 tegra_legacy_cursor_plane_formats[] = {
 	DRM_FORMAT_RGBA8888,
 };
 
+static const u32 tegra_cursor_plane_formats[] = {
+	DRM_FORMAT_ARGB8888,
+};
+
 static int tegra_cursor_atomic_check(struct drm_plane *plane,
-				     struct drm_plane_state *state)
+				     struct drm_atomic_state *state)
 {
+	struct drm_plane_state *new_plane_state = drm_atomic_get_new_plane_state(state,
+										 plane);
+	struct tegra_plane_state *plane_state = to_tegra_plane_state(new_plane_state);
 	struct tegra_plane *tegra = to_tegra_plane(plane);
 	int err;
 
+	plane_state->peak_memory_bandwidth = 0;
+	plane_state->avg_memory_bandwidth = 0;
+
 	/* no need for further checks if the plane is being disabled */
-	if (!state->crtc)
+	if (!new_plane_state->crtc) {
+		plane_state->total_peak_memory_bandwidth = 0;
 		return 0;
+	}
 
 	/* scaling not supported for cursor */
-	if ((state->src_w >> 16 != state->crtc_w) ||
-	    (state->src_h >> 16 != state->crtc_h))
+	if ((new_plane_state->src_w >> 16 != new_plane_state->crtc_w) ||
+	    (new_plane_state->src_h >> 16 != new_plane_state->crtc_h))
 		return -EINVAL;
 
 	/* only square cursors supported */
-	if (state->src_w != state->src_h)
+	if (new_plane_state->src_w != new_plane_state->src_h)
 		return -EINVAL;
 
-	if (state->crtc_w != 32 && state->crtc_w != 64 &&
-	    state->crtc_w != 128 && state->crtc_w != 256)
+	if (new_plane_state->crtc_w != 32 && new_plane_state->crtc_w != 64 &&
+	    new_plane_state->crtc_w != 128 && new_plane_state->crtc_w != 256)
 		return -EINVAL;
 
-	err = tegra_plane_state_add(tegra, state);
+	err = tegra_plane_state_add(tegra, new_plane_state);
 	if (err < 0)
 		return err;
 
 	return 0;
 }
 
-static void tegra_cursor_atomic_update(struct drm_plane *plane,
-				       struct drm_plane_state *old_state)
+static void __tegra_cursor_atomic_update(struct drm_plane *plane,
+					 struct drm_plane_state *new_state)
 {
-	struct tegra_bo *bo = tegra_fb_get_plane(plane->state->fb, 0);
-	struct tegra_dc *dc = to_tegra_dc(plane->state->crtc);
-	struct drm_plane_state *state = plane->state;
-	u32 value = CURSOR_CLIP_DISPLAY;
+	struct tegra_plane_state *tegra_plane_state = to_tegra_plane_state(new_state);
+	struct tegra_dc *dc = to_tegra_dc(new_state->crtc);
+	struct tegra_drm *tegra = plane->dev->dev_private;
+#ifdef CONFIG_ARCH_DMA_ADDR_T_64BIT
+	u64 dma_mask = *dc->dev->dma_mask;
+#endif
+	unsigned int x, y;
+	u32 value = 0;
 
 	/* rien ne va plus */
-	if (!plane->state->crtc || !plane->state->fb)
+	if (!new_state->crtc || !new_state->fb)
 		return;
 
-	switch (state->crtc_w) {
+	/*
+	 * Legacy display supports hardware clipping of the cursor, but
+	 * nvdisplay relies on software to clip the cursor to the screen.
+	 */
+	if (!dc->soc->has_nvdisplay)
+		value |= CURSOR_CLIP_DISPLAY;
+
+	switch (new_state->crtc_w) {
 	case 32:
 		value |= CURSOR_SIZE_32x32;
 		break;
@@ -859,16 +942,16 @@ static void tegra_cursor_atomic_update(struct drm_plane *plane,
 		break;
 
 	default:
-		WARN(1, "cursor size %ux%u not supported\n", state->crtc_w,
-		     state->crtc_h);
+		WARN(1, "cursor size %ux%u not supported\n",
+		     new_state->crtc_w, new_state->crtc_h);
 		return;
 	}
 
-	value |= (bo->paddr >> 10) & 0x3fffff;
+	value |= (tegra_plane_state->iova[0] >> 10) & 0x3fffff;
 	tegra_dc_writel(dc, value, DC_DISP_CURSOR_START_ADDR);
 
 #ifdef CONFIG_ARCH_DMA_ADDR_T_64BIT
-	value = (bo->paddr >> 32) & 0x3;
+	value = (tegra_plane_state->iova[0] >> 32) & (dma_mask >> 32);
 	tegra_dc_writel(dc, value, DC_DISP_CURSOR_START_ADDR_HI);
 #endif
 
@@ -880,20 +963,55 @@ static void tegra_cursor_atomic_update(struct drm_plane *plane,
 	value = tegra_dc_readl(dc, DC_DISP_BLEND_CURSOR_CONTROL);
 	value &= ~CURSOR_DST_BLEND_MASK;
 	value &= ~CURSOR_SRC_BLEND_MASK;
-	value |= CURSOR_MODE_NORMAL;
+
+	if (dc->soc->has_nvdisplay)
+		value &= ~CURSOR_COMPOSITION_MODE_XOR;
+	else
+		value |= CURSOR_MODE_NORMAL;
+
 	value |= CURSOR_DST_BLEND_NEG_K1_TIMES_SRC;
 	value |= CURSOR_SRC_BLEND_K1_TIMES_SRC;
 	value |= CURSOR_ALPHA;
 	tegra_dc_writel(dc, value, DC_DISP_BLEND_CURSOR_CONTROL);
 
+	/* nvdisplay relies on software for clipping */
+	if (dc->soc->has_nvdisplay) {
+		struct drm_rect src;
+
+		x = new_state->dst.x1;
+		y = new_state->dst.y1;
+
+		drm_rect_fp_to_int(&src, &new_state->src);
+
+		value = (src.y1 & tegra->vmask) << 16 | (src.x1 & tegra->hmask);
+		tegra_dc_writel(dc, value, DC_DISP_PCALC_HEAD_SET_CROPPED_POINT_IN_CURSOR);
+
+		value = (drm_rect_height(&src) & tegra->vmask) << 16 |
+			(drm_rect_width(&src) & tegra->hmask);
+		tegra_dc_writel(dc, value, DC_DISP_PCALC_HEAD_SET_CROPPED_SIZE_IN_CURSOR);
+	} else {
+		x = new_state->crtc_x;
+		y = new_state->crtc_y;
+	}
+
 	/* position the cursor */
-	value = (state->crtc_y & 0x3fff) << 16 | (state->crtc_x & 0x3fff);
+	value = ((y & tegra->vmask) << 16) | (x & tegra->hmask);
 	tegra_dc_writel(dc, value, DC_DISP_CURSOR_POSITION);
 }
 
+static void tegra_cursor_atomic_update(struct drm_plane *plane,
+				       struct drm_atomic_state *state)
+{
+	struct drm_plane_state *new_state = drm_atomic_get_new_plane_state(state, plane);
+
+	__tegra_cursor_atomic_update(plane, new_state);
+}
+
 static void tegra_cursor_atomic_disable(struct drm_plane *plane,
-					struct drm_plane_state *old_state)
+					struct drm_atomic_state *state)
 {
+	struct drm_plane_state *old_state = drm_atomic_get_old_plane_state(state,
+									   plane);
 	struct tegra_dc *dc;
 	u32 value;
 
@@ -908,10 +1026,84 @@ static void tegra_cursor_atomic_disable(struct drm_plane *plane,
 	tegra_dc_writel(dc, value, DC_DISP_DISP_WIN_OPTIONS);
 }
 
+static int tegra_cursor_atomic_async_check(struct drm_plane *plane, struct drm_atomic_state *state,
+					   bool flip)
+{
+	struct drm_plane_state *new_state = drm_atomic_get_new_plane_state(state, plane);
+	struct drm_crtc_state *crtc_state;
+	int min_scale, max_scale;
+	int err;
+
+	crtc_state = drm_atomic_get_new_crtc_state(state, new_state->crtc);
+	if (WARN_ON(!crtc_state))
+		return -EINVAL;
+
+	if (!crtc_state->active)
+		return -EINVAL;
+
+	if (plane->state->crtc != new_state->crtc ||
+	    plane->state->src_w != new_state->src_w ||
+	    plane->state->src_h != new_state->src_h ||
+	    plane->state->crtc_w != new_state->crtc_w ||
+	    plane->state->crtc_h != new_state->crtc_h ||
+	    plane->state->fb != new_state->fb ||
+	    plane->state->fb == NULL)
+		return -EINVAL;
+
+	min_scale = (1 << 16) / 8;
+	max_scale = (8 << 16) / 1;
+
+	err = drm_atomic_helper_check_plane_state(new_state, crtc_state, min_scale, max_scale,
+						  true, true);
+	if (err < 0)
+		return err;
+
+	if (new_state->visible != plane->state->visible)
+		return -EINVAL;
+
+	return 0;
+}
+
+static void tegra_cursor_atomic_async_update(struct drm_plane *plane,
+					     struct drm_atomic_state *state)
+{
+	struct drm_plane_state *new_state = drm_atomic_get_new_plane_state(state, plane);
+	struct tegra_dc *dc = to_tegra_dc(new_state->crtc);
+
+	plane->state->src_x = new_state->src_x;
+	plane->state->src_y = new_state->src_y;
+	plane->state->crtc_x = new_state->crtc_x;
+	plane->state->crtc_y = new_state->crtc_y;
+
+	if (new_state->visible) {
+		struct tegra_plane *p = to_tegra_plane(plane);
+		u32 value;
+
+		__tegra_cursor_atomic_update(plane, new_state);
+
+		value = (WIN_A_ACT_REQ << p->index) << 8 | GENERAL_UPDATE;
+		tegra_dc_writel(dc, value, DC_CMD_STATE_CONTROL);
+		(void)tegra_dc_readl(dc, DC_CMD_STATE_CONTROL);
+
+		value = (WIN_A_ACT_REQ << p->index) | GENERAL_ACT_REQ;
+		tegra_dc_writel(dc, value, DC_CMD_STATE_CONTROL);
+		(void)tegra_dc_readl(dc, DC_CMD_STATE_CONTROL);
+	}
+}
+
 static const struct drm_plane_helper_funcs tegra_cursor_plane_helper_funcs = {
+	.prepare_fb = tegra_plane_prepare_fb,
+	.cleanup_fb = tegra_plane_cleanup_fb,
 	.atomic_check = tegra_cursor_atomic_check,
 	.atomic_update = tegra_cursor_atomic_update,
 	.atomic_disable = tegra_cursor_atomic_disable,
+	.atomic_async_check = tegra_cursor_atomic_async_check,
+	.atomic_async_update = tegra_cursor_atomic_async_update,
+};
+
+static const uint64_t linear_modifiers[] = {
+	DRM_FORMAT_MOD_LINEAR,
+	DRM_FORMAT_MOD_INVALID
 };
 
 static struct drm_plane *tegra_dc_cursor_plane_create(struct drm_device *drm,
@@ -937,12 +1129,23 @@ static struct drm_plane *tegra_dc_cursor_plane_create(struct drm_device *drm,
 	plane->index = 6;
 	plane->dc = dc;
 
-	num_formats = ARRAY_SIZE(tegra_cursor_plane_formats);
-	formats = tegra_cursor_plane_formats;
+	if (!dc->soc->has_nvdisplay) {
+		num_formats = ARRAY_SIZE(tegra_legacy_cursor_plane_formats);
+		formats = tegra_legacy_cursor_plane_formats;
+
+		err = tegra_plane_interconnect_init(plane);
+		if (err) {
+			kfree(plane);
+			return ERR_PTR(err);
+		}
+	} else {
+		num_formats = ARRAY_SIZE(tegra_cursor_plane_formats);
+		formats = tegra_cursor_plane_formats;
+	}
 
 	err = drm_universal_plane_init(drm, &plane->base, possible_crtcs,
 				       &tegra_plane_funcs, formats,
-				       num_formats, NULL,
+				       num_formats, linear_modifiers,
 				       DRM_PLANE_TYPE_CURSOR, NULL);
 	if (err < 0) {
 		kfree(plane);
@@ -950,6 +1153,7 @@ static struct drm_plane *tegra_dc_cursor_plane_create(struct drm_device *drm,
 	}
 
 	drm_plane_helper_add(&plane->base, &tegra_cursor_plane_helper_funcs);
+	drm_plane_create_zpos_immutable_property(&plane->base, 255);
 
 	return &plane->base;
 }
@@ -998,6 +1202,13 @@ static const u32 tegra114_overlay_formats[] = {
 	DRM_FORMAT_YUYV,
 	DRM_FORMAT_YUV420,
 	DRM_FORMAT_YUV422,
+	/* semi-planar formats */
+	DRM_FORMAT_NV12,
+	DRM_FORMAT_NV21,
+	DRM_FORMAT_NV16,
+	DRM_FORMAT_NV61,
+	DRM_FORMAT_NV24,
+	DRM_FORMAT_NV42,
 };
 
 static const u32 tegra124_overlay_formats[] = {
@@ -1026,8 +1237,18 @@ static const u32 tegra124_overlay_formats[] = {
 	/* planar formats */
 	DRM_FORMAT_UYVY,
 	DRM_FORMAT_YUYV,
-	DRM_FORMAT_YUV420,
-	DRM_FORMAT_YUV422,
+	DRM_FORMAT_YVYU,
+	DRM_FORMAT_VYUY,
+	DRM_FORMAT_YUV420, /* YU12 */
+	DRM_FORMAT_YUV422, /* YU16 */
+	DRM_FORMAT_YUV444, /* YU24 */
+	/* semi-planar formats */
+	DRM_FORMAT_NV12,
+	DRM_FORMAT_NV21,
+	DRM_FORMAT_NV16,
+	DRM_FORMAT_NV61,
+	DRM_FORMAT_NV24,
+	DRM_FORMAT_NV42,
 };
 
 static struct drm_plane *tegra_dc_overlay_plane_create(struct drm_device *drm,
@@ -1053,6 +1274,12 @@ static struct drm_plane *tegra_dc_overlay_plane_create(struct drm_device *drm,
 	num_formats = dc->soc->num_overlay_formats;
 	formats = dc->soc->overlay_formats;
 
+	err = tegra_plane_interconnect_init(plane);
+	if (err) {
+		kfree(plane);
+		return ERR_PTR(err);
+	}
+
 	if (!cursor)
 		type = DRM_PLANE_TYPE_OVERLAY;
 	else
@@ -1060,7 +1287,8 @@ static struct drm_plane *tegra_dc_overlay_plane_create(struct drm_device *drm,
 
 	err = drm_universal_plane_init(drm, &plane->base, possible_crtcs,
 				       &tegra_plane_funcs, formats,
-				       num_formats, NULL, type, NULL);
+				       num_formats, linear_modifiers,
+				       type, NULL);
 	if (err < 0) {
 		kfree(plane);
 		return ERR_PTR(err);
@@ -1072,6 +1300,8 @@ static struct drm_plane *tegra_dc_overlay_plane_create(struct drm_device *drm,
 	err = drm_plane_create_rotation_property(&plane->base,
 						 DRM_MODE_ROTATE_0,
 						 DRM_MODE_ROTATE_0 |
+						 DRM_MODE_ROTATE_180 |
+						 DRM_MODE_REFLECT_X |
 						 DRM_MODE_REFLECT_Y);
 	if (err < 0)
 		dev_err(dc->dev, "failed to create rotation property: %d\n",
@@ -1092,10 +1322,16 @@ static struct drm_plane *tegra_dc_add_shared_planes(struct drm_device *drm,
 		if (wgrp->dc == dc->pipe) {
 			for (j = 0; j < wgrp->num_windows; j++) {
 				unsigned int index = wgrp->windows[j];
+				enum drm_plane_type type;
+
+				if (primary)
+					type = DRM_PLANE_TYPE_OVERLAY;
+				else
+					type = DRM_PLANE_TYPE_PRIMARY;
 
 				plane = tegra_shared_plane_create(drm, dc,
 								  wgrp->index,
-								  index);
+								  index, type);
 				if (IS_ERR(plane))
 					return plane;
 
@@ -1103,10 +1339,8 @@ static struct drm_plane *tegra_dc_add_shared_planes(struct drm_device *drm,
 				 * Choose the first shared plane owned by this
 				 * head as the primary plane.
 				 */
-				if (!primary) {
-					plane->type = DRM_PLANE_TYPE_PRIMARY;
+				if (!primary)
 					primary = plane;
-				}
 			}
 		}
 	}
@@ -1138,9 +1372,9 @@ static struct drm_plane *tegra_dc_add_planes(struct drm_device *drm,
 			err = PTR_ERR(planes[i]);
 
 			while (i--)
-				tegra_plane_funcs.destroy(planes[i]);
+				planes[i]->funcs->destroy(planes[i]);
 
-			tegra_plane_funcs.destroy(primary);
+			primary->funcs->destroy(primary);
 			return ERR_PTR(err);
 		}
 	}
@@ -1155,21 +1389,15 @@ static void tegra_dc_destroy(struct drm_crtc *crtc)
 
 static void tegra_crtc_reset(struct drm_crtc *crtc)
 {
-	struct tegra_dc_state *state;
+	struct tegra_dc_state *state = kzalloc(sizeof(*state), GFP_KERNEL);
 
 	if (crtc->state)
-		__drm_atomic_helper_crtc_destroy_state(crtc->state);
-
-	kfree(crtc->state);
-	crtc->state = NULL;
-
-	state = kzalloc(sizeof(*state), GFP_KERNEL);
-	if (state) {
-		crtc->state = &state->base;
-		crtc->state->crtc = crtc;
-	}
+		tegra_crtc_atomic_destroy_state(crtc, crtc->state);
 
-	drm_crtc_vblank_reset(crtc);
+	if (state)
+		__drm_atomic_helper_crtc_reset(crtc, &state->base);
+	else
+		__drm_atomic_helper_crtc_reset(crtc, NULL);
 }
 
 static struct drm_crtc_state *
@@ -1482,6 +1710,11 @@ static int tegra_dc_show_stats(struct seq_file *s, void *data)
 	seq_printf(s, "underflow: %lu\n", dc->stats.underflow);
 	seq_printf(s, "overflow: %lu\n", dc->stats.overflow);
 
+	seq_printf(s, "frames total: %lu\n", dc->stats.frames_total);
+	seq_printf(s, "vblank total: %lu\n", dc->stats.vblank_total);
+	seq_printf(s, "underflow total: %lu\n", dc->stats.underflow_total);
+	seq_printf(s, "overflow total: %lu\n", dc->stats.overflow_total);
+
 	return 0;
 }
 
@@ -1497,7 +1730,6 @@ static int tegra_dc_late_register(struct drm_crtc *crtc)
 	struct drm_minor *minor = crtc->dev->primary;
 	struct dentry *root;
 	struct tegra_dc *dc = to_tegra_dc(crtc);
-	int err;
 
 #ifdef CONFIG_DEBUG_FS
 	root = crtc->debugfs_entry;
@@ -1513,17 +1745,9 @@ static int tegra_dc_late_register(struct drm_crtc *crtc)
 	for (i = 0; i < count; i++)
 		dc->debugfs_files[i].data = dc;
 
-	err = drm_debugfs_create_files(dc->debugfs_files, count, root, minor);
-	if (err < 0)
-		goto free;
+	drm_debugfs_create_files(dc->debugfs_files, count, root, minor);
 
 	return 0;
-
-free:
-	kfree(dc->debugfs_files);
-	dc->debugfs_files = NULL;
-
-	return err;
 }
 
 static void tegra_dc_early_unregister(struct drm_crtc *crtc)
@@ -1531,8 +1755,15 @@ static void tegra_dc_early_unregister(struct drm_crtc *crtc)
 	unsigned int count = ARRAY_SIZE(debugfs_files);
 	struct drm_minor *minor = crtc->dev->primary;
 	struct tegra_dc *dc = to_tegra_dc(crtc);
+	struct dentry *root;
+
+#ifdef CONFIG_DEBUG_FS
+	root = crtc->debugfs_entry;
+#else
+	root = NULL;
+#endif
 
-	drm_debugfs_remove_files(dc->debugfs_files, count, minor);
+	drm_debugfs_remove_files(dc->debugfs_files, count, root, minor);
 	kfree(dc->debugfs_files);
 	dc->debugfs_files = NULL;
 }
@@ -1646,10 +1877,55 @@ int tegra_dc_state_setup_clock(struct tegra_dc *dc,
 	return 0;
 }
 
-static void tegra_dc_commit_state(struct tegra_dc *dc,
-				  struct tegra_dc_state *state)
+static void tegra_dc_update_voltage_state(struct tegra_dc *dc,
+					  struct tegra_dc_state *state)
+{
+	unsigned long rate, pstate;
+	struct dev_pm_opp *opp;
+	int err;
+
+	if (!dc->has_opp_table)
+		return;
+
+	/* calculate actual pixel clock rate which depends on internal divider */
+	rate = DIV_ROUND_UP(clk_get_rate(dc->clk) * 2, state->div + 2);
+
+	/* find suitable OPP for the rate */
+	opp = dev_pm_opp_find_freq_ceil(dc->dev, &rate);
+
+	/*
+	 * Very high resolution modes may results in a clock rate that is
+	 * above the characterized maximum. In this case it's okay to fall
+	 * back to the characterized maximum.
+	 */
+	if (opp == ERR_PTR(-ERANGE))
+		opp = dev_pm_opp_find_freq_floor(dc->dev, &rate);
+
+	if (IS_ERR(opp)) {
+		dev_err(dc->dev, "failed to find OPP for %luHz: %pe\n",
+			rate, opp);
+		return;
+	}
+
+	pstate = dev_pm_opp_get_required_pstate(opp, 0);
+	dev_pm_opp_put(opp);
+
+	/*
+	 * The minimum core voltage depends on the pixel clock rate (which
+	 * depends on internal clock divider of the CRTC) and not on the
+	 * rate of the display controller clock. This is why we're not using
+	 * dev_pm_opp_set_rate() API and instead controlling the power domain
+	 * directly.
+	 */
+	err = dev_pm_genpd_set_performance_state(dc->dev, pstate);
+	if (err)
+		dev_err(dc->dev, "failed to set power domain state to %lu: %d\n",
+			pstate, err);
+}
+
+static void tegra_dc_set_clock_rate(struct tegra_dc *dc,
+				    struct tegra_dc_state *state)
 {
-	u32 value;
 	int err;
 
 	err = clk_set_parent(dc->clk, state->clk);
@@ -1670,21 +1946,18 @@ static void tegra_dc_commit_state(struct tegra_dc *dc,
 			dev_err(dc->dev,
 				"failed to set clock rate to %lu Hz\n",
 				state->pclk);
+
+		err = clk_set_rate(dc->clk, state->pclk);
+		if (err < 0)
+			dev_err(dc->dev, "failed to set clock %pC to %lu Hz: %d\n",
+				dc->clk, state->pclk, err);
 	}
 
 	DRM_DEBUG_KMS("rate: %lu, div: %u\n", clk_get_rate(dc->clk),
 		      state->div);
 	DRM_DEBUG_KMS("pclk: %lu\n", state->pclk);
 
-	if (!dc->soc->has_nvdisplay) {
-		value = SHIFT_CLK_DIVIDER(state->div) | PIXEL_CLK_DIVIDER_PCD1;
-		tegra_dc_writel(dc, value, DC_DISP_DISP_CLOCK_CONTROL);
-	}
-
-	err = clk_set_rate(dc->clk, state->pclk);
-	if (err < 0)
-		dev_err(dc->dev, "failed to set clock %pC to %lu Hz: %d\n",
-			dc->clk, state->pclk, err);
+	tegra_dc_update_voltage_state(dc, state);
 }
 
 static void tegra_dc_stop(struct tegra_dc *dc)
@@ -1723,11 +1996,109 @@ static int tegra_dc_wait_idle(struct tegra_dc *dc, unsigned long timeout)
 	return -ETIMEDOUT;
 }
 
+static void
+tegra_crtc_update_memory_bandwidth(struct drm_crtc *crtc,
+				   struct drm_atomic_state *state,
+				   bool prepare_bandwidth_transition)
+{
+	const struct tegra_plane_state *old_tegra_state, *new_tegra_state;
+	u32 i, new_avg_bw, old_avg_bw, new_peak_bw, old_peak_bw;
+	const struct drm_plane_state *old_plane_state;
+	const struct drm_crtc_state *old_crtc_state;
+	struct tegra_dc_window window, old_window;
+	struct tegra_dc *dc = to_tegra_dc(crtc);
+	struct tegra_plane *tegra;
+	struct drm_plane *plane;
+
+	if (dc->soc->has_nvdisplay)
+		return;
+
+	old_crtc_state = drm_atomic_get_old_crtc_state(state, crtc);
+
+	if (!crtc->state->active) {
+		if (!old_crtc_state->active)
+			return;
+
+		/*
+		 * When CRTC is disabled on DPMS, the state of attached planes
+		 * is kept unchanged. Hence we need to enforce removal of the
+		 * bandwidths from the ICC paths.
+		 */
+		drm_atomic_crtc_for_each_plane(plane, crtc) {
+			tegra = to_tegra_plane(plane);
+
+			icc_set_bw(tegra->icc_mem, 0, 0);
+			icc_set_bw(tegra->icc_mem_vfilter, 0, 0);
+		}
+
+		return;
+	}
+
+	for_each_old_plane_in_state(old_crtc_state->state, plane,
+				    old_plane_state, i) {
+		old_tegra_state = to_const_tegra_plane_state(old_plane_state);
+		new_tegra_state = to_const_tegra_plane_state(plane->state);
+		tegra = to_tegra_plane(plane);
+
+		/*
+		 * We're iterating over the global atomic state and it contains
+		 * planes from another CRTC, hence we need to filter out the
+		 * planes unrelated to this CRTC.
+		 */
+		if (tegra->dc != dc)
+			continue;
+
+		new_avg_bw = new_tegra_state->avg_memory_bandwidth;
+		old_avg_bw = old_tegra_state->avg_memory_bandwidth;
+
+		new_peak_bw = new_tegra_state->total_peak_memory_bandwidth;
+		old_peak_bw = old_tegra_state->total_peak_memory_bandwidth;
+
+		/*
+		 * See the comment related to !crtc->state->active above,
+		 * which explains why bandwidths need to be updated when
+		 * CRTC is turning ON.
+		 */
+		if (new_avg_bw == old_avg_bw && new_peak_bw == old_peak_bw &&
+		    old_crtc_state->active)
+			continue;
+
+		window.src.h = drm_rect_height(&plane->state->src) >> 16;
+		window.dst.h = drm_rect_height(&plane->state->dst);
+
+		old_window.src.h = drm_rect_height(&old_plane_state->src) >> 16;
+		old_window.dst.h = drm_rect_height(&old_plane_state->dst);
+
+		/*
+		 * During the preparation phase (atomic_begin), the memory
+		 * freq should go high before the DC changes are committed
+		 * if bandwidth requirement goes up, otherwise memory freq
+		 * should to stay high if BW requirement goes down.  The
+		 * opposite applies to the completion phase (post_commit).
+		 */
+		if (prepare_bandwidth_transition) {
+			new_avg_bw = max(old_avg_bw, new_avg_bw);
+			new_peak_bw = max(old_peak_bw, new_peak_bw);
+
+			if (tegra_plane_use_vertical_filtering(tegra, &old_window))
+				window = old_window;
+		}
+
+		icc_set_bw(tegra->icc_mem, new_avg_bw, new_peak_bw);
+
+		if (tegra_plane_use_vertical_filtering(tegra, &window))
+			icc_set_bw(tegra->icc_mem_vfilter, new_avg_bw, new_peak_bw);
+		else
+			icc_set_bw(tegra->icc_mem_vfilter, 0, 0);
+	}
+}
+
 static void tegra_crtc_atomic_disable(struct drm_crtc *crtc,
-				      struct drm_crtc_state *old_state)
+				      struct drm_atomic_state *state)
 {
 	struct tegra_dc *dc = to_tegra_dc(crtc);
 	u32 value;
+	int err;
 
 	if (!tegra_dc_idle(dc)) {
 		tegra_dc_stop(dc);
@@ -1774,18 +2145,35 @@ static void tegra_crtc_atomic_disable(struct drm_crtc *crtc,
 
 	spin_unlock_irq(&crtc->dev->event_lock);
 
-	pm_runtime_put_sync(dc->dev);
+	err = host1x_client_suspend(&dc->client);
+	if (err < 0)
+		dev_err(dc->dev, "failed to suspend: %d\n", err);
+
+	if (dc->has_opp_table) {
+		err = dev_pm_genpd_set_performance_state(dc->dev, 0);
+		if (err)
+			dev_err(dc->dev,
+				"failed to clear power domain state: %d\n", err);
+	}
 }
 
 static void tegra_crtc_atomic_enable(struct drm_crtc *crtc,
-				     struct drm_crtc_state *old_state)
+				     struct drm_atomic_state *state)
 {
 	struct drm_display_mode *mode = &crtc->state->adjusted_mode;
-	struct tegra_dc_state *state = to_dc_state(crtc->state);
+	struct tegra_dc_state *crtc_state = to_dc_state(crtc->state);
 	struct tegra_dc *dc = to_tegra_dc(crtc);
 	u32 value;
+	int err;
 
-	pm_runtime_get_sync(dc->dev);
+	/* apply PLL changes */
+	tegra_dc_set_clock_rate(dc, crtc_state);
+
+	err = host1x_client_resume(&dc->client);
+	if (err < 0) {
+		dev_err(dc->dev, "failed to resume: %d\n", err);
+		return;
+	}
 
 	/* initialize display controller */
 	if (dc->syncpt) {
@@ -1855,8 +2243,11 @@ static void tegra_crtc_atomic_enable(struct drm_crtc *crtc,
 	else
 		tegra_dc_writel(dc, 0, DC_DISP_BORDER_COLOR);
 
-	/* apply PLL and pixel clock changes */
-	tegra_dc_commit_state(dc, state);
+	/* apply pixel clock changes */
+	if (!dc->soc->has_nvdisplay) {
+		value = SHIFT_CLK_DIVIDER(crtc_state->div) | PIXEL_CLK_DIVIDER_PCD1;
+		tegra_dc_writel(dc, value, DC_DISP_DISP_CLOCK_CONTROL);
+	}
 
 	/* program display mode */
 	tegra_dc_set_timings(dc, mode);
@@ -1886,16 +2277,24 @@ static void tegra_crtc_atomic_enable(struct drm_crtc *crtc,
 		tegra_dc_writel(dc, value, DC_COM_RG_UNDERFLOW);
 	}
 
+	if (dc->rgb) {
+		/* XXX: parameterize? */
+		value = SC0_H_QUALIFIER_NONE | SC1_H_QUALIFIER_NONE;
+		tegra_dc_writel(dc, value, DC_DISP_SHIFT_CLOCK_OPTIONS);
+	}
+
 	tegra_dc_commit(dc);
 
 	drm_crtc_vblank_on(crtc);
 }
 
 static void tegra_crtc_atomic_begin(struct drm_crtc *crtc,
-				    struct drm_crtc_state *old_crtc_state)
+				    struct drm_atomic_state *state)
 {
 	unsigned long flags;
 
+	tegra_crtc_update_memory_bandwidth(crtc, state, true);
+
 	if (crtc->state->event) {
 		spin_lock_irqsave(&crtc->dev->event_lock, flags);
 
@@ -1911,22 +2310,222 @@ static void tegra_crtc_atomic_begin(struct drm_crtc *crtc,
 }
 
 static void tegra_crtc_atomic_flush(struct drm_crtc *crtc,
-				    struct drm_crtc_state *old_crtc_state)
+				    struct drm_atomic_state *state)
 {
-	struct tegra_dc_state *state = to_dc_state(crtc->state);
+	struct drm_crtc_state *crtc_state = drm_atomic_get_new_crtc_state(state,
+									  crtc);
+	struct tegra_dc_state *dc_state = to_dc_state(crtc_state);
 	struct tegra_dc *dc = to_tegra_dc(crtc);
 	u32 value;
 
-	value = state->planes << 8 | GENERAL_UPDATE;
+	value = dc_state->planes << 8 | GENERAL_UPDATE;
 	tegra_dc_writel(dc, value, DC_CMD_STATE_CONTROL);
 	value = tegra_dc_readl(dc, DC_CMD_STATE_CONTROL);
 
-	value = state->planes | GENERAL_ACT_REQ;
+	value = dc_state->planes | GENERAL_ACT_REQ;
 	tegra_dc_writel(dc, value, DC_CMD_STATE_CONTROL);
 	value = tegra_dc_readl(dc, DC_CMD_STATE_CONTROL);
 }
 
+static bool tegra_plane_is_cursor(const struct drm_plane_state *state)
+{
+	const struct tegra_dc_soc_info *soc = to_tegra_dc(state->crtc)->soc;
+	const struct drm_format_info *fmt = state->fb->format;
+	unsigned int src_w = drm_rect_width(&state->src) >> 16;
+	unsigned int dst_w = drm_rect_width(&state->dst);
+
+	if (state->plane->type != DRM_PLANE_TYPE_CURSOR)
+		return false;
+
+	if (soc->supports_cursor)
+		return true;
+
+	if (src_w != dst_w || fmt->num_planes != 1 || src_w * fmt->cpp[0] > 256)
+		return false;
+
+	return true;
+}
+
+static unsigned long
+tegra_plane_overlap_mask(struct drm_crtc_state *state,
+			 const struct drm_plane_state *plane_state)
+{
+	const struct drm_plane_state *other_state;
+	const struct tegra_plane *tegra;
+	unsigned long overlap_mask = 0;
+	struct drm_plane *plane;
+	struct drm_rect rect;
+
+	if (!plane_state->visible || !plane_state->fb)
+		return 0;
+
+	/*
+	 * Data-prefetch FIFO will easily help to overcome temporal memory
+	 * pressure if other plane overlaps with the cursor plane.
+	 */
+	if (tegra_plane_is_cursor(plane_state))
+		return 0;
+
+	drm_atomic_crtc_state_for_each_plane_state(plane, other_state, state) {
+		rect = plane_state->dst;
+
+		tegra = to_tegra_plane(other_state->plane);
+
+		if (!other_state->visible || !other_state->fb)
+			continue;
+
+		/*
+		 * Ignore cursor plane overlaps because it's not practical to
+		 * assume that it contributes to the bandwidth in overlapping
+		 * area if window width is small.
+		 */
+		if (tegra_plane_is_cursor(other_state))
+			continue;
+
+		if (drm_rect_intersect(&rect, &other_state->dst))
+			overlap_mask |= BIT(tegra->index);
+	}
+
+	return overlap_mask;
+}
+
+static int tegra_crtc_calculate_memory_bandwidth(struct drm_crtc *crtc,
+						 struct drm_atomic_state *state)
+{
+	ulong overlap_mask[TEGRA_DC_LEGACY_PLANES_NUM] = {}, mask;
+	u32 plane_peak_bw[TEGRA_DC_LEGACY_PLANES_NUM] = {};
+	bool all_planes_overlap_simultaneously = true;
+	const struct tegra_plane_state *tegra_state;
+	const struct drm_plane_state *plane_state;
+	struct tegra_dc *dc = to_tegra_dc(crtc);
+	struct drm_crtc_state *new_state;
+	struct tegra_plane *tegra;
+	struct drm_plane *plane;
+
+	/*
+	 * The nv-display uses shared planes.  The algorithm below assumes
+	 * maximum 3 planes per-CRTC, this assumption isn't applicable to
+	 * the nv-display.  Note that T124 support has additional windows,
+	 * but currently they aren't supported by the driver.
+	 */
+	if (dc->soc->has_nvdisplay)
+		return 0;
+
+	new_state = drm_atomic_get_new_crtc_state(state, crtc);
+
+	/*
+	 * For overlapping planes pixel's data is fetched for each plane at
+	 * the same time, hence bandwidths are accumulated in this case.
+	 * This needs to be taken into account for calculating total bandwidth
+	 * consumed by all planes.
+	 *
+	 * Here we get the overlapping state of each plane, which is a
+	 * bitmask of plane indices telling with what planes there is an
+	 * overlap. Note that bitmask[plane] includes BIT(plane) in order
+	 * to make further code nicer and simpler.
+	 */
+	drm_atomic_crtc_state_for_each_plane_state(plane, plane_state, new_state) {
+		tegra_state = to_const_tegra_plane_state(plane_state);
+		tegra = to_tegra_plane(plane);
+
+		if (WARN_ON_ONCE(tegra->index >= TEGRA_DC_LEGACY_PLANES_NUM))
+			return -EINVAL;
+
+		plane_peak_bw[tegra->index] = tegra_state->peak_memory_bandwidth;
+		mask = tegra_plane_overlap_mask(new_state, plane_state);
+		overlap_mask[tegra->index] = mask;
+
+		if (hweight_long(mask) != 3)
+			all_planes_overlap_simultaneously = false;
+	}
+
+	/*
+	 * Then we calculate maximum bandwidth of each plane state.
+	 * The bandwidth includes the plane BW + BW of the "simultaneously"
+	 * overlapping planes, where "simultaneously" means areas where DC
+	 * fetches from the planes simultaneously during of scan-out process.
+	 *
+	 * For example, if plane A overlaps with planes B and C, but B and C
+	 * don't overlap, then the peak bandwidth will be either in area where
+	 * A-and-B or A-and-C planes overlap.
+	 *
+	 * The plane_peak_bw[] contains peak memory bandwidth values of
+	 * each plane, this information is needed by interconnect provider
+	 * in order to set up latency allowance based on the peak BW, see
+	 * tegra_crtc_update_memory_bandwidth().
+	 */
+	drm_atomic_crtc_state_for_each_plane_state(plane, plane_state, new_state) {
+		u32 i, old_peak_bw, new_peak_bw, overlap_bw = 0;
+
+		/*
+		 * Note that plane's atomic check doesn't touch the
+		 * total_peak_memory_bandwidth of enabled plane, hence the
+		 * current state contains the old bandwidth state from the
+		 * previous CRTC commit.
+		 */
+		tegra_state = to_const_tegra_plane_state(plane_state);
+		tegra = to_tegra_plane(plane);
+
+		for_each_set_bit(i, &overlap_mask[tegra->index], 3) {
+			if (i == tegra->index)
+				continue;
+
+			if (all_planes_overlap_simultaneously)
+				overlap_bw += plane_peak_bw[i];
+			else
+				overlap_bw = max(overlap_bw, plane_peak_bw[i]);
+		}
+
+		new_peak_bw = plane_peak_bw[tegra->index] + overlap_bw;
+		old_peak_bw = tegra_state->total_peak_memory_bandwidth;
+
+		/*
+		 * If plane's peak bandwidth changed (for example plane isn't
+		 * overlapped anymore) and plane isn't in the atomic state,
+		 * then add plane to the state in order to have the bandwidth
+		 * updated.
+		 */
+		if (old_peak_bw != new_peak_bw) {
+			struct tegra_plane_state *new_tegra_state;
+			struct drm_plane_state *new_plane_state;
+
+			new_plane_state = drm_atomic_get_plane_state(state, plane);
+			if (IS_ERR(new_plane_state))
+				return PTR_ERR(new_plane_state);
+
+			new_tegra_state = to_tegra_plane_state(new_plane_state);
+			new_tegra_state->total_peak_memory_bandwidth = new_peak_bw;
+		}
+	}
+
+	return 0;
+}
+
+static int tegra_crtc_atomic_check(struct drm_crtc *crtc,
+				   struct drm_atomic_state *state)
+{
+	int err;
+
+	err = tegra_crtc_calculate_memory_bandwidth(crtc, state);
+	if (err)
+		return err;
+
+	return 0;
+}
+
+void tegra_crtc_atomic_post_commit(struct drm_crtc *crtc,
+				   struct drm_atomic_state *state)
+{
+	/*
+	 * Display bandwidth is allowed to go down only once hardware state
+	 * is known to be armed, i.e. state was committed and VBLANK event
+	 * received.
+	 */
+	tegra_crtc_update_memory_bandwidth(crtc, state, false);
+}
+
 static const struct drm_crtc_helper_funcs tegra_crtc_helper_funcs = {
+	.atomic_check = tegra_crtc_atomic_check,
 	.atomic_begin = tegra_crtc_atomic_begin,
 	.atomic_flush = tegra_crtc_atomic_flush,
 	.atomic_enable = tegra_crtc_atomic_enable,
@@ -1945,6 +2544,7 @@ static irqreturn_t tegra_dc_irq(int irq, void *data)
 		/*
 		dev_dbg(dc->dev, "%s(): frame end\n", __func__);
 		*/
+		dc->stats.frames_total++;
 		dc->stats.frames++;
 	}
 
@@ -1953,6 +2553,7 @@ static irqreturn_t tegra_dc_irq(int irq, void *data)
 		dev_dbg(dc->dev, "%s(): vertical blank\n", __func__);
 		*/
 		drm_crtc_handle_vblank(&dc->base);
+		dc->stats.vblank_total++;
 		dc->stats.vblank++;
 	}
 
@@ -1960,6 +2561,7 @@ static irqreturn_t tegra_dc_irq(int irq, void *data)
 		/*
 		dev_dbg(dc->dev, "%s(): underflow\n", __func__);
 		*/
+		dc->stats.underflow_total++;
 		dc->stats.underflow++;
 	}
 
@@ -1967,11 +2569,13 @@ static irqreturn_t tegra_dc_irq(int irq, void *data)
 		/*
 		dev_dbg(dc->dev, "%s(): overflow\n", __func__);
 		*/
+		dc->stats.overflow_total++;
 		dc->stats.overflow++;
 	}
 
 	if (status & HEAD_UF_INT) {
 		dev_dbg_ratelimited(dc->dev, "%s(): head underflow\n", __func__);
+		dc->stats.underflow_total++;
 		dc->stats.underflow++;
 	}
 
@@ -1995,9 +2599,19 @@ static bool tegra_dc_has_window_groups(struct tegra_dc *dc)
 	return false;
 }
 
+static int tegra_dc_early_init(struct host1x_client *client)
+{
+	struct drm_device *drm = dev_get_drvdata(client->host);
+	struct tegra_drm *tegra = drm->dev_private;
+
+	tegra->num_crtcs++;
+
+	return 0;
+}
+
 static int tegra_dc_init(struct host1x_client *client)
 {
-	struct drm_device *drm = dev_get_drvdata(client->parent);
+	struct drm_device *drm = dev_get_drvdata(client->host);
 	unsigned long flags = HOST1X_SYNCPT_CLIENT_MANAGED;
 	struct tegra_dc *dc = host1x_client_to_dc(client);
 	struct tegra_drm *tegra = drm->dev_private;
@@ -2006,6 +2620,12 @@ static int tegra_dc_init(struct host1x_client *client)
 	int err;
 
 	/*
+	 * DC has been reset by now, so VBLANK syncpoint can be released
+	 * for general use.
+	 */
+	host1x_syncpt_release_vblank_reservation(client, 26 + dc->pipe);
+
+	/*
 	 * XXX do not register DCs with no window groups because we cannot
 	 * assign a primary plane to them, which in turn will cause KMS to
 	 * crash.
@@ -2013,13 +2633,21 @@ static int tegra_dc_init(struct host1x_client *client)
 	if (!tegra_dc_has_window_groups(dc))
 		return 0;
 
+	/*
+	 * Set the display hub as the host1x client parent for the display
+	 * controller. This is needed for the runtime reference counting that
+	 * ensures the display hub is always powered when any of the display
+	 * controllers are.
+	 */
+	if (dc->soc->has_nvdisplay)
+		client->parent = &tegra->hub->client;
+
 	dc->syncpt = host1x_syncpt_request(client, flags);
 	if (!dc->syncpt)
 		dev_warn(dc->dev, "failed to allocate syncpoint\n");
 
-	dc->group = host1x_client_iommu_attach(client, true);
-	if (IS_ERR(dc->group)) {
-		err = PTR_ERR(dc->group);
+	err = host1x_client_iommu_attach(client);
+	if (err < 0 && err != -ENODEV) {
 		dev_err(client->dev, "failed to attach to domain: %d\n", err);
 		return err;
 	}
@@ -2063,6 +2691,12 @@ static int tegra_dc_init(struct host1x_client *client)
 	if (dc->soc->pitch_align > tegra->pitch_align)
 		tegra->pitch_align = dc->soc->pitch_align;
 
+	/* track maximum resolution */
+	if (dc->soc->has_nvdisplay)
+		drm->mode_config.max_width = drm->mode_config.max_height = 16384;
+	else
+		drm->mode_config.max_width = drm->mode_config.max_height = 4096;
+
 	err = tegra_dc_rgb_init(drm, dc);
 	if (err < 0 && err != -ENODEV) {
 		dev_err(dc->dev, "failed to initialize RGB output: %d\n", err);
@@ -2077,6 +2711,12 @@ static int tegra_dc_init(struct host1x_client *client)
 		goto cleanup;
 	}
 
+	/*
+	 * Inherit the DMA parameters (such as maximum segment size) from the
+	 * parent host1x device.
+	 */
+	client->dev->dma_parms = client->host->dma_parms;
+
 	return 0;
 
 cleanup:
@@ -2086,8 +2726,8 @@ cleanup:
 	if (!IS_ERR(primary))
 		drm_plane_cleanup(primary);
 
-	host1x_client_iommu_detach(client, dc->group);
-	host1x_syncpt_free(dc->syncpt);
+	host1x_client_iommu_detach(client);
+	host1x_syncpt_put(dc->syncpt);
 
 	return err;
 }
@@ -2100,6 +2740,9 @@ static int tegra_dc_exit(struct host1x_client *client)
 	if (!tegra_dc_has_window_groups(dc))
 		return 0;
 
+	/* avoid a dangling pointer just in case this disappears */
+	client->dev->dma_parms = NULL;
+
 	devm_free_irq(dc->dev, dc->irq, dc);
 
 	err = tegra_dc_rgb_exit(dc);
@@ -2108,15 +2751,92 @@ static int tegra_dc_exit(struct host1x_client *client)
 		return err;
 	}
 
-	host1x_client_iommu_detach(client, dc->group);
-	host1x_syncpt_free(dc->syncpt);
+	host1x_client_iommu_detach(client);
+	host1x_syncpt_put(dc->syncpt);
+
+	return 0;
+}
+
+static int tegra_dc_late_exit(struct host1x_client *client)
+{
+	struct drm_device *drm = dev_get_drvdata(client->host);
+	struct tegra_drm *tegra = drm->dev_private;
+
+	tegra->num_crtcs--;
+
+	return 0;
+}
+
+static int tegra_dc_runtime_suspend(struct host1x_client *client)
+{
+	struct tegra_dc *dc = host1x_client_to_dc(client);
+	struct device *dev = client->dev;
+	int err;
+
+	err = reset_control_assert(dc->rst);
+	if (err < 0) {
+		dev_err(dev, "failed to assert reset: %d\n", err);
+		return err;
+	}
+
+	if (dc->soc->has_powergate)
+		tegra_powergate_power_off(dc->powergate);
+
+	clk_disable_unprepare(dc->clk);
+	pm_runtime_put_sync(dev);
+
+	return 0;
+}
+
+static int tegra_dc_runtime_resume(struct host1x_client *client)
+{
+	struct tegra_dc *dc = host1x_client_to_dc(client);
+	struct device *dev = client->dev;
+	int err;
+
+	err = pm_runtime_resume_and_get(dev);
+	if (err < 0) {
+		dev_err(dev, "failed to get runtime PM: %d\n", err);
+		return err;
+	}
+
+	if (dc->soc->has_powergate) {
+		err = tegra_powergate_sequence_power_up(dc->powergate, dc->clk,
+							dc->rst);
+		if (err < 0) {
+			dev_err(dev, "failed to power partition: %d\n", err);
+			goto put_rpm;
+		}
+	} else {
+		err = clk_prepare_enable(dc->clk);
+		if (err < 0) {
+			dev_err(dev, "failed to enable clock: %d\n", err);
+			goto put_rpm;
+		}
+
+		err = reset_control_deassert(dc->rst);
+		if (err < 0) {
+			dev_err(dev, "failed to deassert reset: %d\n", err);
+			goto disable_clk;
+		}
+	}
 
 	return 0;
+
+disable_clk:
+	clk_disable_unprepare(dc->clk);
+put_rpm:
+	pm_runtime_put_sync(dev);
+	return err;
 }
 
 static const struct host1x_client_ops dc_client_ops = {
+	.early_init = tegra_dc_early_init,
 	.init = tegra_dc_init,
 	.exit = tegra_dc_exit,
+	.late_exit = tegra_dc_late_exit,
+	.suspend = tegra_dc_runtime_suspend,
+	.resume = tegra_dc_runtime_resume,
 };
 
 static const struct tegra_dc_soc_info tegra20_dc_soc_info = {
@@ -2124,6 +2844,7 @@ static const struct tegra_dc_soc_info tegra20_dc_soc_info = {
 	.supports_interlacing = false,
 	.supports_cursor = false,
 	.supports_block_linear = false,
+	.supports_sector_layout = false,
 	.has_legacy_blending = true,
 	.pitch_align = 8,
 	.has_powergate = false,
@@ -2135,7 +2856,10 @@ static const struct tegra_dc_soc_info tegra20_dc_soc_info = {
 	.overlay_formats = tegra20_overlay_formats,
 	.modifiers = tegra20_modifiers,
 	.has_win_a_without_filters = true,
+	.has_win_b_vfilter_mem_client = true,
 	.has_win_c_without_vert_filter = true,
+	.plane_tiled_memory_bandwidth_x2 = false,
+	.has_pll_d2_out0 = false,
 };
 
 static const struct tegra_dc_soc_info tegra30_dc_soc_info = {
@@ -2143,6 +2867,7 @@ static const struct tegra_dc_soc_info tegra30_dc_soc_info = {
 	.supports_interlacing = false,
 	.supports_cursor = false,
 	.supports_block_linear = false,
+	.supports_sector_layout = false,
 	.has_legacy_blending = true,
 	.pitch_align = 8,
 	.has_powergate = false,
@@ -2154,7 +2879,10 @@ static const struct tegra_dc_soc_info tegra30_dc_soc_info = {
 	.overlay_formats = tegra20_overlay_formats,
 	.modifiers = tegra20_modifiers,
 	.has_win_a_without_filters = false,
+	.has_win_b_vfilter_mem_client = true,
 	.has_win_c_without_vert_filter = false,
+	.plane_tiled_memory_bandwidth_x2 = true,
+	.has_pll_d2_out0 = true,
 };
 
 static const struct tegra_dc_soc_info tegra114_dc_soc_info = {
@@ -2162,6 +2890,7 @@ static const struct tegra_dc_soc_info tegra114_dc_soc_info = {
 	.supports_interlacing = false,
 	.supports_cursor = false,
 	.supports_block_linear = false,
+	.supports_sector_layout = false,
 	.has_legacy_blending = true,
 	.pitch_align = 64,
 	.has_powergate = true,
@@ -2173,7 +2902,10 @@ static const struct tegra_dc_soc_info tegra114_dc_soc_info = {
 	.overlay_formats = tegra114_overlay_formats,
 	.modifiers = tegra20_modifiers,
 	.has_win_a_without_filters = false,
+	.has_win_b_vfilter_mem_client = false,
 	.has_win_c_without_vert_filter = false,
+	.plane_tiled_memory_bandwidth_x2 = true,
+	.has_pll_d2_out0 = true,
 };
 
 static const struct tegra_dc_soc_info tegra124_dc_soc_info = {
@@ -2181,6 +2913,7 @@ static const struct tegra_dc_soc_info tegra124_dc_soc_info = {
 	.supports_interlacing = true,
 	.supports_cursor = true,
 	.supports_block_linear = true,
+	.supports_sector_layout = false,
 	.has_legacy_blending = false,
 	.pitch_align = 64,
 	.has_powergate = true,
@@ -2192,7 +2925,10 @@ static const struct tegra_dc_soc_info tegra124_dc_soc_info = {
 	.overlay_formats = tegra124_overlay_formats,
 	.modifiers = tegra124_modifiers,
 	.has_win_a_without_filters = false,
+	.has_win_b_vfilter_mem_client = false,
 	.has_win_c_without_vert_filter = false,
+	.plane_tiled_memory_bandwidth_x2 = false,
+	.has_pll_d2_out0 = true,
 };
 
 static const struct tegra_dc_soc_info tegra210_dc_soc_info = {
@@ -2200,6 +2936,7 @@ static const struct tegra_dc_soc_info tegra210_dc_soc_info = {
 	.supports_interlacing = true,
 	.supports_cursor = true,
 	.supports_block_linear = true,
+	.supports_sector_layout = false,
 	.has_legacy_blending = false,
 	.pitch_align = 64,
 	.has_powergate = true,
@@ -2211,7 +2948,10 @@ static const struct tegra_dc_soc_info tegra210_dc_soc_info = {
 	.overlay_formats = tegra114_overlay_formats,
 	.modifiers = tegra124_modifiers,
 	.has_win_a_without_filters = false,
+	.has_win_b_vfilter_mem_client = false,
 	.has_win_c_without_vert_filter = false,
+	.plane_tiled_memory_bandwidth_x2 = false,
+	.has_pll_d2_out0 = true,
 };
 
 static const struct tegra_windowgroup_soc tegra186_dc_wgrps[] = {
@@ -2253,6 +2993,7 @@ static const struct tegra_dc_soc_info tegra186_dc_soc_info = {
 	.supports_interlacing = true,
 	.supports_cursor = true,
 	.supports_block_linear = true,
+	.supports_sector_layout = false,
 	.has_legacy_blending = false,
 	.pitch_align = 64,
 	.has_powergate = false,
@@ -2260,6 +3001,8 @@ static const struct tegra_dc_soc_info tegra186_dc_soc_info = {
 	.has_nvdisplay = true,
 	.wgrps = tegra186_dc_wgrps,
 	.num_wgrps = ARRAY_SIZE(tegra186_dc_wgrps),
+	.plane_tiled_memory_bandwidth_x2 = false,
+	.has_pll_d2_out0 = false,
 };
 
 static const struct tegra_windowgroup_soc tegra194_dc_wgrps[] = {
@@ -2301,6 +3044,7 @@ static const struct tegra_dc_soc_info tegra194_dc_soc_info = {
 	.supports_interlacing = true,
 	.supports_cursor = true,
 	.supports_block_linear = true,
+	.supports_sector_layout = true,
 	.has_legacy_blending = false,
 	.pitch_align = 64,
 	.has_powergate = false,
@@ -2308,6 +3052,8 @@ static const struct tegra_dc_soc_info tegra194_dc_soc_info = {
 	.has_nvdisplay = true,
 	.wgrps = tegra194_dc_wgrps,
 	.num_wgrps = ARRAY_SIZE(tegra194_dc_wgrps),
+	.plane_tiled_memory_bandwidth_x2 = false,
+	.has_pll_d2_out0 = false,
 };
 
 static const struct of_device_id tegra_dc_of_match[] = {
@@ -2375,10 +3121,10 @@ static int tegra_dc_parse_dt(struct tegra_dc *dc)
 	return 0;
 }
 
-static int tegra_dc_match_by_pipe(struct device *dev, void *data)
+static int tegra_dc_match_by_pipe(struct device *dev, const void *data)
 {
 	struct tegra_dc *dc = dev_get_drvdata(dev);
-	unsigned int pipe = (unsigned long)data;
+	unsigned int pipe = (unsigned long)(void *)data;
 
 	return dc->pipe == pipe;
 }
@@ -2391,33 +3137,53 @@ static int tegra_dc_couple(struct tegra_dc *dc)
 	 * POWER_CONTROL registers during CRTC enabling.
 	 */
 	if (dc->soc->coupled_pm && dc->pipe == 1) {
-		u32 flags = DL_FLAG_PM_RUNTIME | DL_FLAG_AUTOREMOVE_CONSUMER;
-		struct device_link *link;
-		struct device *partner;
+		struct device *companion;
+		struct tegra_dc *parent;
 
-		partner = driver_find_device(dc->dev->driver, NULL, NULL,
-					     tegra_dc_match_by_pipe);
-		if (!partner)
+		companion = driver_find_device(dc->dev->driver, NULL, (const void *)0,
+					       tegra_dc_match_by_pipe);
+		if (!companion)
 			return -EPROBE_DEFER;
 
-		link = device_link_add(dc->dev, partner, flags);
-		if (!link) {
-			dev_err(dc->dev, "failed to link controllers\n");
-			return -EINVAL;
-		}
+		parent = dev_get_drvdata(companion);
+		dc->client.parent = &parent->client;
 
-		dev_dbg(dc->dev, "coupled to %s\n", dev_name(partner));
+		dev_dbg(dc->dev, "coupled to %s\n", dev_name(companion));
+		put_device(companion);
 	}
 
 	return 0;
 }
 
+static int tegra_dc_init_opp_table(struct tegra_dc *dc)
+{
+	struct tegra_core_opp_params opp_params = {};
+	int err;
+
+	err = devm_tegra_core_dev_init_opp_table(dc->dev, &opp_params);
+	if (err && err != -ENODEV)
+		return err;
+
+	if (err)
+		dc->has_opp_table = false;
+	else
+		dc->has_opp_table = true;
+
+	return 0;
+}
+
 static int tegra_dc_probe(struct platform_device *pdev)
 {
-	struct resource *regs;
+	u64 dma_mask = dma_get_mask(pdev->dev.parent);
 	struct tegra_dc *dc;
 	int err;
 
+	err = dma_coerce_mask_and_coherent(&pdev->dev, dma_mask);
+	if (err < 0) {
+		dev_err(&pdev->dev, "failed to set DMA mask: %d\n", err);
+		return err;
+	}
+
 	dc = devm_kzalloc(&pdev->dev, sizeof(*dc), GFP_KERNEL);
 	if (!dc)
 		return -ENOMEM;
@@ -2455,8 +3221,10 @@ static int tegra_dc_probe(struct platform_device *pdev)
 	usleep_range(2000, 4000);
 
 	err = reset_control_assert(dc->rst);
-	if (err < 0)
+	if (err < 0) {
+		clk_disable_unprepare(dc->clk);
 		return err;
+	}
 
 	usleep_range(2000, 4000);
 
@@ -2471,22 +3239,22 @@ static int tegra_dc_probe(struct platform_device *pdev)
 		tegra_powergate_power_off(dc->powergate);
 	}
 
-	regs = platform_get_resource(pdev, IORESOURCE_MEM, 0);
-	dc->regs = devm_ioremap_resource(&pdev->dev, regs);
+	err = tegra_dc_init_opp_table(dc);
+	if (err < 0)
+		return err;
+
+	dc->regs = devm_platform_ioremap_resource(pdev, 0);
 	if (IS_ERR(dc->regs))
 		return PTR_ERR(dc->regs);
 
 	dc->irq = platform_get_irq(pdev, 0);
-	if (dc->irq < 0) {
-		dev_err(&pdev->dev, "failed to get IRQ\n");
+	if (dc->irq < 0)
 		return -ENXIO;
-	}
 
 	err = tegra_dc_rgb_probe(dc);
-	if (err < 0 && err != -ENODEV) {
-		dev_err(&pdev->dev, "failed to probe RGB output: %d\n", err);
-		return err;
-	}
+	if (err < 0 && err != -ENODEV)
+		return dev_err_probe(&pdev->dev, err,
+				     "failed to probe RGB output\n");
 
 	platform_set_drvdata(pdev, dc);
 	pm_runtime_enable(&pdev->dev);
@@ -2499,94 +3267,33 @@ static int tegra_dc_probe(struct platform_device *pdev)
 	if (err < 0) {
 		dev_err(&pdev->dev, "failed to register host1x client: %d\n",
 			err);
-		return err;
+		goto disable_pm;
 	}
 
 	return 0;
-}
-
-static int tegra_dc_remove(struct platform_device *pdev)
-{
-	struct tegra_dc *dc = platform_get_drvdata(pdev);
-	int err;
-
-	err = host1x_client_unregister(&dc->client);
-	if (err < 0) {
-		dev_err(&pdev->dev, "failed to unregister host1x client: %d\n",
-			err);
-		return err;
-	}
-
-	err = tegra_dc_rgb_remove(dc);
-	if (err < 0) {
-		dev_err(&pdev->dev, "failed to remove RGB output: %d\n", err);
-		return err;
-	}
 
+disable_pm:
 	pm_runtime_disable(&pdev->dev);
+	tegra_dc_rgb_remove(dc);
 
-	return 0;
-}
-
-#ifdef CONFIG_PM
-static int tegra_dc_suspend(struct device *dev)
-{
-	struct tegra_dc *dc = dev_get_drvdata(dev);
-	int err;
-
-	err = reset_control_assert(dc->rst);
-	if (err < 0) {
-		dev_err(dev, "failed to assert reset: %d\n", err);
-		return err;
-	}
-
-	if (dc->soc->has_powergate)
-		tegra_powergate_power_off(dc->powergate);
-
-	clk_disable_unprepare(dc->clk);
-
-	return 0;
+	return err;
 }
 
-static int tegra_dc_resume(struct device *dev)
+static void tegra_dc_remove(struct platform_device *pdev)
 {
-	struct tegra_dc *dc = dev_get_drvdata(dev);
-	int err;
+	struct tegra_dc *dc = platform_get_drvdata(pdev);
 
-	if (dc->soc->has_powergate) {
-		err = tegra_powergate_sequence_power_up(dc->powergate, dc->clk,
-							dc->rst);
-		if (err < 0) {
-			dev_err(dev, "failed to power partition: %d\n", err);
-			return err;
-		}
-	} else {
-		err = clk_prepare_enable(dc->clk);
-		if (err < 0) {
-			dev_err(dev, "failed to enable clock: %d\n", err);
-			return err;
-		}
+	host1x_client_unregister(&dc->client);
 
-		err = reset_control_deassert(dc->rst);
-		if (err < 0) {
-			dev_err(dev, "failed to deassert reset: %d\n", err);
-			return err;
-		}
-	}
+	tegra_dc_rgb_remove(dc);
 
-	return 0;
+	pm_runtime_disable(&pdev->dev);
 }
-#endif
-
-static const struct dev_pm_ops tegra_dc_pm_ops = {
-	SET_RUNTIME_PM_OPS(tegra_dc_suspend, tegra_dc_resume, NULL)
-};
 
 struct platform_driver tegra_dc_driver = {
 	.driver = {
 		.name = "tegra-dc",
 		.of_match_table = tegra_dc_of_match,
-		.pm = &tegra_dc_pm_ops,
 	},
 	.probe = tegra_dc_probe,
 	.remove = tegra_dc_remove,
diff --git a/drivers/gpu/drm/tegra/dc.h b/drivers/gpu/drm/tegra/dc.h
index 1256dfb6b2f5..0559fa6b1bf7 100644
--- a/drivers/gpu/drm/tegra/dc.h
+++ b/drivers/gpu/drm/tegra/dc.h
@@ -1,10 +1,7 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
 /*
  * Copyright (C) 2012 Avionic Design GmbH
  * Copyright (C) 2012 NVIDIA CORPORATION.  All rights reserved.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
  */
 
 #ifndef TEGRA_DC_H
@@ -18,6 +15,8 @@
 
 struct tegra_output;
 
+#define TEGRA_DC_LEGACY_PLANES_NUM	7
+
 struct tegra_dc_state {
 	struct drm_crtc_state base;
 
@@ -41,6 +40,11 @@ struct tegra_dc_stats {
 	unsigned long vblank;
 	unsigned long underflow;
 	unsigned long overflow;
+
+	unsigned long frames_total;
+	unsigned long vblank_total;
+	unsigned long underflow_total;
+	unsigned long overflow_total;
 };
 
 struct tegra_windowgroup_soc {
@@ -55,6 +59,7 @@ struct tegra_dc_soc_info {
 	bool supports_interlacing;
 	bool supports_cursor;
 	bool supports_block_linear;
+	bool supports_sector_layout;
 	bool has_legacy_blending;
 	unsigned int pitch_align;
 	bool has_powergate;
@@ -68,7 +73,10 @@ struct tegra_dc_soc_info {
 	unsigned int num_overlay_formats;
 	const u64 *modifiers;
 	bool has_win_a_without_filters;
+	bool has_win_b_vfilter_mem_client;
 	bool has_win_c_without_vert_filter;
+	bool plane_tiled_memory_bandwidth_x2;
+	bool has_pll_d2_out0;
 };
 
 struct tegra_dc {
@@ -94,7 +102,7 @@ struct tegra_dc {
 
 	const struct tegra_dc_soc_info *soc;
 
-	struct iommu_group *group;
+	bool has_opp_table;
 };
 
 static inline struct tegra_dc *
@@ -141,7 +149,8 @@ struct tegra_dc_window {
 	unsigned int stride[2];
 	unsigned long base[3];
 	unsigned int zpos;
-	bool bottom_up;
+	bool reflect_x;
+	bool reflect_y;
 
 	struct tegra_bo_tiling tiling;
 	u32 format;
@@ -155,10 +164,12 @@ int tegra_dc_state_setup_clock(struct tegra_dc *dc,
 			       struct drm_crtc_state *crtc_state,
 			       struct clk *clk, unsigned long pclk,
 			       unsigned int div);
+void tegra_crtc_atomic_post_commit(struct drm_crtc *crtc,
+				   struct drm_atomic_state *state);
 
 /* from rgb.c */
 int tegra_dc_rgb_probe(struct tegra_dc *dc);
-int tegra_dc_rgb_remove(struct tegra_dc *dc);
+void tegra_dc_rgb_remove(struct tegra_dc *dc);
 int tegra_dc_rgb_init(struct drm_device *drm, struct tegra_dc *dc);
 int tegra_dc_rgb_exit(struct tegra_dc *dc);
 
@@ -515,6 +526,8 @@ int tegra_dc_rgb_exit(struct tegra_dc *dc);
 
 #define DC_DISP_CURSOR_START_ADDR_HI		0x4ec
 #define DC_DISP_BLEND_CURSOR_CONTROL		0x4f1
+#define CURSOR_COMPOSITION_MODE_BLEND		(0 << 25)
+#define CURSOR_COMPOSITION_MODE_XOR		(1 << 25)
 #define CURSOR_MODE_LEGACY			(0 << 24)
 #define CURSOR_MODE_NORMAL			(1 << 24)
 #define CURSOR_DST_BLEND_ZERO			(0 << 16)
@@ -624,6 +637,13 @@ int tegra_dc_rgb_exit(struct tegra_dc *dc);
 #define WIN_COLOR_DEPTH_A8B8G8R8       36
 #define WIN_COLOR_DEPTH_B8G8R8X8       37
 #define WIN_COLOR_DEPTH_R8G8B8X8       38
+#define WIN_COLOR_DEPTH_YCbCr444P      41
+#define WIN_COLOR_DEPTH_YCrCb420SP     42
+#define WIN_COLOR_DEPTH_YCbCr420SP     43
+#define WIN_COLOR_DEPTH_YCrCb422SP     44
+#define WIN_COLOR_DEPTH_YCbCr422SP     45
+#define WIN_COLOR_DEPTH_YCrCb444SP     48
+#define WIN_COLOR_DEPTH_YCbCr444SP     49
 #define WIN_COLOR_DEPTH_X8B8G8R8       65
 #define WIN_COLOR_DEPTH_X8R8G8B8       66
 
@@ -697,6 +717,9 @@ int tegra_dc_rgb_exit(struct tegra_dc *dc);
 
 #define DC_WINBUF_START_ADDR_HI			0x80d
 
+#define DC_WINBUF_START_ADDR_HI_U		0x80f
+#define DC_WINBUF_START_ADDR_HI_V		0x811
+
 #define DC_WINBUF_CDE_CONTROL			0x82f
 #define  ENABLE_SURFACE (1 << 0)
 
@@ -709,15 +732,37 @@ int tegra_dc_rgb_exit(struct tegra_dc *dc);
 #define PROTOCOL_MASK (0xf << 8)
 #define PROTOCOL_SINGLE_TMDS_A (0x1 << 8)
 
+#define DC_DISP_PCALC_HEAD_SET_CROPPED_POINT_IN_CURSOR	0x442
+#define DC_DISP_PCALC_HEAD_SET_CROPPED_SIZE_IN_CURSOR	0x446
+
+#define DC_WINC_PRECOMP_WGRP_PIPE_CAPA 0x500
+#define DC_WINC_PRECOMP_WGRP_PIPE_CAPB 0x501
+#define DC_WINC_PRECOMP_WGRP_PIPE_CAPC 0x502
+#define  MAX_PIXELS_5TAP444(x) ((x) & 0xffff)
+#define DC_WINC_PRECOMP_WGRP_PIPE_CAPD 0x503
+#define DC_WINC_PRECOMP_WGRP_PIPE_CAPE 0x504
+#define  MAX_PIXELS_2TAP444(x) ((x) & 0xffff)
+#define DC_WINC_PRECOMP_WGRP_PIPE_CAPF 0x505
+
 #define DC_WIN_CORE_WINDOWGROUP_SET_CONTROL	0x702
 #define OWNER_MASK (0xf << 0)
 #define OWNER(x) (((x) & 0xf) << 0)
 
 #define DC_WIN_CROPPED_SIZE			0x706
 
+#define DC_WIN_SET_INPUT_SCALER_H_START_PHASE	0x707
+#define DC_WIN_SET_INPUT_SCALER_V_START_PHASE	0x708
+
 #define DC_WIN_PLANAR_STORAGE			0x709
 #define PITCH(x) (((x) >> 6) & 0x1fff)
 
+#define DC_WIN_PLANAR_STORAGE_UV		0x70a
+#define  PITCH_U(x) ((((x) >> 6) & 0x1fff) <<  0)
+#define  PITCH_V(x) ((((x) >> 6) & 0x1fff) << 16)
+
+#define DC_WIN_SET_INPUT_SCALER_HPHASE_INCR	0x70b
+#define DC_WIN_SET_INPUT_SCALER_VPHASE_INCR	0x70c
+
 #define DC_WIN_SET_PARAMS			0x70d
 #define  CLAMP_BEFORE_BLEND (1 << 15)
 #define  DEGAMMA_NONE (0 << 13)
@@ -738,6 +783,10 @@ int tegra_dc_rgb_exit(struct tegra_dc *dc);
 #define  VERTICAL_TAPS_2 (1 << 0)
 #define  VERTICAL_TAPS_5 (4 << 0)
 
+#define DC_WIN_WINDOWGROUP_SET_INPUT_SCALER_COEFF 0x70f
+#define  COEFF_INDEX(x) (((x) & 0xff) << 15)
+#define  COEFF_DATA(x) (((x) & 0x3ff) << 0)
+
 #define DC_WIN_WINDOWGROUP_SET_INPUT_SCALER_USAGE	0x711
 #define  INPUT_SCALER_USE422  (1 << 2)
 #define  INPUT_SCALER_VBYPASS (1 << 1)
diff --git a/drivers/gpu/drm/tegra/dp.c b/drivers/gpu/drm/tegra/dp.c
new file mode 100644
index 000000000000..990e744b0923
--- /dev/null
+++ b/drivers/gpu/drm/tegra/dp.c
@@ -0,0 +1,818 @@
+// SPDX-License-Identifier: MIT
+/*
+ * Copyright (C) 2013-2019 NVIDIA Corporation
+ * Copyright (C) 2015 Rob Clark
+ */
+
+#include <drm/display/drm_dp_helper.h>
+#include <drm/drm_crtc.h>
+#include <drm/drm_print.h>
+
+#include "dp.h"
+
+static const u8 drm_dp_edp_revisions[] = { 0x11, 0x12, 0x13, 0x14 };
+
+static void drm_dp_link_caps_reset(struct drm_dp_link_caps *caps)
+{
+	caps->enhanced_framing = false;
+	caps->tps3_supported = false;
+	caps->fast_training = false;
+	caps->channel_coding = false;
+	caps->alternate_scrambler_reset = false;
+}
+
+void drm_dp_link_caps_copy(struct drm_dp_link_caps *dest,
+			   const struct drm_dp_link_caps *src)
+{
+	dest->enhanced_framing = src->enhanced_framing;
+	dest->tps3_supported = src->tps3_supported;
+	dest->fast_training = src->fast_training;
+	dest->channel_coding = src->channel_coding;
+	dest->alternate_scrambler_reset = src->alternate_scrambler_reset;
+}
+
+static void drm_dp_link_reset(struct drm_dp_link *link)
+{
+	unsigned int i;
+
+	if (!link)
+		return;
+
+	link->revision = 0;
+	link->max_rate = 0;
+	link->max_lanes = 0;
+
+	drm_dp_link_caps_reset(&link->caps);
+	link->aux_rd_interval.cr = 0;
+	link->aux_rd_interval.ce = 0;
+	link->edp = 0;
+
+	link->rate = 0;
+	link->lanes = 0;
+
+	for (i = 0; i < DP_MAX_SUPPORTED_RATES; i++)
+		link->rates[i] = 0;
+
+	link->num_rates = 0;
+}
+
+/**
+ * drm_dp_link_add_rate() - add a rate to the list of supported rates
+ * @link: the link to add the rate to
+ * @rate: the rate to add
+ *
+ * Add a link rate to the list of supported link rates.
+ *
+ * Returns:
+ * 0 on success or one of the following negative error codes on failure:
+ * - ENOSPC if the maximum number of supported rates has been reached
+ * - EEXISTS if the link already supports this rate
+ *
+ * See also:
+ * drm_dp_link_remove_rate()
+ */
+int drm_dp_link_add_rate(struct drm_dp_link *link, unsigned long rate)
+{
+	unsigned int i, pivot;
+
+	if (link->num_rates == DP_MAX_SUPPORTED_RATES)
+		return -ENOSPC;
+
+	for (pivot = 0; pivot < link->num_rates; pivot++)
+		if (rate <= link->rates[pivot])
+			break;
+
+	if (pivot != link->num_rates && rate == link->rates[pivot])
+		return -EEXIST;
+
+	for (i = link->num_rates; i > pivot; i--)
+		link->rates[i] = link->rates[i - 1];
+
+	link->rates[pivot] = rate;
+	link->num_rates++;
+
+	return 0;
+}
+
+/**
+ * drm_dp_link_remove_rate() - remove a rate from the list of supported rates
+ * @link: the link from which to remove the rate
+ * @rate: the rate to remove
+ *
+ * Removes a link rate from the list of supported link rates.
+ *
+ * Returns:
+ * 0 on success or one of the following negative error codes on failure:
+ * - EINVAL if the specified rate is not among the supported rates
+ *
+ * See also:
+ * drm_dp_link_add_rate()
+ */
+int drm_dp_link_remove_rate(struct drm_dp_link *link, unsigned long rate)
+{
+	unsigned int i;
+
+	for (i = 0; i < link->num_rates; i++)
+		if (rate == link->rates[i])
+			break;
+
+	if (i == link->num_rates)
+		return -EINVAL;
+
+	link->num_rates--;
+
+	while (i < link->num_rates) {
+		link->rates[i] = link->rates[i + 1];
+		i++;
+	}
+
+	return 0;
+}
+
+/**
+ * drm_dp_link_update_rates() - normalize the supported link rates array
+ * @link: the link for which to normalize the supported link rates
+ *
+ * Users should call this function after they've manually modified the array
+ * of supported link rates. This function removes any stale entries, compacts
+ * the array and updates the supported link rate count. Note that calling the
+ * drm_dp_link_remove_rate() function already does this janitorial work.
+ *
+ * See also:
+ * drm_dp_link_add_rate(), drm_dp_link_remove_rate()
+ */
+void drm_dp_link_update_rates(struct drm_dp_link *link)
+{
+	unsigned int i, count = 0;
+
+	for (i = 0; i < link->num_rates; i++) {
+		if (link->rates[i] != 0)
+			link->rates[count++] = link->rates[i];
+	}
+
+	for (i = count; i < link->num_rates; i++)
+		link->rates[i] = 0;
+
+	link->num_rates = count;
+}
+
+/**
+ * drm_dp_link_probe() - probe a DisplayPort link for capabilities
+ * @aux: DisplayPort AUX channel
+ * @link: pointer to structure in which to return link capabilities
+ *
+ * The structure filled in by this function can usually be passed directly
+ * into drm_dp_link_power_up() and drm_dp_link_configure() to power up and
+ * configure the link based on the link's capabilities.
+ *
+ * Returns 0 on success or a negative error code on failure.
+ */
+int drm_dp_link_probe(struct drm_dp_aux *aux, struct drm_dp_link *link)
+{
+	u8 dpcd[DP_RECEIVER_CAP_SIZE], value;
+	unsigned int rd_interval;
+	int err;
+
+	drm_dp_link_reset(link);
+
+	err = drm_dp_dpcd_read(aux, DP_DPCD_REV, dpcd, sizeof(dpcd));
+	if (err < 0)
+		return err;
+
+	link->revision = dpcd[DP_DPCD_REV];
+	link->max_rate = drm_dp_max_link_rate(dpcd);
+	link->max_lanes = drm_dp_max_lane_count(dpcd);
+
+	link->caps.enhanced_framing = drm_dp_enhanced_frame_cap(dpcd);
+	link->caps.tps3_supported = drm_dp_tps3_supported(dpcd);
+	link->caps.fast_training = drm_dp_fast_training_cap(dpcd);
+	link->caps.channel_coding = drm_dp_channel_coding_supported(dpcd);
+
+	if (drm_dp_alternate_scrambler_reset_cap(dpcd)) {
+		link->caps.alternate_scrambler_reset = true;
+
+		err = drm_dp_dpcd_readb(aux, DP_EDP_DPCD_REV, &value);
+		if (err < 0)
+			return err;
+
+		if (value >= ARRAY_SIZE(drm_dp_edp_revisions))
+			DRM_ERROR("unsupported eDP version: %02x\n", value);
+		else
+			link->edp = drm_dp_edp_revisions[value];
+	}
+
+	/*
+	 * The DPCD stores the AUX read interval in units of 4 ms. There are
+	 * two special cases:
+	 *
+	 *   1) if the TRAINING_AUX_RD_INTERVAL field is 0, the clock recovery
+	 *      and channel equalization should use 100 us or 400 us AUX read
+	 *      intervals, respectively
+	 *
+	 *   2) for DP v1.4 and above, clock recovery should always use 100 us
+	 *      AUX read intervals
+	 */
+	rd_interval = dpcd[DP_TRAINING_AUX_RD_INTERVAL] &
+			   DP_TRAINING_AUX_RD_MASK;
+
+	if (rd_interval > 4) {
+		DRM_DEBUG_KMS("AUX interval %u out of range (max. 4)\n",
+			      rd_interval);
+		rd_interval = 4;
+	}
+
+	rd_interval *= 4 * USEC_PER_MSEC;
+
+	if (rd_interval == 0 || link->revision >= DP_DPCD_REV_14)
+		link->aux_rd_interval.cr = 100;
+
+	if (rd_interval == 0)
+		link->aux_rd_interval.ce = 400;
+
+	link->rate = link->max_rate;
+	link->lanes = link->max_lanes;
+
+	/* Parse SUPPORTED_LINK_RATES from eDP 1.4 */
+	if (link->edp >= 0x14) {
+		u8 supported_rates[DP_MAX_SUPPORTED_RATES * 2];
+		unsigned int i;
+		u16 rate;
+
+		err = drm_dp_dpcd_read(aux, DP_SUPPORTED_LINK_RATES,
+				       supported_rates,
+				       sizeof(supported_rates));
+		if (err < 0)
+			return err;
+
+		for (i = 0; i < DP_MAX_SUPPORTED_RATES; i++) {
+			rate = supported_rates[i * 2 + 1] << 8 |
+			       supported_rates[i * 2 + 0];
+
+			drm_dp_link_add_rate(link, rate * 200);
+		}
+	}
+
+	return 0;
+}
+
+/**
+ * drm_dp_link_configure() - configure a DisplayPort link
+ * @aux: DisplayPort AUX channel
+ * @link: pointer to a structure containing the link configuration
+ *
+ * Returns 0 on success or a negative error code on failure.
+ */
+int drm_dp_link_configure(struct drm_dp_aux *aux, struct drm_dp_link *link)
+{
+	u8 values[2], value;
+	int err;
+
+	if (link->ops && link->ops->configure) {
+		err = link->ops->configure(link);
+		if (err < 0) {
+			DRM_ERROR("failed to configure DP link: %d\n", err);
+			return err;
+		}
+	}
+
+	values[0] = drm_dp_link_rate_to_bw_code(link->rate);
+	values[1] = link->lanes;
+
+	if (link->caps.enhanced_framing)
+		values[1] |= DP_LANE_COUNT_ENHANCED_FRAME_EN;
+
+	err = drm_dp_dpcd_write(aux, DP_LINK_BW_SET, values, sizeof(values));
+	if (err < 0)
+		return err;
+
+	if (link->caps.channel_coding)
+		value = DP_SET_ANSI_8B10B;
+	else
+		value = 0;
+
+	err = drm_dp_dpcd_writeb(aux, DP_MAIN_LINK_CHANNEL_CODING_SET, value);
+	if (err < 0)
+		return err;
+
+	if (link->caps.alternate_scrambler_reset) {
+		err = drm_dp_dpcd_writeb(aux, DP_EDP_CONFIGURATION_SET,
+					 DP_ALTERNATE_SCRAMBLER_RESET_ENABLE);
+		if (err < 0)
+			return err;
+	}
+
+	return 0;
+}
+
+/**
+ * drm_dp_link_choose() - choose the lowest possible configuration for a mode
+ * @link: DRM DP link object
+ * @mode: DRM display mode
+ * @info: DRM display information
+ *
+ * According to the eDP specification, a source should select a configuration
+ * with the lowest number of lanes and the lowest possible link rate that can
+ * match the bitrate requirements of a video mode. However it must ensure not
+ * to exceed the capabilities of the sink.
+ *
+ * Returns: 0 on success or a negative error code on failure.
+ */
+int drm_dp_link_choose(struct drm_dp_link *link,
+		       const struct drm_display_mode *mode,
+		       const struct drm_display_info *info)
+{
+	/* available link symbol clock rates */
+	static const unsigned int rates[3] = { 162000, 270000, 540000 };
+	/* available number of lanes */
+	static const unsigned int lanes[3] = { 1, 2, 4 };
+	unsigned long requirement, capacity;
+	unsigned int rate = link->max_rate;
+	unsigned int i, j;
+
+	/* bandwidth requirement */
+	requirement = mode->clock * info->bpc * 3;
+
+	for (i = 0; i < ARRAY_SIZE(lanes) && lanes[i] <= link->max_lanes; i++) {
+		for (j = 0; j < ARRAY_SIZE(rates) && rates[j] <= rate; j++) {
+			/*
+			 * Capacity for this combination of lanes and rate,
+			 * factoring in the ANSI 8B/10B encoding.
+			 *
+			 * Link rates in the DRM DP helpers are really link
+			 * symbol frequencies, so a tenth of the actual rate
+			 * of the link.
+			 */
+			capacity = lanes[i] * (rates[j] * 10) * 8 / 10;
+
+			if (capacity >= requirement) {
+				DRM_DEBUG_KMS("using %u lanes at %u kHz (%lu/%lu kbps)\n",
+					      lanes[i], rates[j], requirement,
+					      capacity);
+				link->lanes = lanes[i];
+				link->rate = rates[j];
+				return 0;
+			}
+		}
+	}
+
+	return -ERANGE;
+}
+
+/**
+ * DOC: Link training
+ *
+ * These functions contain common logic and helpers to implement DisplayPort
+ * link training.
+ */
+
+/**
+ * drm_dp_link_train_init() - initialize DisplayPort link training state
+ * @train: DisplayPort link training state
+ */
+void drm_dp_link_train_init(struct drm_dp_link_train *train)
+{
+	struct drm_dp_link_train_set *request = &train->request;
+	struct drm_dp_link_train_set *adjust = &train->adjust;
+	unsigned int i;
+
+	for (i = 0; i < 4; i++) {
+		request->voltage_swing[i] = 0;
+		adjust->voltage_swing[i] = 0;
+
+		request->pre_emphasis[i] = 0;
+		adjust->pre_emphasis[i] = 0;
+
+		request->post_cursor[i] = 0;
+		adjust->post_cursor[i] = 0;
+	}
+
+	train->pattern = DP_TRAINING_PATTERN_DISABLE;
+	train->clock_recovered = false;
+	train->channel_equalized = false;
+}
+
+static bool drm_dp_link_train_valid(const struct drm_dp_link_train *train)
+{
+	return train->clock_recovered && train->channel_equalized;
+}
+
+static int drm_dp_link_apply_training(struct drm_dp_link *link)
+{
+	struct drm_dp_link_train_set *request = &link->train.request;
+	unsigned int lanes = link->lanes, *vs, *pe, *pc, i;
+	struct drm_dp_aux *aux = link->aux;
+	u8 values[4], pattern = 0;
+	int err;
+
+	err = link->ops->apply_training(link);
+	if (err < 0) {
+		DRM_ERROR("failed to apply link training: %d\n", err);
+		return err;
+	}
+
+	vs = request->voltage_swing;
+	pe = request->pre_emphasis;
+	pc = request->post_cursor;
+
+	/* write currently selected voltage-swing and pre-emphasis levels */
+	for (i = 0; i < lanes; i++)
+		values[i] = DP_TRAIN_VOLTAGE_SWING_LEVEL(vs[i]) |
+			    DP_TRAIN_PRE_EMPHASIS_LEVEL(pe[i]);
+
+	err = drm_dp_dpcd_write(aux, DP_TRAINING_LANE0_SET, values, lanes);
+	if (err < 0) {
+		DRM_ERROR("failed to set training parameters: %d\n", err);
+		return err;
+	}
+
+	/* write currently selected post-cursor level (if supported) */
+	if (link->revision >= 0x12 && link->rate == 540000) {
+		values[0] = values[1] = 0;
+
+		for (i = 0; i < lanes; i++)
+			values[i / 2] |= DP_LANE_POST_CURSOR(i, pc[i]);
+
+		err = drm_dp_dpcd_write(aux, DP_TRAINING_LANE0_1_SET2, values,
+					DIV_ROUND_UP(lanes, 2));
+		if (err < 0) {
+			DRM_ERROR("failed to set post-cursor: %d\n", err);
+			return err;
+		}
+	}
+
+	/* write link pattern */
+	if (link->train.pattern != DP_TRAINING_PATTERN_DISABLE)
+		pattern |= DP_LINK_SCRAMBLING_DISABLE;
+
+	pattern |= link->train.pattern;
+
+	err = drm_dp_dpcd_writeb(aux, DP_TRAINING_PATTERN_SET, pattern);
+	if (err < 0) {
+		DRM_ERROR("failed to set training pattern: %d\n", err);
+		return err;
+	}
+
+	return 0;
+}
+
+static void drm_dp_link_train_wait(struct drm_dp_link *link)
+{
+	unsigned long min = 0;
+
+	switch (link->train.pattern) {
+	case DP_TRAINING_PATTERN_1:
+		min = link->aux_rd_interval.cr;
+		break;
+
+	case DP_TRAINING_PATTERN_2:
+	case DP_TRAINING_PATTERN_3:
+		min = link->aux_rd_interval.ce;
+		break;
+
+	default:
+		break;
+	}
+
+	if (min > 0)
+		usleep_range(min, 2 * min);
+}
+
+static void drm_dp_link_get_adjustments(struct drm_dp_link *link,
+					u8 status[DP_LINK_STATUS_SIZE])
+{
+	struct drm_dp_link_train_set *adjust = &link->train.adjust;
+	unsigned int i;
+	u8 post_cursor;
+	int err;
+
+	err = drm_dp_dpcd_read(link->aux, DP_ADJUST_REQUEST_POST_CURSOR2,
+			       &post_cursor, sizeof(post_cursor));
+	if (err < 0) {
+		DRM_ERROR("failed to read post_cursor2: %d\n", err);
+		post_cursor = 0;
+	}
+
+	for (i = 0; i < link->lanes; i++) {
+		adjust->voltage_swing[i] =
+			drm_dp_get_adjust_request_voltage(status, i) >>
+				DP_TRAIN_VOLTAGE_SWING_SHIFT;
+
+		adjust->pre_emphasis[i] =
+			drm_dp_get_adjust_request_pre_emphasis(status, i) >>
+				DP_TRAIN_PRE_EMPHASIS_SHIFT;
+
+		adjust->post_cursor[i] =
+			(post_cursor >> (i << 1)) & 0x3;
+	}
+}
+
+static void drm_dp_link_train_adjust(struct drm_dp_link_train *train)
+{
+	struct drm_dp_link_train_set *request = &train->request;
+	struct drm_dp_link_train_set *adjust = &train->adjust;
+	unsigned int i;
+
+	for (i = 0; i < 4; i++)
+		if (request->voltage_swing[i] != adjust->voltage_swing[i])
+			request->voltage_swing[i] = adjust->voltage_swing[i];
+
+	for (i = 0; i < 4; i++)
+		if (request->pre_emphasis[i] != adjust->pre_emphasis[i])
+			request->pre_emphasis[i] = adjust->pre_emphasis[i];
+
+	for (i = 0; i < 4; i++)
+		if (request->post_cursor[i] != adjust->post_cursor[i])
+			request->post_cursor[i] = adjust->post_cursor[i];
+}
+
+static int drm_dp_link_recover_clock(struct drm_dp_link *link)
+{
+	u8 status[DP_LINK_STATUS_SIZE];
+	int err;
+
+	err = drm_dp_link_apply_training(link);
+	if (err < 0)
+		return err;
+
+	drm_dp_link_train_wait(link);
+
+	err = drm_dp_dpcd_read_link_status(link->aux, status);
+	if (err < 0) {
+		DRM_ERROR("failed to read link status: %d\n", err);
+		return err;
+	}
+
+	if (!drm_dp_clock_recovery_ok(status, link->lanes))
+		drm_dp_link_get_adjustments(link, status);
+	else
+		link->train.clock_recovered = true;
+
+	return 0;
+}
+
+static int drm_dp_link_clock_recovery(struct drm_dp_link *link)
+{
+	unsigned int repeat;
+	int err;
+
+	/* start clock recovery using training pattern 1 */
+	link->train.pattern = DP_TRAINING_PATTERN_1;
+
+	for (repeat = 1; repeat < 5; repeat++) {
+		err = drm_dp_link_recover_clock(link);
+		if (err < 0) {
+			DRM_ERROR("failed to recover clock: %d\n", err);
+			return err;
+		}
+
+		if (link->train.clock_recovered)
+			break;
+
+		drm_dp_link_train_adjust(&link->train);
+	}
+
+	return 0;
+}
+
+static int drm_dp_link_equalize_channel(struct drm_dp_link *link)
+{
+	struct drm_dp_aux *aux = link->aux;
+	u8 status[DP_LINK_STATUS_SIZE];
+	int err;
+
+	err = drm_dp_link_apply_training(link);
+	if (err < 0)
+		return err;
+
+	drm_dp_link_train_wait(link);
+
+	err = drm_dp_dpcd_read_link_status(aux, status);
+	if (err < 0) {
+		DRM_ERROR("failed to read link status: %d\n", err);
+		return err;
+	}
+
+	if (!drm_dp_clock_recovery_ok(status, link->lanes)) {
+		DRM_ERROR("clock recovery lost while equalizing channel\n");
+		link->train.clock_recovered = false;
+		return 0;
+	}
+
+	if (!drm_dp_channel_eq_ok(status, link->lanes))
+		drm_dp_link_get_adjustments(link, status);
+	else
+		link->train.channel_equalized = true;
+
+	return 0;
+}
+
+static int drm_dp_link_channel_equalization(struct drm_dp_link *link)
+{
+	unsigned int repeat;
+	int err;
+
+	/* start channel equalization using pattern 2 or 3 */
+	if (link->caps.tps3_supported)
+		link->train.pattern = DP_TRAINING_PATTERN_3;
+	else
+		link->train.pattern = DP_TRAINING_PATTERN_2;
+
+	for (repeat = 1; repeat < 5; repeat++) {
+		err = drm_dp_link_equalize_channel(link);
+		if (err < 0) {
+			DRM_ERROR("failed to equalize channel: %d\n", err);
+			return err;
+		}
+
+		if (link->train.channel_equalized)
+			break;
+
+		drm_dp_link_train_adjust(&link->train);
+	}
+
+	return 0;
+}
+
+static int drm_dp_link_downgrade(struct drm_dp_link *link)
+{
+	switch (link->rate) {
+	case 162000:
+		return -EINVAL;
+
+	case 270000:
+		link->rate = 162000;
+		break;
+
+	case 540000:
+		link->rate = 270000;
+		return 0;
+	}
+
+	return 0;
+}
+
+static void drm_dp_link_train_disable(struct drm_dp_link *link)
+{
+	int err;
+
+	link->train.pattern = DP_TRAINING_PATTERN_DISABLE;
+
+	err = drm_dp_link_apply_training(link);
+	if (err < 0)
+		DRM_ERROR("failed to disable link training: %d\n", err);
+}
+
+static int drm_dp_link_train_full(struct drm_dp_link *link)
+{
+	int err;
+
+retry:
+	DRM_DEBUG_KMS("full-training link: %u lane%s at %u MHz\n",
+		      link->lanes, (link->lanes > 1) ? "s" : "",
+		      link->rate / 100);
+
+	err = drm_dp_link_configure(link->aux, link);
+	if (err < 0) {
+		DRM_ERROR("failed to configure DP link: %d\n", err);
+		return err;
+	}
+
+	err = drm_dp_link_clock_recovery(link);
+	if (err < 0) {
+		DRM_ERROR("clock recovery failed: %d\n", err);
+		goto out;
+	}
+
+	if (!link->train.clock_recovered) {
+		DRM_ERROR("clock recovery failed, downgrading link\n");
+
+		err = drm_dp_link_downgrade(link);
+		if (err < 0)
+			goto out;
+
+		goto retry;
+	}
+
+	DRM_DEBUG_KMS("clock recovery succeeded\n");
+
+	err = drm_dp_link_channel_equalization(link);
+	if (err < 0) {
+		DRM_ERROR("channel equalization failed: %d\n", err);
+		goto out;
+	}
+
+	if (!link->train.channel_equalized) {
+		DRM_ERROR("channel equalization failed, downgrading link\n");
+
+		err = drm_dp_link_downgrade(link);
+		if (err < 0)
+			goto out;
+
+		goto retry;
+	}
+
+	DRM_DEBUG_KMS("channel equalization succeeded\n");
+
+out:
+	drm_dp_link_train_disable(link);
+	return err;
+}
+
+static int drm_dp_link_train_fast(struct drm_dp_link *link)
+{
+	u8 status[DP_LINK_STATUS_SIZE];
+	int err;
+
+	DRM_DEBUG_KMS("fast-training link: %u lane%s at %u MHz\n",
+		      link->lanes, (link->lanes > 1) ? "s" : "",
+		      link->rate / 100);
+
+	err = drm_dp_link_configure(link->aux, link);
+	if (err < 0) {
+		DRM_ERROR("failed to configure DP link: %d\n", err);
+		return err;
+	}
+
+	/* transmit training pattern 1 for 500 microseconds */
+	link->train.pattern = DP_TRAINING_PATTERN_1;
+
+	err = drm_dp_link_apply_training(link);
+	if (err < 0)
+		goto out;
+
+	usleep_range(500, 1000);
+
+	/* transmit training pattern 2 or 3 for 500 microseconds */
+	if (link->caps.tps3_supported)
+		link->train.pattern = DP_TRAINING_PATTERN_3;
+	else
+		link->train.pattern = DP_TRAINING_PATTERN_2;
+
+	err = drm_dp_link_apply_training(link);
+	if (err < 0)
+		goto out;
+
+	usleep_range(500, 1000);
+
+	err = drm_dp_dpcd_read_link_status(link->aux, status);
+	if (err < 0) {
+		DRM_ERROR("failed to read link status: %d\n", err);
+		goto out;
+	}
+
+	if (!drm_dp_clock_recovery_ok(status, link->lanes)) {
+		DRM_ERROR("clock recovery failed\n");
+		err = -EIO;
+	}
+
+	if (!drm_dp_channel_eq_ok(status, link->lanes)) {
+		DRM_ERROR("channel equalization failed\n");
+		err = -EIO;
+	}
+
+out:
+	drm_dp_link_train_disable(link);
+	return err;
+}
+
+/**
+ * drm_dp_link_train() - perform DisplayPort link training
+ * @link: a DP link object
+ *
+ * Uses the context stored in the DP link object to perform link training. It
+ * is expected that drivers will call drm_dp_link_probe() to obtain the link
+ * capabilities before performing link training.
+ *
+ * If the sink supports fast link training (no AUX CH handshake) and valid
+ * training settings are available, this function will try to perform fast
+ * link training and fall back to full link training on failure.
+ *
+ * Returns: 0 on success or a negative error code on failure.
+ */
+int drm_dp_link_train(struct drm_dp_link *link)
+{
+	int err;
+
+	drm_dp_link_train_init(&link->train);
+
+	if (link->caps.fast_training) {
+		if (drm_dp_link_train_valid(&link->train)) {
+			err = drm_dp_link_train_fast(link);
+			if (err < 0)
+				DRM_ERROR("fast link training failed: %d\n",
+					  err);
+			else
+				return 0;
+		} else {
+			DRM_DEBUG_KMS("training parameters not available\n");
+		}
+	} else {
+		DRM_DEBUG_KMS("fast link training not supported\n");
+	}
+
+	err = drm_dp_link_train_full(link);
+	if (err < 0)
+		DRM_ERROR("full link training failed: %d\n", err);
+
+	return err;
+}
diff --git a/drivers/gpu/drm/tegra/dp.h b/drivers/gpu/drm/tegra/dp.h
new file mode 100644
index 000000000000..695060cafac0
--- /dev/null
+++ b/drivers/gpu/drm/tegra/dp.h
@@ -0,0 +1,175 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright (C) 2013-2019 NVIDIA Corporation.
+ * Copyright (C) 2015 Rob Clark
+ */
+
+#ifndef DRM_TEGRA_DP_H
+#define DRM_TEGRA_DP_H 1
+
+#include <linux/types.h>
+
+struct drm_display_info;
+struct drm_display_mode;
+struct drm_dp_aux;
+struct drm_dp_link;
+
+/**
+ * struct drm_dp_link_caps - DP link capabilities
+ */
+struct drm_dp_link_caps {
+	/**
+	 * @enhanced_framing:
+	 *
+	 * enhanced framing capability (mandatory as of DP 1.2)
+	 */
+	bool enhanced_framing;
+
+	/**
+	 * tps3_supported:
+	 *
+	 * training pattern sequence 3 supported for equalization
+	 */
+	bool tps3_supported;
+
+	/**
+	 * @fast_training:
+	 *
+	 * AUX CH handshake not required for link training
+	 */
+	bool fast_training;
+
+	/**
+	 * @channel_coding:
+	 *
+	 * ANSI 8B/10B channel coding capability
+	 */
+	bool channel_coding;
+
+	/**
+	 * @alternate_scrambler_reset:
+	 *
+	 * eDP alternate scrambler reset capability
+	 */
+	bool alternate_scrambler_reset;
+};
+
+void drm_dp_link_caps_copy(struct drm_dp_link_caps *dest,
+			   const struct drm_dp_link_caps *src);
+
+/**
+ * struct drm_dp_link_ops - DP link operations
+ */
+struct drm_dp_link_ops {
+	/**
+	 * @apply_training:
+	 */
+	int (*apply_training)(struct drm_dp_link *link);
+
+	/**
+	 * @configure:
+	 */
+	int (*configure)(struct drm_dp_link *link);
+};
+
+#define DP_TRAIN_VOLTAGE_SWING_LEVEL(x) ((x) << 0)
+#define DP_TRAIN_PRE_EMPHASIS_LEVEL(x) ((x) << 3)
+#define DP_LANE_POST_CURSOR(i, x) (((x) & 0x3) << (((i) & 1) << 2))
+
+/**
+ * struct drm_dp_link_train_set - link training settings
+ * @voltage_swing: per-lane voltage swing
+ * @pre_emphasis: per-lane pre-emphasis
+ * @post_cursor: per-lane post-cursor
+ */
+struct drm_dp_link_train_set {
+	unsigned int voltage_swing[4];
+	unsigned int pre_emphasis[4];
+	unsigned int post_cursor[4];
+};
+
+/**
+ * struct drm_dp_link_train - link training state information
+ * @request: currently requested settings
+ * @adjust: adjustments requested by sink
+ * @pattern: currently requested training pattern
+ * @clock_recovered: flag to track if clock recovery has completed
+ * @channel_equalized: flag to track if channel equalization has completed
+ */
+struct drm_dp_link_train {
+	struct drm_dp_link_train_set request;
+	struct drm_dp_link_train_set adjust;
+
+	unsigned int pattern;
+
+	bool clock_recovered;
+	bool channel_equalized;
+};
+
+/**
+ * struct drm_dp_link - DP link capabilities and configuration
+ * @revision: DP specification revision supported on the link
+ * @max_rate: maximum clock rate supported on the link
+ * @max_lanes: maximum number of lanes supported on the link
+ * @caps: capabilities supported on the link (see &drm_dp_link_caps)
+ * @aux_rd_interval: AUX read interval to use for training (in microseconds)
+ * @edp: eDP revision (0x11: eDP 1.1, 0x12: eDP 1.2, ...)
+ * @rate: currently configured link rate
+ * @lanes: currently configured number of lanes
+ * @rates: additional supported link rates in kHz (eDP 1.4)
+ * @num_rates: number of additional supported link rates (eDP 1.4)
+ */
+struct drm_dp_link {
+	unsigned char revision;
+	unsigned int max_rate;
+	unsigned int max_lanes;
+
+	struct drm_dp_link_caps caps;
+
+	/**
+	 * @cr: clock recovery read interval
+	 * @ce: channel equalization read interval
+	 */
+	struct {
+		unsigned int cr;
+		unsigned int ce;
+	} aux_rd_interval;
+
+	unsigned char edp;
+
+	unsigned int rate;
+	unsigned int lanes;
+
+	unsigned long rates[DP_MAX_SUPPORTED_RATES];
+	unsigned int num_rates;
+
+	/**
+	 * @ops: DP link operations
+	 */
+	const struct drm_dp_link_ops *ops;
+
+	/**
+	 * @aux: DP AUX channel
+	 */
+	struct drm_dp_aux *aux;
+
+	/**
+	 * @train: DP link training state
+	 */
+	struct drm_dp_link_train train;
+};
+
+int drm_dp_link_add_rate(struct drm_dp_link *link, unsigned long rate);
+int drm_dp_link_remove_rate(struct drm_dp_link *link, unsigned long rate);
+void drm_dp_link_update_rates(struct drm_dp_link *link);
+
+int drm_dp_link_probe(struct drm_dp_aux *aux, struct drm_dp_link *link);
+int drm_dp_link_configure(struct drm_dp_aux *aux, struct drm_dp_link *link);
+int drm_dp_link_choose(struct drm_dp_link *link,
+		       const struct drm_display_mode *mode,
+		       const struct drm_display_info *info);
+
+void drm_dp_link_train_init(struct drm_dp_link_train *train);
+int drm_dp_link_train(struct drm_dp_link *link);
+
+#endif
diff --git a/drivers/gpu/drm/tegra/dpaux.c b/drivers/gpu/drm/tegra/dpaux.c
index ee4180d8db14..e5297ac5c0fc 100644
--- a/drivers/gpu/drm/tegra/dpaux.c
+++ b/drivers/gpu/drm/tegra/dpaux.c
@@ -1,29 +1,28 @@
+// SPDX-License-Identifier: GPL-2.0-only
 /*
  * Copyright (C) 2013 NVIDIA Corporation
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
  */
 
 #include <linux/clk.h>
 #include <linux/delay.h>
-#include <linux/gpio.h>
 #include <linux/interrupt.h>
 #include <linux/io.h>
-#include <linux/of_gpio.h>
+#include <linux/module.h>
+#include <linux/of.h>
 #include <linux/pinctrl/pinconf-generic.h>
 #include <linux/pinctrl/pinctrl.h>
 #include <linux/pinctrl/pinmux.h>
-#include <linux/pm_runtime.h>
 #include <linux/platform_device.h>
-#include <linux/reset.h>
+#include <linux/pm_runtime.h>
 #include <linux/regulator/consumer.h>
+#include <linux/reset.h>
 #include <linux/workqueue.h>
 
-#include <drm/drm_dp_helper.h>
+#include <drm/display/drm_dp_helper.h>
+#include <drm/display/drm_dp_aux_bus.h>
 #include <drm/drm_panel.h>
 
+#include "dp.h"
 #include "dpaux.h"
 #include "drm.h"
 #include "trace.h"
@@ -31,10 +30,18 @@
 static DEFINE_MUTEX(dpaux_lock);
 static LIST_HEAD(dpaux_list);
 
+struct tegra_dpaux_soc {
+	unsigned int cmh;
+	unsigned int drvz;
+	unsigned int drvi;
+};
+
 struct tegra_dpaux {
 	struct drm_dp_aux aux;
 	struct device *dev;
 
+	const struct tegra_dpaux_soc *soc;
+
 	void __iomem *regs;
 	int irq;
 
@@ -122,6 +129,7 @@ static ssize_t tegra_dpaux_transfer(struct drm_dp_aux *aux,
 	struct tegra_dpaux *dpaux = to_dpaux(aux);
 	unsigned long status;
 	ssize_t ret = 0;
+	u8 reply = 0;
 	u32 value;
 
 	/* Tegra has 4x4 byte DP AUX transmit and receive FIFOs. */
@@ -216,23 +224,23 @@ static ssize_t tegra_dpaux_transfer(struct drm_dp_aux *aux,
 
 	switch ((value & DPAUX_DP_AUXSTAT_REPLY_TYPE_MASK) >> 16) {
 	case 0x00:
-		msg->reply = DP_AUX_NATIVE_REPLY_ACK;
+		reply = DP_AUX_NATIVE_REPLY_ACK;
 		break;
 
 	case 0x01:
-		msg->reply = DP_AUX_NATIVE_REPLY_NACK;
+		reply = DP_AUX_NATIVE_REPLY_NACK;
 		break;
 
 	case 0x02:
-		msg->reply = DP_AUX_NATIVE_REPLY_DEFER;
+		reply = DP_AUX_NATIVE_REPLY_DEFER;
 		break;
 
 	case 0x04:
-		msg->reply = DP_AUX_I2C_REPLY_NACK;
+		reply = DP_AUX_I2C_REPLY_NACK;
 		break;
 
 	case 0x08:
-		msg->reply = DP_AUX_I2C_REPLY_DEFER;
+		reply = DP_AUX_I2C_REPLY_DEFER;
 		break;
 	}
 
@@ -240,14 +248,24 @@ static ssize_t tegra_dpaux_transfer(struct drm_dp_aux *aux,
 		if (msg->request & DP_AUX_I2C_READ) {
 			size_t count = value & DPAUX_DP_AUXSTAT_REPLY_MASK;
 
-			if (WARN_ON(count != msg->size))
-				count = min_t(size_t, count, msg->size);
+			/*
+			 * There might be a smarter way to do this, but since
+			 * the DP helpers will already retry transactions for
+			 * an -EBUSY return value, simply reuse that instead.
+			 */
+			if (count != msg->size) {
+				ret = -EBUSY;
+				goto out;
+			}
 
 			tegra_dpaux_read_fifo(dpaux, msg->buffer, count);
 			ret = count;
 		}
 	}
 
+	msg->reply = reply;
+
+out:
 	return ret;
 }
 
@@ -262,7 +280,6 @@ static void tegra_dpaux_hotplug(struct work_struct *work)
 static irqreturn_t tegra_dpaux_irq(int irq, void *data)
 {
 	struct tegra_dpaux *dpaux = data;
-	irqreturn_t ret = IRQ_HANDLED;
 	u32 value;
 
 	/* clear interrupts */
@@ -279,7 +296,7 @@ static irqreturn_t tegra_dpaux_irq(int irq, void *data)
 	if (value & DPAUX_INTR_AUX_DONE)
 		complete(&dpaux->complete);
 
-	return ret;
+	return IRQ_HANDLED;
 }
 
 enum tegra_dpaux_functions {
@@ -312,9 +329,9 @@ static int tegra_dpaux_pad_config(struct tegra_dpaux *dpaux, unsigned function)
 
 	switch (function) {
 	case DPAUX_PADCTL_FUNC_AUX:
-		value = DPAUX_HYBRID_PADCTL_AUX_CMH(2) |
-			DPAUX_HYBRID_PADCTL_AUX_DRVZ(4) |
-			DPAUX_HYBRID_PADCTL_AUX_DRVI(0x18) |
+		value = DPAUX_HYBRID_PADCTL_AUX_CMH(dpaux->soc->cmh) |
+			DPAUX_HYBRID_PADCTL_AUX_DRVZ(dpaux->soc->drvz) |
+			DPAUX_HYBRID_PADCTL_AUX_DRVI(dpaux->soc->drvi) |
 			DPAUX_HYBRID_PADCTL_AUX_INPUT_RCV |
 			DPAUX_HYBRID_PADCTL_MODE_AUX;
 		break;
@@ -322,9 +339,9 @@ static int tegra_dpaux_pad_config(struct tegra_dpaux *dpaux, unsigned function)
 	case DPAUX_PADCTL_FUNC_I2C:
 		value = DPAUX_HYBRID_PADCTL_I2C_SDA_INPUT_RCV |
 			DPAUX_HYBRID_PADCTL_I2C_SCL_INPUT_RCV |
-			DPAUX_HYBRID_PADCTL_AUX_CMH(2) |
-			DPAUX_HYBRID_PADCTL_AUX_DRVZ(4) |
-			DPAUX_HYBRID_PADCTL_AUX_DRVI(0x18) |
+			DPAUX_HYBRID_PADCTL_AUX_CMH(dpaux->soc->cmh) |
+			DPAUX_HYBRID_PADCTL_AUX_DRVZ(dpaux->soc->drvz) |
+			DPAUX_HYBRID_PADCTL_AUX_DRVI(dpaux->soc->drvi) |
 			DPAUX_HYBRID_PADCTL_MODE_I2C;
 		break;
 
@@ -430,7 +447,6 @@ static const struct pinmux_ops tegra_dpaux_pinmux_ops = {
 static int tegra_dpaux_probe(struct platform_device *pdev)
 {
 	struct tegra_dpaux *dpaux;
-	struct resource *regs;
 	u32 value;
 	int err;
 
@@ -438,21 +454,19 @@ static int tegra_dpaux_probe(struct platform_device *pdev)
 	if (!dpaux)
 		return -ENOMEM;
 
+	dpaux->soc = of_device_get_match_data(&pdev->dev);
 	INIT_WORK(&dpaux->work, tegra_dpaux_hotplug);
 	init_completion(&dpaux->complete);
 	INIT_LIST_HEAD(&dpaux->list);
 	dpaux->dev = &pdev->dev;
 
-	regs = platform_get_resource(pdev, IORESOURCE_MEM, 0);
-	dpaux->regs = devm_ioremap_resource(&pdev->dev, regs);
+	dpaux->regs = devm_platform_ioremap_resource(pdev, 0);
 	if (IS_ERR(dpaux->regs))
 		return PTR_ERR(dpaux->regs);
 
 	dpaux->irq = platform_get_irq(pdev, 0);
-	if (dpaux->irq < 0) {
-		dev_err(&pdev->dev, "failed to get IRQ\n");
-		return -ENXIO;
-	}
+	if (dpaux->irq < 0)
+		return dpaux->irq;
 
 	if (!pdev->dev.pm_domain) {
 		dpaux->rst = devm_reset_control_get(&pdev->dev, "dpaux");
@@ -485,11 +499,13 @@ static int tegra_dpaux_probe(struct platform_device *pdev)
 		return err;
 	}
 
-	dpaux->vdd = devm_regulator_get(&pdev->dev, "vdd");
+	dpaux->vdd = devm_regulator_get_optional(&pdev->dev, "vdd");
 	if (IS_ERR(dpaux->vdd)) {
-		dev_err(&pdev->dev, "failed to get VDD supply: %ld\n",
-			PTR_ERR(dpaux->vdd));
-		return PTR_ERR(dpaux->vdd);
+		if (PTR_ERR(dpaux->vdd) != -ENODEV)
+			return dev_err_probe(&pdev->dev, PTR_ERR(dpaux->vdd),
+					     "failed to get VDD supply\n");
+
+		dpaux->vdd = NULL;
 	}
 
 	platform_set_drvdata(pdev, dpaux);
@@ -501,7 +517,7 @@ static int tegra_dpaux_probe(struct platform_device *pdev)
 	if (err < 0) {
 		dev_err(dpaux->dev, "failed to request IRQ#%u: %d\n",
 			dpaux->irq, err);
-		return err;
+		goto err_pm_disable;
 	}
 
 	disable_irq(dpaux->irq);
@@ -509,9 +525,7 @@ static int tegra_dpaux_probe(struct platform_device *pdev)
 	dpaux->aux.transfer = tegra_dpaux_transfer;
 	dpaux->aux.dev = &pdev->dev;
 
-	err = drm_dp_aux_register(&dpaux->aux);
-	if (err < 0)
-		return err;
+	drm_dp_aux_init(&dpaux->aux);
 
 	/*
 	 * Assume that by default the DPAUX/I2C pads will be used for HDMI,
@@ -523,7 +537,7 @@ static int tegra_dpaux_probe(struct platform_device *pdev)
 	 */
 	err = tegra_dpaux_pad_config(dpaux, DPAUX_PADCTL_FUNC_I2C);
 	if (err < 0)
-		return err;
+		goto err_pm_disable;
 
 #ifdef CONFIG_GENERIC_PINCONF
 	dpaux->desc.name = dev_name(&pdev->dev);
@@ -536,7 +550,8 @@ static int tegra_dpaux_probe(struct platform_device *pdev)
 	dpaux->pinctrl = devm_pinctrl_register(&pdev->dev, &dpaux->desc, dpaux);
 	if (IS_ERR(dpaux->pinctrl)) {
 		dev_err(&pdev->dev, "failed to register pincontrol\n");
-		return PTR_ERR(dpaux->pinctrl);
+		err = PTR_ERR(dpaux->pinctrl);
+		goto err_pm_disable;
 	}
 #endif
 	/* enable and clear all interrupts */
@@ -549,10 +564,21 @@ static int tegra_dpaux_probe(struct platform_device *pdev)
 	list_add_tail(&dpaux->list, &dpaux_list);
 	mutex_unlock(&dpaux_lock);
 
+	err = devm_of_dp_aux_populate_ep_devices(&dpaux->aux);
+	if (err < 0) {
+		dev_err(dpaux->dev, "failed to populate AUX bus: %d\n", err);
+		goto err_pm_disable;
+	}
+
 	return 0;
+
+err_pm_disable:
+	pm_runtime_put_sync(&pdev->dev);
+	pm_runtime_disable(&pdev->dev);
+	return err;
 }
 
-static int tegra_dpaux_remove(struct platform_device *pdev)
+static void tegra_dpaux_remove(struct platform_device *pdev)
 {
 	struct tegra_dpaux *dpaux = platform_get_drvdata(pdev);
 
@@ -561,19 +587,14 @@ static int tegra_dpaux_remove(struct platform_device *pdev)
 	/* make sure pads are powered down when not in use */
 	tegra_dpaux_pad_power_down(dpaux);
 
-	pm_runtime_put(&pdev->dev);
+	pm_runtime_put_sync(&pdev->dev);
 	pm_runtime_disable(&pdev->dev);
 
-	drm_dp_aux_unregister(&dpaux->aux);
-
 	mutex_lock(&dpaux_lock);
 	list_del(&dpaux->list);
 	mutex_unlock(&dpaux_lock);
-
-	return 0;
 }
 
-#ifdef CONFIG_PM
 static int tegra_dpaux_suspend(struct device *dev)
 {
 	struct tegra_dpaux *dpaux = dev_get_drvdata(dev);
@@ -632,17 +653,34 @@ disable_clk:
 	clk_disable_unprepare(dpaux->clk);
 	return err;
 }
-#endif
 
 static const struct dev_pm_ops tegra_dpaux_pm_ops = {
-	SET_RUNTIME_PM_OPS(tegra_dpaux_suspend, tegra_dpaux_resume, NULL)
+	RUNTIME_PM_OPS(tegra_dpaux_suspend, tegra_dpaux_resume, NULL)
+};
+
+static const struct tegra_dpaux_soc tegra124_dpaux_soc = {
+	.cmh = 0x02,
+	.drvz = 0x04,
+	.drvi = 0x18,
+};
+
+static const struct tegra_dpaux_soc tegra210_dpaux_soc = {
+	.cmh = 0x02,
+	.drvz = 0x04,
+	.drvi = 0x30,
+};
+
+static const struct tegra_dpaux_soc tegra194_dpaux_soc = {
+	.cmh = 0x02,
+	.drvz = 0x04,
+	.drvi = 0x2c,
 };
 
 static const struct of_device_id tegra_dpaux_of_match[] = {
-	{ .compatible = "nvidia,tegra194-dpaux", },
-	{ .compatible = "nvidia,tegra186-dpaux", },
-	{ .compatible = "nvidia,tegra210-dpaux", },
-	{ .compatible = "nvidia,tegra124-dpaux", },
+	{ .compatible = "nvidia,tegra194-dpaux", .data = &tegra194_dpaux_soc },
+	{ .compatible = "nvidia,tegra186-dpaux", .data = &tegra210_dpaux_soc },
+	{ .compatible = "nvidia,tegra210-dpaux", .data = &tegra210_dpaux_soc },
+	{ .compatible = "nvidia,tegra124-dpaux", .data = &tegra124_dpaux_soc },
 	{ },
 };
 MODULE_DEVICE_TABLE(of, tegra_dpaux_of_match);
@@ -651,7 +689,7 @@ struct platform_driver tegra_dpaux_driver = {
 	.driver = {
 		.name = "tegra-dpaux",
 		.of_match_table = tegra_dpaux_of_match,
-		.pm = &tegra_dpaux_pm_ops,
+		.pm = pm_ptr(&tegra_dpaux_pm_ops),
 	},
 	.probe = tegra_dpaux_probe,
 	.remove = tegra_dpaux_remove,
@@ -680,28 +718,40 @@ int drm_dp_aux_attach(struct drm_dp_aux *aux, struct tegra_output *output)
 	unsigned long timeout;
 	int err;
 
-	output->connector.polled = DRM_CONNECTOR_POLL_HPD;
-	dpaux->output = output;
-
-	err = regulator_enable(dpaux->vdd);
+	aux->drm_dev = output->connector.dev;
+	err = drm_dp_aux_register(aux);
 	if (err < 0)
 		return err;
 
-	timeout = jiffies + msecs_to_jiffies(250);
+	output->connector.polled = DRM_CONNECTOR_POLL_HPD;
+	dpaux->output = output;
 
-	while (time_before(jiffies, timeout)) {
+	if (output->panel) {
 		enum drm_connector_status status;
 
-		status = drm_dp_aux_detect(aux);
-		if (status == connector_status_connected) {
-			enable_irq(dpaux->irq);
-			return 0;
+		if (dpaux->vdd) {
+			err = regulator_enable(dpaux->vdd);
+			if (err < 0)
+				return err;
 		}
 
-		usleep_range(1000, 2000);
+		timeout = jiffies + msecs_to_jiffies(250);
+
+		while (time_before(jiffies, timeout)) {
+			status = drm_dp_aux_detect(aux);
+
+			if (status == connector_status_connected)
+				break;
+
+			usleep_range(1000, 2000);
+		}
+
+		if (status != connector_status_connected)
+			return -ETIMEDOUT;
 	}
 
-	return -ETIMEDOUT;
+	enable_irq(dpaux->irq);
+	return 0;
 }
 
 int drm_dp_aux_detach(struct drm_dp_aux *aux)
@@ -710,27 +760,36 @@ int drm_dp_aux_detach(struct drm_dp_aux *aux)
 	unsigned long timeout;
 	int err;
 
+	drm_dp_aux_unregister(aux);
 	disable_irq(dpaux->irq);
 
-	err = regulator_disable(dpaux->vdd);
-	if (err < 0)
-		return err;
+	if (dpaux->output->panel) {
+		enum drm_connector_status status;
+
+		if (dpaux->vdd) {
+			err = regulator_disable(dpaux->vdd);
+			if (err < 0)
+				return err;
+		}
 
-	timeout = jiffies + msecs_to_jiffies(250);
+		timeout = jiffies + msecs_to_jiffies(250);
 
-	while (time_before(jiffies, timeout)) {
-		enum drm_connector_status status;
+		while (time_before(jiffies, timeout)) {
+			status = drm_dp_aux_detect(aux);
+
+			if (status == connector_status_disconnected)
+				break;
 
-		status = drm_dp_aux_detect(aux);
-		if (status == connector_status_disconnected) {
-			dpaux->output = NULL;
-			return 0;
+			usleep_range(1000, 2000);
 		}
 
-		usleep_range(1000, 2000);
+		if (status != connector_status_disconnected)
+			return -ETIMEDOUT;
+
+		dpaux->output = NULL;
 	}
 
-	return -ETIMEDOUT;
+	return 0;
 }
 
 enum drm_connector_status drm_dp_aux_detect(struct drm_dp_aux *aux)
@@ -761,72 +820,3 @@ int drm_dp_aux_disable(struct drm_dp_aux *aux)
 
 	return 0;
 }
-
-int drm_dp_aux_prepare(struct drm_dp_aux *aux, u8 encoding)
-{
-	int err;
-
-	err = drm_dp_dpcd_writeb(aux, DP_MAIN_LINK_CHANNEL_CODING_SET,
-				 encoding);
-	if (err < 0)
-		return err;
-
-	return 0;
-}
-
-int drm_dp_aux_train(struct drm_dp_aux *aux, struct drm_dp_link *link,
-		     u8 pattern)
-{
-	u8 tp = pattern & DP_TRAINING_PATTERN_MASK;
-	u8 status[DP_LINK_STATUS_SIZE], values[4];
-	unsigned int i;
-	int err;
-
-	err = drm_dp_dpcd_writeb(aux, DP_TRAINING_PATTERN_SET, pattern);
-	if (err < 0)
-		return err;
-
-	if (tp == DP_TRAINING_PATTERN_DISABLE)
-		return 0;
-
-	for (i = 0; i < link->num_lanes; i++)
-		values[i] = DP_TRAIN_MAX_PRE_EMPHASIS_REACHED |
-			    DP_TRAIN_PRE_EMPH_LEVEL_0 |
-			    DP_TRAIN_MAX_SWING_REACHED |
-			    DP_TRAIN_VOLTAGE_SWING_LEVEL_0;
-
-	err = drm_dp_dpcd_write(aux, DP_TRAINING_LANE0_SET, values,
-				link->num_lanes);
-	if (err < 0)
-		return err;
-
-	usleep_range(500, 1000);
-
-	err = drm_dp_dpcd_read_link_status(aux, status);
-	if (err < 0)
-		return err;
-
-	switch (tp) {
-	case DP_TRAINING_PATTERN_1:
-		if (!drm_dp_clock_recovery_ok(status, link->num_lanes))
-			return -EAGAIN;
-
-		break;
-
-	case DP_TRAINING_PATTERN_2:
-		if (!drm_dp_channel_eq_ok(status, link->num_lanes))
-			return -EAGAIN;
-
-		break;
-
-	default:
-		dev_err(aux->dev, "unsupported training pattern %u\n", tp);
-		return -EINVAL;
-	}
-
-	err = drm_dp_dpcd_writeb(aux, DP_EDP_CONFIGURATION_SET, 0);
-	if (err < 0)
-		return err;
-
-	return 0;
-}
diff --git a/drivers/gpu/drm/tegra/dpaux.h b/drivers/gpu/drm/tegra/dpaux.h
index 20783d9f4728..5eced10fad37 100644
--- a/drivers/gpu/drm/tegra/dpaux.h
+++ b/drivers/gpu/drm/tegra/dpaux.h
@@ -1,9 +1,6 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
 /*
  * Copyright (C) 2013 NVIDIA Corporation
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
  */
 
 #ifndef DRM_TEGRA_DPAUX_H
diff --git a/drivers/gpu/drm/tegra/drm.c b/drivers/gpu/drm/tegra/drm.c
index 4b70ce664c41..1d18d43292dc 100644
--- a/drivers/gpu/drm/tegra/drm.c
+++ b/drivers/gpu/drm/tegra/drm.c
@@ -1,42 +1,48 @@
+// SPDX-License-Identifier: GPL-2.0-only
 /*
  * Copyright (C) 2012 Avionic Design GmbH
  * Copyright (C) 2012-2016 NVIDIA CORPORATION.  All rights reserved.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
  */
 
+#include <linux/aperture.h>
 #include <linux/bitops.h>
 #include <linux/host1x.h>
 #include <linux/idr.h>
 #include <linux/iommu.h>
+#include <linux/module.h>
+#include <linux/platform_device.h>
+#include <linux/pm_runtime.h>
 
+#include <drm/clients/drm_client_setup.h>
 #include <drm/drm_atomic.h>
 #include <drm/drm_atomic_helper.h>
+#include <drm/drm_debugfs.h>
+#include <drm/drm_drv.h>
+#include <drm/drm_fourcc.h>
+#include <drm/drm_framebuffer.h>
+#include <drm/drm_ioctl.h>
+#include <drm/drm_prime.h>
+#include <drm/drm_print.h>
+#include <drm/drm_vblank.h>
 
 #if IS_ENABLED(CONFIG_ARM_DMA_USE_IOMMU)
 #include <asm/dma-iommu.h>
 #endif
 
+#include "dc.h"
 #include "drm.h"
 #include "gem.h"
+#include "uapi.h"
 
 #define DRIVER_NAME "tegra"
 #define DRIVER_DESC "NVIDIA Tegra graphics"
-#define DRIVER_DATE "20120330"
-#define DRIVER_MAJOR 0
+#define DRIVER_MAJOR 1
 #define DRIVER_MINOR 0
 #define DRIVER_PATCHLEVEL 0
 
 #define CARVEOUT_SZ SZ_64M
 #define CDMA_GATHER_FETCHES_MAX_NB 16383
 
-struct tegra_drm_file {
-	struct idr contexts;
-	struct mutex lock;
-};
-
 static int tegra_atomic_check(struct drm_device *drm,
 			      struct drm_atomic_state *state)
 {
@@ -51,29 +57,42 @@ static int tegra_atomic_check(struct drm_device *drm,
 
 static const struct drm_mode_config_funcs tegra_drm_mode_config_funcs = {
 	.fb_create = tegra_fb_create,
-#ifdef CONFIG_DRM_FBDEV_EMULATION
-	.output_poll_changed = drm_fb_helper_output_poll_changed,
-#endif
 	.atomic_check = tegra_atomic_check,
 	.atomic_commit = drm_atomic_helper_commit,
 };
 
+static void tegra_atomic_post_commit(struct drm_device *drm,
+				     struct drm_atomic_state *old_state)
+{
+	struct drm_crtc_state *old_crtc_state __maybe_unused;
+	struct drm_crtc *crtc;
+	unsigned int i;
+
+	for_each_old_crtc_in_state(old_state, crtc, old_crtc_state, i)
+		tegra_crtc_atomic_post_commit(crtc, old_state);
+}
+
 static void tegra_atomic_commit_tail(struct drm_atomic_state *old_state)
 {
 	struct drm_device *drm = old_state->dev;
 	struct tegra_drm *tegra = drm->dev_private;
 
 	if (tegra->hub) {
+		bool fence_cookie = dma_fence_begin_signalling();
+
 		drm_atomic_helper_commit_modeset_disables(drm, old_state);
 		tegra_display_hub_atomic_commit(drm, old_state);
 		drm_atomic_helper_commit_planes(drm, old_state, 0);
 		drm_atomic_helper_commit_modeset_enables(drm, old_state);
 		drm_atomic_helper_commit_hw_done(old_state);
+		dma_fence_end_signalling(fence_cookie);
 		drm_atomic_helper_wait_for_vblanks(drm, old_state);
 		drm_atomic_helper_cleanup_planes(drm, old_state);
 	} else {
 		drm_atomic_helper_commit_tail_rpm(old_state);
 	}
+
+	tegra_atomic_post_commit(drm, old_state);
 }
 
 static const struct drm_mode_config_helper_funcs
@@ -81,163 +100,6 @@ tegra_drm_mode_config_helpers = {
 	.atomic_commit_tail = tegra_atomic_commit_tail,
 };
 
-static int tegra_drm_load(struct drm_device *drm, unsigned long flags)
-{
-	struct host1x_device *device = to_host1x_device(drm->dev);
-	struct tegra_drm *tegra;
-	int err;
-
-	tegra = kzalloc(sizeof(*tegra), GFP_KERNEL);
-	if (!tegra)
-		return -ENOMEM;
-
-	if (iommu_present(&platform_bus_type)) {
-		u64 carveout_start, carveout_end, gem_start, gem_end;
-		struct iommu_domain_geometry *geometry;
-		unsigned long order;
-
-		tegra->domain = iommu_domain_alloc(&platform_bus_type);
-		if (!tegra->domain) {
-			err = -ENOMEM;
-			goto free;
-		}
-
-		err = iova_cache_get();
-		if (err < 0)
-			goto domain;
-
-		geometry = &tegra->domain->geometry;
-		gem_start = geometry->aperture_start;
-		gem_end = geometry->aperture_end - CARVEOUT_SZ;
-		carveout_start = gem_end + 1;
-		carveout_end = geometry->aperture_end;
-
-		order = __ffs(tegra->domain->pgsize_bitmap);
-		init_iova_domain(&tegra->carveout.domain, 1UL << order,
-				 carveout_start >> order);
-
-		tegra->carveout.shift = iova_shift(&tegra->carveout.domain);
-		tegra->carveout.limit = carveout_end >> tegra->carveout.shift;
-
-		drm_mm_init(&tegra->mm, gem_start, gem_end - gem_start + 1);
-		mutex_init(&tegra->mm_lock);
-
-		DRM_DEBUG("IOMMU apertures:\n");
-		DRM_DEBUG("  GEM: %#llx-%#llx\n", gem_start, gem_end);
-		DRM_DEBUG("  Carveout: %#llx-%#llx\n", carveout_start,
-			  carveout_end);
-	}
-
-	mutex_init(&tegra->clients_lock);
-	INIT_LIST_HEAD(&tegra->clients);
-
-	drm->dev_private = tegra;
-	tegra->drm = drm;
-
-	drm_mode_config_init(drm);
-
-	drm->mode_config.min_width = 0;
-	drm->mode_config.min_height = 0;
-
-	drm->mode_config.max_width = 4096;
-	drm->mode_config.max_height = 4096;
-
-	drm->mode_config.allow_fb_modifiers = true;
-
-	drm->mode_config.normalize_zpos = true;
-
-	drm->mode_config.funcs = &tegra_drm_mode_config_funcs;
-	drm->mode_config.helper_private = &tegra_drm_mode_config_helpers;
-
-	err = tegra_drm_fb_prepare(drm);
-	if (err < 0)
-		goto config;
-
-	drm_kms_helper_poll_init(drm);
-
-	err = host1x_device_init(device);
-	if (err < 0)
-		goto fbdev;
-
-	if (tegra->hub) {
-		err = tegra_display_hub_prepare(tegra->hub);
-		if (err < 0)
-			goto device;
-	}
-
-	/*
-	 * We don't use the drm_irq_install() helpers provided by the DRM
-	 * core, so we need to set this manually in order to allow the
-	 * DRM_IOCTL_WAIT_VBLANK to operate correctly.
-	 */
-	drm->irq_enabled = true;
-
-	/* syncpoints are used for full 32-bit hardware VBLANK counters */
-	drm->max_vblank_count = 0xffffffff;
-
-	err = drm_vblank_init(drm, drm->mode_config.num_crtc);
-	if (err < 0)
-		goto hub;
-
-	drm_mode_config_reset(drm);
-
-	err = tegra_drm_fb_init(drm);
-	if (err < 0)
-		goto hub;
-
-	return 0;
-
-hub:
-	if (tegra->hub)
-		tegra_display_hub_cleanup(tegra->hub);
-device:
-	host1x_device_exit(device);
-fbdev:
-	drm_kms_helper_poll_fini(drm);
-	tegra_drm_fb_free(drm);
-config:
-	drm_mode_config_cleanup(drm);
-
-	if (tegra->domain) {
-		mutex_destroy(&tegra->mm_lock);
-		drm_mm_takedown(&tegra->mm);
-		put_iova_domain(&tegra->carveout.domain);
-		iova_cache_put();
-	}
-domain:
-	if (tegra->domain)
-		iommu_domain_free(tegra->domain);
-free:
-	kfree(tegra);
-	return err;
-}
-
-static void tegra_drm_unload(struct drm_device *drm)
-{
-	struct host1x_device *device = to_host1x_device(drm->dev);
-	struct tegra_drm *tegra = drm->dev_private;
-	int err;
-
-	drm_kms_helper_poll_fini(drm);
-	tegra_drm_fb_exit(drm);
-	drm_atomic_helper_shutdown(drm);
-	drm_mode_config_cleanup(drm);
-
-	err = host1x_device_exit(device);
-	if (err < 0)
-		return;
-
-	if (tegra->domain) {
-		mutex_destroy(&tegra->mm_lock);
-		drm_mm_takedown(&tegra->mm);
-		put_iova_domain(&tegra->carveout.domain);
-		iova_cache_put();
-		iommu_domain_free(tegra->domain);
-	}
-
-	kfree(tegra);
-}
-
 static int tegra_drm_open(struct drm_device *drm, struct drm_file *filp)
 {
 	struct tegra_drm_file *fpriv;
@@ -246,7 +108,9 @@ static int tegra_drm_open(struct drm_device *drm, struct drm_file *filp)
 	if (!fpriv)
 		return -ENOMEM;
 
-	idr_init(&fpriv->contexts);
+	idr_init_base(&fpriv->legacy_contexts, 1);
+	xa_init_flags(&fpriv->contexts, XA_FLAGS_ALLOC1);
+	xa_init(&fpriv->syncpoints);
 	mutex_init(&fpriv->lock);
 	filp->driver_priv = fpriv;
 
@@ -256,23 +120,10 @@ static int tegra_drm_open(struct drm_device *drm, struct drm_file *filp)
 static void tegra_drm_context_free(struct tegra_drm_context *context)
 {
 	context->client->ops->close_channel(context);
+	pm_runtime_put(context->client->base.dev);
 	kfree(context);
 }
 
-static struct host1x_bo *
-host1x_bo_lookup(struct drm_file *file, u32 handle)
-{
-	struct drm_gem_object *gem;
-	struct tegra_bo *bo;
-
-	gem = drm_gem_object_lookup(file, handle);
-	if (!gem)
-		return NULL;
-
-	bo = to_tegra_bo(gem);
-	return &bo->base;
-}
-
 static int host1x_reloc_copy_from_user(struct host1x_reloc *dest,
 				       struct drm_tegra_reloc __user *src,
 				       struct drm_device *drm,
@@ -301,11 +152,13 @@ static int host1x_reloc_copy_from_user(struct host1x_reloc *dest,
 	if (err < 0)
 		return err;
 
-	dest->cmdbuf.bo = host1x_bo_lookup(file, cmdbuf);
+	dest->flags = HOST1X_RELOC_READ | HOST1X_RELOC_WRITE;
+
+	dest->cmdbuf.bo = tegra_gem_lookup(file, cmdbuf);
 	if (!dest->cmdbuf.bo)
 		return -ENOENT;
 
-	dest->target.bo = host1x_bo_lookup(file, target);
+	dest->target.bo = tegra_gem_lookup(file, target);
 	if (!dest->target.bo)
 		return -ENOENT;
 
@@ -325,7 +178,7 @@ int tegra_drm_submit(struct tegra_drm_context *context,
 	struct drm_tegra_syncpt syncpt;
 	struct host1x *host1x = dev_get_drvdata(drm->dev->parent);
 	struct drm_gem_object **refs;
-	struct host1x_syncpt *sp;
+	struct host1x_syncpt *sp = NULL;
 	struct host1x_job *job;
 	unsigned int num_refs;
 	int err;
@@ -343,7 +196,7 @@ int tegra_drm_submit(struct tegra_drm_context *context,
 		return -EINVAL;
 
 	job = host1x_job_alloc(context->channel, args->num_cmdbufs,
-			       args->num_relocs);
+			       args->num_relocs, false);
 	if (!job)
 		return -ENOMEM;
 
@@ -351,6 +204,7 @@ int tegra_drm_submit(struct tegra_drm_context *context,
 	job->client = client;
 	job->class = client->class;
 	job->serialize = true;
+	job->syncpt_recovery = true;
 
 	/*
 	 * Track referenced BOs so that they can be unreferenced after the
@@ -387,7 +241,7 @@ int tegra_drm_submit(struct tegra_drm_context *context,
 			goto fail;
 		}
 
-		bo = host1x_bo_lookup(file, cmdbuf.handle);
+		bo = tegra_gem_lookup(file, cmdbuf.handle);
 		if (!bo) {
 			err = -ENOENT;
 			goto fail;
@@ -452,8 +306,8 @@ int tegra_drm_submit(struct tegra_drm_context *context,
 		goto fail;
 	}
 
-	/* check whether syncpoint ID is valid */
-	sp = host1x_syncpt_get(host1x, syncpt.id);
+	/* Syncpoint ref will be dropped on job release. */
+	sp = host1x_syncpt_get_by_id(host1x, syncpt.id);
 	if (!sp) {
 		err = -ENOENT;
 		goto fail;
@@ -462,7 +316,7 @@ int tegra_drm_submit(struct tegra_drm_context *context,
 	job->is_addr_reg = context->client->ops->is_addr_reg;
 	job->is_valid_class = context->client->ops->is_valid_class;
 	job->syncpt_incrs = syncpt.incrs;
-	job->syncpt_id = syncpt.id;
+	job->syncpt = sp;
 	job->timeout = 10000;
 
 	if (args->timeout && args->timeout < 10000)
@@ -482,7 +336,7 @@ int tegra_drm_submit(struct tegra_drm_context *context,
 
 fail:
 	while (num_refs--)
-		drm_gem_object_put_unlocked(refs[num_refs]);
+		drm_gem_object_put(refs[num_refs]);
 
 	kfree(refs);
 
@@ -522,7 +376,7 @@ static int tegra_gem_mmap(struct drm_device *drm, void *data,
 
 	args->offset = drm_vma_node_offset_addr(&bo->gem.vma_node);
 
-	drm_gem_object_put_unlocked(gem);
+	drm_gem_object_put(gem);
 
 	return 0;
 }
@@ -534,7 +388,7 @@ static int tegra_syncpt_read(struct drm_device *drm, void *data,
 	struct drm_tegra_syncpt_read *args = data;
 	struct host1x_syncpt *sp;
 
-	sp = host1x_syncpt_get(host, args->id);
+	sp = host1x_syncpt_get_by_id_noref(host, args->id);
 	if (!sp)
 		return -EINVAL;
 
@@ -549,7 +403,7 @@ static int tegra_syncpt_incr(struct drm_device *drm, void *data,
 	struct drm_tegra_syncpt_incr *args = data;
 	struct host1x_syncpt *sp;
 
-	sp = host1x_syncpt_get(host1x, args->id);
+	sp = host1x_syncpt_get_by_id_noref(host1x, args->id);
 	if (!sp)
 		return -EINVAL;
 
@@ -563,7 +417,7 @@ static int tegra_syncpt_wait(struct drm_device *drm, void *data,
 	struct drm_tegra_syncpt_wait *args = data;
 	struct host1x_syncpt *sp;
 
-	sp = host1x_syncpt_get(host1x, args->id);
+	sp = host1x_syncpt_get_by_id_noref(host1x, args->id);
 	if (!sp)
 		return -EINVAL;
 
@@ -578,13 +432,20 @@ static int tegra_client_open(struct tegra_drm_file *fpriv,
 {
 	int err;
 
+	err = pm_runtime_resume_and_get(client->base.dev);
+	if (err)
+		return err;
+
 	err = client->ops->open_channel(client, context);
-	if (err < 0)
+	if (err < 0) {
+		pm_runtime_put(client->base.dev);
 		return err;
+	}
 
-	err = idr_alloc(&fpriv->contexts, context, 1, 0, GFP_KERNEL);
+	err = idr_alloc(&fpriv->legacy_contexts, context, 1, 0, GFP_KERNEL);
 	if (err < 0) {
 		client->ops->close_channel(context);
+		pm_runtime_put(client->base.dev);
 		return err;
 	}
 
@@ -637,13 +498,13 @@ static int tegra_close_channel(struct drm_device *drm, void *data,
 
 	mutex_lock(&fpriv->lock);
 
-	context = idr_find(&fpriv->contexts, args->context);
+	context = idr_find(&fpriv->legacy_contexts, args->context);
 	if (!context) {
 		err = -EINVAL;
 		goto unlock;
 	}
 
-	idr_remove(&fpriv->contexts, context->id);
+	idr_remove(&fpriv->legacy_contexts, context->id);
 	tegra_drm_context_free(context);
 
 unlock:
@@ -662,7 +523,7 @@ static int tegra_get_syncpt(struct drm_device *drm, void *data,
 
 	mutex_lock(&fpriv->lock);
 
-	context = idr_find(&fpriv->contexts, args->context);
+	context = idr_find(&fpriv->legacy_contexts, args->context);
 	if (!context) {
 		err = -ENODEV;
 		goto unlock;
@@ -691,7 +552,7 @@ static int tegra_submit(struct drm_device *drm, void *data,
 
 	mutex_lock(&fpriv->lock);
 
-	context = idr_find(&fpriv->contexts, args->context);
+	context = idr_find(&fpriv->legacy_contexts, args->context);
 	if (!context) {
 		err = -ENODEV;
 		goto unlock;
@@ -716,7 +577,7 @@ static int tegra_get_syncpt_base(struct drm_device *drm, void *data,
 
 	mutex_lock(&fpriv->lock);
 
-	context = idr_find(&fpriv->contexts, args->context);
+	context = idr_find(&fpriv->legacy_contexts, args->context);
 	if (!context) {
 		err = -ENODEV;
 		goto unlock;
@@ -790,7 +651,7 @@ static int tegra_gem_set_tiling(struct drm_device *drm, void *data,
 	bo->tiling.mode = mode;
 	bo->tiling.value = value;
 
-	drm_gem_object_put_unlocked(gem);
+	drm_gem_object_put(gem);
 
 	return 0;
 }
@@ -830,7 +691,7 @@ static int tegra_gem_get_tiling(struct drm_device *drm, void *data,
 		break;
 	}
 
-	drm_gem_object_put_unlocked(gem);
+	drm_gem_object_put(gem);
 
 	return err;
 }
@@ -855,7 +716,7 @@ static int tegra_gem_set_flags(struct drm_device *drm, void *data,
 	if (args->flags & DRM_TEGRA_GEM_BOTTOM_UP)
 		bo->flags |= TEGRA_BO_BOTTOM_UP;
 
-	drm_gem_object_put_unlocked(gem);
+	drm_gem_object_put(gem);
 
 	return 0;
 }
@@ -877,7 +738,7 @@ static int tegra_gem_get_flags(struct drm_device *drm, void *data,
 	if (bo->flags & TEGRA_BO_BOTTOM_UP)
 		args->flags |= DRM_TEGRA_GEM_BOTTOM_UP;
 
-	drm_gem_object_put_unlocked(gem);
+	drm_gem_object_put(gem);
 
 	return 0;
 }
@@ -885,34 +746,49 @@ static int tegra_gem_get_flags(struct drm_device *drm, void *data,
 
 static const struct drm_ioctl_desc tegra_drm_ioctls[] = {
 #ifdef CONFIG_DRM_TEGRA_STAGING
-	DRM_IOCTL_DEF_DRV(TEGRA_GEM_CREATE, tegra_gem_create,
-			  DRM_UNLOCKED | DRM_RENDER_ALLOW),
-	DRM_IOCTL_DEF_DRV(TEGRA_GEM_MMAP, tegra_gem_mmap,
-			  DRM_UNLOCKED | DRM_RENDER_ALLOW),
+	DRM_IOCTL_DEF_DRV(TEGRA_CHANNEL_OPEN, tegra_drm_ioctl_channel_open,
+			  DRM_RENDER_ALLOW),
+	DRM_IOCTL_DEF_DRV(TEGRA_CHANNEL_CLOSE, tegra_drm_ioctl_channel_close,
+			  DRM_RENDER_ALLOW),
+	DRM_IOCTL_DEF_DRV(TEGRA_CHANNEL_MAP, tegra_drm_ioctl_channel_map,
+			  DRM_RENDER_ALLOW),
+	DRM_IOCTL_DEF_DRV(TEGRA_CHANNEL_UNMAP, tegra_drm_ioctl_channel_unmap,
+			  DRM_RENDER_ALLOW),
+	DRM_IOCTL_DEF_DRV(TEGRA_CHANNEL_SUBMIT, tegra_drm_ioctl_channel_submit,
+			  DRM_RENDER_ALLOW),
+	DRM_IOCTL_DEF_DRV(TEGRA_SYNCPOINT_ALLOCATE, tegra_drm_ioctl_syncpoint_allocate,
+			  DRM_RENDER_ALLOW),
+	DRM_IOCTL_DEF_DRV(TEGRA_SYNCPOINT_FREE, tegra_drm_ioctl_syncpoint_free,
+			  DRM_RENDER_ALLOW),
+	DRM_IOCTL_DEF_DRV(TEGRA_SYNCPOINT_WAIT, tegra_drm_ioctl_syncpoint_wait,
+			  DRM_RENDER_ALLOW),
+
+	DRM_IOCTL_DEF_DRV(TEGRA_GEM_CREATE, tegra_gem_create, DRM_RENDER_ALLOW),
+	DRM_IOCTL_DEF_DRV(TEGRA_GEM_MMAP, tegra_gem_mmap, DRM_RENDER_ALLOW),
 	DRM_IOCTL_DEF_DRV(TEGRA_SYNCPT_READ, tegra_syncpt_read,
-			  DRM_UNLOCKED | DRM_RENDER_ALLOW),
+			  DRM_RENDER_ALLOW),
 	DRM_IOCTL_DEF_DRV(TEGRA_SYNCPT_INCR, tegra_syncpt_incr,
-			  DRM_UNLOCKED | DRM_RENDER_ALLOW),
+			  DRM_RENDER_ALLOW),
 	DRM_IOCTL_DEF_DRV(TEGRA_SYNCPT_WAIT, tegra_syncpt_wait,
-			  DRM_UNLOCKED | DRM_RENDER_ALLOW),
+			  DRM_RENDER_ALLOW),
 	DRM_IOCTL_DEF_DRV(TEGRA_OPEN_CHANNEL, tegra_open_channel,
-			  DRM_UNLOCKED | DRM_RENDER_ALLOW),
+			  DRM_RENDER_ALLOW),
 	DRM_IOCTL_DEF_DRV(TEGRA_CLOSE_CHANNEL, tegra_close_channel,
-			  DRM_UNLOCKED | DRM_RENDER_ALLOW),
+			  DRM_RENDER_ALLOW),
 	DRM_IOCTL_DEF_DRV(TEGRA_GET_SYNCPT, tegra_get_syncpt,
-			  DRM_UNLOCKED | DRM_RENDER_ALLOW),
+			  DRM_RENDER_ALLOW),
 	DRM_IOCTL_DEF_DRV(TEGRA_SUBMIT, tegra_submit,
-			  DRM_UNLOCKED | DRM_RENDER_ALLOW),
+			  DRM_RENDER_ALLOW),
 	DRM_IOCTL_DEF_DRV(TEGRA_GET_SYNCPT_BASE, tegra_get_syncpt_base,
-			  DRM_UNLOCKED | DRM_RENDER_ALLOW),
+			  DRM_RENDER_ALLOW),
 	DRM_IOCTL_DEF_DRV(TEGRA_GEM_SET_TILING, tegra_gem_set_tiling,
-			  DRM_UNLOCKED | DRM_RENDER_ALLOW),
+			  DRM_RENDER_ALLOW),
 	DRM_IOCTL_DEF_DRV(TEGRA_GEM_GET_TILING, tegra_gem_get_tiling,
-			  DRM_UNLOCKED | DRM_RENDER_ALLOW),
+			  DRM_RENDER_ALLOW),
 	DRM_IOCTL_DEF_DRV(TEGRA_GEM_SET_FLAGS, tegra_gem_set_flags,
-			  DRM_UNLOCKED | DRM_RENDER_ALLOW),
+			  DRM_RENDER_ALLOW),
 	DRM_IOCTL_DEF_DRV(TEGRA_GEM_GET_FLAGS, tegra_gem_get_flags,
-			  DRM_UNLOCKED | DRM_RENDER_ALLOW),
+			  DRM_RENDER_ALLOW),
 #endif
 };
 
@@ -926,6 +802,7 @@ static const struct file_operations tegra_drm_fops = {
 	.read = drm_read,
 	.compat_ioctl = drm_compat_ioctl,
 	.llseek = noop_llseek,
+	.fop_flags = FOP_UNSIGNED_OFFSET,
 };
 
 static int tegra_drm_context_cleanup(int id, void *p, void *data)
@@ -942,10 +819,11 @@ static void tegra_drm_postclose(struct drm_device *drm, struct drm_file *file)
 	struct tegra_drm_file *fpriv = file->driver_priv;
 
 	mutex_lock(&fpriv->lock);
-	idr_for_each(&fpriv->contexts, tegra_drm_context_cleanup, NULL);
+	idr_for_each(&fpriv->legacy_contexts, tegra_drm_context_cleanup, NULL);
+	tegra_drm_uapi_close_file(fpriv);
 	mutex_unlock(&fpriv->lock);
 
-	idr_destroy(&fpriv->contexts);
+	idr_destroy(&fpriv->legacy_contexts);
 	mutex_destroy(&fpriv->lock);
 	kfree(fpriv);
 }
@@ -993,44 +871,36 @@ static struct drm_info_list tegra_debugfs_list[] = {
 	{ "iova", tegra_debugfs_iova, 0 },
 };
 
-static int tegra_debugfs_init(struct drm_minor *minor)
+static void tegra_debugfs_init(struct drm_minor *minor)
 {
-	return drm_debugfs_create_files(tegra_debugfs_list,
-					ARRAY_SIZE(tegra_debugfs_list),
-					minor->debugfs_root, minor);
+	drm_debugfs_create_files(tegra_debugfs_list,
+				 ARRAY_SIZE(tegra_debugfs_list),
+				 minor->debugfs_root, minor);
 }
 #endif
 
-static struct drm_driver tegra_drm_driver = {
-	.driver_features = DRIVER_MODESET | DRIVER_GEM | DRIVER_PRIME |
-			   DRIVER_ATOMIC | DRIVER_RENDER,
-	.load = tegra_drm_load,
-	.unload = tegra_drm_unload,
+static const struct drm_driver tegra_drm_driver = {
+	.driver_features = DRIVER_MODESET | DRIVER_GEM |
+			   DRIVER_ATOMIC | DRIVER_RENDER | DRIVER_SYNCOBJ,
 	.open = tegra_drm_open,
 	.postclose = tegra_drm_postclose,
-	.lastclose = drm_fb_helper_lastclose,
 
 #if defined(CONFIG_DEBUG_FS)
 	.debugfs_init = tegra_debugfs_init,
 #endif
 
-	.gem_free_object_unlocked = tegra_bo_free_object,
-	.gem_vm_ops = &tegra_bo_vm_ops,
-
-	.prime_handle_to_fd = drm_gem_prime_handle_to_fd,
-	.prime_fd_to_handle = drm_gem_prime_fd_to_handle,
-	.gem_prime_export = tegra_gem_prime_export,
 	.gem_prime_import = tegra_gem_prime_import,
 
 	.dumb_create = tegra_bo_dumb_create,
 
+	TEGRA_FBDEV_DRIVER_OPS,
+
 	.ioctls = tegra_drm_ioctls,
 	.num_ioctls = ARRAY_SIZE(tegra_drm_ioctls),
 	.fops = &tegra_drm_fops,
 
 	.name = DRIVER_NAME,
 	.desc = DRIVER_DESC,
-	.date = DRIVER_DATE,
 	.major = DRIVER_MAJOR,
 	.minor = DRIVER_MINOR,
 	.patchlevel = DRIVER_PATCHLEVEL,
@@ -1039,8 +909,17 @@ static struct drm_driver tegra_drm_driver = {
 int tegra_drm_register_client(struct tegra_drm *tegra,
 			      struct tegra_drm_client *client)
 {
+	/*
+	 * When MLOCKs are implemented, change to allocate a shared channel
+	 * only when MLOCKs are disabled.
+	 */
+	client->shared_channel = host1x_channel_request(&client->base);
+	if (!client->shared_channel)
+		return -EBUSY;
+
 	mutex_lock(&tegra->clients_lock);
 	list_add_tail(&client->list, &tegra->clients);
+	client->drm = tegra;
 	mutex_unlock(&tegra->clients_lock);
 
 	return 0;
@@ -1051,62 +930,82 @@ int tegra_drm_unregister_client(struct tegra_drm *tegra,
 {
 	mutex_lock(&tegra->clients_lock);
 	list_del_init(&client->list);
+	client->drm = NULL;
 	mutex_unlock(&tegra->clients_lock);
 
+	if (client->shared_channel)
+		host1x_channel_put(client->shared_channel);
+
 	return 0;
 }
 
-struct iommu_group *host1x_client_iommu_attach(struct host1x_client *client,
-					       bool shared)
+int host1x_client_iommu_attach(struct host1x_client *client)
 {
-	struct drm_device *drm = dev_get_drvdata(client->parent);
+	struct iommu_domain *domain = iommu_get_domain_for_dev(client->dev);
+	struct drm_device *drm = dev_get_drvdata(client->host);
 	struct tegra_drm *tegra = drm->dev_private;
 	struct iommu_group *group = NULL;
 	int err;
 
+#if IS_ENABLED(CONFIG_ARM_DMA_USE_IOMMU)
+	if (client->dev->archdata.mapping) {
+		struct dma_iommu_mapping *mapping =
+				to_dma_iommu_mapping(client->dev);
+		arm_iommu_detach_device(client->dev);
+		arm_iommu_release_mapping(mapping);
+
+		domain = iommu_get_domain_for_dev(client->dev);
+	}
+#endif
+
+	/*
+	 * If the host1x client is already attached to an IOMMU domain that is
+	 * not the shared IOMMU domain, don't try to attach it to a different
+	 * domain. This allows using the IOMMU-backed DMA API.
+	 */
+	if (domain && domain->type != IOMMU_DOMAIN_IDENTITY &&
+	    domain != tegra->domain)
+		return 0;
+
 	if (tegra->domain) {
 		group = iommu_group_get(client->dev);
-		if (!group) {
-			dev_err(client->dev, "failed to get IOMMU group\n");
-			return ERR_PTR(-ENODEV);
-		}
+		if (!group)
+			return -ENODEV;
 
-		if (!shared || (shared && (group != tegra->group))) {
-#if IS_ENABLED(CONFIG_ARM_DMA_USE_IOMMU)
-			if (client->dev->archdata.mapping) {
-				struct dma_iommu_mapping *mapping =
-					to_dma_iommu_mapping(client->dev);
-				arm_iommu_detach_device(client->dev);
-				arm_iommu_release_mapping(mapping);
-			}
-#endif
+		if (domain != tegra->domain) {
 			err = iommu_attach_group(tegra->domain, group);
 			if (err < 0) {
 				iommu_group_put(group);
-				return ERR_PTR(err);
+				return err;
 			}
-
-			if (shared && !tegra->group)
-				tegra->group = group;
 		}
+
+		tegra->use_explicit_iommu = true;
 	}
 
-	return group;
+	client->group = group;
+
+	return 0;
 }
 
-void host1x_client_iommu_detach(struct host1x_client *client,
-				struct iommu_group *group)
+void host1x_client_iommu_detach(struct host1x_client *client)
 {
-	struct drm_device *drm = dev_get_drvdata(client->parent);
+	struct drm_device *drm = dev_get_drvdata(client->host);
 	struct tegra_drm *tegra = drm->dev_private;
+	struct iommu_domain *domain;
 
-	if (group) {
-		if (group == tegra->group) {
-			iommu_detach_group(tegra->domain, group);
-			tegra->group = NULL;
-		}
+	if (client->group) {
+		/*
+		 * Devices that are part of the same group may no longer be
+		 * attached to a domain at this point because their group may
+		 * have been detached by an earlier client.
+		 */
+		domain = iommu_get_domain_for_dev(client->dev);
+		if (domain)
+			iommu_detach_group(tegra->domain, client->group);
 
-		iommu_group_put(group);
+		iommu_group_put(client->group);
+		client->group = NULL;
 	}
 }
 
@@ -1156,7 +1055,7 @@ void *tegra_drm_alloc(struct tegra_drm *tegra, size_t size, dma_addr_t *dma)
 
 	*dma = iova_dma_addr(&tegra->carveout.domain, alloc);
 	err = iommu_map(tegra->domain, *dma, virt_to_phys(virt),
-			size, IOMMU_READ | IOMMU_WRITE);
+			size, IOMMU_READ | IOMMU_WRITE, GFP_KERNEL);
 	if (err < 0)
 		goto free_iova;
 
@@ -1187,28 +1086,216 @@ void tegra_drm_free(struct tegra_drm *tegra, size_t size, void *virt,
 	free_pages((unsigned long)virt, get_order(size));
 }
 
+static bool host1x_drm_wants_iommu(struct host1x_device *dev)
+{
+	struct host1x *host1x = dev_get_drvdata(dev->dev.parent);
+	struct iommu_domain *domain;
+
+	/* Our IOMMU usage policy doesn't currently play well with GART */
+	if (of_machine_is_compatible("nvidia,tegra20"))
+		return false;
+
+	/*
+	 * If the Tegra DRM clients are backed by an IOMMU, push buffers are
+	 * likely to be allocated beyond the 32-bit boundary if sufficient
+	 * system memory is available. This is problematic on earlier Tegra
+	 * generations where host1x supports a maximum of 32 address bits in
+	 * the GATHER opcode. In this case, unless host1x is behind an IOMMU
+	 * as well it won't be able to process buffers allocated beyond the
+	 * 32-bit boundary.
+	 *
+	 * The DMA API will use bounce buffers in this case, so that could
+	 * perhaps still be made to work, even if less efficient, but there
+	 * is another catch: in order to perform cache maintenance on pages
+	 * allocated for discontiguous buffers we need to map and unmap the
+	 * SG table representing these buffers. This is fine for something
+	 * small like a push buffer, but it exhausts the bounce buffer pool
+	 * (typically on the order of a few MiB) for framebuffers (many MiB
+	 * for any modern resolution).
+	 *
+	 * Work around this by making sure that Tegra DRM clients only use
+	 * an IOMMU if the parent host1x also uses an IOMMU.
+	 *
+	 * Note that there's still a small gap here that we don't cover: if
+	 * the DMA API is backed by an IOMMU there's no way to control which
+	 * device is attached to an IOMMU and which isn't, except via wiring
+	 * up the device tree appropriately. This is considered an problem
+	 * of integration, so care must be taken for the DT to be consistent.
+	 */
+	domain = iommu_get_domain_for_dev(dev->dev.parent);
+
+	/*
+	 * Tegra20 and Tegra30 don't support addressing memory beyond the
+	 * 32-bit boundary, so the regular GATHER opcodes will always be
+	 * sufficient and whether or not the host1x is attached to an IOMMU
+	 * doesn't matter.
+	 */
+	if (!domain && host1x_get_dma_mask(host1x) <= DMA_BIT_MASK(32))
+		return true;
+
+	return domain != NULL;
+}
+
 static int host1x_drm_probe(struct host1x_device *dev)
 {
-	struct drm_driver *driver = &tegra_drm_driver;
+	struct device *dma_dev = dev->dev.parent;
+	struct tegra_drm *tegra;
 	struct drm_device *drm;
 	int err;
 
-	drm = drm_dev_alloc(driver, &dev->dev);
+	drm = drm_dev_alloc(&tegra_drm_driver, &dev->dev);
 	if (IS_ERR(drm))
 		return PTR_ERR(drm);
 
+	tegra = kzalloc(sizeof(*tegra), GFP_KERNEL);
+	if (!tegra) {
+		err = -ENOMEM;
+		goto put;
+	}
+
+	if (host1x_drm_wants_iommu(dev) && device_iommu_mapped(dma_dev)) {
+		tegra->domain = iommu_paging_domain_alloc(dma_dev);
+		if (IS_ERR(tegra->domain)) {
+			err = PTR_ERR(tegra->domain);
+			goto free;
+		}
+
+		err = iova_cache_get();
+		if (err < 0)
+			goto domain;
+	}
+
+	mutex_init(&tegra->clients_lock);
+	INIT_LIST_HEAD(&tegra->clients);
+
 	dev_set_drvdata(&dev->dev, drm);
+	drm->dev_private = tegra;
+	tegra->drm = drm;
+
+	drm_mode_config_init(drm);
+
+	drm->mode_config.min_width = 0;
+	drm->mode_config.min_height = 0;
+	drm->mode_config.max_width = 0;
+	drm->mode_config.max_height = 0;
+
+	drm->mode_config.normalize_zpos = true;
+
+	drm->mode_config.funcs = &tegra_drm_mode_config_funcs;
+	drm->mode_config.helper_private = &tegra_drm_mode_config_helpers;
 
-	err = drm_fb_helper_remove_conflicting_framebuffers(NULL, "tegradrmfb", false);
+	drm_kms_helper_poll_init(drm);
+
+	err = host1x_device_init(dev);
 	if (err < 0)
-		goto put;
+		goto poll;
+
+	/*
+	 * Now that all display controller have been initialized, the maximum
+	 * supported resolution is known and the bitmask for horizontal and
+	 * vertical bitfields can be computed.
+	 */
+	tegra->hmask = drm->mode_config.max_width - 1;
+	tegra->vmask = drm->mode_config.max_height - 1;
+
+	if (tegra->use_explicit_iommu) {
+		u64 carveout_start, carveout_end, gem_start, gem_end;
+		u64 dma_mask = dma_get_mask(&dev->dev);
+		dma_addr_t start, end;
+		unsigned long order;
+
+		start = tegra->domain->geometry.aperture_start & dma_mask;
+		end = tegra->domain->geometry.aperture_end & dma_mask;
+
+		gem_start = start;
+		gem_end = end - CARVEOUT_SZ;
+		carveout_start = gem_end + 1;
+		carveout_end = end;
+
+		order = __ffs(tegra->domain->pgsize_bitmap);
+		init_iova_domain(&tegra->carveout.domain, 1UL << order,
+				 carveout_start >> order);
+
+		tegra->carveout.shift = iova_shift(&tegra->carveout.domain);
+		tegra->carveout.limit = carveout_end >> tegra->carveout.shift;
+
+		drm_mm_init(&tegra->mm, gem_start, gem_end - gem_start + 1);
+		mutex_init(&tegra->mm_lock);
+
+		DRM_DEBUG_DRIVER("IOMMU apertures:\n");
+		DRM_DEBUG_DRIVER("  GEM: %#llx-%#llx\n", gem_start, gem_end);
+		DRM_DEBUG_DRIVER("  Carveout: %#llx-%#llx\n", carveout_start,
+				 carveout_end);
+	} else if (tegra->domain) {
+		iommu_domain_free(tegra->domain);
+		tegra->domain = NULL;
+		iova_cache_put();
+	}
+
+	if (tegra->hub) {
+		err = tegra_display_hub_prepare(tegra->hub);
+		if (err < 0)
+			goto device;
+	}
+
+	/* syncpoints are used for full 32-bit hardware VBLANK counters */
+	drm->max_vblank_count = 0xffffffff;
+
+	err = drm_vblank_init(drm, drm->mode_config.num_crtc);
+	if (err < 0)
+		goto hub;
+
+	drm_mode_config_reset(drm);
+
+	/*
+	 * Only take over from a potential firmware framebuffer if any CRTCs
+	 * have been registered. This must not be a fatal error because there
+	 * are other accelerators that are exposed via this driver.
+	 *
+	 * Another case where this happens is on Tegra234 where the display
+	 * hardware is no longer part of the host1x complex, so this driver
+	 * will not expose any modesetting features.
+	 */
+	if (drm->mode_config.num_crtc > 0) {
+		err = aperture_remove_all_conflicting_devices(tegra_drm_driver.name);
+		if (err < 0)
+			goto hub;
+	} else {
+		/*
+		 * Indicate to userspace that this doesn't expose any display
+		 * capabilities.
+		 */
+		drm->driver_features &= ~(DRIVER_MODESET | DRIVER_ATOMIC);
+	}
 
 	err = drm_dev_register(drm, 0);
 	if (err < 0)
-		goto put;
+		goto hub;
+
+	drm_client_setup(drm, NULL);
 
 	return 0;
 
+hub:
+	if (tegra->hub)
+		tegra_display_hub_cleanup(tegra->hub);
+device:
+	if (tegra->domain) {
+		mutex_destroy(&tegra->mm_lock);
+		drm_mm_takedown(&tegra->mm);
+		put_iova_domain(&tegra->carveout.domain);
+		iova_cache_put();
+	}
+
+	host1x_device_exit(dev);
+poll:
+	drm_kms_helper_poll_fini(drm);
+	drm_mode_config_cleanup(drm);
+domain:
+	if (tegra->domain)
+		iommu_domain_free(tegra->domain);
+free:
+	kfree(tegra);
 put:
 	drm_dev_put(drm);
 	return err;
@@ -1217,13 +1304,41 @@ put:
 static int host1x_drm_remove(struct host1x_device *dev)
 {
 	struct drm_device *drm = dev_get_drvdata(&dev->dev);
+	struct tegra_drm *tegra = drm->dev_private;
+	int err;
 
 	drm_dev_unregister(drm);
+
+	drm_kms_helper_poll_fini(drm);
+	drm_atomic_helper_shutdown(drm);
+	drm_mode_config_cleanup(drm);
+
+	if (tegra->hub)
+		tegra_display_hub_cleanup(tegra->hub);
+
+	err = host1x_device_exit(dev);
+	if (err < 0)
+		dev_err(&dev->dev, "host1x device cleanup failed: %d\n", err);
+
+	if (tegra->domain) {
+		mutex_destroy(&tegra->mm_lock);
+		drm_mm_takedown(&tegra->mm);
+		put_iova_domain(&tegra->carveout.domain);
+		iova_cache_put();
+		iommu_domain_free(tegra->domain);
+	}
+
+	kfree(tegra);
 	drm_dev_put(drm);
 
 	return 0;
 }
 
+static void host1x_drm_shutdown(struct host1x_device *dev)
+{
+	drm_atomic_helper_shutdown(dev_get_drvdata(&dev->dev));
+}
+
 #ifdef CONFIG_PM_SLEEP
 static int host1x_drm_suspend(struct device *dev)
 {
@@ -1252,8 +1367,10 @@ static const struct of_device_id host1x_drm_subdevs[] = {
 	{ .compatible = "nvidia,tegra30-hdmi", },
 	{ .compatible = "nvidia,tegra30-gr2d", },
 	{ .compatible = "nvidia,tegra30-gr3d", },
+	{ .compatible = "nvidia,tegra114-dc", },
 	{ .compatible = "nvidia,tegra114-dsi", },
 	{ .compatible = "nvidia,tegra114-hdmi", },
+	{ .compatible = "nvidia,tegra114-gr2d", },
 	{ .compatible = "nvidia,tegra114-gr3d", },
 	{ .compatible = "nvidia,tegra124-dc", },
 	{ .compatible = "nvidia,tegra124-sor", },
@@ -1266,15 +1383,21 @@ static const struct of_device_id host1x_drm_subdevs[] = {
 	{ .compatible = "nvidia,tegra210-sor", },
 	{ .compatible = "nvidia,tegra210-sor1", },
 	{ .compatible = "nvidia,tegra210-vic", },
+	{ .compatible = "nvidia,tegra210-nvdec", },
+	{ .compatible = "nvidia,tegra210-nvjpg", },
 	{ .compatible = "nvidia,tegra186-display", },
 	{ .compatible = "nvidia,tegra186-dc", },
 	{ .compatible = "nvidia,tegra186-sor", },
 	{ .compatible = "nvidia,tegra186-sor1", },
 	{ .compatible = "nvidia,tegra186-vic", },
+	{ .compatible = "nvidia,tegra186-nvdec", },
 	{ .compatible = "nvidia,tegra194-display", },
 	{ .compatible = "nvidia,tegra194-dc", },
 	{ .compatible = "nvidia,tegra194-sor", },
 	{ .compatible = "nvidia,tegra194-vic", },
+	{ .compatible = "nvidia,tegra194-nvdec", },
+	{ .compatible = "nvidia,tegra234-vic", },
+	{ .compatible = "nvidia,tegra234-nvdec", },
 	{ /* sentinel */ }
 };
 
@@ -1285,6 +1408,7 @@ static struct host1x_driver host1x_drm_driver = {
 	},
 	.probe = host1x_drm_probe,
 	.remove = host1x_drm_remove,
+	.shutdown = host1x_drm_shutdown,
 	.subdevs = host1x_drm_subdevs,
 };
 
@@ -1298,12 +1422,17 @@ static struct platform_driver * const drivers[] = {
 	&tegra_gr2d_driver,
 	&tegra_gr3d_driver,
 	&tegra_vic_driver,
+	&tegra_nvdec_driver,
+	&tegra_nvjpg_driver,
 };
 
 static int __init host1x_drm_init(void)
 {
 	int err;
 
+	if (drm_firmware_drivers_only())
+		return -ENODEV;
+
 	err = host1x_driver_register(&host1x_drm_driver);
 	if (err < 0)
 		return err;
diff --git a/drivers/gpu/drm/tegra/drm.h b/drivers/gpu/drm/tegra/drm.h
index 1012335bb489..ae68b03d8483 100644
--- a/drivers/gpu/drm/tegra/drm.h
+++ b/drivers/gpu/drm/tegra/drm.h
@@ -1,46 +1,41 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
 /*
  * Copyright (C) 2012 Avionic Design GmbH
  * Copyright (C) 2012-2013 NVIDIA CORPORATION.  All rights reserved.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
  */
 
 #ifndef HOST1X_DRM_H
 #define HOST1X_DRM_H 1
 
-#include <uapi/drm/tegra_drm.h>
 #include <linux/host1x.h>
 #include <linux/iova.h>
-#include <linux/of_gpio.h>
+#include <linux/gpio/consumer.h>
 
-#include <drm/drmP.h>
 #include <drm/drm_atomic.h>
-#include <drm/drm_crtc_helper.h>
-#include <drm/drm_edid.h>
+#include <drm/drm_bridge.h>
 #include <drm/drm_encoder.h>
-#include <drm/drm_fb_helper.h>
 #include <drm/drm_fixed.h>
+#include <drm/drm_probe_helper.h>
+#include <uapi/drm/tegra_drm.h>
 
 #include "gem.h"
 #include "hub.h"
 #include "trace.h"
 
-struct reset_control;
+/* XXX move to include/uapi/drm/drm_fourcc.h? */
+#define DRM_FORMAT_MOD_NVIDIA_SECTOR_LAYOUT BIT_ULL(22)
 
-#ifdef CONFIG_DRM_FBDEV_EMULATION
-struct tegra_fbdev {
-	struct drm_fb_helper base;
-	struct drm_framebuffer *fb;
-};
-#endif
+struct drm_fb_helper;
+struct drm_fb_helper_surface_size;
+
+struct edid;
+struct reset_control;
 
 struct tegra_drm {
 	struct drm_device *drm;
 
 	struct iommu_domain *domain;
-	struct iommu_group *group;
+	bool use_explicit_iommu;
 	struct mutex mm_lock;
 	struct drm_mm mm;
 
@@ -53,21 +48,30 @@ struct tegra_drm {
 	struct mutex clients_lock;
 	struct list_head clients;
 
-#ifdef CONFIG_DRM_FBDEV_EMULATION
-	struct tegra_fbdev *fbdev;
-#endif
-
+	unsigned int hmask, vmask;
 	unsigned int pitch_align;
+	unsigned int num_crtcs;
 
 	struct tegra_display_hub *hub;
 };
 
+static inline struct host1x *tegra_drm_to_host1x(struct tegra_drm *tegra)
+{
+	return dev_get_drvdata(tegra->drm->dev->parent);
+}
+
 struct tegra_drm_client;
 
 struct tegra_drm_context {
 	struct tegra_drm_client *client;
 	struct host1x_channel *channel;
+
+	/* Only used by legacy UAPI. */
 	unsigned int id;
+
+	/* Only used by new UAPI. */
+	struct xarray mappings;
+	struct host1x_memory_context *memory_context;
 };
 
 struct tegra_drm_client_ops {
@@ -79,16 +83,29 @@ struct tegra_drm_client_ops {
 	int (*submit)(struct tegra_drm_context *context,
 		      struct drm_tegra_submit *args, struct drm_device *drm,
 		      struct drm_file *file);
+	int (*get_streamid_offset)(struct tegra_drm_client *client, u32 *offset);
+	int (*can_use_memory_ctx)(struct tegra_drm_client *client, bool *supported);
 };
 
 int tegra_drm_submit(struct tegra_drm_context *context,
 		     struct drm_tegra_submit *args, struct drm_device *drm,
 		     struct drm_file *file);
 
+static inline int
+tegra_drm_get_streamid_offset_thi(struct tegra_drm_client *client, u32 *offset)
+{
+	*offset = 0x30;
+
+	return 0;
+}
+
 struct tegra_drm_client {
 	struct host1x_client base;
 	struct list_head list;
+	struct tegra_drm *drm;
+	struct host1x_channel *shared_channel;
 
+	/* Set by driver */
 	unsigned int version;
 	const struct tegra_drm_client_ops *ops;
 };
@@ -103,13 +120,8 @@ int tegra_drm_register_client(struct tegra_drm *tegra,
 			      struct tegra_drm_client *client);
 int tegra_drm_unregister_client(struct tegra_drm *tegra,
 				struct tegra_drm_client *client);
-struct iommu_group *host1x_client_iommu_attach(struct host1x_client *client,
-					       bool shared);
-void host1x_client_iommu_detach(struct host1x_client *client,
-				struct iommu_group *group);
-
-int tegra_drm_init(struct tegra_drm *tegra, struct drm_device *drm);
-int tegra_drm_exit(struct tegra_drm *tegra);
+int host1x_client_iommu_attach(struct host1x_client *client);
+void host1x_client_iommu_detach(struct host1x_client *client);
 
 void *tegra_drm_alloc(struct tegra_drm *tegra, size_t size, dma_addr_t *iova);
 void tegra_drm_free(struct tegra_drm *tegra, size_t size, void *virt,
@@ -121,13 +133,13 @@ struct tegra_output {
 	struct device_node *of_node;
 	struct device *dev;
 
+	struct drm_bridge *bridge;
 	struct drm_panel *panel;
 	struct i2c_adapter *ddc;
-	const struct edid *edid;
-	struct cec_notifier *notifier;
+	const struct drm_edid *drm_edid;
+	struct cec_notifier *cec;
 	unsigned int hpd_irq;
-	int hpd_gpio;
-	enum of_gpio_flags hpd_gpio_flags;
+	struct gpio_desc *hpd_gpio;
 
 	struct drm_encoder encoder;
 	struct drm_connector connector;
@@ -150,26 +162,21 @@ int tegra_output_init(struct drm_device *drm, struct tegra_output *output);
 void tegra_output_exit(struct tegra_output *output);
 void tegra_output_find_possible_crtcs(struct tegra_output *output,
 				      struct drm_device *drm);
+int tegra_output_suspend(struct tegra_output *output);
+int tegra_output_resume(struct tegra_output *output);
 
 int tegra_output_connector_get_modes(struct drm_connector *connector);
 enum drm_connector_status
 tegra_output_connector_detect(struct drm_connector *connector, bool force);
 void tegra_output_connector_destroy(struct drm_connector *connector);
 
-void tegra_output_encoder_destroy(struct drm_encoder *encoder);
-
 /* from dpaux.c */
-struct drm_dp_link;
-
 struct drm_dp_aux *drm_dp_aux_find_by_of_node(struct device_node *np);
 enum drm_connector_status drm_dp_aux_detect(struct drm_dp_aux *aux);
 int drm_dp_aux_attach(struct drm_dp_aux *aux, struct tegra_output *output);
 int drm_dp_aux_detach(struct drm_dp_aux *aux);
 int drm_dp_aux_enable(struct drm_dp_aux *aux);
 int drm_dp_aux_disable(struct drm_dp_aux *aux);
-int drm_dp_aux_prepare(struct drm_dp_aux *aux, u8 encoding);
-int drm_dp_aux_train(struct drm_dp_aux *aux, struct drm_dp_link *link,
-		     u8 pattern);
 
 /* from fb.c */
 struct tegra_bo *tegra_fb_get_plane(struct drm_framebuffer *framebuffer,
@@ -177,13 +184,25 @@ struct tegra_bo *tegra_fb_get_plane(struct drm_framebuffer *framebuffer,
 bool tegra_fb_is_bottom_up(struct drm_framebuffer *framebuffer);
 int tegra_fb_get_tiling(struct drm_framebuffer *framebuffer,
 			struct tegra_bo_tiling *tiling);
+struct drm_framebuffer *tegra_fb_alloc(struct drm_device *drm,
+				       const struct drm_format_info *info,
+				       const struct drm_mode_fb_cmd2 *mode_cmd,
+				       struct tegra_bo **planes,
+				       unsigned int num_planes);
 struct drm_framebuffer *tegra_fb_create(struct drm_device *drm,
 					struct drm_file *file,
+					const struct drm_format_info *info,
 					const struct drm_mode_fb_cmd2 *cmd);
-int tegra_drm_fb_prepare(struct drm_device *drm);
-void tegra_drm_fb_free(struct drm_device *drm);
-int tegra_drm_fb_init(struct drm_device *drm);
-void tegra_drm_fb_exit(struct drm_device *drm);
+
+#ifdef CONFIG_DRM_FBDEV_EMULATION
+int tegra_fbdev_driver_fbdev_probe(struct drm_fb_helper *helper,
+				   struct drm_fb_helper_surface_size *sizes);
+#define TEGRA_FBDEV_DRIVER_OPS \
+	.fbdev_probe = tegra_fbdev_driver_fbdev_probe
+#else
+#define TEGRA_FBDEV_DRIVER_OPS \
+	.fbdev_probe = NULL
+#endif
 
 extern struct platform_driver tegra_display_hub_driver;
 extern struct platform_driver tegra_dc_driver;
@@ -194,5 +213,7 @@ extern struct platform_driver tegra_sor_driver;
 extern struct platform_driver tegra_gr2d_driver;
 extern struct platform_driver tegra_gr3d_driver;
 extern struct platform_driver tegra_vic_driver;
+extern struct platform_driver tegra_nvdec_driver;
+extern struct platform_driver tegra_nvjpg_driver;
 
 #endif /* HOST1X_DRM_H */
diff --git a/drivers/gpu/drm/tegra/dsi.c b/drivers/gpu/drm/tegra/dsi.c
index ee6ca8fa1c65..175f5f9937b0 100644
--- a/drivers/gpu/drm/tegra/dsi.c
+++ b/drivers/gpu/drm/tegra/dsi.c
@@ -1,28 +1,29 @@
+// SPDX-License-Identifier: GPL-2.0-only
 /*
  * Copyright (C) 2013 NVIDIA Corporation
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
  */
 
 #include <linux/clk.h>
 #include <linux/debugfs.h>
+#include <linux/delay.h>
 #include <linux/host1x.h>
 #include <linux/module.h>
 #include <linux/of.h>
 #include <linux/of_platform.h>
 #include <linux/platform_device.h>
 #include <linux/pm_runtime.h>
+#include <linux/regulator/consumer.h>
 #include <linux/reset.h>
 
-#include <linux/regulator/consumer.h>
+#include <video/mipi_display.h>
 
 #include <drm/drm_atomic_helper.h>
+#include <drm/drm_debugfs.h>
+#include <drm/drm_file.h>
 #include <drm/drm_mipi_dsi.h>
 #include <drm/drm_panel.h>
-
-#include <video/mipi_display.h>
+#include <drm/drm_print.h>
+#include <drm/drm_simple_kms_helper.h>
 
 #include "dc.h"
 #include "drm.h"
@@ -235,7 +236,6 @@ static int tegra_dsi_late_register(struct drm_connector *connector)
 	struct drm_minor *minor = connector->dev->primary;
 	struct dentry *root = connector->debugfs_entry;
 	struct tegra_dsi *dsi = to_dsi(output);
-	int err;
 
 	dsi->debugfs_files = kmemdup(debugfs_files, sizeof(debugfs_files),
 				     GFP_KERNEL);
@@ -245,17 +245,9 @@ static int tegra_dsi_late_register(struct drm_connector *connector)
 	for (i = 0; i < count; i++)
 		dsi->debugfs_files[i].data = dsi;
 
-	err = drm_debugfs_create_files(dsi->debugfs_files, count, root, minor);
-	if (err < 0)
-		goto free;
+	drm_debugfs_create_files(dsi->debugfs_files, count, root, minor);
 
 	return 0;
-
-free:
-	kfree(dsi->debugfs_files);
-	dsi->debugfs_files = NULL;
-
-	return err;
 }
 
 static void tegra_dsi_early_unregister(struct drm_connector *connector)
@@ -265,6 +257,7 @@ static void tegra_dsi_early_unregister(struct drm_connector *connector)
 	struct tegra_dsi *dsi = to_dsi(output);
 
 	drm_debugfs_remove_files(dsi->debugfs_files, count,
+				 connector->debugfs_entry,
 				 connector->dev->primary);
 	kfree(dsi->debugfs_files);
 	dsi->debugfs_files = NULL;
@@ -553,12 +546,19 @@ static void tegra_dsi_configure(struct tegra_dsi *dsi, unsigned int pipe,
 		/* horizontal back porch */
 		hbp = (mode->htotal - mode->hsync_end) * mul / div;
 
-		if ((dsi->flags & MIPI_DSI_MODE_VIDEO_SYNC_PULSE) == 0)
-			hbp += hsw;
-
 		/* horizontal front porch */
 		hfp = (mode->hsync_start - mode->hdisplay) * mul / div;
 
+		if (dsi->master || dsi->slave) {
+			hact /= 2;
+			hsw /= 2;
+			hbp /= 2;
+			hfp /= 2;
+		}
+
+		if ((dsi->flags & MIPI_DSI_MODE_VIDEO_SYNC_PULSE) == 0)
+			hbp += hsw;
+
 		/* subtract packet overhead */
 		hsw -= 10;
 		hbp -= 14;
@@ -568,11 +568,6 @@ static void tegra_dsi_configure(struct tegra_dsi *dsi, unsigned int pipe,
 		tegra_dsi_writel(dsi, hact << 16 | hbp, DSI_PKT_LEN_2_3);
 		tegra_dsi_writel(dsi, hfp, DSI_PKT_LEN_4_5);
 		tegra_dsi_writel(dsi, 0x0f0f << 16, DSI_PKT_LEN_6_7);
-
-		/* set SOL delay (for non-burst mode only) */
-		tegra_dsi_writel(dsi, 8 * mul / div, DSI_SOL_DELAY);
-
-		/* TODO: implement ganged mode */
 	} else {
 		u16 bytes;
 
@@ -594,29 +589,28 @@ static void tegra_dsi_configure(struct tegra_dsi *dsi, unsigned int pipe,
 		value = MIPI_DCS_WRITE_MEMORY_START << 8 |
 			MIPI_DCS_WRITE_MEMORY_CONTINUE;
 		tegra_dsi_writel(dsi, value, DSI_DCS_CMDS);
+	}
 
-		/* set SOL delay */
-		if (dsi->master || dsi->slave) {
-			unsigned long delay, bclk, bclk_ganged;
-			unsigned int lanes = state->lanes;
-
-			/* SOL to valid, valid to FIFO and FIFO write delay */
-			delay = 4 + 4 + 2;
-			delay = DIV_ROUND_UP(delay * mul, div * lanes);
-			/* FIFO read delay */
-			delay = delay + 6;
-
-			bclk = DIV_ROUND_UP(mode->htotal * mul, div * lanes);
-			bclk_ganged = DIV_ROUND_UP(bclk * lanes / 2, lanes);
-			value = bclk - bclk_ganged + delay + 20;
-		} else {
-			/* TODO: revisit for non-ganged mode */
-			value = 8 * mul / div;
-		}
+	/* set SOL delay */
+	if (dsi->master || dsi->slave) {
+		unsigned long delay, bclk, bclk_ganged;
+		unsigned int lanes = state->lanes;
+
+		/* SOL to valid, valid to FIFO and FIFO write delay */
+		delay = 4 + 4 + 2;
+		delay = DIV_ROUND_UP(delay * mul, div * lanes);
+		/* FIFO read delay */
+		delay = delay + 6;
 
-		tegra_dsi_writel(dsi, value, DSI_SOL_DELAY);
+		bclk = DIV_ROUND_UP(mode->htotal * mul, div * lanes);
+		bclk_ganged = DIV_ROUND_UP(bclk * lanes / 2, lanes);
+		value = bclk - bclk_ganged + delay + 20;
+	} else {
+		value = 8 * mul / div;
 	}
 
+	tegra_dsi_writel(dsi, value, DSI_SOL_DELAY);
+
 	if (dsi->slave) {
 		tegra_dsi_configure(dsi->slave, pipe, mode);
 
@@ -679,6 +673,7 @@ static int tegra_dsi_pad_enable(struct tegra_dsi *dsi)
 static int tegra_dsi_pad_calibrate(struct tegra_dsi *dsi)
 {
 	u32 value;
+	int err;
 
 	/*
 	 * XXX Is this still needed? The module reset is deasserted right
@@ -702,7 +697,11 @@ static int tegra_dsi_pad_calibrate(struct tegra_dsi *dsi)
 		DSI_PAD_PREEMP_PD(0x03) | DSI_PAD_PREEMP_PU(0x3);
 	tegra_dsi_writel(dsi, value, DSI_PAD_CONTROL_3);
 
-	return tegra_mipi_calibrate(dsi->mipi);
+	err = tegra_mipi_start_calibration(dsi->mipi);
+	if (err < 0)
+		return err;
+
+	return tegra_mipi_finish_calibration(dsi->mipi);
 }
 
 static void tegra_dsi_set_timeout(struct tegra_dsi *dsi, unsigned long bclk,
@@ -815,7 +814,7 @@ static const struct drm_connector_funcs tegra_dsi_connector_funcs = {
 
 static enum drm_mode_status
 tegra_dsi_connector_mode_valid(struct drm_connector *connector,
-			       struct drm_display_mode *mode)
+			       const struct drm_display_mode *mode)
 {
 	return MODE_OK;
 }
@@ -825,10 +824,6 @@ static const struct drm_connector_helper_funcs tegra_dsi_connector_helper_funcs
 	.mode_valid = tegra_dsi_connector_mode_valid,
 };
 
-static const struct drm_encoder_funcs tegra_dsi_encoder_funcs = {
-	.destroy = tegra_output_encoder_destroy,
-};
-
 static void tegra_dsi_unprepare(struct tegra_dsi *dsi)
 {
 	int err;
@@ -841,7 +836,9 @@ static void tegra_dsi_unprepare(struct tegra_dsi *dsi)
 		dev_err(dsi->dev, "failed to disable MIPI calibration: %d\n",
 			err);
 
-	pm_runtime_put(dsi->dev);
+	err = host1x_client_suspend(&dsi->client);
+	if (err < 0)
+		dev_err(dsi->dev, "failed to suspend: %d\n", err);
 }
 
 static void tegra_dsi_encoder_disable(struct drm_encoder *encoder)
@@ -883,11 +880,15 @@ static void tegra_dsi_encoder_disable(struct drm_encoder *encoder)
 	tegra_dsi_unprepare(dsi);
 }
 
-static void tegra_dsi_prepare(struct tegra_dsi *dsi)
+static int tegra_dsi_prepare(struct tegra_dsi *dsi)
 {
 	int err;
 
-	pm_runtime_get_sync(dsi->dev);
+	err = host1x_client_resume(&dsi->client);
+	if (err < 0) {
+		dev_err(dsi->dev, "failed to resume: %d\n", err);
+		return err;
+	}
 
 	err = tegra_mipi_enable(dsi->mipi);
 	if (err < 0)
@@ -900,6 +901,8 @@ static void tegra_dsi_prepare(struct tegra_dsi *dsi)
 
 	if (dsi->slave)
 		tegra_dsi_prepare(dsi->slave);
+
+	return 0;
 }
 
 static void tegra_dsi_encoder_enable(struct drm_encoder *encoder)
@@ -910,8 +913,13 @@ static void tegra_dsi_encoder_enable(struct drm_encoder *encoder)
 	struct tegra_dsi *dsi = to_dsi(output);
 	struct tegra_dsi_state *state;
 	u32 value;
+	int err;
 
-	tegra_dsi_prepare(dsi);
+	err = tegra_dsi_prepare(dsi);
+	if (err < 0) {
+		dev_err(dsi->dev, "failed to prepare: %d\n", err);
+		return;
+	}
 
 	state = tegra_dsi_get_state(dsi);
 
@@ -1031,7 +1039,7 @@ static const struct drm_encoder_helper_funcs tegra_dsi_encoder_helper_funcs = {
 
 static int tegra_dsi_init(struct host1x_client *client)
 {
-	struct drm_device *drm = dev_get_drvdata(client->parent);
+	struct drm_device *drm = dev_get_drvdata(client->host);
 	struct tegra_dsi *dsi = host1x_client_to_dsi(client);
 	int err;
 
@@ -1046,9 +1054,8 @@ static int tegra_dsi_init(struct host1x_client *client)
 					 &tegra_dsi_connector_helper_funcs);
 		dsi->output.connector.dpms = DRM_MODE_DPMS_OFF;
 
-		drm_encoder_init(drm, &dsi->output.encoder,
-				 &tegra_dsi_encoder_funcs,
-				 DRM_MODE_ENCODER_DSI, NULL);
+		drm_simple_encoder_init(drm, &dsi->output.encoder,
+					DRM_MODE_ENCODER_DSI);
 		drm_encoder_helper_add(&dsi->output.encoder,
 				       &tegra_dsi_encoder_helper_funcs);
 
@@ -1076,9 +1083,89 @@ static int tegra_dsi_exit(struct host1x_client *client)
 	return 0;
 }
 
+static int tegra_dsi_runtime_suspend(struct host1x_client *client)
+{
+	struct tegra_dsi *dsi = host1x_client_to_dsi(client);
+	struct device *dev = client->dev;
+	int err;
+
+	if (dsi->rst) {
+		err = reset_control_assert(dsi->rst);
+		if (err < 0) {
+			dev_err(dev, "failed to assert reset: %d\n", err);
+			return err;
+		}
+	}
+
+	usleep_range(1000, 2000);
+
+	clk_disable_unprepare(dsi->clk_lp);
+	clk_disable_unprepare(dsi->clk);
+
+	regulator_disable(dsi->vdd);
+	pm_runtime_put_sync(dev);
+
+	return 0;
+}
+
+static int tegra_dsi_runtime_resume(struct host1x_client *client)
+{
+	struct tegra_dsi *dsi = host1x_client_to_dsi(client);
+	struct device *dev = client->dev;
+	int err;
+
+	err = pm_runtime_resume_and_get(dev);
+	if (err < 0) {
+		dev_err(dev, "failed to get runtime PM: %d\n", err);
+		return err;
+	}
+
+	err = regulator_enable(dsi->vdd);
+	if (err < 0) {
+		dev_err(dev, "failed to enable VDD supply: %d\n", err);
+		goto put_rpm;
+	}
+
+	err = clk_prepare_enable(dsi->clk);
+	if (err < 0) {
+		dev_err(dev, "cannot enable DSI clock: %d\n", err);
+		goto disable_vdd;
+	}
+
+	err = clk_prepare_enable(dsi->clk_lp);
+	if (err < 0) {
+		dev_err(dev, "cannot enable low-power clock: %d\n", err);
+		goto disable_clk;
+	}
+
+	usleep_range(1000, 2000);
+
+	if (dsi->rst) {
+		err = reset_control_deassert(dsi->rst);
+		if (err < 0) {
+			dev_err(dev, "cannot assert reset: %d\n", err);
+			goto disable_clk_lp;
+		}
+	}
+
+	return 0;
+
+disable_clk_lp:
+	clk_disable_unprepare(dsi->clk_lp);
+disable_clk:
+	clk_disable_unprepare(dsi->clk);
+disable_vdd:
+	regulator_disable(dsi->vdd);
+put_rpm:
+	pm_runtime_put_sync(dev);
+	return err;
+}
+
 static const struct host1x_client_ops dsi_client_ops = {
 	.init = tegra_dsi_init,
 	.exit = tegra_dsi_exit,
+	.suspend = tegra_dsi_runtime_suspend,
+	.resume = tegra_dsi_runtime_resume,
 };
 
 static int tegra_dsi_setup_clocks(struct tegra_dsi *dsi)
@@ -1414,10 +1501,8 @@ static int tegra_dsi_host_attach(struct mipi_dsi_host *host,
 		if (IS_ERR(output->panel))
 			output->panel = NULL;
 
-		if (output->panel && output->connector.dev) {
-			drm_panel_attach(output->panel, &output->connector);
+		if (output->panel && output->connector.dev)
 			drm_helper_hpd_irq_event(output->connector.dev);
-		}
 	}
 
 	return 0;
@@ -1452,12 +1537,16 @@ static int tegra_dsi_ganged_probe(struct tegra_dsi *dsi)
 	np = of_parse_phandle(dsi->dev->of_node, "nvidia,ganged-mode", 0);
 	if (np) {
 		struct platform_device *gangster = of_find_device_by_node(np);
+		of_node_put(np);
+		if (!gangster)
+			return -EPROBE_DEFER;
 
 		dsi->slave = platform_get_drvdata(gangster);
-		of_node_put(np);
 
-		if (!dsi->slave)
+		if (!dsi->slave) {
+			put_device(&gangster->dev);
 			return -EPROBE_DEFER;
+		}
 
 		dsi->slave->master = dsi;
 	}
@@ -1468,7 +1557,6 @@ static int tegra_dsi_ganged_probe(struct tegra_dsi *dsi)
 static int tegra_dsi_probe(struct platform_device *pdev)
 {
 	struct tegra_dsi *dsi;
-	struct resource *regs;
 	int err;
 
 	dsi = devm_kzalloc(&pdev->dev, sizeof(*dsi), GFP_KERNEL);
@@ -1500,48 +1588,57 @@ static int tegra_dsi_probe(struct platform_device *pdev)
 
 	if (!pdev->dev.pm_domain) {
 		dsi->rst = devm_reset_control_get(&pdev->dev, "dsi");
-		if (IS_ERR(dsi->rst))
-			return PTR_ERR(dsi->rst);
+		if (IS_ERR(dsi->rst)) {
+			err = PTR_ERR(dsi->rst);
+			goto remove;
+		}
 	}
 
 	dsi->clk = devm_clk_get(&pdev->dev, NULL);
 	if (IS_ERR(dsi->clk)) {
-		dev_err(&pdev->dev, "cannot get DSI clock\n");
-		return PTR_ERR(dsi->clk);
+		err = dev_err_probe(&pdev->dev, PTR_ERR(dsi->clk),
+				    "cannot get DSI clock\n");
+		goto remove;
 	}
 
 	dsi->clk_lp = devm_clk_get(&pdev->dev, "lp");
 	if (IS_ERR(dsi->clk_lp)) {
-		dev_err(&pdev->dev, "cannot get low-power clock\n");
-		return PTR_ERR(dsi->clk_lp);
+		err = dev_err_probe(&pdev->dev, PTR_ERR(dsi->clk_lp),
+				    "cannot get low-power clock\n");
+		goto remove;
 	}
 
 	dsi->clk_parent = devm_clk_get(&pdev->dev, "parent");
 	if (IS_ERR(dsi->clk_parent)) {
-		dev_err(&pdev->dev, "cannot get parent clock\n");
-		return PTR_ERR(dsi->clk_parent);
+		err = dev_err_probe(&pdev->dev, PTR_ERR(dsi->clk_parent),
+				    "cannot get parent clock\n");
+		goto remove;
 	}
 
 	dsi->vdd = devm_regulator_get(&pdev->dev, "avdd-dsi-csi");
 	if (IS_ERR(dsi->vdd)) {
-		dev_err(&pdev->dev, "cannot get VDD supply\n");
-		return PTR_ERR(dsi->vdd);
+		err = dev_err_probe(&pdev->dev, PTR_ERR(dsi->vdd),
+				    "cannot get VDD supply\n");
+		goto remove;
 	}
 
 	err = tegra_dsi_setup_clocks(dsi);
 	if (err < 0) {
 		dev_err(&pdev->dev, "cannot setup clocks\n");
-		return err;
+		goto remove;
 	}
 
-	regs = platform_get_resource(pdev, IORESOURCE_MEM, 0);
-	dsi->regs = devm_ioremap_resource(&pdev->dev, regs);
-	if (IS_ERR(dsi->regs))
-		return PTR_ERR(dsi->regs);
+	dsi->regs = devm_platform_ioremap_resource(pdev, 0);
+	if (IS_ERR(dsi->regs)) {
+		err = PTR_ERR(dsi->regs);
+		goto remove;
+	}
 
-	dsi->mipi = tegra_mipi_request(&pdev->dev);
-	if (IS_ERR(dsi->mipi))
-		return PTR_ERR(dsi->mipi);
+	dsi->mipi = tegra_mipi_request(&pdev->dev, pdev->dev.of_node);
+	if (IS_ERR(dsi->mipi)) {
+		err = PTR_ERR(dsi->mipi);
+		goto remove;
+	}
 
 	dsi->host.ops = &tegra_dsi_host_ops;
 	dsi->host.dev = &pdev->dev;
@@ -1569,107 +1666,29 @@ static int tegra_dsi_probe(struct platform_device *pdev)
 	return 0;
 
 unregister:
+	pm_runtime_disable(&pdev->dev);
 	mipi_dsi_host_unregister(&dsi->host);
 mipi_free:
 	tegra_mipi_free(dsi->mipi);
+remove:
+	tegra_output_remove(&dsi->output);
 	return err;
 }
 
-static int tegra_dsi_remove(struct platform_device *pdev)
+static void tegra_dsi_remove(struct platform_device *pdev)
 {
 	struct tegra_dsi *dsi = platform_get_drvdata(pdev);
-	int err;
 
 	pm_runtime_disable(&pdev->dev);
 
-	err = host1x_client_unregister(&dsi->client);
-	if (err < 0) {
-		dev_err(&pdev->dev, "failed to unregister host1x client: %d\n",
-			err);
-		return err;
-	}
+	host1x_client_unregister(&dsi->client);
 
 	tegra_output_remove(&dsi->output);
 
 	mipi_dsi_host_unregister(&dsi->host);
 	tegra_mipi_free(dsi->mipi);
-
-	return 0;
 }
 
-#ifdef CONFIG_PM
-static int tegra_dsi_suspend(struct device *dev)
-{
-	struct tegra_dsi *dsi = dev_get_drvdata(dev);
-	int err;
-
-	if (dsi->rst) {
-		err = reset_control_assert(dsi->rst);
-		if (err < 0) {
-			dev_err(dev, "failed to assert reset: %d\n", err);
-			return err;
-		}
-	}
-
-	usleep_range(1000, 2000);
-
-	clk_disable_unprepare(dsi->clk_lp);
-	clk_disable_unprepare(dsi->clk);
-
-	regulator_disable(dsi->vdd);
-
-	return 0;
-}
-
-static int tegra_dsi_resume(struct device *dev)
-{
-	struct tegra_dsi *dsi = dev_get_drvdata(dev);
-	int err;
-
-	err = regulator_enable(dsi->vdd);
-	if (err < 0) {
-		dev_err(dsi->dev, "failed to enable VDD supply: %d\n", err);
-		return err;
-	}
-
-	err = clk_prepare_enable(dsi->clk);
-	if (err < 0) {
-		dev_err(dev, "cannot enable DSI clock: %d\n", err);
-		goto disable_vdd;
-	}
-
-	err = clk_prepare_enable(dsi->clk_lp);
-	if (err < 0) {
-		dev_err(dev, "cannot enable low-power clock: %d\n", err);
-		goto disable_clk;
-	}
-
-	usleep_range(1000, 2000);
-
-	if (dsi->rst) {
-		err = reset_control_deassert(dsi->rst);
-		if (err < 0) {
-			dev_err(dev, "cannot assert reset: %d\n", err);
-			goto disable_clk_lp;
-		}
-	}
-
-	return 0;
-
-disable_clk_lp:
-	clk_disable_unprepare(dsi->clk_lp);
-disable_clk:
-	clk_disable_unprepare(dsi->clk);
-disable_vdd:
-	regulator_disable(dsi->vdd);
-	return err;
-}
-#endif
-
-static const struct dev_pm_ops tegra_dsi_pm_ops = {
-	SET_RUNTIME_PM_OPS(tegra_dsi_suspend, tegra_dsi_resume, NULL)
-};
-
 static const struct of_device_id tegra_dsi_of_match[] = {
 	{ .compatible = "nvidia,tegra210-dsi", },
 	{ .compatible = "nvidia,tegra132-dsi", },
@@ -1683,7 +1702,6 @@ struct platform_driver tegra_dsi_driver = {
 	.driver = {
 		.name = "tegra-dsi",
 		.of_match_table = tegra_dsi_of_match,
-		.pm = &tegra_dsi_pm_ops,
 	},
 	.probe = tegra_dsi_probe,
 	.remove = tegra_dsi_remove,
diff --git a/drivers/gpu/drm/tegra/dsi.h b/drivers/gpu/drm/tegra/dsi.h
index 219263615399..f39594e65e97 100644
--- a/drivers/gpu/drm/tegra/dsi.h
+++ b/drivers/gpu/drm/tegra/dsi.h
@@ -1,9 +1,6 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
 /*
  * Copyright (C) 2013 NVIDIA Corporation
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
  */
 
 #ifndef DRM_TEGRA_DSI_H
diff --git a/drivers/gpu/drm/tegra/falcon.c b/drivers/gpu/drm/tegra/falcon.c
index 352d05feabb0..17f616bbcb45 100644
--- a/drivers/gpu/drm/tegra/falcon.c
+++ b/drivers/gpu/drm/tegra/falcon.c
@@ -1,9 +1,6 @@
+// SPDX-License-Identifier: GPL-2.0-only
 /*
  * Copyright (c) 2015, NVIDIA Corporation.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
  */
 
 #include <linux/platform_device.h>
@@ -33,6 +30,14 @@ int falcon_wait_idle(struct falcon *falcon)
 				  (value == 0), 10, 100000);
 }
 
+static int falcon_dma_wait_not_full(struct falcon *falcon)
+{
+	u32 value;
+
+	return readl_poll_timeout(falcon->regs + FALCON_DMATRFCMD, value,
+				  !(value & FALCON_DMATRFCMD_FULL), 10, 100000);
+}
+
 static int falcon_dma_wait_idle(struct falcon *falcon)
 {
 	u32 value;
@@ -47,50 +52,48 @@ static int falcon_copy_chunk(struct falcon *falcon,
 			     enum falcon_memory target)
 {
 	u32 cmd = FALCON_DMATRFCMD_SIZE_256B;
+	int err;
 
 	if (target == FALCON_MEMORY_IMEM)
 		cmd |= FALCON_DMATRFCMD_IMEM;
 
+	/*
+	 * Use second DMA context (i.e. the one for firmware). Strictly
+	 * speaking, at this point both DMA contexts point to the firmware
+	 * stream ID, but this register's value will be reused by the firmware
+	 * for later DMA transactions, so we need to use the correct value.
+	 */
+	cmd |= FALCON_DMATRFCMD_DMACTX(1);
+
+	err = falcon_dma_wait_not_full(falcon);
+	if (err < 0)
+		return err;
+
 	falcon_writel(falcon, offset, FALCON_DMATRFMOFFS);
 	falcon_writel(falcon, base, FALCON_DMATRFFBOFFS);
 	falcon_writel(falcon, cmd, FALCON_DMATRFCMD);
 
-	return falcon_dma_wait_idle(falcon);
+	return 0;
 }
 
 static void falcon_copy_firmware_image(struct falcon *falcon,
 				       const struct firmware *firmware)
 {
-	u32 *firmware_vaddr = falcon->firmware.vaddr;
-	dma_addr_t daddr;
+	u32 *virt = falcon->firmware.virt;
 	size_t i;
-	int err;
 
 	/* copy the whole thing taking into account endianness */
 	for (i = 0; i < firmware->size / sizeof(u32); i++)
-		firmware_vaddr[i] = le32_to_cpu(((u32 *)firmware->data)[i]);
-
-	/* ensure that caches are flushed and falcon can see the firmware */
-	daddr = dma_map_single(falcon->dev, firmware_vaddr,
-			       falcon->firmware.size, DMA_TO_DEVICE);
-	err = dma_mapping_error(falcon->dev, daddr);
-	if (err) {
-		dev_err(falcon->dev, "failed to map firmware: %d\n", err);
-		return;
-	}
-	dma_sync_single_for_device(falcon->dev, daddr,
-				   falcon->firmware.size, DMA_TO_DEVICE);
-	dma_unmap_single(falcon->dev, daddr, falcon->firmware.size,
-			 DMA_TO_DEVICE);
+		virt[i] = le32_to_cpu(((__le32 *)firmware->data)[i]);
 }
 
 static int falcon_parse_firmware_image(struct falcon *falcon)
 {
-	struct falcon_fw_bin_header_v1 *bin = (void *)falcon->firmware.vaddr;
+	struct falcon_fw_bin_header_v1 *bin = (void *)falcon->firmware.virt;
 	struct falcon_fw_os_header_v1 *os;
 
 	/* endian problems would show up right here */
-	if (bin->magic != PCI_VENDOR_ID_NVIDIA) {
+	if (bin->magic != PCI_VENDOR_ID_NVIDIA && bin->magic != 0x10fe) {
 		dev_err(falcon->dev, "incorrect firmware magic\n");
 		return -EINVAL;
 	}
@@ -107,7 +110,7 @@ static int falcon_parse_firmware_image(struct falcon *falcon)
 		return -EINVAL;
 	}
 
-	os = falcon->firmware.vaddr + bin->os_header_offset;
+	os = falcon->firmware.virt + bin->os_header_offset;
 
 	falcon->firmware.bin_data.size = bin->os_size;
 	falcon->firmware.bin_data.offset = bin->os_data_offset;
@@ -128,6 +131,8 @@ int falcon_read_firmware(struct falcon *falcon, const char *name)
 	if (err < 0)
 		return err;
 
+	falcon->firmware.size = falcon->firmware.firmware->size;
+
 	return 0;
 }
 
@@ -136,16 +141,6 @@ int falcon_load_firmware(struct falcon *falcon)
 	const struct firmware *firmware = falcon->firmware.firmware;
 	int err;
 
-	falcon->firmware.size = firmware->size;
-
-	/* allocate iova space for the firmware */
-	falcon->firmware.vaddr = falcon->ops->alloc(falcon, firmware->size,
-						    &falcon->firmware.paddr);
-	if (IS_ERR(falcon->firmware.vaddr)) {
-		dev_err(falcon->dev, "DMA memory mapping failed\n");
-		return PTR_ERR(falcon->firmware.vaddr);
-	}
-
 	/* copy firmware image into local area. this also ensures endianness */
 	falcon_copy_firmware_image(falcon, firmware);
 
@@ -153,45 +148,26 @@ int falcon_load_firmware(struct falcon *falcon)
 	err = falcon_parse_firmware_image(falcon);
 	if (err < 0) {
 		dev_err(falcon->dev, "failed to parse firmware image\n");
-		goto err_setup_firmware_image;
+		return err;
 	}
 
 	release_firmware(firmware);
 	falcon->firmware.firmware = NULL;
 
 	return 0;
-
-err_setup_firmware_image:
-	falcon->ops->free(falcon, falcon->firmware.size,
-			  falcon->firmware.paddr, falcon->firmware.vaddr);
-
-	return err;
 }
 
 int falcon_init(struct falcon *falcon)
 {
-	/* check mandatory ops */
-	if (!falcon->ops || !falcon->ops->alloc || !falcon->ops->free)
-		return -EINVAL;
-
-	falcon->firmware.vaddr = NULL;
+	falcon->firmware.virt = NULL;
 
 	return 0;
 }
 
 void falcon_exit(struct falcon *falcon)
 {
-	if (falcon->firmware.firmware) {
+	if (falcon->firmware.firmware)
 		release_firmware(falcon->firmware.firmware);
-		falcon->firmware.firmware = NULL;
-	}
-
-	if (falcon->firmware.vaddr) {
-		falcon->ops->free(falcon, falcon->firmware.size,
-				  falcon->firmware.paddr,
-				  falcon->firmware.vaddr);
-		falcon->firmware.vaddr = NULL;
-	}
 }
 
 int falcon_boot(struct falcon *falcon)
@@ -200,7 +176,7 @@ int falcon_boot(struct falcon *falcon)
 	u32 value;
 	int err;
 
-	if (!falcon->firmware.vaddr)
+	if (!falcon->firmware.virt)
 		return -EINVAL;
 
 	err = readl_poll_timeout(falcon->regs + FALCON_DMACTL, value,
@@ -213,7 +189,7 @@ int falcon_boot(struct falcon *falcon)
 	falcon_writel(falcon, 0, FALCON_DMACTL);
 
 	/* setup the address of the binary data so Falcon can access it later */
-	falcon_writel(falcon, (falcon->firmware.paddr +
+	falcon_writel(falcon, (falcon->firmware.iova +
 			       falcon->firmware.bin_data.offset) >> 8,
 		      FALCON_DMATRFBASE);
 
@@ -223,9 +199,15 @@ int falcon_boot(struct falcon *falcon)
 				  falcon->firmware.data.offset + offset,
 				  offset, FALCON_MEMORY_DATA);
 
-	/* copy the first code segment into Falcon internal memory */
-	falcon_copy_chunk(falcon, falcon->firmware.code.offset,
-			  0, FALCON_MEMORY_IMEM);
+	/* copy the code segment into Falcon internal memory */
+	for (offset = 0; offset < falcon->firmware.code.size; offset += 256)
+		falcon_copy_chunk(falcon, falcon->firmware.code.offset + offset,
+				  offset, FALCON_MEMORY_IMEM);
+
+	/* wait for DMA to complete */
+	err = falcon_dma_wait_idle(falcon);
+	if (err < 0)
+		return err;
 
 	/* setup falcon interrupts */
 	falcon_writel(falcon, FALCON_IRQMSET_EXT(0xff) |
diff --git a/drivers/gpu/drm/tegra/falcon.h b/drivers/gpu/drm/tegra/falcon.h
index 4504ed5a199e..902bb7e4fd0f 100644
--- a/drivers/gpu/drm/tegra/falcon.h
+++ b/drivers/gpu/drm/tegra/falcon.h
@@ -1,9 +1,6 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
 /*
  * Copyright (c) 2015, NVIDIA Corporation.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
  */
 
 #ifndef _FALCON_H_
@@ -50,9 +47,11 @@
 #define FALCON_DMATRFMOFFS			0x00001114
 
 #define FALCON_DMATRFCMD			0x00001118
+#define FALCON_DMATRFCMD_FULL			(1 << 0)
 #define FALCON_DMATRFCMD_IDLE			(1 << 1)
 #define FALCON_DMATRFCMD_IMEM			(1 << 4)
 #define FALCON_DMATRFCMD_SIZE_256B		(6 << 8)
+#define FALCON_DMATRFCMD_DMACTX(v)		(((v) & 0x7) << 12)
 
 #define FALCON_DMATRFFBOFFS			0x0000111c
 
@@ -77,15 +76,6 @@ struct falcon_fw_os_header_v1 {
 	u32 data_size;
 };
 
-struct falcon;
-
-struct falcon_ops {
-	void *(*alloc)(struct falcon *falcon, size_t size,
-		       dma_addr_t *paddr);
-	void (*free)(struct falcon *falcon, size_t size,
-		     dma_addr_t paddr, void *vaddr);
-};
-
 struct falcon_firmware_section {
 	unsigned long offset;
 	size_t size;
@@ -96,8 +86,9 @@ struct falcon_firmware {
 	const struct firmware *firmware;
 
 	/* Raw firmware data */
-	dma_addr_t paddr;
-	void *vaddr;
+	dma_addr_t iova;
+	dma_addr_t phys;
+	void *virt;
 	size_t size;
 
 	/* Parsed firmware information */
@@ -110,8 +101,6 @@ struct falcon {
 	/* Set by falcon client */
 	struct device *dev;
 	void __iomem *regs;
-	const struct falcon_ops *ops;
-	void *data;
 
 	struct falcon_firmware firmware;
 };
diff --git a/drivers/gpu/drm/tegra/fb.c b/drivers/gpu/drm/tegra/fb.c
index b947e82bbeb1..1cef8c5cac50 100644
--- a/drivers/gpu/drm/tegra/fb.c
+++ b/drivers/gpu/drm/tegra/fb.c
@@ -1,27 +1,22 @@
+// SPDX-License-Identifier: GPL-2.0-only
 /*
  * Copyright (C) 2012-2013 Avionic Design GmbH
  * Copyright (C) 2012 NVIDIA CORPORATION.  All rights reserved.
  *
- * Based on the KMS/FB CMA helpers
- *   Copyright (C) 2012 Analog Device Inc.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
+ * Based on the KMS/FB DMA helpers
+ *   Copyright (C) 2012 Analog Devices Inc.
  */
 
 #include <linux/console.h>
 
-#include "drm.h"
-#include "gem.h"
+#include <drm/drm_fourcc.h>
+#include <drm/drm_framebuffer.h>
 #include <drm/drm_gem_framebuffer_helper.h>
+#include <drm/drm_modeset_helper.h>
+#include <drm/drm_print.h>
 
-#ifdef CONFIG_DRM_FBDEV_EMULATION
-static inline struct tegra_fbdev *to_tegra_fbdev(struct drm_fb_helper *helper)
-{
-	return container_of(helper, struct tegra_fbdev, base);
-}
-#endif
+#include "drm.h"
+#include "gem.h"
 
 struct tegra_bo *tegra_fb_get_plane(struct drm_framebuffer *framebuffer,
 				    unsigned int index)
@@ -44,6 +39,15 @@ int tegra_fb_get_tiling(struct drm_framebuffer *framebuffer,
 {
 	uint64_t modifier = framebuffer->modifier;
 
+	if (fourcc_mod_is_vendor(modifier, NVIDIA)) {
+		if ((modifier & DRM_FORMAT_MOD_NVIDIA_SECTOR_LAYOUT) == 0)
+			tiling->sector_layout = TEGRA_BO_SECTOR_LAYOUT_TEGRA;
+		else
+			tiling->sector_layout = TEGRA_BO_SECTOR_LAYOUT_GPU;
+
+		modifier &= ~DRM_FORMAT_MOD_NVIDIA_SECTOR_LAYOUT;
+	}
+
 	switch (modifier) {
 	case DRM_FORMAT_MOD_LINEAR:
 		tiling->mode = TEGRA_BO_TILING_MODE_PITCH;
@@ -86,6 +90,7 @@ int tegra_fb_get_tiling(struct drm_framebuffer *framebuffer,
 		break;
 
 	default:
+		DRM_DEBUG_KMS("unknown format modifier: %llx\n", modifier);
 		return -EINVAL;
 	}
 
@@ -97,10 +102,11 @@ static const struct drm_framebuffer_funcs tegra_fb_funcs = {
 	.create_handle = drm_gem_fb_create_handle,
 };
 
-static struct drm_framebuffer *tegra_fb_alloc(struct drm_device *drm,
-					      const struct drm_mode_fb_cmd2 *mode_cmd,
-					      struct tegra_bo **planes,
-					      unsigned int num_planes)
+struct drm_framebuffer *tegra_fb_alloc(struct drm_device *drm,
+				       const struct drm_format_info *info,
+				       const struct drm_mode_fb_cmd2 *mode_cmd,
+				       struct tegra_bo **planes,
+				       unsigned int num_planes)
 {
 	struct drm_framebuffer *fb;
 	unsigned int i;
@@ -110,7 +116,7 @@ static struct drm_framebuffer *tegra_fb_alloc(struct drm_device *drm,
 	if (!fb)
 		return ERR_PTR(-ENOMEM);
 
-	drm_helper_mode_fill_fb_struct(drm, fb, mode_cmd);
+	drm_helper_mode_fill_fb_struct(drm, fb, info, mode_cmd);
 
 	for (i = 0; i < fb->format->num_planes; i++)
 		fb->obj[i] = &planes[i]->gem;
@@ -128,20 +134,18 @@ static struct drm_framebuffer *tegra_fb_alloc(struct drm_device *drm,
 
 struct drm_framebuffer *tegra_fb_create(struct drm_device *drm,
 					struct drm_file *file,
+					const struct drm_format_info *info,
 					const struct drm_mode_fb_cmd2 *cmd)
 {
-	unsigned int hsub, vsub, i;
 	struct tegra_bo *planes[4];
 	struct drm_gem_object *gem;
 	struct drm_framebuffer *fb;
+	unsigned int i;
 	int err;
 
-	hsub = drm_format_horz_chroma_subsampling(cmd->pixel_format);
-	vsub = drm_format_vert_chroma_subsampling(cmd->pixel_format);
-
-	for (i = 0; i < drm_format_num_planes(cmd->pixel_format); i++) {
-		unsigned int width = cmd->width / (i ? hsub : 1);
-		unsigned int height = cmd->height / (i ? vsub : 1);
+	for (i = 0; i < info->num_planes; i++) {
+		unsigned int width = cmd->width / (i ? info->hsub : 1);
+		unsigned int height = cmd->height / (i ? info->vsub : 1);
 		unsigned int size, bpp;
 
 		gem = drm_gem_object_lookup(file, cmd->handles[i]);
@@ -150,20 +154,21 @@ struct drm_framebuffer *tegra_fb_create(struct drm_device *drm,
 			goto unreference;
 		}
 
-		bpp = drm_format_plane_cpp(cmd->pixel_format, i);
+		bpp = info->cpp[i];
 
 		size = (height - 1) * cmd->pitches[i] +
 		       width * bpp + cmd->offsets[i];
 
 		if (gem->size < size) {
 			err = -EINVAL;
+			drm_gem_object_put(gem);
 			goto unreference;
 		}
 
 		planes[i] = to_tegra_bo(gem);
 	}
 
-	fb = tegra_fb_alloc(drm, cmd, planes, i);
+	fb = tegra_fb_alloc(drm, info, cmd, planes, i);
 	if (IS_ERR(fb)) {
 		err = PTR_ERR(fb);
 		goto unreference;
@@ -173,242 +178,7 @@ struct drm_framebuffer *tegra_fb_create(struct drm_device *drm,
 
 unreference:
 	while (i--)
-		drm_gem_object_put_unlocked(&planes[i]->gem);
+		drm_gem_object_put(&planes[i]->gem);
 
 	return ERR_PTR(err);
 }
-
-#ifdef CONFIG_DRM_FBDEV_EMULATION
-static int tegra_fb_mmap(struct fb_info *info, struct vm_area_struct *vma)
-{
-	struct drm_fb_helper *helper = info->par;
-	struct tegra_bo *bo;
-	int err;
-
-	bo = tegra_fb_get_plane(helper->fb, 0);
-
-	err = drm_gem_mmap_obj(&bo->gem, bo->gem.size, vma);
-	if (err < 0)
-		return err;
-
-	return __tegra_gem_mmap(&bo->gem, vma);
-}
-
-static struct fb_ops tegra_fb_ops = {
-	.owner = THIS_MODULE,
-	DRM_FB_HELPER_DEFAULT_OPS,
-	.fb_fillrect = drm_fb_helper_sys_fillrect,
-	.fb_copyarea = drm_fb_helper_sys_copyarea,
-	.fb_imageblit = drm_fb_helper_sys_imageblit,
-	.fb_mmap = tegra_fb_mmap,
-};
-
-static int tegra_fbdev_probe(struct drm_fb_helper *helper,
-			     struct drm_fb_helper_surface_size *sizes)
-{
-	struct tegra_fbdev *fbdev = to_tegra_fbdev(helper);
-	struct tegra_drm *tegra = helper->dev->dev_private;
-	struct drm_device *drm = helper->dev;
-	struct drm_mode_fb_cmd2 cmd = { 0 };
-	unsigned int bytes_per_pixel;
-	struct drm_framebuffer *fb;
-	unsigned long offset;
-	struct fb_info *info;
-	struct tegra_bo *bo;
-	size_t size;
-	int err;
-
-	bytes_per_pixel = DIV_ROUND_UP(sizes->surface_bpp, 8);
-
-	cmd.width = sizes->surface_width;
-	cmd.height = sizes->surface_height;
-	cmd.pitches[0] = round_up(sizes->surface_width * bytes_per_pixel,
-				  tegra->pitch_align);
-
-	cmd.pixel_format = drm_mode_legacy_fb_format(sizes->surface_bpp,
-						     sizes->surface_depth);
-
-	size = cmd.pitches[0] * cmd.height;
-
-	bo = tegra_bo_create(drm, size, 0);
-	if (IS_ERR(bo))
-		return PTR_ERR(bo);
-
-	info = drm_fb_helper_alloc_fbi(helper);
-	if (IS_ERR(info)) {
-		dev_err(drm->dev, "failed to allocate framebuffer info\n");
-		drm_gem_object_put_unlocked(&bo->gem);
-		return PTR_ERR(info);
-	}
-
-	fbdev->fb = tegra_fb_alloc(drm, &cmd, &bo, 1);
-	if (IS_ERR(fbdev->fb)) {
-		err = PTR_ERR(fbdev->fb);
-		dev_err(drm->dev, "failed to allocate DRM framebuffer: %d\n",
-			err);
-		drm_gem_object_put_unlocked(&bo->gem);
-		return PTR_ERR(fbdev->fb);
-	}
-
-	fb = fbdev->fb;
-	helper->fb = fb;
-	helper->fbdev = info;
-
-	info->par = helper;
-	info->flags = FBINFO_FLAG_DEFAULT;
-	info->fbops = &tegra_fb_ops;
-
-	drm_fb_helper_fill_fix(info, fb->pitches[0], fb->format->depth);
-	drm_fb_helper_fill_var(info, helper, fb->width, fb->height);
-
-	offset = info->var.xoffset * bytes_per_pixel +
-		 info->var.yoffset * fb->pitches[0];
-
-	if (bo->pages) {
-		bo->vaddr = vmap(bo->pages, bo->num_pages, VM_MAP,
-				 pgprot_writecombine(PAGE_KERNEL));
-		if (!bo->vaddr) {
-			dev_err(drm->dev, "failed to vmap() framebuffer\n");
-			err = -ENOMEM;
-			goto destroy;
-		}
-	}
-
-	drm->mode_config.fb_base = (resource_size_t)bo->paddr;
-	info->screen_base = (void __iomem *)bo->vaddr + offset;
-	info->screen_size = size;
-	info->fix.smem_start = (unsigned long)(bo->paddr + offset);
-	info->fix.smem_len = size;
-
-	return 0;
-
-destroy:
-	drm_framebuffer_remove(fb);
-	return err;
-}
-
-static const struct drm_fb_helper_funcs tegra_fb_helper_funcs = {
-	.fb_probe = tegra_fbdev_probe,
-};
-
-static struct tegra_fbdev *tegra_fbdev_create(struct drm_device *drm)
-{
-	struct tegra_fbdev *fbdev;
-
-	fbdev = kzalloc(sizeof(*fbdev), GFP_KERNEL);
-	if (!fbdev) {
-		dev_err(drm->dev, "failed to allocate DRM fbdev\n");
-		return ERR_PTR(-ENOMEM);
-	}
-
-	drm_fb_helper_prepare(drm, &fbdev->base, &tegra_fb_helper_funcs);
-
-	return fbdev;
-}
-
-static void tegra_fbdev_free(struct tegra_fbdev *fbdev)
-{
-	kfree(fbdev);
-}
-
-static int tegra_fbdev_init(struct tegra_fbdev *fbdev,
-			    unsigned int preferred_bpp,
-			    unsigned int num_crtc,
-			    unsigned int max_connectors)
-{
-	struct drm_device *drm = fbdev->base.dev;
-	int err;
-
-	err = drm_fb_helper_init(drm, &fbdev->base, max_connectors);
-	if (err < 0) {
-		dev_err(drm->dev, "failed to initialize DRM FB helper: %d\n",
-			err);
-		return err;
-	}
-
-	err = drm_fb_helper_single_add_all_connectors(&fbdev->base);
-	if (err < 0) {
-		dev_err(drm->dev, "failed to add connectors: %d\n", err);
-		goto fini;
-	}
-
-	err = drm_fb_helper_initial_config(&fbdev->base, preferred_bpp);
-	if (err < 0) {
-		dev_err(drm->dev, "failed to set initial configuration: %d\n",
-			err);
-		goto fini;
-	}
-
-	return 0;
-
-fini:
-	drm_fb_helper_fini(&fbdev->base);
-	return err;
-}
-
-static void tegra_fbdev_exit(struct tegra_fbdev *fbdev)
-{
-	drm_fb_helper_unregister_fbi(&fbdev->base);
-
-	if (fbdev->fb) {
-		struct tegra_bo *bo = tegra_fb_get_plane(fbdev->fb, 0);
-
-		/* Undo the special mapping we made in fbdev probe. */
-		if (bo && bo->pages) {
-			vunmap(bo->vaddr);
-			bo->vaddr = NULL;
-		}
-
-		drm_framebuffer_remove(fbdev->fb);
-	}
-
-	drm_fb_helper_fini(&fbdev->base);
-	tegra_fbdev_free(fbdev);
-}
-#endif
-
-int tegra_drm_fb_prepare(struct drm_device *drm)
-{
-#ifdef CONFIG_DRM_FBDEV_EMULATION
-	struct tegra_drm *tegra = drm->dev_private;
-
-	tegra->fbdev = tegra_fbdev_create(drm);
-	if (IS_ERR(tegra->fbdev))
-		return PTR_ERR(tegra->fbdev);
-#endif
-
-	return 0;
-}
-
-void tegra_drm_fb_free(struct drm_device *drm)
-{
-#ifdef CONFIG_DRM_FBDEV_EMULATION
-	struct tegra_drm *tegra = drm->dev_private;
-
-	tegra_fbdev_free(tegra->fbdev);
-#endif
-}
-
-int tegra_drm_fb_init(struct drm_device *drm)
-{
-#ifdef CONFIG_DRM_FBDEV_EMULATION
-	struct tegra_drm *tegra = drm->dev_private;
-	int err;
-
-	err = tegra_fbdev_init(tegra->fbdev, 32, drm->mode_config.num_crtc,
-			       drm->mode_config.num_connector);
-	if (err < 0)
-		return err;
-#endif
-
-	return 0;
-}
-
-void tegra_drm_fb_exit(struct drm_device *drm)
-{
-#ifdef CONFIG_DRM_FBDEV_EMULATION
-	struct tegra_drm *tegra = drm->dev_private;
-
-	tegra_fbdev_exit(tegra->fbdev);
-#endif
-}
diff --git a/drivers/gpu/drm/tegra/fbdev.c b/drivers/gpu/drm/tegra/fbdev.c
new file mode 100644
index 000000000000..8f40882aa76e
--- /dev/null
+++ b/drivers/gpu/drm/tegra/fbdev.c
@@ -0,0 +1,143 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright (C) 2012-2013 Avionic Design GmbH
+ * Copyright (C) 2012 NVIDIA CORPORATION.  All rights reserved.
+ *
+ * Based on the KMS/FB DMA helpers
+ *   Copyright (C) 2012 Analog Devices Inc.
+ */
+
+#include <linux/console.h>
+#include <linux/fb.h>
+#include <linux/vmalloc.h>
+
+#include <drm/drm_drv.h>
+#include <drm/drm_crtc_helper.h>
+#include <drm/drm_fb_helper.h>
+#include <drm/drm_fourcc.h>
+#include <drm/drm_framebuffer.h>
+#include <drm/drm_gem_framebuffer_helper.h>
+#include <drm/drm_modeset_helper.h>
+
+#include "drm.h"
+#include "gem.h"
+
+static int tegra_fb_mmap(struct fb_info *info, struct vm_area_struct *vma)
+{
+	struct drm_fb_helper *helper = info->par;
+	struct tegra_bo *bo;
+	int err;
+
+	bo = tegra_fb_get_plane(helper->fb, 0);
+
+	err = drm_gem_mmap_obj(&bo->gem, bo->gem.size, vma);
+	if (err < 0)
+		return err;
+
+	return __tegra_gem_mmap(&bo->gem, vma);
+}
+
+static void tegra_fbdev_fb_destroy(struct fb_info *info)
+{
+	struct drm_fb_helper *helper = info->par;
+	struct drm_framebuffer *fb = helper->fb;
+	struct tegra_bo *bo = tegra_fb_get_plane(fb, 0);
+
+	drm_fb_helper_fini(helper);
+
+	/* Undo the special mapping we made in fbdev probe. */
+	if (bo->pages) {
+		vunmap(bo->vaddr);
+		bo->vaddr = NULL;
+	}
+	drm_framebuffer_remove(fb);
+
+	drm_client_release(&helper->client);
+}
+
+static const struct fb_ops tegra_fb_ops = {
+	.owner = THIS_MODULE,
+	__FB_DEFAULT_DMAMEM_OPS_RDWR,
+	DRM_FB_HELPER_DEFAULT_OPS,
+	__FB_DEFAULT_DMAMEM_OPS_DRAW,
+	.fb_mmap = tegra_fb_mmap,
+	.fb_destroy = tegra_fbdev_fb_destroy,
+};
+
+static const struct drm_fb_helper_funcs tegra_fbdev_helper_funcs = {
+};
+
+int tegra_fbdev_driver_fbdev_probe(struct drm_fb_helper *helper,
+				   struct drm_fb_helper_surface_size *sizes)
+{
+	struct tegra_drm *tegra = helper->dev->dev_private;
+	struct drm_device *drm = helper->dev;
+	struct drm_mode_fb_cmd2 cmd = { 0 };
+	struct fb_info *info = helper->info;
+	unsigned int bytes_per_pixel;
+	struct drm_framebuffer *fb;
+	unsigned long offset;
+	struct tegra_bo *bo;
+	size_t size;
+	int err;
+
+	bytes_per_pixel = DIV_ROUND_UP(sizes->surface_bpp, 8);
+
+	cmd.width = sizes->surface_width;
+	cmd.height = sizes->surface_height;
+	cmd.pitches[0] = round_up(sizes->surface_width * bytes_per_pixel,
+				  tegra->pitch_align);
+
+	cmd.pixel_format = drm_mode_legacy_fb_format(sizes->surface_bpp,
+						     sizes->surface_depth);
+
+	size = cmd.pitches[0] * cmd.height;
+
+	bo = tegra_bo_create(drm, size, 0);
+	if (IS_ERR(bo))
+		return PTR_ERR(bo);
+
+	fb = tegra_fb_alloc(drm,
+			    drm_get_format_info(drm, cmd.pixel_format, cmd.modifier[0]),
+			    &cmd, &bo, 1);
+	if (IS_ERR(fb)) {
+		err = PTR_ERR(fb);
+		dev_err(drm->dev, "failed to allocate DRM framebuffer: %d\n",
+			err);
+		drm_gem_object_put(&bo->gem);
+		return PTR_ERR(fb);
+	}
+
+	helper->funcs = &tegra_fbdev_helper_funcs;
+	helper->fb = fb;
+	helper->info = info;
+
+	info->fbops = &tegra_fb_ops;
+
+	drm_fb_helper_fill_info(info, helper, sizes);
+
+	offset = info->var.xoffset * bytes_per_pixel +
+		 info->var.yoffset * fb->pitches[0];
+
+	if (bo->pages) {
+		bo->vaddr = vmap(bo->pages, bo->num_pages, VM_MAP,
+				 pgprot_writecombine(PAGE_KERNEL));
+		if (!bo->vaddr) {
+			dev_err(drm->dev, "failed to vmap() framebuffer\n");
+			err = -ENOMEM;
+			goto destroy;
+		}
+	}
+
+	info->flags |= FBINFO_VIRTFB;
+	info->screen_buffer = bo->vaddr + offset;
+	info->screen_size = size;
+	info->fix.smem_start = (unsigned long)(bo->iova + offset);
+	info->fix.smem_len = size;
+
+	return 0;
+
+destroy:
+	drm_framebuffer_remove(fb);
+	return err;
+}
diff --git a/drivers/gpu/drm/tegra/firewall.c b/drivers/gpu/drm/tegra/firewall.c
new file mode 100644
index 000000000000..d53f890fa689
--- /dev/null
+++ b/drivers/gpu/drm/tegra/firewall.c
@@ -0,0 +1,257 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/* Copyright (c) 2010-2020 NVIDIA Corporation */
+
+#include "drm.h"
+#include "submit.h"
+#include "uapi.h"
+
+struct tegra_drm_firewall {
+	struct tegra_drm_submit_data *submit;
+	struct tegra_drm_client *client;
+	u32 *data;
+	u32 pos;
+	u32 end;
+	u32 class;
+};
+
+static int fw_next(struct tegra_drm_firewall *fw, u32 *word)
+{
+	if (fw->pos == fw->end)
+		return -EINVAL;
+
+	*word = fw->data[fw->pos++];
+
+	return 0;
+}
+
+static bool fw_check_addr_valid(struct tegra_drm_firewall *fw, u32 offset)
+{
+	u32 i;
+
+	for (i = 0; i < fw->submit->num_used_mappings; i++) {
+		struct tegra_drm_mapping *m = fw->submit->used_mappings[i].mapping;
+
+		if (offset >= m->iova && offset <= m->iova_end)
+			return true;
+	}
+
+	return false;
+}
+
+static int fw_check_reg(struct tegra_drm_firewall *fw, u32 offset)
+{
+	bool is_addr;
+	u32 word;
+	int err;
+
+	err = fw_next(fw, &word);
+	if (err)
+		return err;
+
+	if (!fw->client->ops->is_addr_reg)
+		return 0;
+
+	is_addr = fw->client->ops->is_addr_reg(fw->client->base.dev, fw->class,
+					       offset);
+
+	if (!is_addr)
+		return 0;
+
+	if (!fw_check_addr_valid(fw, word))
+		return -EINVAL;
+
+	return 0;
+}
+
+static int fw_check_regs_seq(struct tegra_drm_firewall *fw, u32 offset,
+			     u32 count, bool incr)
+{
+	u32 i;
+
+	for (i = 0; i < count; i++) {
+		if (fw_check_reg(fw, offset))
+			return -EINVAL;
+
+		if (incr)
+			offset++;
+	}
+
+	return 0;
+}
+
+static int fw_check_regs_mask(struct tegra_drm_firewall *fw, u32 offset,
+			      u16 mask)
+{
+	unsigned long bmask = mask;
+	unsigned int bit;
+
+	for_each_set_bit(bit, &bmask, 16) {
+		if (fw_check_reg(fw, offset+bit))
+			return -EINVAL;
+	}
+
+	return 0;
+}
+
+static int fw_check_regs_imm(struct tegra_drm_firewall *fw, u32 offset)
+{
+	bool is_addr;
+
+	if (!fw->client->ops->is_addr_reg)
+		return 0;
+
+	is_addr = fw->client->ops->is_addr_reg(fw->client->base.dev, fw->class,
+					       offset);
+	if (is_addr)
+		return -EINVAL;
+
+	return 0;
+}
+
+static int fw_check_class(struct tegra_drm_firewall *fw, u32 class)
+{
+	if (!fw->client->ops->is_valid_class) {
+		if (class == fw->client->base.class)
+			return 0;
+		else
+			return -EINVAL;
+	}
+
+	if (!fw->client->ops->is_valid_class(class))
+		return -EINVAL;
+
+	return 0;
+}
+
+enum {
+	HOST1X_OPCODE_SETCLASS  = 0x00,
+	HOST1X_OPCODE_INCR      = 0x01,
+	HOST1X_OPCODE_NONINCR   = 0x02,
+	HOST1X_OPCODE_MASK      = 0x03,
+	HOST1X_OPCODE_IMM       = 0x04,
+	HOST1X_OPCODE_RESTART   = 0x05,
+	HOST1X_OPCODE_GATHER    = 0x06,
+	HOST1X_OPCODE_SETSTRMID = 0x07,
+	HOST1X_OPCODE_SETAPPID  = 0x08,
+	HOST1X_OPCODE_SETPYLD   = 0x09,
+	HOST1X_OPCODE_INCR_W    = 0x0a,
+	HOST1X_OPCODE_NONINCR_W = 0x0b,
+	HOST1X_OPCODE_GATHER_W  = 0x0c,
+	HOST1X_OPCODE_RESTART_W = 0x0d,
+	HOST1X_OPCODE_EXTEND    = 0x0e,
+};
+
+int tegra_drm_fw_validate(struct tegra_drm_client *client, u32 *data, u32 start,
+			  u32 words, struct tegra_drm_submit_data *submit,
+			  u32 *job_class)
+{
+	struct tegra_drm_firewall fw = {
+		.submit = submit,
+		.client = client,
+		.data = data,
+		.pos = start,
+		.end = start+words,
+		.class = *job_class,
+	};
+	bool payload_valid = false;
+	u32 payload;
+	int err;
+
+	while (fw.pos != fw.end) {
+		u32 word, opcode, offset, count, mask, class;
+
+		err = fw_next(&fw, &word);
+		if (err)
+			return err;
+
+		opcode = (word & 0xf0000000) >> 28;
+
+		switch (opcode) {
+		case HOST1X_OPCODE_SETCLASS:
+			offset = word >> 16 & 0xfff;
+			mask = word & 0x3f;
+			class = (word >> 6) & 0x3ff;
+			err = fw_check_class(&fw, class);
+			fw.class = class;
+			*job_class = class;
+			if (!err)
+				err = fw_check_regs_mask(&fw, offset, mask);
+			if (err)
+				dev_warn(client->base.dev,
+					 "illegal SETCLASS(offset=0x%x, mask=0x%x, class=0x%x) at word %u",
+					 offset, mask, class, fw.pos-1);
+			break;
+		case HOST1X_OPCODE_INCR:
+			offset = (word >> 16) & 0xfff;
+			count = word & 0xffff;
+			err = fw_check_regs_seq(&fw, offset, count, true);
+			if (err)
+				dev_warn(client->base.dev,
+					 "illegal INCR(offset=0x%x, count=%u) in class 0x%x at word %u",
+					 offset, count, fw.class, fw.pos-1);
+			break;
+		case HOST1X_OPCODE_NONINCR:
+			offset = (word >> 16) & 0xfff;
+			count = word & 0xffff;
+			err = fw_check_regs_seq(&fw, offset, count, false);
+			if (err)
+				dev_warn(client->base.dev,
+					 "illegal NONINCR(offset=0x%x, count=%u) in class 0x%x at word %u",
+					 offset, count, fw.class, fw.pos-1);
+			break;
+		case HOST1X_OPCODE_MASK:
+			offset = (word >> 16) & 0xfff;
+			mask = word & 0xffff;
+			err = fw_check_regs_mask(&fw, offset, mask);
+			if (err)
+				dev_warn(client->base.dev,
+					 "illegal MASK(offset=0x%x, mask=0x%x) in class 0x%x at word %u",
+					 offset, mask, fw.class, fw.pos-1);
+			break;
+		case HOST1X_OPCODE_IMM:
+			/* IMM cannot reasonably be used to write a pointer */
+			offset = (word >> 16) & 0xfff;
+			err = fw_check_regs_imm(&fw, offset);
+			if (err)
+				dev_warn(client->base.dev,
+					 "illegal IMM(offset=0x%x) in class 0x%x at word %u",
+					 offset, fw.class, fw.pos-1);
+			break;
+		case HOST1X_OPCODE_SETPYLD:
+			payload = word & 0xffff;
+			payload_valid = true;
+			break;
+		case HOST1X_OPCODE_INCR_W:
+			if (!payload_valid)
+				return -EINVAL;
+
+			offset = word & 0x3fffff;
+			err = fw_check_regs_seq(&fw, offset, payload, true);
+			if (err)
+				dev_warn(client->base.dev,
+					 "illegal INCR_W(offset=0x%x) in class 0x%x at word %u",
+					 offset, fw.class, fw.pos-1);
+			break;
+		case HOST1X_OPCODE_NONINCR_W:
+			if (!payload_valid)
+				return -EINVAL;
+
+			offset = word & 0x3fffff;
+			err = fw_check_regs_seq(&fw, offset, payload, false);
+			if (err)
+				dev_warn(client->base.dev,
+					 "illegal NONINCR(offset=0x%x) in class 0x%x at word %u",
+					 offset, fw.class, fw.pos-1);
+			break;
+		default:
+			dev_warn(client->base.dev, "illegal opcode at word %u",
+				 fw.pos-1);
+			return -EINVAL;
+		}
+
+		if (err)
+			return err;
+	}
+
+	return 0;
+}
diff --git a/drivers/gpu/drm/tegra/gem.c b/drivers/gpu/drm/tegra/gem.c
index 4f80100ff5f3..6b14f1e919eb 100644
--- a/drivers/gpu/drm/tegra/gem.c
+++ b/drivers/gpu/drm/tegra/gem.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0-only
 /*
  * NVIDIA Tegra DRM GEM helper functions
  *
@@ -7,88 +8,210 @@
  * Based on the GEM/CMA helpers
  *
  * Copyright (c) 2011 Samsung Electronics Co., Ltd.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
  */
 
 #include <linux/dma-buf.h>
 #include <linux/iommu.h>
-#include <drm/tegra_drm.h>
+#include <linux/module.h>
+#include <linux/vmalloc.h>
+
+#include <drm/drm_drv.h>
+#include <drm/drm_dumb_buffers.h>
+#include <drm/drm_prime.h>
 
 #include "drm.h"
 #include "gem.h"
 
+MODULE_IMPORT_NS("DMA_BUF");
+
+static unsigned int sg_dma_count_chunks(struct scatterlist *sgl, unsigned int nents)
+{
+	dma_addr_t next = ~(dma_addr_t)0;
+	unsigned int count = 0, i;
+	struct scatterlist *s;
+
+	for_each_sg(sgl, s, nents, i) {
+		/* sg_dma_address(s) is only valid for entries that have sg_dma_len(s) != 0. */
+		if (!sg_dma_len(s))
+			continue;
+
+		if (sg_dma_address(s) != next) {
+			next = sg_dma_address(s) + sg_dma_len(s);
+			count++;
+		}
+	}
+
+	return count;
+}
+
+static inline unsigned int sgt_dma_count_chunks(struct sg_table *sgt)
+{
+	return sg_dma_count_chunks(sgt->sgl, sgt->nents);
+}
+
 static void tegra_bo_put(struct host1x_bo *bo)
 {
 	struct tegra_bo *obj = host1x_to_tegra_bo(bo);
 
-	drm_gem_object_put_unlocked(&obj->gem);
+	drm_gem_object_put(&obj->gem);
 }
 
-static dma_addr_t tegra_bo_pin(struct host1x_bo *bo, struct sg_table **sgt)
+static struct host1x_bo_mapping *tegra_bo_pin(struct device *dev, struct host1x_bo *bo,
+					      enum dma_data_direction direction)
 {
 	struct tegra_bo *obj = host1x_to_tegra_bo(bo);
+	struct drm_gem_object *gem = &obj->gem;
+	struct host1x_bo_mapping *map;
+	int err;
+
+	map = kzalloc(sizeof(*map), GFP_KERNEL);
+	if (!map)
+		return ERR_PTR(-ENOMEM);
+
+	kref_init(&map->ref);
+	map->bo = host1x_bo_get(bo);
+	map->direction = direction;
+	map->dev = dev;
+
+	/*
+	 * Imported buffers need special treatment to satisfy the semantics of DMA-BUF.
+	 */
+	if (obj->dma_buf) {
+		struct dma_buf *buf = obj->dma_buf;
+
+		map->attach = dma_buf_attach(buf, dev);
+		if (IS_ERR(map->attach)) {
+			err = PTR_ERR(map->attach);
+			goto free;
+		}
+
+		map->sgt = dma_buf_map_attachment_unlocked(map->attach, direction);
+		if (IS_ERR(map->sgt)) {
+			dma_buf_detach(buf, map->attach);
+			err = PTR_ERR(map->sgt);
+			map->sgt = NULL;
+			goto free;
+		}
+
+		err = sgt_dma_count_chunks(map->sgt);
+		map->size = gem->size;
+
+		goto out;
+	}
 
-	*sgt = obj->sgt;
+	/*
+	 * If we don't have a mapping for this buffer yet, return an SG table
+	 * so that host1x can do the mapping for us via the DMA API.
+	 */
+	map->sgt = kzalloc(sizeof(*map->sgt), GFP_KERNEL);
+	if (!map->sgt) {
+		err = -ENOMEM;
+		goto free;
+	}
 
-	return obj->paddr;
+	if (obj->pages) {
+		/*
+		 * If the buffer object was allocated from the explicit IOMMU
+		 * API code paths, construct an SG table from the pages.
+		 */
+		err = sg_alloc_table_from_pages(map->sgt, obj->pages, obj->num_pages, 0, gem->size,
+						GFP_KERNEL);
+		if (err < 0)
+			goto free;
+	} else {
+		/*
+		 * If the buffer object had no pages allocated and if it was
+		 * not imported, it had to be allocated with the DMA API, so
+		 * the DMA API helper can be used.
+		 */
+		err = dma_get_sgtable(dev, map->sgt, obj->vaddr, obj->iova, gem->size);
+		if (err < 0)
+			goto free;
+	}
+
+	err = dma_map_sgtable(dev, map->sgt, direction, 0);
+	if (err)
+		goto free_sgt;
+
+out:
+	/*
+	 * If we've manually mapped the buffer object through the IOMMU, make sure to return the
+	 * existing IOVA address of our mapping.
+	 */
+	if (!obj->mm) {
+		map->phys = sg_dma_address(map->sgt->sgl);
+		map->chunks = err;
+	} else {
+		map->phys = obj->iova;
+		map->chunks = 1;
+	}
+
+	map->size = gem->size;
+
+	return map;
+
+free_sgt:
+	sg_free_table(map->sgt);
+free:
+	kfree(map->sgt);
+	kfree(map);
+	return ERR_PTR(err);
 }
 
-static void tegra_bo_unpin(struct host1x_bo *bo, struct sg_table *sgt)
+static void tegra_bo_unpin(struct host1x_bo_mapping *map)
 {
+	if (map->attach) {
+		dma_buf_unmap_attachment_unlocked(map->attach, map->sgt,
+						  map->direction);
+		dma_buf_detach(map->attach->dmabuf, map->attach);
+	} else {
+		dma_unmap_sgtable(map->dev, map->sgt, map->direction, 0);
+		sg_free_table(map->sgt);
+		kfree(map->sgt);
+	}
+
+	host1x_bo_put(map->bo);
+	kfree(map);
 }
 
 static void *tegra_bo_mmap(struct host1x_bo *bo)
 {
 	struct tegra_bo *obj = host1x_to_tegra_bo(bo);
+	struct iosys_map map = { 0 };
+	void *vaddr;
+	int ret;
 
 	if (obj->vaddr)
 		return obj->vaddr;
-	else if (obj->gem.import_attach)
-		return dma_buf_vmap(obj->gem.import_attach->dmabuf);
-	else
-		return vmap(obj->pages, obj->num_pages, VM_MAP,
-			    pgprot_writecombine(PAGE_KERNEL));
-}
 
-static void tegra_bo_munmap(struct host1x_bo *bo, void *addr)
-{
-	struct tegra_bo *obj = host1x_to_tegra_bo(bo);
+	if (obj->dma_buf) {
+		ret = dma_buf_vmap_unlocked(obj->dma_buf, &map);
+		if (ret < 0)
+			return ERR_PTR(ret);
 
-	if (obj->vaddr)
-		return;
-	else if (obj->gem.import_attach)
-		dma_buf_vunmap(obj->gem.import_attach->dmabuf, addr);
-	else
-		vunmap(addr);
-}
+		return map.vaddr;
+	}
 
-static void *tegra_bo_kmap(struct host1x_bo *bo, unsigned int page)
-{
-	struct tegra_bo *obj = host1x_to_tegra_bo(bo);
+	vaddr = vmap(obj->pages, obj->num_pages, VM_MAP,
+		     pgprot_writecombine(PAGE_KERNEL));
+	if (!vaddr)
+		return ERR_PTR(-ENOMEM);
 
-	if (obj->vaddr)
-		return obj->vaddr + page * PAGE_SIZE;
-	else if (obj->gem.import_attach)
-		return dma_buf_kmap(obj->gem.import_attach->dmabuf, page);
-	else
-		return vmap(obj->pages + page, 1, VM_MAP,
-			    pgprot_writecombine(PAGE_KERNEL));
+	return vaddr;
 }
 
-static void tegra_bo_kunmap(struct host1x_bo *bo, unsigned int page,
-			    void *addr)
+static void tegra_bo_munmap(struct host1x_bo *bo, void *addr)
 {
 	struct tegra_bo *obj = host1x_to_tegra_bo(bo);
+	struct iosys_map map = IOSYS_MAP_INIT_VADDR(addr);
 
 	if (obj->vaddr)
 		return;
-	else if (obj->gem.import_attach)
-		dma_buf_kunmap(obj->gem.import_attach->dmabuf, page, addr);
-	else
-		vunmap(addr);
+
+	if (obj->dma_buf)
+		return dma_buf_vunmap_unlocked(obj->dma_buf, &map);
+
+	vunmap(addr);
 }
 
 static struct host1x_bo *tegra_bo_get(struct host1x_bo *bo)
@@ -107,8 +230,6 @@ static const struct host1x_bo_ops tegra_bo_ops = {
 	.unpin = tegra_bo_unpin,
 	.mmap = tegra_bo_mmap,
 	.munmap = tegra_bo_munmap,
-	.kmap = tegra_bo_kmap,
-	.kunmap = tegra_bo_kunmap,
 };
 
 static int tegra_bo_iommu_map(struct tegra_drm *tegra, struct tegra_bo *bo)
@@ -133,10 +254,9 @@ static int tegra_bo_iommu_map(struct tegra_drm *tegra, struct tegra_bo *bo)
 		goto unlock;
 	}
 
-	bo->paddr = bo->mm->start;
+	bo->iova = bo->mm->start;
 
-	bo->size = iommu_map_sg(tegra->domain, bo->paddr, bo->sgt->sgl,
-				bo->sgt->nents, prot);
+	bo->size = iommu_map_sgtable(tegra->domain, bo->iova, bo->sgt, prot);
 	if (!bo->size) {
 		dev_err(tegra->drm->dev, "failed to map buffer\n");
 		err = -ENOMEM;
@@ -161,7 +281,7 @@ static int tegra_bo_iommu_unmap(struct tegra_drm *tegra, struct tegra_bo *bo)
 		return 0;
 
 	mutex_lock(&tegra->mm_lock);
-	iommu_unmap(tegra->domain, bo->paddr, bo->size);
+	iommu_unmap(tegra->domain, bo->iova, bo->size);
 	drm_mm_remove_node(bo->mm);
 	mutex_unlock(&tegra->mm_lock);
 
@@ -170,6 +290,12 @@ static int tegra_bo_iommu_unmap(struct tegra_drm *tegra, struct tegra_bo *bo)
 	return 0;
 }
 
+static const struct drm_gem_object_funcs tegra_gem_object_funcs = {
+	.free = tegra_bo_free_object,
+	.export = tegra_gem_prime_export,
+	.vm_ops = &tegra_bo_vm_ops,
+};
+
 static struct tegra_bo *tegra_bo_alloc_object(struct drm_device *drm,
 					      size_t size)
 {
@@ -180,6 +306,8 @@ static struct tegra_bo *tegra_bo_alloc_object(struct drm_device *drm,
 	if (!bo)
 		return ERR_PTR(-ENOMEM);
 
+	bo->gem.funcs = &tegra_gem_object_funcs;
+
 	host1x_bo_init(&bo->base, &tegra_bo_ops);
 	size = round_up(size, PAGE_SIZE);
 
@@ -203,13 +331,12 @@ free:
 static void tegra_bo_free(struct drm_device *drm, struct tegra_bo *bo)
 {
 	if (bo->pages) {
-		dma_unmap_sg(drm->dev, bo->sgt->sgl, bo->sgt->nents,
-			     DMA_BIDIRECTIONAL);
+		dma_unmap_sgtable(drm->dev, bo->sgt, DMA_FROM_DEVICE, 0);
 		drm_gem_put_pages(&bo->gem, bo->pages, true, true);
 		sg_free_table(bo->sgt);
 		kfree(bo->sgt);
 	} else if (bo->vaddr) {
-		dma_free_wc(drm->dev, bo->gem.size, bo->vaddr, bo->paddr);
+		dma_free_wc(drm->dev, bo->gem.size, bo->vaddr, bo->iova);
 	}
 }
 
@@ -223,18 +350,15 @@ static int tegra_bo_get_pages(struct drm_device *drm, struct tegra_bo *bo)
 
 	bo->num_pages = bo->gem.size >> PAGE_SHIFT;
 
-	bo->sgt = drm_prime_pages_to_sg(bo->pages, bo->num_pages);
+	bo->sgt = drm_prime_pages_to_sg(bo->gem.dev, bo->pages, bo->num_pages);
 	if (IS_ERR(bo->sgt)) {
 		err = PTR_ERR(bo->sgt);
 		goto put_pages;
 	}
 
-	err = dma_map_sg(drm->dev, bo->sgt->sgl, bo->sgt->nents,
-			 DMA_BIDIRECTIONAL);
-	if (err == 0) {
-		err = -EFAULT;
+	err = dma_map_sgtable(drm->dev, bo->sgt, DMA_FROM_DEVICE, 0);
+	if (err)
 		goto free_sgt;
-	}
 
 	return 0;
 
@@ -264,7 +388,7 @@ static int tegra_bo_alloc(struct drm_device *drm, struct tegra_bo *bo)
 	} else {
 		size_t size = bo->gem.size;
 
-		bo->vaddr = dma_alloc_wc(drm->dev, size, &bo->paddr,
+		bo->vaddr = dma_alloc_wc(drm->dev, size, &bo->iova,
 					 GFP_KERNEL | __GFP_NOWARN);
 		if (!bo->vaddr) {
 			dev_err(drm->dev,
@@ -324,7 +448,7 @@ struct tegra_bo *tegra_bo_create_with_handle(struct drm_file *file,
 		return ERR_PTR(err);
 	}
 
-	drm_gem_object_put_unlocked(&bo->gem);
+	drm_gem_object_put(&bo->gem);
 
 	return bo;
 }
@@ -341,40 +465,38 @@ static struct tegra_bo *tegra_bo_import(struct drm_device *drm,
 	if (IS_ERR(bo))
 		return bo;
 
-	attach = dma_buf_attach(buf, drm->dev);
-	if (IS_ERR(attach)) {
-		err = PTR_ERR(attach);
-		goto free;
-	}
-
-	get_dma_buf(buf);
+	/*
+	 * If we need to use IOMMU API to map the dma-buf into the internally managed
+	 * domain, map it first to the DRM device to get an sgt.
+	 */
+	if (tegra->domain) {
+		attach = dma_buf_attach(buf, drm->dev);
+		if (IS_ERR(attach)) {
+			err = PTR_ERR(attach);
+			goto free;
+		}
 
-	bo->sgt = dma_buf_map_attachment(attach, DMA_TO_DEVICE);
-	if (IS_ERR(bo->sgt)) {
-		err = PTR_ERR(bo->sgt);
-		goto detach;
-	}
+		bo->sgt = dma_buf_map_attachment_unlocked(attach, DMA_TO_DEVICE);
+		if (IS_ERR(bo->sgt)) {
+			err = PTR_ERR(bo->sgt);
+			goto detach;
+		}
 
-	if (tegra->domain) {
 		err = tegra_bo_iommu_map(tegra, bo);
 		if (err < 0)
 			goto detach;
-	} else {
-		if (bo->sgt->nents > 1) {
-			err = -EINVAL;
-			goto detach;
-		}
 
-		bo->paddr = sg_dma_address(bo->sgt->sgl);
+		bo->gem.import_attach = attach;
 	}
 
-	bo->gem.import_attach = attach;
+	get_dma_buf(buf);
+	bo->dma_buf = buf;
 
 	return bo;
 
 detach:
 	if (!IS_ERR_OR_NULL(bo->sgt))
-		dma_buf_unmap_attachment(attach, bo->sgt, DMA_TO_DEVICE);
+		dma_buf_unmap_attachment_unlocked(attach, bo->sgt, DMA_TO_DEVICE);
 
 	dma_buf_detach(buf, attach);
 	dma_buf_put(buf);
@@ -387,19 +509,33 @@ free:
 void tegra_bo_free_object(struct drm_gem_object *gem)
 {
 	struct tegra_drm *tegra = gem->dev->dev_private;
+	struct host1x_bo_mapping *mapping, *tmp;
 	struct tegra_bo *bo = to_tegra_bo(gem);
 
-	if (tegra->domain)
+	/* remove all mappings of this buffer object from any caches */
+	list_for_each_entry_safe(mapping, tmp, &bo->base.mappings, list) {
+		if (mapping->cache)
+			host1x_bo_unpin(mapping);
+		else
+			dev_err(gem->dev->dev, "mapping %p stale for device %s\n", mapping,
+				dev_name(mapping->dev));
+	}
+
+	if (tegra->domain) {
 		tegra_bo_iommu_unmap(tegra, bo);
 
-	if (gem->import_attach) {
-		dma_buf_unmap_attachment(gem->import_attach, bo->sgt,
-					 DMA_TO_DEVICE);
-		drm_prime_gem_destroy(gem, NULL);
-	} else {
-		tegra_bo_free(gem->dev, bo);
+		if (drm_gem_is_imported(gem)) {
+			dma_buf_unmap_attachment_unlocked(gem->import_attach, bo->sgt,
+							  DMA_TO_DEVICE);
+			dma_buf_detach(gem->import_attach->dmabuf, gem->import_attach);
+		}
 	}
 
+	tegra_bo_free(gem->dev, bo);
+
+	if (bo->dma_buf)
+		dma_buf_put(bo->dma_buf);
+
 	drm_gem_object_release(gem);
 	kfree(bo);
 }
@@ -407,12 +543,13 @@ void tegra_bo_free_object(struct drm_gem_object *gem)
 int tegra_bo_dumb_create(struct drm_file *file, struct drm_device *drm,
 			 struct drm_mode_create_dumb *args)
 {
-	unsigned int min_pitch = DIV_ROUND_UP(args->width * args->bpp, 8);
 	struct tegra_drm *tegra = drm->dev_private;
 	struct tegra_bo *bo;
+	int ret;
 
-	args->pitch = round_up(min_pitch, tegra->pitch_align);
-	args->size = args->pitch * args->height;
+	ret = drm_mode_size_dumb(drm, args, tegra->pitch_align, 0);
+	if (ret)
+		return ret;
 
 	bo = tegra_bo_create_with_handle(file, drm, args->size, 0,
 					 &args->handle);
@@ -458,10 +595,10 @@ int __tegra_gem_mmap(struct drm_gem_object *gem, struct vm_area_struct *vma)
 		 * and set the vm_pgoff (used as a fake buffer offset by DRM)
 		 * to 0 as we want to map the whole buffer.
 		 */
-		vma->vm_flags &= ~VM_PFNMAP;
+		vm_flags_clear(vma, VM_PFNMAP);
 		vma->vm_pgoff = 0;
 
-		err = dma_mmap_wc(gem->dev->dev, vma, bo->vaddr, bo->paddr,
+		err = dma_mmap_wc(gem->dev->dev, vma, bo->vaddr, bo->iova,
 				  gem->size);
 		if (err < 0) {
 			drm_gem_vm_close(vma);
@@ -472,8 +609,7 @@ int __tegra_gem_mmap(struct drm_gem_object *gem, struct vm_area_struct *vma)
 	} else {
 		pgprot_t prot = vm_get_page_prot(vma->vm_flags);
 
-		vma->vm_flags |= VM_MIXEDMAP;
-		vma->vm_flags &= ~VM_PFNMAP;
+		vm_flags_mod(vma, VM_MIXEDMAP, VM_PFNMAP);
 
 		vma->vm_page_prot = pgprot_writecombine(prot);
 	}
@@ -508,25 +644,18 @@ tegra_gem_prime_map_dma_buf(struct dma_buf_attachment *attach,
 		return NULL;
 
 	if (bo->pages) {
-		struct scatterlist *sg;
-		unsigned int i;
-
-		if (sg_alloc_table(sgt, bo->num_pages, GFP_KERNEL))
-			goto free;
-
-		for_each_sg(sgt->sgl, sg, bo->num_pages, i)
-			sg_set_page(sg, bo->pages[i], PAGE_SIZE, 0);
-
-		if (dma_map_sg(attach->dev, sgt->sgl, sgt->nents, dir) == 0)
+		if (sg_alloc_table_from_pages(sgt, bo->pages, bo->num_pages,
+					      0, gem->size, GFP_KERNEL) < 0)
 			goto free;
 	} else {
-		if (sg_alloc_table(sgt, 1, GFP_KERNEL))
+		if (dma_get_sgtable(attach->dev, sgt, bo->vaddr, bo->iova,
+				    gem->size) < 0)
 			goto free;
-
-		sg_dma_address(sgt->sgl) = bo->paddr;
-		sg_dma_len(sgt->sgl) = gem->size;
 	}
 
+	if (dma_map_sgtable(attach->dev, sgt, dir, 0))
+		goto free;
+
 	return sgt;
 
 free:
@@ -543,7 +672,7 @@ static void tegra_gem_prime_unmap_dma_buf(struct dma_buf_attachment *attach,
 	struct tegra_bo *bo = to_tegra_bo(gem);
 
 	if (bo->pages)
-		dma_unmap_sg(attach->dev, sgt->sgl, sgt->nents, dir);
+		dma_unmap_sgtable(attach->dev, sgt, dir, 0);
 
 	sg_free_table(sgt);
 	kfree(sgt);
@@ -562,8 +691,7 @@ static int tegra_gem_prime_begin_cpu_access(struct dma_buf *buf,
 	struct drm_device *drm = gem->dev;
 
 	if (bo->pages)
-		dma_sync_sg_for_cpu(drm->dev, bo->sgt->sgl, bo->sgt->nents,
-				    DMA_FROM_DEVICE);
+		dma_sync_sgtable_for_cpu(drm->dev, bo->sgt, DMA_FROM_DEVICE);
 
 	return 0;
 }
@@ -576,22 +704,11 @@ static int tegra_gem_prime_end_cpu_access(struct dma_buf *buf,
 	struct drm_device *drm = gem->dev;
 
 	if (bo->pages)
-		dma_sync_sg_for_device(drm->dev, bo->sgt->sgl, bo->sgt->nents,
-				       DMA_TO_DEVICE);
+		dma_sync_sgtable_for_device(drm->dev, bo->sgt, DMA_TO_DEVICE);
 
 	return 0;
 }
 
-static void *tegra_gem_prime_kmap(struct dma_buf *buf, unsigned long page)
-{
-	return NULL;
-}
-
-static void tegra_gem_prime_kunmap(struct dma_buf *buf, unsigned long page,
-				   void *addr)
-{
-}
-
 static int tegra_gem_prime_mmap(struct dma_buf *buf, struct vm_area_struct *vma)
 {
 	struct drm_gem_object *gem = buf->priv;
@@ -604,16 +721,27 @@ static int tegra_gem_prime_mmap(struct dma_buf *buf, struct vm_area_struct *vma)
 	return __tegra_gem_mmap(gem, vma);
 }
 
-static void *tegra_gem_prime_vmap(struct dma_buf *buf)
+static int tegra_gem_prime_vmap(struct dma_buf *buf, struct iosys_map *map)
 {
 	struct drm_gem_object *gem = buf->priv;
 	struct tegra_bo *bo = to_tegra_bo(gem);
+	void *vaddr;
+
+	vaddr = tegra_bo_mmap(&bo->base);
+	if (IS_ERR(vaddr))
+		return PTR_ERR(vaddr);
 
-	return bo->vaddr;
+	iosys_map_set_vaddr(map, vaddr);
+
+	return 0;
 }
 
-static void tegra_gem_prime_vunmap(struct dma_buf *buf, void *vaddr)
+static void tegra_gem_prime_vunmap(struct dma_buf *buf, struct iosys_map *map)
 {
+	struct drm_gem_object *gem = buf->priv;
+	struct tegra_bo *bo = to_tegra_bo(gem);
+
+	tegra_bo_munmap(&bo->base, map->vaddr);
 }
 
 static const struct dma_buf_ops tegra_gem_prime_dmabuf_ops = {
@@ -622,27 +750,24 @@ static const struct dma_buf_ops tegra_gem_prime_dmabuf_ops = {
 	.release = tegra_gem_prime_release,
 	.begin_cpu_access = tegra_gem_prime_begin_cpu_access,
 	.end_cpu_access = tegra_gem_prime_end_cpu_access,
-	.map = tegra_gem_prime_kmap,
-	.unmap = tegra_gem_prime_kunmap,
 	.mmap = tegra_gem_prime_mmap,
 	.vmap = tegra_gem_prime_vmap,
 	.vunmap = tegra_gem_prime_vunmap,
 };
 
-struct dma_buf *tegra_gem_prime_export(struct drm_device *drm,
-				       struct drm_gem_object *gem,
+struct dma_buf *tegra_gem_prime_export(struct drm_gem_object *gem,
 				       int flags)
 {
 	DEFINE_DMA_BUF_EXPORT_INFO(exp_info);
 
 	exp_info.exp_name = KBUILD_MODNAME;
-	exp_info.owner = drm->driver->fops->owner;
+	exp_info.owner = gem->dev->driver->fops->owner;
 	exp_info.ops = &tegra_gem_prime_dmabuf_ops;
 	exp_info.size = gem->size;
 	exp_info.flags = flags;
 	exp_info.priv = gem;
 
-	return drm_gem_dmabuf_export(drm, &exp_info);
+	return drm_gem_dmabuf_export(gem->dev, &exp_info);
 }
 
 struct drm_gem_object *tegra_gem_prime_import(struct drm_device *drm,
@@ -665,3 +790,16 @@ struct drm_gem_object *tegra_gem_prime_import(struct drm_device *drm,
 
 	return &bo->gem;
 }
+
+struct host1x_bo *tegra_gem_lookup(struct drm_file *file, u32 handle)
+{
+	struct drm_gem_object *gem;
+	struct tegra_bo *bo;
+
+	gem = drm_gem_object_lookup(file, handle);
+	if (!gem)
+		return NULL;
+
+	bo = to_tegra_bo(gem);
+	return &bo->base;
+}
diff --git a/drivers/gpu/drm/tegra/gem.h b/drivers/gpu/drm/tegra/gem.h
index 6bd7dd7e55b4..bf2cbd48eb3f 100644
--- a/drivers/gpu/drm/tegra/gem.h
+++ b/drivers/gpu/drm/tegra/gem.h
@@ -1,11 +1,8 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
 /*
  * Tegra host1x GEM implementation
  *
  * Copyright (c) 2012-2013, NVIDIA Corporation.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
  */
 
 #ifndef __HOST1X_GEM_H
@@ -14,7 +11,6 @@
 #include <linux/host1x.h>
 
 #include <drm/drm.h>
-#include <drm/drmP.h>
 #include <drm/drm_gem.h>
 
 #define TEGRA_BO_BOTTOM_UP (1 << 0)
@@ -25,18 +21,45 @@ enum tegra_bo_tiling_mode {
 	TEGRA_BO_TILING_MODE_BLOCK,
 };
 
+enum tegra_bo_sector_layout {
+	TEGRA_BO_SECTOR_LAYOUT_TEGRA,
+	TEGRA_BO_SECTOR_LAYOUT_GPU,
+};
+
 struct tegra_bo_tiling {
 	enum tegra_bo_tiling_mode mode;
 	unsigned long value;
+	enum tegra_bo_sector_layout sector_layout;
 };
 
+/*
+ * How memory is referenced within a tegra_bo:
+ *
+ * Buffer source  | Mapping API(*)  | Fields
+ * ---------------+-----------------+---------------
+ * Allocated here | DMA API         | iova (IOVA mapped to drm->dev), vaddr (CPU VA)
+ *
+ * Allocated here | IOMMU API       | pages/num_pages (Phys. memory), sgt (Mapped to drm->dev),
+ *                                  | iova/size (Mapped to domain)
+ *
+ * Imported       | DMA API         | dma_buf (Imported dma_buf)
+ *
+ * Imported       | IOMMU API       | dma_buf (Imported dma_buf),
+ *                                  | gem->import_attach (Attachment on drm->dev),
+ *                                  | sgt (Mapped to drm->dev)
+ *                                  | iova/size (Mapped to domain)
+ *
+ * (*) If tegra->domain is set, i.e. TegraDRM IOMMU domain is directly managed through IOMMU API,
+ *     this is IOMMU API. Otherwise DMA API.
+ */
 struct tegra_bo {
 	struct drm_gem_object gem;
 	struct host1x_bo base;
 	unsigned long flags;
 	struct sg_table *sgt;
-	dma_addr_t paddr;
+	dma_addr_t iova;
 	void *vaddr;
+	struct dma_buf *dma_buf;
 
 	struct drm_mm_node *mm;
 	unsigned long num_pages;
@@ -73,10 +96,11 @@ extern const struct vm_operations_struct tegra_bo_vm_ops;
 int __tegra_gem_mmap(struct drm_gem_object *gem, struct vm_area_struct *vma);
 int tegra_drm_mmap(struct file *file, struct vm_area_struct *vma);
 
-struct dma_buf *tegra_gem_prime_export(struct drm_device *drm,
-				       struct drm_gem_object *gem,
+struct dma_buf *tegra_gem_prime_export(struct drm_gem_object *gem,
 				       int flags);
 struct drm_gem_object *tegra_gem_prime_import(struct drm_device *drm,
 					      struct dma_buf *buf);
 
+struct host1x_bo *tegra_gem_lookup(struct drm_file *file, u32 handle);
+
 #endif
diff --git a/drivers/gpu/drm/tegra/gr2d.c b/drivers/gpu/drm/tegra/gr2d.c
index 673059fd2fcb..21f4dd0fa6af 100644
--- a/drivers/gpu/drm/tegra/gr2d.c
+++ b/drivers/gpu/drm/tegra/gr2d.c
@@ -1,29 +1,41 @@
+// SPDX-License-Identifier: GPL-2.0-only
 /*
  * Copyright (c) 2012-2013, NVIDIA Corporation.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
  */
 
 #include <linux/clk.h>
+#include <linux/delay.h>
 #include <linux/iommu.h>
-#include <linux/of_device.h>
+#include <linux/module.h>
+#include <linux/of.h>
+#include <linux/platform_device.h>
+#include <linux/pm_runtime.h>
+#include <linux/reset.h>
+
+#include <soc/tegra/common.h>
 
 #include "drm.h"
 #include "gem.h"
 #include "gr2d.h"
 
+enum {
+	RST_MC,
+	RST_GR2D,
+	RST_GR2D_MAX,
+};
+
 struct gr2d_soc {
 	unsigned int version;
 };
 
 struct gr2d {
-	struct iommu_group *group;
 	struct tegra_drm_client client;
 	struct host1x_channel *channel;
 	struct clk *clk;
 
+	struct reset_control_bulk_data resets[RST_GR2D_MAX];
+	unsigned int nresets;
+
 	const struct gr2d_soc *soc;
 
 	DECLARE_BITMAP(addr_regs, GR2D_NUM_REGS);
@@ -37,12 +49,12 @@ static inline struct gr2d *to_gr2d(struct tegra_drm_client *client)
 static int gr2d_init(struct host1x_client *client)
 {
 	struct tegra_drm_client *drm = host1x_to_drm_client(client);
-	struct drm_device *dev = dev_get_drvdata(client->parent);
+	struct drm_device *dev = dev_get_drvdata(client->host);
 	unsigned long flags = HOST1X_SYNCPT_HAS_BASE;
 	struct gr2d *gr2d = to_gr2d(drm);
 	int err;
 
-	gr2d->channel = host1x_channel_request(client->dev);
+	gr2d->channel = host1x_channel_request(client);
 	if (!gr2d->channel)
 		return -ENOMEM;
 
@@ -53,9 +65,8 @@ static int gr2d_init(struct host1x_client *client)
 		goto put;
 	}
 
-	gr2d->group = host1x_client_iommu_attach(client, false);
-	if (IS_ERR(gr2d->group)) {
-		err = PTR_ERR(gr2d->group);
+	err = host1x_client_iommu_attach(client);
+	if (err < 0) {
 		dev_err(client->dev, "failed to attach to domain: %d\n", err);
 		goto free;
 	}
@@ -63,15 +74,15 @@ static int gr2d_init(struct host1x_client *client)
 	err = tegra_drm_register_client(dev->dev_private, drm);
 	if (err < 0) {
 		dev_err(client->dev, "failed to register client: %d\n", err);
-		goto detach;
+		goto detach_iommu;
 	}
 
 	return 0;
 
-detach:
-	host1x_client_iommu_detach(client, gr2d->group);
+detach_iommu:
+	host1x_client_iommu_detach(client);
 free:
-	host1x_syncpt_free(client->syncpts[0]);
+	host1x_syncpt_put(client->syncpts[0]);
 put:
 	host1x_channel_put(gr2d->channel);
 	return err;
@@ -80,7 +91,7 @@ put:
 static int gr2d_exit(struct host1x_client *client)
 {
 	struct tegra_drm_client *drm = host1x_to_drm_client(client);
-	struct drm_device *dev = dev_get_drvdata(client->parent);
+	struct drm_device *dev = dev_get_drvdata(client->host);
 	struct tegra_drm *tegra = dev->dev_private;
 	struct gr2d *gr2d = to_gr2d(drm);
 	int err;
@@ -89,10 +100,15 @@ static int gr2d_exit(struct host1x_client *client)
 	if (err < 0)
 		return err;
 
-	host1x_client_iommu_detach(client, gr2d->group);
-	host1x_syncpt_free(client->syncpts[0]);
+	pm_runtime_dont_use_autosuspend(client->dev);
+	pm_runtime_force_suspend(client->dev);
+
+	host1x_client_iommu_detach(client);
+	host1x_syncpt_put(client->syncpts[0]);
 	host1x_channel_put(gr2d->channel);
 
+	gr2d->channel = NULL;
+
 	return 0;
 }
 
@@ -165,9 +181,14 @@ static const struct gr2d_soc tegra30_gr2d_soc = {
 	.version = 0x30,
 };
 
+static const struct gr2d_soc tegra114_gr2d_soc = {
+	.version = 0x35,
+};
+
 static const struct of_device_id gr2d_match[] = {
-	{ .compatible = "nvidia,tegra30-gr2d", .data = &tegra20_gr2d_soc },
-	{ .compatible = "nvidia,tegra20-gr2d", .data = &tegra30_gr2d_soc },
+	{ .compatible = "nvidia,tegra114-gr2d", .data = &tegra114_gr2d_soc },
+	{ .compatible = "nvidia,tegra30-gr2d", .data = &tegra30_gr2d_soc },
+	{ .compatible = "nvidia,tegra20-gr2d", .data = &tegra20_gr2d_soc },
 	{ },
 };
 MODULE_DEVICE_TABLE(of, gr2d_match);
@@ -181,6 +202,7 @@ static const u32 gr2d_addr_regs[] = {
 	GR2D_DSTC_BASE_ADDR,
 	GR2D_SRCA_BASE_ADDR,
 	GR2D_SRCB_BASE_ADDR,
+	GR2D_PATBASE_ADDR,
 	GR2D_SRC_BASE_ADDR_SB,
 	GR2D_DSTA_BASE_ADDR_SB,
 	GR2D_DSTB_BASE_ADDR_SB,
@@ -188,6 +210,27 @@ static const u32 gr2d_addr_regs[] = {
 	GR2D_VA_BASE_ADDR_SB,
 };
 
+static int gr2d_get_resets(struct device *dev, struct gr2d *gr2d)
+{
+	int err;
+
+	gr2d->resets[RST_MC].id = "mc";
+	gr2d->resets[RST_GR2D].id = "2d";
+	gr2d->nresets = RST_GR2D_MAX;
+
+	err = devm_reset_control_bulk_get_optional_exclusive_released(
+				dev, gr2d->nresets, gr2d->resets);
+	if (err) {
+		dev_err(dev, "failed to get reset: %d\n", err);
+		return err;
+	}
+
+	if (WARN_ON(!gr2d->resets[RST_GR2D].rstc))
+		return -ENOENT;
+
+	return 0;
+}
+
 static int gr2d_probe(struct platform_device *pdev)
 {
 	struct device *dev = &pdev->dev;
@@ -200,6 +243,8 @@ static int gr2d_probe(struct platform_device *pdev)
 	if (!gr2d)
 		return -ENOMEM;
 
+	platform_set_drvdata(pdev, gr2d);
+
 	gr2d->soc = of_device_get_match_data(dev);
 
 	syncpts = devm_kzalloc(dev, sizeof(*syncpts), GFP_KERNEL);
@@ -212,11 +257,9 @@ static int gr2d_probe(struct platform_device *pdev)
 		return PTR_ERR(gr2d->clk);
 	}
 
-	err = clk_prepare_enable(gr2d->clk);
-	if (err) {
-		dev_err(dev, "cannot turn on clock\n");
+	err = gr2d_get_resets(dev, gr2d);
+	if (err)
 		return err;
-	}
 
 	INIT_LIST_HEAD(&gr2d->client.base.list);
 	gr2d->client.base.ops = &gr2d_client_ops;
@@ -229,10 +272,13 @@ static int gr2d_probe(struct platform_device *pdev)
 	gr2d->client.version = gr2d->soc->version;
 	gr2d->client.ops = &gr2d_ops;
 
+	err = devm_tegra_core_dev_init_opp_table_common(dev);
+	if (err)
+		return err;
+
 	err = host1x_client_register(&gr2d->client.base);
 	if (err < 0) {
 		dev_err(dev, "failed to register host1x client: %d\n", err);
-		clk_disable_unprepare(gr2d->clk);
 		return err;
 	}
 
@@ -240,32 +286,112 @@ static int gr2d_probe(struct platform_device *pdev)
 	for (i = 0; i < ARRAY_SIZE(gr2d_addr_regs); i++)
 		set_bit(gr2d_addr_regs[i], gr2d->addr_regs);
 
-	platform_set_drvdata(pdev, gr2d);
-
 	return 0;
 }
 
-static int gr2d_remove(struct platform_device *pdev)
+static void gr2d_remove(struct platform_device *pdev)
 {
 	struct gr2d *gr2d = platform_get_drvdata(pdev);
+
+	pm_runtime_disable(&pdev->dev);
+	host1x_client_unregister(&gr2d->client.base);
+}
+
+static int __maybe_unused gr2d_runtime_suspend(struct device *dev)
+{
+	struct gr2d *gr2d = dev_get_drvdata(dev);
 	int err;
 
-	err = host1x_client_unregister(&gr2d->client.base);
-	if (err < 0) {
-		dev_err(&pdev->dev, "failed to unregister host1x client: %d\n",
-			err);
-		return err;
+	host1x_channel_stop(gr2d->channel);
+	reset_control_bulk_release(gr2d->nresets, gr2d->resets);
+
+	/*
+	 * GR2D module shouldn't be reset while hardware is idling, otherwise
+	 * host1x's cmdproc will stuck on trying to access any G2 register
+	 * after reset. GR2D module could be either hot-reset or reset after
+	 * power-gating of the HEG partition. Hence we will put in reset only
+	 * the memory client part of the module, the HEG GENPD will take care
+	 * of resetting GR2D module across power-gating.
+	 *
+	 * On Tegra20 there is no HEG partition, but it's okay to have
+	 * undetermined h/w state since userspace is expected to reprogram
+	 * the state on each job submission anyways.
+	 */
+	err = reset_control_acquire(gr2d->resets[RST_MC].rstc);
+	if (err) {
+		dev_err(dev, "failed to acquire MC reset: %d\n", err);
+		goto acquire_reset;
+	}
+
+	err = reset_control_assert(gr2d->resets[RST_MC].rstc);
+	reset_control_release(gr2d->resets[RST_MC].rstc);
+	if (err) {
+		dev_err(dev, "failed to assert MC reset: %d\n", err);
+		goto acquire_reset;
 	}
 
 	clk_disable_unprepare(gr2d->clk);
 
 	return 0;
+
+acquire_reset:
+	reset_control_bulk_acquire(gr2d->nresets, gr2d->resets);
+	reset_control_bulk_deassert(gr2d->nresets, gr2d->resets);
+
+	return err;
 }
 
+static int __maybe_unused gr2d_runtime_resume(struct device *dev)
+{
+	struct gr2d *gr2d = dev_get_drvdata(dev);
+	int err;
+
+	err = reset_control_bulk_acquire(gr2d->nresets, gr2d->resets);
+	if (err) {
+		dev_err(dev, "failed to acquire reset: %d\n", err);
+		return err;
+	}
+
+	err = clk_prepare_enable(gr2d->clk);
+	if (err) {
+		dev_err(dev, "failed to enable clock: %d\n", err);
+		goto release_reset;
+	}
+
+	usleep_range(2000, 4000);
+
+	/* this is a reset array which deasserts both 2D MC and 2D itself */
+	err = reset_control_bulk_deassert(gr2d->nresets, gr2d->resets);
+	if (err) {
+		dev_err(dev, "failed to deassert reset: %d\n", err);
+		goto disable_clk;
+	}
+
+	pm_runtime_enable(dev);
+	pm_runtime_use_autosuspend(dev);
+	pm_runtime_set_autosuspend_delay(dev, 500);
+
+	return 0;
+
+disable_clk:
+	clk_disable_unprepare(gr2d->clk);
+release_reset:
+	reset_control_bulk_release(gr2d->nresets, gr2d->resets);
+
+	return err;
+}
+
+static const struct dev_pm_ops tegra_gr2d_pm = {
+	SET_RUNTIME_PM_OPS(gr2d_runtime_suspend, gr2d_runtime_resume, NULL)
+	SET_SYSTEM_SLEEP_PM_OPS(pm_runtime_force_suspend,
+				pm_runtime_force_resume)
+};
+
 struct platform_driver tegra_gr2d_driver = {
 	.driver = {
 		.name = "tegra-gr2d",
 		.of_match_table = gr2d_match,
+		.pm = &tegra_gr2d_pm,
 	},
 	.probe = gr2d_probe,
 	.remove = gr2d_remove,
diff --git a/drivers/gpu/drm/tegra/gr2d.h b/drivers/gpu/drm/tegra/gr2d.h
index 4d7304fb015e..9b7d66e15b9f 100644
--- a/drivers/gpu/drm/tegra/gr2d.h
+++ b/drivers/gpu/drm/tegra/gr2d.h
@@ -1,9 +1,6 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
 /*
  * Copyright (C) 2013 NVIDIA Corporation
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
  */
 
 #ifndef TEGRA_GR2D_H
@@ -17,6 +14,7 @@
 #define GR2D_DSTC_BASE_ADDR		0x2d
 #define GR2D_SRCA_BASE_ADDR		0x31
 #define GR2D_SRCB_BASE_ADDR		0x32
+#define GR2D_PATBASE_ADDR		0x47
 #define GR2D_SRC_BASE_ADDR_SB		0x48
 #define GR2D_DSTA_BASE_ADDR_SB		0x49
 #define GR2D_DSTB_BASE_ADDR_SB		0x4a
diff --git a/drivers/gpu/drm/tegra/gr3d.c b/drivers/gpu/drm/tegra/gr3d.c
index 4778ae999668..42e9656ab80c 100644
--- a/drivers/gpu/drm/tegra/gr3d.c
+++ b/drivers/gpu/drm/tegra/gr3d.c
@@ -1,40 +1,52 @@
+// SPDX-License-Identifier: GPL-2.0-only
 /*
  * Copyright (C) 2013 Avionic Design GmbH
  * Copyright (C) 2013 NVIDIA Corporation
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
  */
 
 #include <linux/clk.h>
+#include <linux/delay.h>
 #include <linux/host1x.h>
 #include <linux/iommu.h>
 #include <linux/module.h>
-#include <linux/of_device.h>
+#include <linux/of.h>
 #include <linux/platform_device.h>
+#include <linux/pm_domain.h>
+#include <linux/pm_opp.h>
+#include <linux/pm_runtime.h>
 #include <linux/reset.h>
 
+#include <soc/tegra/common.h>
 #include <soc/tegra/pmc.h>
 
 #include "drm.h"
 #include "gem.h"
 #include "gr3d.h"
 
+enum {
+	RST_MC,
+	RST_GR3D,
+	RST_MC2,
+	RST_GR3D2,
+	RST_GR3D_MAX,
+};
+
 struct gr3d_soc {
 	unsigned int version;
+	unsigned int num_clocks;
+	unsigned int num_resets;
 };
 
 struct gr3d {
-	struct iommu_group *group;
 	struct tegra_drm_client client;
 	struct host1x_channel *channel;
-	struct clk *clk_secondary;
-	struct clk *clk;
-	struct reset_control *rst_secondary;
-	struct reset_control *rst;
 
 	const struct gr3d_soc *soc;
+	struct clk_bulk_data *clocks;
+	unsigned int nclocks;
+	struct reset_control_bulk_data resets[RST_GR3D_MAX];
+	unsigned int nresets;
+	struct dev_pm_domain_list *pd_list;
 
 	DECLARE_BITMAP(addr_regs, GR3D_NUM_REGS);
 };
@@ -47,12 +59,12 @@ static inline struct gr3d *to_gr3d(struct tegra_drm_client *client)
 static int gr3d_init(struct host1x_client *client)
 {
 	struct tegra_drm_client *drm = host1x_to_drm_client(client);
-	struct drm_device *dev = dev_get_drvdata(client->parent);
+	struct drm_device *dev = dev_get_drvdata(client->host);
 	unsigned long flags = HOST1X_SYNCPT_HAS_BASE;
 	struct gr3d *gr3d = to_gr3d(drm);
 	int err;
 
-	gr3d->channel = host1x_channel_request(client->dev);
+	gr3d->channel = host1x_channel_request(client);
 	if (!gr3d->channel)
 		return -ENOMEM;
 
@@ -63,9 +75,8 @@ static int gr3d_init(struct host1x_client *client)
 		goto put;
 	}
 
-	gr3d->group = host1x_client_iommu_attach(client, false);
-	if (IS_ERR(gr3d->group)) {
-		err = PTR_ERR(gr3d->group);
+	err = host1x_client_iommu_attach(client);
+	if (err < 0) {
 		dev_err(client->dev, "failed to attach to domain: %d\n", err);
 		goto free;
 	}
@@ -73,15 +84,15 @@ static int gr3d_init(struct host1x_client *client)
 	err = tegra_drm_register_client(dev->dev_private, drm);
 	if (err < 0) {
 		dev_err(client->dev, "failed to register client: %d\n", err);
-		goto detach;
+		goto detach_iommu;
 	}
 
 	return 0;
 
-detach:
-	host1x_client_iommu_detach(client, gr3d->group);
+detach_iommu:
+	host1x_client_iommu_detach(client);
 free:
-	host1x_syncpt_free(client->syncpts[0]);
+	host1x_syncpt_put(client->syncpts[0]);
 put:
 	host1x_channel_put(gr3d->channel);
 	return err;
@@ -90,7 +101,7 @@ put:
 static int gr3d_exit(struct host1x_client *client)
 {
 	struct tegra_drm_client *drm = host1x_to_drm_client(client);
-	struct drm_device *dev = dev_get_drvdata(client->parent);
+	struct drm_device *dev = dev_get_drvdata(client->host);
 	struct gr3d *gr3d = to_gr3d(drm);
 	int err;
 
@@ -98,10 +109,15 @@ static int gr3d_exit(struct host1x_client *client)
 	if (err < 0)
 		return err;
 
-	host1x_client_iommu_detach(client, gr3d->group);
-	host1x_syncpt_free(client->syncpts[0]);
+	pm_runtime_dont_use_autosuspend(client->dev);
+	pm_runtime_force_suspend(client->dev);
+
+	host1x_client_iommu_detach(client);
+	host1x_syncpt_put(client->syncpts[0]);
 	host1x_channel_put(gr3d->channel);
 
+	gr3d->channel = NULL;
+
 	return 0;
 }
 
@@ -160,14 +176,20 @@ static const struct tegra_drm_client_ops gr3d_ops = {
 
 static const struct gr3d_soc tegra20_gr3d_soc = {
 	.version = 0x20,
+	.num_clocks = 1,
+	.num_resets = 2,
 };
 
 static const struct gr3d_soc tegra30_gr3d_soc = {
 	.version = 0x30,
+	.num_clocks = 2,
+	.num_resets = 4,
 };
 
 static const struct gr3d_soc tegra114_gr3d_soc = {
 	.version = 0x35,
+	.num_clocks = 1,
+	.num_resets = 2,
 };
 
 static const struct of_device_id tegra_gr3d_match[] = {
@@ -283,9 +305,163 @@ static const u32 gr3d_addr_regs[] = {
 	GR3D_GLOBAL_SAMP23SURFADDR(15),
 };
 
+static int gr3d_power_up_legacy_domain(struct device *dev, const char *name,
+				       unsigned int id)
+{
+	struct gr3d *gr3d = dev_get_drvdata(dev);
+	struct reset_control *reset;
+	struct clk *clk;
+	unsigned int i;
+	int err;
+
+	/*
+	 * Tegra20 device-tree doesn't specify 3d clock name and there is only
+	 * one clock for Tegra20. Tegra30+ device-trees always specified names
+	 * for the clocks.
+	 */
+	if (gr3d->nclocks == 1) {
+		if (id == TEGRA_POWERGATE_3D1)
+			return 0;
+
+		clk = gr3d->clocks[0].clk;
+	} else {
+		for (i = 0; i < gr3d->nclocks; i++) {
+			if (WARN_ON(!gr3d->clocks[i].id))
+				continue;
+
+			if (!strcmp(gr3d->clocks[i].id, name)) {
+				clk = gr3d->clocks[i].clk;
+				break;
+			}
+		}
+
+		if (WARN_ON(i == gr3d->nclocks))
+			return -EINVAL;
+	}
+
+	/*
+	 * We use array of resets, which includes MC resets, and MC
+	 * reset shouldn't be asserted while hardware is gated because
+	 * MC flushing will fail for gated hardware. Hence for legacy
+	 * PD we request the individual reset separately.
+	 */
+	reset = reset_control_get_exclusive_released(dev, name);
+	if (IS_ERR(reset))
+		return PTR_ERR(reset);
+
+	err = reset_control_acquire(reset);
+	if (err) {
+		dev_err(dev, "failed to acquire %s reset: %d\n", name, err);
+	} else {
+		err = tegra_powergate_sequence_power_up(id, clk, reset);
+		reset_control_release(reset);
+	}
+
+	reset_control_put(reset);
+	if (err)
+		return err;
+
+	/*
+	 * tegra_powergate_sequence_power_up() leaves clocks enabled,
+	 * while GENPD not. Hence keep clock-enable balanced.
+	 */
+	clk_disable_unprepare(clk);
+
+	return 0;
+}
+
+static int gr3d_init_power(struct device *dev, struct gr3d *gr3d)
+{
+	struct dev_pm_domain_attach_data pd_data = {
+		.pd_names = (const char *[]) { "3d0", "3d1" },
+		.num_pd_names = 2,
+		.pd_flags = PD_FLAG_REQUIRED_OPP,
+	};
+	int err;
+
+	err = of_count_phandle_with_args(dev->of_node, "power-domains",
+					 "#power-domain-cells");
+	if (err < 0) {
+		if (err != -ENOENT)
+			return err;
+
+		/*
+		 * Older device-trees don't use GENPD. In this case we should
+		 * toggle power domain manually.
+		 */
+		err = gr3d_power_up_legacy_domain(dev, "3d",
+						  TEGRA_POWERGATE_3D);
+		if (err)
+			return err;
+
+		err = gr3d_power_up_legacy_domain(dev, "3d2",
+						  TEGRA_POWERGATE_3D1);
+		if (err)
+			return err;
+
+		return 0;
+	}
+
+	/*
+	 * The PM domain core automatically attaches a single power domain,
+	 * otherwise it skips attaching completely. We have a single domain
+	 * on Tegra20 and two domains on Tegra30+.
+	 */
+	if (dev->pm_domain)
+		return 0;
+
+	err = devm_pm_domain_attach_list(dev, &pd_data, &gr3d->pd_list);
+	if (err < 0)
+		return err;
+
+	return 0;
+}
+
+static int gr3d_get_clocks(struct device *dev, struct gr3d *gr3d)
+{
+	int err;
+
+	err = devm_clk_bulk_get_all(dev, &gr3d->clocks);
+	if (err < 0) {
+		dev_err(dev, "failed to get clock: %d\n", err);
+		return err;
+	}
+	gr3d->nclocks = err;
+
+	if (gr3d->nclocks != gr3d->soc->num_clocks) {
+		dev_err(dev, "invalid number of clocks: %u\n", gr3d->nclocks);
+		return -ENOENT;
+	}
+
+	return 0;
+}
+
+static int gr3d_get_resets(struct device *dev, struct gr3d *gr3d)
+{
+	int err;
+
+	gr3d->resets[RST_MC].id = "mc";
+	gr3d->resets[RST_MC2].id = "mc2";
+	gr3d->resets[RST_GR3D].id = "3d";
+	gr3d->resets[RST_GR3D2].id = "3d2";
+	gr3d->nresets = gr3d->soc->num_resets;
+
+	err = devm_reset_control_bulk_get_optional_exclusive_released(
+				dev, gr3d->nresets, gr3d->resets);
+	if (err) {
+		dev_err(dev, "failed to get reset: %d\n", err);
+		return err;
+	}
+
+	if (WARN_ON(!gr3d->resets[RST_GR3D].rstc) ||
+	    WARN_ON(!gr3d->resets[RST_GR3D2].rstc && gr3d->nresets == 4))
+		return -ENOENT;
+
+	return 0;
+}
+
 static int gr3d_probe(struct platform_device *pdev)
 {
-	struct device_node *np = pdev->dev.of_node;
 	struct host1x_syncpt **syncpts;
 	struct gr3d *gr3d;
 	unsigned int i;
@@ -295,56 +471,25 @@ static int gr3d_probe(struct platform_device *pdev)
 	if (!gr3d)
 		return -ENOMEM;
 
+	platform_set_drvdata(pdev, gr3d);
+
 	gr3d->soc = of_device_get_match_data(&pdev->dev);
 
 	syncpts = devm_kzalloc(&pdev->dev, sizeof(*syncpts), GFP_KERNEL);
 	if (!syncpts)
 		return -ENOMEM;
 
-	gr3d->clk = devm_clk_get(&pdev->dev, NULL);
-	if (IS_ERR(gr3d->clk)) {
-		dev_err(&pdev->dev, "cannot get clock\n");
-		return PTR_ERR(gr3d->clk);
-	}
-
-	gr3d->rst = devm_reset_control_get(&pdev->dev, "3d");
-	if (IS_ERR(gr3d->rst)) {
-		dev_err(&pdev->dev, "cannot get reset\n");
-		return PTR_ERR(gr3d->rst);
-	}
-
-	if (of_device_is_compatible(np, "nvidia,tegra30-gr3d")) {
-		gr3d->clk_secondary = devm_clk_get(&pdev->dev, "3d2");
-		if (IS_ERR(gr3d->clk_secondary)) {
-			dev_err(&pdev->dev, "cannot get secondary clock\n");
-			return PTR_ERR(gr3d->clk_secondary);
-		}
-
-		gr3d->rst_secondary = devm_reset_control_get(&pdev->dev,
-								"3d2");
-		if (IS_ERR(gr3d->rst_secondary)) {
-			dev_err(&pdev->dev, "cannot get secondary reset\n");
-			return PTR_ERR(gr3d->rst_secondary);
-		}
-	}
+	err = gr3d_get_clocks(&pdev->dev, gr3d);
+	if (err)
+		return err;
 
-	err = tegra_powergate_sequence_power_up(TEGRA_POWERGATE_3D, gr3d->clk,
-						gr3d->rst);
-	if (err < 0) {
-		dev_err(&pdev->dev, "failed to power up 3D unit\n");
+	err = gr3d_get_resets(&pdev->dev, gr3d);
+	if (err)
 		return err;
-	}
 
-	if (gr3d->clk_secondary) {
-		err = tegra_powergate_sequence_power_up(TEGRA_POWERGATE_3D1,
-							gr3d->clk_secondary,
-							gr3d->rst_secondary);
-		if (err < 0) {
-			dev_err(&pdev->dev,
-				"failed to power up secondary 3D unit\n");
-			return err;
-		}
-	}
+	err = gr3d_init_power(&pdev->dev, gr3d);
+	if (err)
+		return err;
 
 	INIT_LIST_HEAD(&gr3d->client.base.list);
 	gr3d->client.base.ops = &gr3d_client_ops;
@@ -357,6 +502,10 @@ static int gr3d_probe(struct platform_device *pdev)
 	gr3d->client.version = gr3d->soc->version;
 	gr3d->client.ops = &gr3d_ops;
 
+	err = devm_tegra_core_dev_init_opp_table_common(&pdev->dev);
+	if (err)
+		return err;
+
 	err = host1x_client_register(&gr3d->client.base);
 	if (err < 0) {
 		dev_err(&pdev->dev, "failed to register host1x client: %d\n",
@@ -368,38 +517,92 @@ static int gr3d_probe(struct platform_device *pdev)
 	for (i = 0; i < ARRAY_SIZE(gr3d_addr_regs); i++)
 		set_bit(gr3d_addr_regs[i], gr3d->addr_regs);
 
-	platform_set_drvdata(pdev, gr3d);
-
 	return 0;
 }
 
-static int gr3d_remove(struct platform_device *pdev)
+static void gr3d_remove(struct platform_device *pdev)
 {
 	struct gr3d *gr3d = platform_get_drvdata(pdev);
+
+	pm_runtime_disable(&pdev->dev);
+	host1x_client_unregister(&gr3d->client.base);
+}
+
+static int __maybe_unused gr3d_runtime_suspend(struct device *dev)
+{
+	struct gr3d *gr3d = dev_get_drvdata(dev);
 	int err;
 
-	err = host1x_client_unregister(&gr3d->client.base);
-	if (err < 0) {
-		dev_err(&pdev->dev, "failed to unregister host1x client: %d\n",
-			err);
+	host1x_channel_stop(gr3d->channel);
+
+	err = reset_control_bulk_assert(gr3d->nresets, gr3d->resets);
+	if (err) {
+		dev_err(dev, "failed to assert reset: %d\n", err);
+		return err;
+	}
+
+	usleep_range(10, 20);
+
+	/*
+	 * Older device-trees don't specify MC resets and power-gating can't
+	 * be done safely in that case. Hence we will keep the power ungated
+	 * for older DTBs. For newer DTBs, GENPD will perform the power-gating.
+	 */
+
+	clk_bulk_disable_unprepare(gr3d->nclocks, gr3d->clocks);
+	reset_control_bulk_release(gr3d->nresets, gr3d->resets);
+
+	return 0;
+}
+
+static int __maybe_unused gr3d_runtime_resume(struct device *dev)
+{
+	struct gr3d *gr3d = dev_get_drvdata(dev);
+	int err;
+
+	err = reset_control_bulk_acquire(gr3d->nresets, gr3d->resets);
+	if (err) {
+		dev_err(dev, "failed to acquire reset: %d\n", err);
 		return err;
 	}
 
-	if (gr3d->clk_secondary) {
-		tegra_powergate_power_off(TEGRA_POWERGATE_3D1);
-		clk_disable_unprepare(gr3d->clk_secondary);
+	err = clk_bulk_prepare_enable(gr3d->nclocks, gr3d->clocks);
+	if (err) {
+		dev_err(dev, "failed to enable clock: %d\n", err);
+		goto release_reset;
 	}
 
-	tegra_powergate_power_off(TEGRA_POWERGATE_3D);
-	clk_disable_unprepare(gr3d->clk);
+	err = reset_control_bulk_deassert(gr3d->nresets, gr3d->resets);
+	if (err) {
+		dev_err(dev, "failed to deassert reset: %d\n", err);
+		goto disable_clk;
+	}
+
+	pm_runtime_enable(dev);
+	pm_runtime_use_autosuspend(dev);
+	pm_runtime_set_autosuspend_delay(dev, 500);
 
 	return 0;
+
+disable_clk:
+	clk_bulk_disable_unprepare(gr3d->nclocks, gr3d->clocks);
+release_reset:
+	reset_control_bulk_release(gr3d->nresets, gr3d->resets);
+
+	return err;
 }
 
+static const struct dev_pm_ops tegra_gr3d_pm = {
+	SET_RUNTIME_PM_OPS(gr3d_runtime_suspend, gr3d_runtime_resume, NULL)
+	SET_SYSTEM_SLEEP_PM_OPS(pm_runtime_force_suspend,
+				pm_runtime_force_resume)
+};
+
 struct platform_driver tegra_gr3d_driver = {
 	.driver = {
 		.name = "tegra-gr3d",
 		.of_match_table = tegra_gr3d_match,
+		.pm = &tegra_gr3d_pm,
 	},
 	.probe = gr3d_probe,
 	.remove = gr3d_remove,
diff --git a/drivers/gpu/drm/tegra/gr3d.h b/drivers/gpu/drm/tegra/gr3d.h
index 0c30a1351c83..ca2921b68c2a 100644
--- a/drivers/gpu/drm/tegra/gr3d.h
+++ b/drivers/gpu/drm/tegra/gr3d.h
@@ -1,9 +1,6 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
 /*
  * Copyright (C) 2013 NVIDIA Corporation
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
  */
 
 #ifndef TEGRA_GR3D_H
diff --git a/drivers/gpu/drm/tegra/hda.c b/drivers/gpu/drm/tegra/hda.c
new file mode 100644
index 000000000000..94245a18a043
--- /dev/null
+++ b/drivers/gpu/drm/tegra/hda.c
@@ -0,0 +1,63 @@
+// SPDX-License-Identifier: MIT
+/*
+ * Copyright (C) 2019 NVIDIA Corporation
+ */
+
+#include <linux/bug.h>
+
+#include <sound/hda_verbs.h>
+
+#include "hda.h"
+
+void tegra_hda_parse_format(unsigned int format, struct tegra_hda_format *fmt)
+{
+	unsigned int mul, div, bits, channels;
+
+	if (format & AC_FMT_TYPE_NON_PCM)
+		fmt->pcm = false;
+	else
+		fmt->pcm = true;
+
+	if (format & AC_FMT_BASE_44K)
+		fmt->sample_rate = 44100;
+	else
+		fmt->sample_rate = 48000;
+
+	mul = (format & AC_FMT_MULT_MASK) >> AC_FMT_MULT_SHIFT;
+	div = (format & AC_FMT_DIV_MASK) >> AC_FMT_DIV_SHIFT;
+
+	fmt->sample_rate *= (mul + 1) / (div + 1);
+
+	switch (format & AC_FMT_BITS_MASK) {
+	case AC_FMT_BITS_8:
+		fmt->bits = 8;
+		break;
+
+	case AC_FMT_BITS_16:
+		fmt->bits = 16;
+		break;
+
+	case AC_FMT_BITS_20:
+		fmt->bits = 20;
+		break;
+
+	case AC_FMT_BITS_24:
+		fmt->bits = 24;
+		break;
+
+	case AC_FMT_BITS_32:
+		fmt->bits = 32;
+		break;
+
+	default:
+		bits = (format & AC_FMT_BITS_MASK) >> AC_FMT_BITS_SHIFT;
+		WARN(1, "invalid number of bits: %#x\n", bits);
+		fmt->bits = 8;
+		break;
+	}
+
+	channels = (format & AC_FMT_CHAN_MASK) >> AC_FMT_CHAN_SHIFT;
+
+	/* channels are encoded as n - 1 */
+	fmt->channels = channels + 1;
+}
diff --git a/drivers/gpu/drm/tegra/hda.h b/drivers/gpu/drm/tegra/hda.h
new file mode 100644
index 000000000000..77269955a4f2
--- /dev/null
+++ b/drivers/gpu/drm/tegra/hda.h
@@ -0,0 +1,20 @@
+// SPDX-License-Identifier: MIT
+/*
+ * Copyright (C) 2019 NVIDIA Corporation
+ */
+
+#ifndef DRM_TEGRA_HDA_H
+#define DRM_TEGRA_HDA_H 1
+
+#include <linux/types.h>
+
+struct tegra_hda_format {
+	unsigned int sample_rate;
+	unsigned int channels;
+	unsigned int bits;
+	bool pcm;
+};
+
+void tegra_hda_parse_format(unsigned int format, struct tegra_hda_format *fmt);
+
+#endif
diff --git a/drivers/gpu/drm/tegra/hdmi.c b/drivers/gpu/drm/tegra/hdmi.c
index 0082468f703c..0adcd4244a42 100644
--- a/drivers/gpu/drm/tegra/hdmi.c
+++ b/drivers/gpu/drm/tegra/hdmi.c
@@ -1,29 +1,38 @@
+// SPDX-License-Identifier: GPL-2.0-only
 /*
  * Copyright (C) 2012 Avionic Design GmbH
  * Copyright (C) 2012 NVIDIA CORPORATION.  All rights reserved.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
  */
 
 #include <linux/clk.h>
 #include <linux/debugfs.h>
-#include <linux/gpio.h>
+#include <linux/delay.h>
 #include <linux/hdmi.h>
-#include <linux/of_device.h>
+#include <linux/math64.h>
+#include <linux/module.h>
+#include <linux/of.h>
+#include <linux/platform_device.h>
+#include <linux/pm_opp.h>
 #include <linux/pm_runtime.h>
 #include <linux/regulator/consumer.h>
 #include <linux/reset.h>
 
+#include <soc/tegra/common.h>
+#include <sound/hdmi-codec.h>
+
+#include <drm/drm_bridge_connector.h>
 #include <drm/drm_atomic_helper.h>
 #include <drm/drm_crtc.h>
-#include <drm/drm_crtc_helper.h>
-
-#include <sound/hda_verbs.h>
-
-#include <media/cec-notifier.h>
-
+#include <drm/drm_debugfs.h>
+#include <drm/drm_edid.h>
+#include <drm/drm_eld.h>
+#include <drm/drm_file.h>
+#include <drm/drm_fourcc.h>
+#include <drm/drm_print.h>
+#include <drm/drm_probe_helper.h>
+#include <drm/drm_simple_kms_helper.h>
+
+#include "hda.h"
 #include "hdmi.h"
 #include "drm.h"
 #include "dc.h"
@@ -71,14 +80,16 @@ struct tegra_hdmi {
 	const struct tegra_hdmi_config *config;
 
 	unsigned int audio_source;
-	unsigned int audio_sample_rate;
-	unsigned int audio_channels;
+	struct tegra_hda_format format;
 
 	unsigned int pixel_clock;
 	bool stereo;
 	bool dvi;
 
 	struct drm_info_list *debugfs_files;
+
+	struct platform_device *audio_pdev;
+	struct mutex audio_lock;
 };
 
 static inline struct tegra_hdmi *
@@ -119,68 +130,11 @@ static inline void tegra_hdmi_writel(struct tegra_hdmi *hdmi, u32 value,
 }
 
 struct tegra_hdmi_audio_config {
-	unsigned int pclk;
 	unsigned int n;
 	unsigned int cts;
 	unsigned int aval;
 };
 
-static const struct tegra_hdmi_audio_config tegra_hdmi_audio_32k[] = {
-	{  25200000, 4096,  25200, 24000 },
-	{  27000000, 4096,  27000, 24000 },
-	{  74250000, 4096,  74250, 24000 },
-	{ 148500000, 4096, 148500, 24000 },
-	{         0,    0,      0,     0 },
-};
-
-static const struct tegra_hdmi_audio_config tegra_hdmi_audio_44_1k[] = {
-	{  25200000, 5880,  26250, 25000 },
-	{  27000000, 5880,  28125, 25000 },
-	{  74250000, 4704,  61875, 20000 },
-	{ 148500000, 4704, 123750, 20000 },
-	{         0,    0,      0,     0 },
-};
-
-static const struct tegra_hdmi_audio_config tegra_hdmi_audio_48k[] = {
-	{  25200000, 6144,  25200, 24000 },
-	{  27000000, 6144,  27000, 24000 },
-	{  74250000, 6144,  74250, 24000 },
-	{ 148500000, 6144, 148500, 24000 },
-	{         0,    0,      0,     0 },
-};
-
-static const struct tegra_hdmi_audio_config tegra_hdmi_audio_88_2k[] = {
-	{  25200000, 11760,  26250, 25000 },
-	{  27000000, 11760,  28125, 25000 },
-	{  74250000,  9408,  61875, 20000 },
-	{ 148500000,  9408, 123750, 20000 },
-	{         0,     0,      0,     0 },
-};
-
-static const struct tegra_hdmi_audio_config tegra_hdmi_audio_96k[] = {
-	{  25200000, 12288,  25200, 24000 },
-	{  27000000, 12288,  27000, 24000 },
-	{  74250000, 12288,  74250, 24000 },
-	{ 148500000, 12288, 148500, 24000 },
-	{         0,     0,      0,     0 },
-};
-
-static const struct tegra_hdmi_audio_config tegra_hdmi_audio_176_4k[] = {
-	{  25200000, 23520,  26250, 25000 },
-	{  27000000, 23520,  28125, 25000 },
-	{  74250000, 18816,  61875, 20000 },
-	{ 148500000, 18816, 123750, 20000 },
-	{         0,     0,      0,     0 },
-};
-
-static const struct tegra_hdmi_audio_config tegra_hdmi_audio_192k[] = {
-	{  25200000, 24576,  25200, 24000 },
-	{  27000000, 24576,  27000, 24000 },
-	{  74250000, 24576,  74250, 24000 },
-	{ 148500000, 24576, 148500, 24000 },
-	{         0,     0,      0,     0 },
-};
-
 static const struct tmds_config tegra20_tmds_config[] = {
 	{ /* slow pixel clock modes */
 		.pclk = 27000000,
@@ -418,57 +372,70 @@ static const struct tmds_config tegra124_tmds_config[] = {
 	},
 };
 
-static const struct tegra_hdmi_audio_config *
-tegra_hdmi_get_audio_config(unsigned int sample_rate, unsigned int pclk)
+static void tegra_hdmi_audio_lock(struct tegra_hdmi *hdmi)
 {
-	const struct tegra_hdmi_audio_config *table;
-
-	switch (sample_rate) {
-	case 32000:
-		table = tegra_hdmi_audio_32k;
-		break;
-
-	case 44100:
-		table = tegra_hdmi_audio_44_1k;
-		break;
-
-	case 48000:
-		table = tegra_hdmi_audio_48k;
-		break;
-
-	case 88200:
-		table = tegra_hdmi_audio_88_2k;
-		break;
-
-	case 96000:
-		table = tegra_hdmi_audio_96k;
-		break;
-
-	case 176400:
-		table = tegra_hdmi_audio_176_4k;
-		break;
-
-	case 192000:
-		table = tegra_hdmi_audio_192k;
-		break;
-
-	default:
-		return NULL;
-	}
+	mutex_lock(&hdmi->audio_lock);
+	disable_irq(hdmi->irq);
+}
 
-	while (table->pclk) {
-		if (table->pclk == pclk)
-			return table;
+static void tegra_hdmi_audio_unlock(struct tegra_hdmi *hdmi)
+{
+	enable_irq(hdmi->irq);
+	mutex_unlock(&hdmi->audio_lock);
+}
 
-		table++;
+static int
+tegra_hdmi_get_audio_config(unsigned int audio_freq, unsigned int pix_clock,
+			    struct tegra_hdmi_audio_config *config)
+{
+	const unsigned int afreq = 128 * audio_freq;
+	const unsigned int min_n = afreq / 1500;
+	const unsigned int max_n = afreq / 300;
+	const unsigned int ideal_n = afreq / 1000;
+	int64_t min_err = (uint64_t)-1 >> 1;
+	unsigned int min_delta = -1;
+	int n;
+
+	memset(config, 0, sizeof(*config));
+	config->n = -1;
+
+	for (n = min_n; n <= max_n; n++) {
+		uint64_t cts_f, aval_f;
+		unsigned int delta;
+		int64_t cts, err;
+
+		/* compute aval in 48.16 fixed point */
+		aval_f = ((int64_t)24000000 << 16) * n;
+		do_div(aval_f, afreq);
+		/* It should round without any rest */
+		if (aval_f & 0xFFFF)
+			continue;
+
+		/* Compute cts in 48.16 fixed point */
+		cts_f = ((int64_t)pix_clock << 16) * n;
+		do_div(cts_f, afreq);
+		/* Round it to the nearest integer */
+		cts = (cts_f & ~0xFFFF) + ((cts_f & BIT(15)) << 1);
+
+		delta = abs(n - ideal_n);
+
+		/* Compute the absolute error */
+		err = abs((int64_t)cts_f - cts);
+		if (err < min_err || (err == min_err && delta < min_delta)) {
+			config->n = n;
+			config->cts = cts >> 16;
+			config->aval = aval_f >> 16;
+			min_delta = delta;
+			min_err = err;
+		}
 	}
 
-	return NULL;
+	return config->n != -1 ? 0 : -EINVAL;
 }
 
 static void tegra_hdmi_setup_audio_fs_tables(struct tegra_hdmi *hdmi)
 {
-	const unsigned int freqs[] = {
+	static const unsigned int freqs[] = {
 		32000, 44100, 48000, 88200, 96000, 176400, 192000
 	};
 	unsigned int i;
@@ -510,7 +477,7 @@ static void tegra_hdmi_write_aval(struct tegra_hdmi *hdmi, u32 value)
 	unsigned int i;
 
 	for (i = 0; i < ARRAY_SIZE(regs); i++) {
-		if (regs[i].sample_rate == hdmi->audio_sample_rate) {
+		if (regs[i].sample_rate == hdmi->format.sample_rate) {
 			tegra_hdmi_writel(hdmi, value, regs[i].offset);
 			break;
 		}
@@ -519,8 +486,9 @@ static void tegra_hdmi_write_aval(struct tegra_hdmi *hdmi, u32 value)
 
 static int tegra_hdmi_setup_audio(struct tegra_hdmi *hdmi)
 {
-	const struct tegra_hdmi_audio_config *config;
+	struct tegra_hdmi_audio_config config;
 	u32 source, value;
+	int err;
 
 	switch (hdmi->audio_source) {
 	case HDA:
@@ -564,7 +532,7 @@ static int tegra_hdmi_setup_audio(struct tegra_hdmi *hdmi)
 		 * play back system startup sounds early. It is possibly not
 		 * needed on Linux at all.
 		 */
-		if (hdmi->audio_channels == 2)
+		if (hdmi->format.channels == 2)
 			value = SOR_AUDIO_CNTRL0_INJECT_NULLSMPL;
 		else
 			value = 0;
@@ -595,25 +563,28 @@ static int tegra_hdmi_setup_audio(struct tegra_hdmi *hdmi)
 		tegra_hdmi_writel(hdmi, value, HDMI_NV_PDISP_SOR_AUDIO_SPARE0);
 	}
 
-	config = tegra_hdmi_get_audio_config(hdmi->audio_sample_rate,
-					     hdmi->pixel_clock);
-	if (!config) {
+	err = tegra_hdmi_get_audio_config(hdmi->format.sample_rate,
+					  hdmi->pixel_clock, &config);
+	if (err < 0) {
 		dev_err(hdmi->dev,
 			"cannot set audio to %u Hz at %u Hz pixel clock\n",
-			hdmi->audio_sample_rate, hdmi->pixel_clock);
-		return -EINVAL;
+			hdmi->format.sample_rate, hdmi->pixel_clock);
+		return err;
 	}
 
+	dev_dbg(hdmi->dev, "audio: pixclk=%u, n=%u, cts=%u, aval=%u\n",
+		hdmi->pixel_clock, config.n, config.cts, config.aval);
+
 	tegra_hdmi_writel(hdmi, 0, HDMI_NV_PDISP_HDMI_ACR_CTRL);
 
 	value = AUDIO_N_RESETF | AUDIO_N_GENERATE_ALTERNATE |
-		AUDIO_N_VALUE(config->n - 1);
+		AUDIO_N_VALUE(config.n - 1);
 	tegra_hdmi_writel(hdmi, value, HDMI_NV_PDISP_AUDIO_N);
 
-	tegra_hdmi_writel(hdmi, ACR_SUBPACK_N(config->n) | ACR_ENABLE,
+	tegra_hdmi_writel(hdmi, ACR_SUBPACK_N(config.n) | ACR_ENABLE,
 			  HDMI_NV_PDISP_HDMI_ACR_0441_SUBPACK_HIGH);
 
-	tegra_hdmi_writel(hdmi, ACR_SUBPACK_CTS(config->cts),
+	tegra_hdmi_writel(hdmi, ACR_SUBPACK_CTS(config.cts),
 			  HDMI_NV_PDISP_HDMI_ACR_0441_SUBPACK_LOW);
 
 	value = SPARE_HW_CTS | SPARE_FORCE_SW_CTS | SPARE_CTS_RESET_VAL(1);
@@ -624,7 +595,7 @@ static int tegra_hdmi_setup_audio(struct tegra_hdmi *hdmi)
 	tegra_hdmi_writel(hdmi, value, HDMI_NV_PDISP_AUDIO_N);
 
 	if (hdmi->config->has_hda)
-		tegra_hdmi_write_aval(hdmi, config->aval);
+		tegra_hdmi_write_aval(hdmi, config.aval);
 
 	tegra_hdmi_setup_audio_fs_tables(hdmi);
 
@@ -688,7 +659,7 @@ static void tegra_hdmi_write_infopack(struct tegra_hdmi *hdmi, const void *data,
 {
 	const u8 *ptr = data;
 	unsigned long offset;
-	size_t i, j;
+	size_t i;
 	u32 value;
 
 	switch (ptr[0]) {
@@ -721,7 +692,7 @@ static void tegra_hdmi_write_infopack(struct tegra_hdmi *hdmi, const void *data,
 	 * - subpack_low: bytes 0 - 3
 	 * - subpack_high: bytes 4 - 6 (with byte 7 padded to 0x00)
 	 */
-	for (i = 3, j = 0; i < size; i += 7, j += 8) {
+	for (i = 3; i < size; i += 7) {
 		size_t rem = size - i, num = min_t(size_t, rem, 4);
 
 		value = tegra_hdmi_subpack(&ptr[i], num);
@@ -741,7 +712,8 @@ static void tegra_hdmi_setup_avi_infoframe(struct tegra_hdmi *hdmi,
 	u8 buffer[17];
 	ssize_t err;
 
-	err = drm_hdmi_avi_infoframe_from_display_mode(&frame, mode, false);
+	err = drm_hdmi_avi_infoframe_from_display_mode(&frame,
+						       &hdmi->output.connector, mode);
 	if (err < 0) {
 		dev_err(hdmi->dev, "failed to setup AVI infoframe: %zd\n", err);
 		return;
@@ -787,7 +759,7 @@ static void tegra_hdmi_setup_audio_infoframe(struct tegra_hdmi *hdmi)
 		return;
 	}
 
-	frame.channels = hdmi->audio_channels;
+	frame.channels = hdmi->format.channels;
 
 	err = hdmi_audio_infoframe_pack(&frame, buffer, sizeof(buffer));
 	if (err < 0) {
@@ -881,16 +853,26 @@ static void tegra_hdmi_setup_tmds(struct tegra_hdmi *hdmi,
 				  HDMI_NV_PDISP_SOR_IO_PEAK_CURRENT);
 }
 
-static bool tegra_output_is_hdmi(struct tegra_output *output)
+static int tegra_hdmi_reconfigure_audio(struct tegra_hdmi *hdmi)
 {
-	struct edid *edid;
+	int err;
 
-	if (!output->connector.edid_blob_ptr)
-		return false;
+	err = tegra_hdmi_setup_audio(hdmi);
+	if (err < 0) {
+		tegra_hdmi_disable_audio_infoframe(hdmi);
+		tegra_hdmi_disable_audio(hdmi);
+	} else {
+		tegra_hdmi_setup_audio_infoframe(hdmi);
+		tegra_hdmi_enable_audio_infoframe(hdmi);
+		tegra_hdmi_enable_audio(hdmi);
+	}
 
-	edid = (struct edid *)output->connector.edid_blob_ptr->data;
+	return err;
+}
 
-	return drm_detect_hdmi_monitor(edid);
+static bool tegra_output_is_hdmi(struct tegra_output *output)
+{
+	return output->connector.display_info.is_hdmi;
 }
 
 static enum drm_connector_status
@@ -1116,7 +1098,6 @@ static int tegra_hdmi_late_register(struct drm_connector *connector)
 	struct drm_minor *minor = connector->dev->primary;
 	struct dentry *root = connector->debugfs_entry;
 	struct tegra_hdmi *hdmi = to_hdmi(output);
-	int err;
 
 	hdmi->debugfs_files = kmemdup(debugfs_files, sizeof(debugfs_files),
 				      GFP_KERNEL);
@@ -1126,17 +1107,9 @@ static int tegra_hdmi_late_register(struct drm_connector *connector)
 	for (i = 0; i < count; i++)
 		hdmi->debugfs_files[i].data = hdmi;
 
-	err = drm_debugfs_create_files(hdmi->debugfs_files, count, root, minor);
-	if (err < 0)
-		goto free;
+	drm_debugfs_create_files(hdmi->debugfs_files, count, root, minor);
 
 	return 0;
-
-free:
-	kfree(hdmi->debugfs_files);
-	hdmi->debugfs_files = NULL;
-
-	return err;
 }
 
 static void tegra_hdmi_early_unregister(struct drm_connector *connector)
@@ -1146,7 +1119,8 @@ static void tegra_hdmi_early_unregister(struct drm_connector *connector)
 	unsigned int count = ARRAY_SIZE(debugfs_files);
 	struct tegra_hdmi *hdmi = to_hdmi(output);
 
-	drm_debugfs_remove_files(hdmi->debugfs_files, count, minor);
+	drm_debugfs_remove_files(hdmi->debugfs_files, count,
+				 connector->debugfs_entry, minor);
 	kfree(hdmi->debugfs_files);
 	hdmi->debugfs_files = NULL;
 }
@@ -1164,7 +1138,7 @@ static const struct drm_connector_funcs tegra_hdmi_connector_funcs = {
 
 static enum drm_mode_status
 tegra_hdmi_connector_mode_valid(struct drm_connector *connector,
-				struct drm_display_mode *mode)
+				const struct drm_display_mode *mode)
 {
 	struct tegra_output *output = connector_to_output(connector);
 	struct tegra_hdmi *hdmi = to_hdmi(output);
@@ -1188,16 +1162,15 @@ tegra_hdmi_connector_helper_funcs = {
 	.mode_valid = tegra_hdmi_connector_mode_valid,
 };
 
-static const struct drm_encoder_funcs tegra_hdmi_encoder_funcs = {
-	.destroy = tegra_output_encoder_destroy,
-};
-
 static void tegra_hdmi_encoder_disable(struct drm_encoder *encoder)
 {
 	struct tegra_output *output = encoder_to_output(encoder);
 	struct tegra_dc *dc = to_tegra_dc(encoder->crtc);
 	struct tegra_hdmi *hdmi = to_hdmi(output);
 	u32 value;
+	int err;
+
+	tegra_hdmi_audio_lock(hdmi);
 
 	/*
 	 * The following accesses registers of the display controller, so make
@@ -1223,7 +1196,13 @@ static void tegra_hdmi_encoder_disable(struct drm_encoder *encoder)
 	tegra_hdmi_writel(hdmi, 0, HDMI_NV_PDISP_INT_ENABLE);
 	tegra_hdmi_writel(hdmi, 0, HDMI_NV_PDISP_INT_MASK);
 
-	pm_runtime_put(hdmi->dev);
+	hdmi->pixel_clock = 0;
+
+	tegra_hdmi_audio_unlock(hdmi);
+
+	err = host1x_client_suspend(&hdmi->client);
+	if (err < 0)
+		dev_err(hdmi->dev, "failed to suspend: %d\n", err);
 }
 
 static void tegra_hdmi_encoder_enable(struct drm_encoder *encoder)
@@ -1238,7 +1217,13 @@ static void tegra_hdmi_encoder_enable(struct drm_encoder *encoder)
 	u32 value;
 	int err;
 
-	pm_runtime_get_sync(hdmi->dev);
+	err = host1x_client_resume(&hdmi->client);
+	if (err < 0) {
+		dev_err(hdmi->dev, "failed to resume: %d\n", err);
+		return;
+	}
+
+	tegra_hdmi_audio_lock(hdmi);
 
 	/*
 	 * Enable and unmask the HDA codec SCRATCH0 register interrupt. This
@@ -1253,7 +1238,7 @@ static void tegra_hdmi_encoder_enable(struct drm_encoder *encoder)
 	h_back_porch = mode->htotal - mode->hsync_end;
 	h_front_porch = mode->hsync_start - mode->hdisplay;
 
-	err = clk_set_rate(hdmi->clk, hdmi->pixel_clock);
+	err = dev_pm_opp_set_rate(hdmi->dev, hdmi->pixel_clock);
 	if (err < 0) {
 		dev_err(hdmi->dev, "failed to set HDMI clock frequency: %d\n",
 			err);
@@ -1314,9 +1299,15 @@ static void tegra_hdmi_encoder_enable(struct drm_encoder *encoder)
 
 	hdmi->dvi = !tegra_output_is_hdmi(output);
 	if (!hdmi->dvi) {
-		err = tegra_hdmi_setup_audio(hdmi);
-		if (err < 0)
-			hdmi->dvi = true;
+		/*
+		 * Make sure that the audio format has been configured before
+		 * enabling audio, otherwise we may try to divide by zero.
+		*/
+		if (hdmi->format.sample_rate > 0) {
+			err = tegra_hdmi_setup_audio(hdmi);
+			if (err < 0)
+				hdmi->dvi = true;
+		}
 	}
 
 	if (hdmi->config->has_hda)
@@ -1439,6 +1430,8 @@ static void tegra_hdmi_encoder_enable(struct drm_encoder *encoder)
 	}
 
 	/* TODO: add HDCP support */
+
+	tegra_hdmi_audio_unlock(hdmi);
 }
 
 static int
@@ -1468,29 +1461,136 @@ static const struct drm_encoder_helper_funcs tegra_hdmi_encoder_helper_funcs = {
 	.atomic_check = tegra_hdmi_encoder_atomic_check,
 };
 
+static int tegra_hdmi_hw_params(struct device *dev, void *data,
+				struct hdmi_codec_daifmt *fmt,
+				struct hdmi_codec_params *hparms)
+{
+	struct tegra_hdmi *hdmi = data;
+	int ret = 0;
+
+	tegra_hdmi_audio_lock(hdmi);
+
+	hdmi->format.sample_rate = hparms->sample_rate;
+	hdmi->format.channels = hparms->channels;
+
+	if (hdmi->pixel_clock && !hdmi->dvi)
+		ret = tegra_hdmi_reconfigure_audio(hdmi);
+
+	tegra_hdmi_audio_unlock(hdmi);
+
+	return ret;
+}
+
+static int tegra_hdmi_audio_startup(struct device *dev, void *data)
+{
+	struct tegra_hdmi *hdmi = data;
+	int ret;
+
+	ret = host1x_client_resume(&hdmi->client);
+	if (ret < 0)
+		dev_err(hdmi->dev, "failed to resume: %d\n", ret);
+
+	return ret;
+}
+
+static void tegra_hdmi_audio_shutdown(struct device *dev, void *data)
+{
+	struct tegra_hdmi *hdmi = data;
+	int ret;
+
+	tegra_hdmi_audio_lock(hdmi);
+
+	hdmi->format.sample_rate = 0;
+	hdmi->format.channels = 0;
+
+	tegra_hdmi_audio_unlock(hdmi);
+
+	ret = host1x_client_suspend(&hdmi->client);
+	if (ret < 0)
+		dev_err(hdmi->dev, "failed to suspend: %d\n", ret);
+}
+
+static const struct hdmi_codec_ops tegra_hdmi_codec_ops = {
+	.hw_params = tegra_hdmi_hw_params,
+	.audio_startup = tegra_hdmi_audio_startup,
+	.audio_shutdown = tegra_hdmi_audio_shutdown,
+};
+
+static int tegra_hdmi_codec_register(struct tegra_hdmi *hdmi)
+{
+	struct hdmi_codec_pdata codec_data = {};
+
+	if (hdmi->config->has_hda)
+		return 0;
+
+	codec_data.ops = &tegra_hdmi_codec_ops;
+	codec_data.data = hdmi;
+	codec_data.spdif = 1;
+
+	hdmi->audio_pdev = platform_device_register_data(hdmi->dev,
+							 HDMI_CODEC_DRV_NAME,
+							 PLATFORM_DEVID_AUTO,
+							 &codec_data,
+							 sizeof(codec_data));
+	if (IS_ERR(hdmi->audio_pdev))
+		return PTR_ERR(hdmi->audio_pdev);
+
+	hdmi->format.channels = 2;
+
+	return 0;
+}
+
+static void tegra_hdmi_codec_unregister(struct tegra_hdmi *hdmi)
+{
+	if (hdmi->audio_pdev)
+		platform_device_unregister(hdmi->audio_pdev);
+}
+
 static int tegra_hdmi_init(struct host1x_client *client)
 {
-	struct drm_device *drm = dev_get_drvdata(client->parent);
 	struct tegra_hdmi *hdmi = host1x_client_to_hdmi(client);
+	struct drm_device *drm = dev_get_drvdata(client->host);
+	struct drm_connector *connector;
 	int err;
 
 	hdmi->output.dev = client->dev;
 
-	drm_connector_init(drm, &hdmi->output.connector,
-			   &tegra_hdmi_connector_funcs,
-			   DRM_MODE_CONNECTOR_HDMIA);
-	drm_connector_helper_add(&hdmi->output.connector,
-				 &tegra_hdmi_connector_helper_funcs);
-	hdmi->output.connector.dpms = DRM_MODE_DPMS_OFF;
-
-	drm_encoder_init(drm, &hdmi->output.encoder, &tegra_hdmi_encoder_funcs,
-			 DRM_MODE_ENCODER_TMDS, NULL);
+	drm_simple_encoder_init(drm, &hdmi->output.encoder,
+				DRM_MODE_ENCODER_TMDS);
 	drm_encoder_helper_add(&hdmi->output.encoder,
 			       &tegra_hdmi_encoder_helper_funcs);
 
-	drm_connector_attach_encoder(&hdmi->output.connector,
-					  &hdmi->output.encoder);
-	drm_connector_register(&hdmi->output.connector);
+	if (hdmi->output.bridge) {
+		err = drm_bridge_attach(&hdmi->output.encoder, hdmi->output.bridge,
+					NULL, DRM_BRIDGE_ATTACH_NO_CONNECTOR);
+		if (err) {
+			dev_err(client->dev, "failed to attach bridge: %d\n",
+				err);
+			return err;
+		}
+
+		connector = drm_bridge_connector_init(drm, &hdmi->output.encoder);
+		if (IS_ERR(connector)) {
+			dev_err(client->dev,
+				"failed to initialize bridge connector: %pe\n",
+				connector);
+			return PTR_ERR(connector);
+		}
+
+		drm_connector_attach_encoder(connector, &hdmi->output.encoder);
+	} else {
+		drm_connector_init_with_ddc(drm, &hdmi->output.connector,
+					    &tegra_hdmi_connector_funcs,
+					    DRM_MODE_CONNECTOR_HDMIA,
+					    hdmi->output.ddc);
+		drm_connector_helper_add(&hdmi->output.connector,
+					 &tegra_hdmi_connector_helper_funcs);
+		hdmi->output.connector.dpms = DRM_MODE_DPMS_OFF;
+
+		drm_connector_attach_encoder(&hdmi->output.connector,
+					     &hdmi->output.encoder);
+		drm_connector_register(&hdmi->output.connector);
+	}
 
 	err = tegra_output_init(drm, &hdmi->output);
 	if (err < 0) {
@@ -1504,28 +1604,47 @@ static int tegra_hdmi_init(struct host1x_client *client)
 	if (err < 0) {
 		dev_err(client->dev, "failed to enable HDMI regulator: %d\n",
 			err);
-		return err;
+		goto output_exit;
 	}
 
 	err = regulator_enable(hdmi->pll);
 	if (err < 0) {
 		dev_err(hdmi->dev, "failed to enable PLL regulator: %d\n", err);
-		return err;
+		goto disable_hdmi;
 	}
 
 	err = regulator_enable(hdmi->vdd);
 	if (err < 0) {
 		dev_err(hdmi->dev, "failed to enable VDD regulator: %d\n", err);
-		return err;
+		goto disable_pll;
+	}
+
+	err = tegra_hdmi_codec_register(hdmi);
+	if (err < 0) {
+		dev_err(hdmi->dev, "failed to register audio codec: %d\n", err);
+		goto disable_vdd;
 	}
 
 	return 0;
+
+disable_vdd:
+	regulator_disable(hdmi->vdd);
+disable_pll:
+	regulator_disable(hdmi->pll);
+disable_hdmi:
+	regulator_disable(hdmi->hdmi);
+output_exit:
+	tegra_output_exit(&hdmi->output);
+
+	return err;
 }
 
 static int tegra_hdmi_exit(struct host1x_client *client)
 {
 	struct tegra_hdmi *hdmi = host1x_client_to_hdmi(client);
 
+	tegra_hdmi_codec_unregister(hdmi);
+
 	tegra_output_exit(&hdmi->output);
 
 	regulator_disable(hdmi->vdd);
@@ -1535,9 +1654,66 @@ static int tegra_hdmi_exit(struct host1x_client *client)
 	return 0;
 }
 
+static int tegra_hdmi_runtime_suspend(struct host1x_client *client)
+{
+	struct tegra_hdmi *hdmi = host1x_client_to_hdmi(client);
+	struct device *dev = client->dev;
+	int err;
+
+	err = reset_control_assert(hdmi->rst);
+	if (err < 0) {
+		dev_err(dev, "failed to assert reset: %d\n", err);
+		return err;
+	}
+
+	usleep_range(1000, 2000);
+
+	clk_disable_unprepare(hdmi->clk);
+	pm_runtime_put_sync(dev);
+
+	return 0;
+}
+
+static int tegra_hdmi_runtime_resume(struct host1x_client *client)
+{
+	struct tegra_hdmi *hdmi = host1x_client_to_hdmi(client);
+	struct device *dev = client->dev;
+	int err;
+
+	err = pm_runtime_resume_and_get(dev);
+	if (err < 0) {
+		dev_err(dev, "failed to get runtime PM: %d\n", err);
+		return err;
+	}
+
+	err = clk_prepare_enable(hdmi->clk);
+	if (err < 0) {
+		dev_err(dev, "failed to enable clock: %d\n", err);
+		goto put_rpm;
+	}
+
+	usleep_range(1000, 2000);
+
+	err = reset_control_deassert(hdmi->rst);
+	if (err < 0) {
+		dev_err(dev, "failed to deassert reset: %d\n", err);
+		goto disable_clk;
+	}
+
+	return 0;
+
+disable_clk:
+	clk_disable_unprepare(hdmi->clk);
+put_rpm:
+	pm_runtime_put_sync(dev);
+	return err;
+}
+
 static const struct host1x_client_ops hdmi_client_ops = {
 	.init = tegra_hdmi_init,
 	.exit = tegra_hdmi_exit,
+	.suspend = tegra_hdmi_runtime_suspend,
+	.resume = tegra_hdmi_runtime_resume,
 };
 
 static const struct tegra_hdmi_config tegra20_hdmi_config = {
@@ -1589,29 +1765,10 @@ static const struct of_device_id tegra_hdmi_of_match[] = {
 };
 MODULE_DEVICE_TABLE(of, tegra_hdmi_of_match);
 
-static void hda_format_parse(unsigned int format, unsigned int *rate,
-			     unsigned int *channels)
-{
-	unsigned int mul, div;
-
-	if (format & AC_FMT_BASE_44K)
-		*rate = 44100;
-	else
-		*rate = 48000;
-
-	mul = (format & AC_FMT_MULT_MASK) >> AC_FMT_MULT_SHIFT;
-	div = (format & AC_FMT_DIV_MASK) >> AC_FMT_DIV_SHIFT;
-
-	*rate = *rate * (mul + 1) / (div + 1);
-
-	*channels = (format & AC_FMT_CHAN_MASK) >> AC_FMT_CHAN_SHIFT;
-}
-
 static irqreturn_t tegra_hdmi_irq(int irq, void *data)
 {
 	struct tegra_hdmi *hdmi = data;
 	u32 value;
-	int err;
 
 	value = tegra_hdmi_readl(hdmi, HDMI_NV_PDISP_INT_STATUS);
 	tegra_hdmi_writel(hdmi, value, HDMI_NV_PDISP_INT_STATUS);
@@ -1623,24 +1780,10 @@ static irqreturn_t tegra_hdmi_irq(int irq, void *data)
 		value = tegra_hdmi_readl(hdmi, HDMI_NV_PDISP_SOR_AUDIO_HDA_CODEC_SCRATCH0);
 
 		if (value & SOR_AUDIO_HDA_CODEC_SCRATCH0_VALID) {
-			unsigned int sample_rate, channels;
-
 			format = value & SOR_AUDIO_HDA_CODEC_SCRATCH0_FMT_MASK;
 
-			hda_format_parse(format, &sample_rate, &channels);
-
-			hdmi->audio_sample_rate = sample_rate;
-			hdmi->audio_channels = channels;
-
-			err = tegra_hdmi_setup_audio(hdmi);
-			if (err < 0) {
-				tegra_hdmi_disable_audio_infoframe(hdmi);
-				tegra_hdmi_disable_audio(hdmi);
-			} else {
-				tegra_hdmi_setup_audio_infoframe(hdmi);
-				tegra_hdmi_enable_audio_infoframe(hdmi);
-				tegra_hdmi_enable_audio(hdmi);
-			}
+			tegra_hda_parse_format(format, &hdmi->format);
+			tegra_hdmi_reconfigure_audio(hdmi);
 		} else {
 			tegra_hdmi_disable_audio_infoframe(hdmi);
 			tegra_hdmi_disable_audio(hdmi);
@@ -1653,7 +1796,6 @@ static irqreturn_t tegra_hdmi_irq(int irq, void *data)
 static int tegra_hdmi_probe(struct platform_device *pdev)
 {
 	struct tegra_hdmi *hdmi;
-	struct resource *regs;
 	int err;
 
 	hdmi = devm_kzalloc(&pdev->dev, sizeof(*hdmi), GFP_KERNEL);
@@ -1664,11 +1806,11 @@ static int tegra_hdmi_probe(struct platform_device *pdev)
 	hdmi->dev = &pdev->dev;
 
 	hdmi->audio_source = AUTO;
-	hdmi->audio_sample_rate = 48000;
-	hdmi->audio_channels = 2;
 	hdmi->stereo = false;
 	hdmi->dvi = false;
 
+	mutex_init(&hdmi->audio_lock);
+
 	hdmi->clk = devm_clk_get(&pdev->dev, NULL);
 	if (IS_ERR(hdmi->clk)) {
 		dev_err(&pdev->dev, "failed to get clock\n");
@@ -1692,26 +1834,22 @@ static int tegra_hdmi_probe(struct platform_device *pdev)
 	}
 
 	hdmi->hdmi = devm_regulator_get(&pdev->dev, "hdmi");
-	if (IS_ERR(hdmi->hdmi)) {
-		dev_err(&pdev->dev, "failed to get HDMI regulator\n");
-		return PTR_ERR(hdmi->hdmi);
-	}
+	err = PTR_ERR_OR_ZERO(hdmi->hdmi);
+	if (err)
+		return dev_err_probe(&pdev->dev, err,
+				     "failed to get HDMI regulator\n");
 
 	hdmi->pll = devm_regulator_get(&pdev->dev, "pll");
-	if (IS_ERR(hdmi->pll)) {
-		dev_err(&pdev->dev, "failed to get PLL regulator\n");
-		return PTR_ERR(hdmi->pll);
-	}
+	err = PTR_ERR_OR_ZERO(hdmi->pll);
+	if (err)
+		return dev_err_probe(&pdev->dev, err,
+				     "failed to get PLL regulator\n");
 
 	hdmi->vdd = devm_regulator_get(&pdev->dev, "vdd");
-	if (IS_ERR(hdmi->vdd)) {
-		dev_err(&pdev->dev, "failed to get VDD regulator\n");
-		return PTR_ERR(hdmi->vdd);
-	}
-
-	hdmi->output.notifier = cec_notifier_get(&pdev->dev);
-	if (hdmi->output.notifier == NULL)
-		return -ENOMEM;
+	err = PTR_ERR_OR_ZERO(hdmi->vdd);
+	if (err)
+		return dev_err_probe(&pdev->dev, err,
+				     "failed to get VDD regulator\n");
 
 	hdmi->output.dev = &pdev->dev;
 
@@ -1719,14 +1857,15 @@ static int tegra_hdmi_probe(struct platform_device *pdev)
 	if (err < 0)
 		return err;
 
-	regs = platform_get_resource(pdev, IORESOURCE_MEM, 0);
-	hdmi->regs = devm_ioremap_resource(&pdev->dev, regs);
-	if (IS_ERR(hdmi->regs))
-		return PTR_ERR(hdmi->regs);
+	hdmi->regs = devm_platform_ioremap_resource(pdev, 0);
+	if (IS_ERR(hdmi->regs)) {
+		err = PTR_ERR(hdmi->regs);
+		goto remove;
+	}
 
 	err = platform_get_irq(pdev, 0);
 	if (err < 0)
-		return err;
+		goto remove;
 
 	hdmi->irq = err;
 
@@ -1735,11 +1874,18 @@ static int tegra_hdmi_probe(struct platform_device *pdev)
 	if (err < 0) {
 		dev_err(&pdev->dev, "failed to request IRQ#%u: %d\n",
 			hdmi->irq, err);
-		return err;
+		goto remove;
 	}
 
 	platform_set_drvdata(pdev, hdmi);
-	pm_runtime_enable(&pdev->dev);
+
+	err = devm_pm_runtime_enable(&pdev->dev);
+	if (err)
+		goto remove;
+
+	err = devm_tegra_core_dev_init_opp_table_common(&pdev->dev);
+	if (err)
+		goto remove;
 
 	INIT_LIST_HEAD(&hdmi->client.list);
 	hdmi->client.ops = &hdmi_client_ops;
@@ -1749,86 +1895,29 @@ static int tegra_hdmi_probe(struct platform_device *pdev)
 	if (err < 0) {
 		dev_err(&pdev->dev, "failed to register host1x client: %d\n",
 			err);
-		return err;
+		goto remove;
 	}
 
 	return 0;
-}
-
-static int tegra_hdmi_remove(struct platform_device *pdev)
-{
-	struct tegra_hdmi *hdmi = platform_get_drvdata(pdev);
-	int err;
-
-	pm_runtime_disable(&pdev->dev);
-
-	err = host1x_client_unregister(&hdmi->client);
-	if (err < 0) {
-		dev_err(&pdev->dev, "failed to unregister host1x client: %d\n",
-			err);
-		return err;
-	}
 
+remove:
 	tegra_output_remove(&hdmi->output);
-
-	if (hdmi->output.notifier)
-		cec_notifier_put(hdmi->output.notifier);
-
-	return 0;
-}
-
-#ifdef CONFIG_PM
-static int tegra_hdmi_suspend(struct device *dev)
-{
-	struct tegra_hdmi *hdmi = dev_get_drvdata(dev);
-	int err;
-
-	err = reset_control_assert(hdmi->rst);
-	if (err < 0) {
-		dev_err(dev, "failed to assert reset: %d\n", err);
-		return err;
-	}
-
-	usleep_range(1000, 2000);
-
-	clk_disable_unprepare(hdmi->clk);
-
-	return 0;
+	return err;
 }
 
-static int tegra_hdmi_resume(struct device *dev)
+static void tegra_hdmi_remove(struct platform_device *pdev)
 {
-	struct tegra_hdmi *hdmi = dev_get_drvdata(dev);
-	int err;
-
-	err = clk_prepare_enable(hdmi->clk);
-	if (err < 0) {
-		dev_err(dev, "failed to enable clock: %d\n", err);
-		return err;
-	}
-
-	usleep_range(1000, 2000);
+	struct tegra_hdmi *hdmi = platform_get_drvdata(pdev);
 
-	err = reset_control_deassert(hdmi->rst);
-	if (err < 0) {
-		dev_err(dev, "failed to deassert reset: %d\n", err);
-		clk_disable_unprepare(hdmi->clk);
-		return err;
-	}
+	host1x_client_unregister(&hdmi->client);
 
-	return 0;
+	tegra_output_remove(&hdmi->output);
 }
-#endif
-
-static const struct dev_pm_ops tegra_hdmi_pm_ops = {
-	SET_RUNTIME_PM_OPS(tegra_hdmi_suspend, tegra_hdmi_resume, NULL)
-};
 
 struct platform_driver tegra_hdmi_driver = {
 	.driver = {
 		.name = "tegra-hdmi",
 		.of_match_table = tegra_hdmi_of_match,
-		.pm = &tegra_hdmi_pm_ops,
 	},
 	.probe = tegra_hdmi_probe,
 	.remove = tegra_hdmi_remove,
diff --git a/drivers/gpu/drm/tegra/hdmi.h b/drivers/gpu/drm/tegra/hdmi.h
index 2339f134a09a..8deb04223c18 100644
--- a/drivers/gpu/drm/tegra/hdmi.h
+++ b/drivers/gpu/drm/tegra/hdmi.h
@@ -1,10 +1,7 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
 /*
  * Copyright (C) 2012 Avionic Design GmbH
  * Copyright (C) 2012 NVIDIA CORPORATION.  All rights reserved.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
  */
 
 #ifndef TEGRA_HDMI_H
diff --git a/drivers/gpu/drm/tegra/hub.c b/drivers/gpu/drm/tegra/hub.c
index 922a48d5a483..c924ffba4094 100644
--- a/drivers/gpu/drm/tegra/hub.c
+++ b/drivers/gpu/drm/tegra/hub.c
@@ -1,30 +1,34 @@
+// SPDX-License-Identifier: GPL-2.0-only
 /*
  * Copyright (C) 2017 NVIDIA CORPORATION.  All rights reserved.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
  */
 
 #include <linux/clk.h>
+#include <linux/delay.h>
+#include <linux/dma-mapping.h>
 #include <linux/host1x.h>
 #include <linux/module.h>
 #include <linux/of.h>
-#include <linux/of_device.h>
 #include <linux/of_graph.h>
+#include <linux/of_platform.h>
 #include <linux/platform_device.h>
 #include <linux/pm_runtime.h>
 #include <linux/reset.h>
 
-#include <drm/drmP.h>
 #include <drm/drm_atomic.h>
 #include <drm/drm_atomic_helper.h>
-#include <drm/drm_crtc_helper.h>
+#include <drm/drm_blend.h>
+#include <drm/drm_fourcc.h>
+#include <drm/drm_framebuffer.h>
+#include <drm/drm_print.h>
+#include <drm/drm_probe_helper.h>
 
 #include "drm.h"
 #include "dc.h"
 #include "plane.h"
 
+#define NFB 24
+
 static const u32 tegra_shared_plane_formats[] = {
 	DRM_FORMAT_ARGB1555,
 	DRM_FORMAT_RGB565,
@@ -57,6 +61,18 @@ static const u64 tegra_shared_plane_modifiers[] = {
 	DRM_FORMAT_MOD_NVIDIA_16BX2_BLOCK(3),
 	DRM_FORMAT_MOD_NVIDIA_16BX2_BLOCK(4),
 	DRM_FORMAT_MOD_NVIDIA_16BX2_BLOCK(5),
+	/*
+	 * The GPU sector layout is only supported on Tegra194, but these will
+	 * be filtered out later on by ->format_mod_supported() on SoCs where
+	 * it isn't supported.
+	 */
+	DRM_FORMAT_MOD_NVIDIA_16BX2_BLOCK(0) | DRM_FORMAT_MOD_NVIDIA_SECTOR_LAYOUT,
+	DRM_FORMAT_MOD_NVIDIA_16BX2_BLOCK(1) | DRM_FORMAT_MOD_NVIDIA_SECTOR_LAYOUT,
+	DRM_FORMAT_MOD_NVIDIA_16BX2_BLOCK(2) | DRM_FORMAT_MOD_NVIDIA_SECTOR_LAYOUT,
+	DRM_FORMAT_MOD_NVIDIA_16BX2_BLOCK(3) | DRM_FORMAT_MOD_NVIDIA_SECTOR_LAYOUT,
+	DRM_FORMAT_MOD_NVIDIA_16BX2_BLOCK(4) | DRM_FORMAT_MOD_NVIDIA_SECTOR_LAYOUT,
+	DRM_FORMAT_MOD_NVIDIA_16BX2_BLOCK(5) | DRM_FORMAT_MOD_NVIDIA_SECTOR_LAYOUT,
+	/* sentinel */
 	DRM_FORMAT_MOD_INVALID
 };
 
@@ -97,17 +113,25 @@ static inline void tegra_plane_writel(struct tegra_plane *plane, u32 value,
 
 static int tegra_windowgroup_enable(struct tegra_windowgroup *wgrp)
 {
+	int err = 0;
+
 	mutex_lock(&wgrp->lock);
 
 	if (wgrp->usecount == 0) {
-		pm_runtime_get_sync(wgrp->parent);
+		err = host1x_client_resume(wgrp->parent);
+		if (err < 0) {
+			dev_err(wgrp->parent->dev, "failed to resume: %d\n", err);
+			goto unlock;
+		}
+
 		reset_control_deassert(wgrp->rst);
 	}
 
 	wgrp->usecount++;
-	mutex_unlock(&wgrp->lock);
 
-	return 0;
+unlock:
+	mutex_unlock(&wgrp->lock);
+	return err;
 }
 
 static void tegra_windowgroup_disable(struct tegra_windowgroup *wgrp)
@@ -123,7 +147,7 @@ static void tegra_windowgroup_disable(struct tegra_windowgroup *wgrp)
 			       wgrp->index);
 		}
 
-		pm_runtime_put(wgrp->parent);
+		host1x_client_suspend(wgrp->parent);
 	}
 
 	wgrp->usecount--;
@@ -143,7 +167,9 @@ int tegra_display_hub_prepare(struct tegra_display_hub *hub)
 	for (i = 0; i < hub->soc->num_wgrps; i++) {
 		struct tegra_windowgroup *wgrp = &hub->wgrps[i];
 
-		tegra_windowgroup_enable(wgrp);
+		/* Skip orphaned window group whose parent DC is disabled */
+		if (wgrp->parent)
+			tegra_windowgroup_enable(wgrp);
 	}
 
 	return 0;
@@ -160,7 +186,9 @@ void tegra_display_hub_cleanup(struct tegra_display_hub *hub)
 	for (i = 0; i < hub->soc->num_wgrps; i++) {
 		struct tegra_windowgroup *wgrp = &hub->wgrps[i];
 
-		tegra_windowgroup_disable(wgrp);
+		/* Skip orphaned window group whose parent DC is disabled */
+		if (wgrp->parent)
+			tegra_windowgroup_disable(wgrp);
 	}
 }
 
@@ -270,6 +298,74 @@ static int tegra_shared_plane_set_owner(struct tegra_plane *plane,
 	return 0;
 }
 
+static void tegra_shared_plane_setup_scaler(struct tegra_plane *plane)
+{
+	static const unsigned int coeffs[192] = {
+		0x00000000, 0x3c70e400, 0x3bb037e4, 0x0c51cc9c,
+		0x00100001, 0x3bf0dbfa, 0x3d00f406, 0x3fe003ff,
+		0x00300002, 0x3b80cbf5, 0x3da1040d, 0x3fb003fe,
+		0x00400002, 0x3b20bff1, 0x3e511015, 0x3f9003fc,
+		0x00500002, 0x3ad0b3ed, 0x3f21201d, 0x3f5003fb,
+		0x00500003, 0x3aa0a3e9, 0x3ff13026, 0x3f2007f9,
+		0x00500403, 0x3a7097e6, 0x00e1402f, 0x3ee007f7,
+		0x00500403, 0x3a608be4, 0x01d14c38, 0x3ea00bf6,
+		0x00500403, 0x3a507fe2, 0x02e15c42, 0x3e500ff4,
+		0x00500402, 0x3a6073e1, 0x03f16c4d, 0x3e000ff2,
+		0x00400402, 0x3a706be0, 0x05117858, 0x3db013f0,
+		0x00300402, 0x3a905fe0, 0x06318863, 0x3d6017ee,
+		0x00300402, 0x3ab057e0, 0x0771986e, 0x3d001beb,
+		0x00200001, 0x3af04fe1, 0x08a1a47a, 0x3cb023e9,
+		0x00100001, 0x3b2047e2, 0x09e1b485, 0x3c6027e7,
+		0x00100000, 0x3b703fe2, 0x0b11c091, 0x3c002fe6,
+		0x3f203800, 0x0391103f, 0x3ff0a014, 0x0811606c,
+		0x3f2037ff, 0x0351083c, 0x03e11842, 0x3f203c00,
+		0x3f302fff, 0x03010439, 0x04311c45, 0x3f104401,
+		0x3f302fff, 0x02c0fc35, 0x04812448, 0x3f104802,
+		0x3f4027ff, 0x0270f832, 0x04c1284b, 0x3f205003,
+		0x3f4023ff, 0x0230f030, 0x0511304e, 0x3f205403,
+		0x3f601fff, 0x01f0e82d, 0x05613451, 0x3f205c04,
+		0x3f701bfe, 0x01b0e02a, 0x05a13c54, 0x3f306006,
+		0x3f7017fe, 0x0170d827, 0x05f14057, 0x3f406807,
+		0x3f8017ff, 0x0140d424, 0x0641445a, 0x3f406c08,
+		0x3fa013ff, 0x0100cc22, 0x0681485d, 0x3f507409,
+		0x3fa00fff, 0x00d0c41f, 0x06d14c60, 0x3f607c0b,
+		0x3fc00fff, 0x0090bc1c, 0x07115063, 0x3f80840c,
+		0x3fd00bff, 0x0070b41a, 0x07515465, 0x3f908c0e,
+		0x3fe007ff, 0x0040b018, 0x07915868, 0x3fb0900f,
+		0x3ff00400, 0x0010a816, 0x07d15c6a, 0x3fd09811,
+		0x00a04c0e, 0x0460f442, 0x0240a827, 0x05c15859,
+		0x0090440d, 0x0440f040, 0x0480fc43, 0x00b05010,
+		0x0080400c, 0x0410ec3e, 0x04910044, 0x00d05411,
+		0x0070380b, 0x03f0e83d, 0x04b10846, 0x00e05812,
+		0x0060340a, 0x03d0e43b, 0x04d10c48, 0x00f06013,
+		0x00503009, 0x03b0e039, 0x04e11449, 0x01106415,
+		0x00402c08, 0x0390d838, 0x05011c4b, 0x01206c16,
+		0x00302807, 0x0370d436, 0x0511204c, 0x01407018,
+		0x00302406, 0x0340d034, 0x0531244e, 0x01507419,
+		0x00202005, 0x0320cc32, 0x05412c50, 0x01707c1b,
+		0x00101c04, 0x0300c431, 0x05613451, 0x0180801d,
+		0x00101803, 0x02e0c02f, 0x05713853, 0x01a0881e,
+		0x00101002, 0x02b0bc2d, 0x05814054, 0x01c08c20,
+		0x00000c02, 0x02a0b82c, 0x05914455, 0x01e09421,
+		0x00000801, 0x0280b02a, 0x05a14c57, 0x02009c23,
+		0x00000400, 0x0260ac28, 0x05b15458, 0x0220a025,
+	};
+	unsigned int ratio, row, column;
+
+	for (ratio = 0; ratio <= 2; ratio++) {
+		for (row = 0; row <= 15; row++) {
+			for (column = 0; column <= 3; column++) {
+				unsigned int index = (ratio << 6) + (row << 2) + column;
+				u32 value;
+
+				value = COEFF_INDEX(index) | COEFF_DATA(coeffs[index]);
+				tegra_plane_writel(plane, value,
+						   DC_WIN_WINDOWGROUP_SET_INPUT_SCALER_COEFF);
+			}
+		}
+	}
+}
+
 static void tegra_dc_assign_shared_plane(struct tegra_dc *dc,
 					 struct tegra_plane *plane)
 {
@@ -315,6 +411,8 @@ static void tegra_dc_assign_shared_plane(struct tegra_dc *dc,
 	value |= THREAD_GROUP_ENABLE;
 	tegra_plane_writel(plane, value, DC_WIN_CORE_IHUB_THREAD_GROUP);
 
+	tegra_shared_plane_setup_scaler(plane);
+
 	tegra_shared_plane_update(plane);
 	tegra_shared_plane_activate(plane);
 }
@@ -326,25 +424,27 @@ static void tegra_dc_remove_shared_plane(struct tegra_dc *dc,
 }
 
 static int tegra_shared_plane_atomic_check(struct drm_plane *plane,
-					   struct drm_plane_state *state)
+					   struct drm_atomic_state *state)
 {
-	struct tegra_plane_state *plane_state = to_tegra_plane_state(state);
+	struct drm_plane_state *new_plane_state = drm_atomic_get_new_plane_state(state,
+										 plane);
+	struct tegra_plane_state *plane_state = to_tegra_plane_state(new_plane_state);
 	struct tegra_shared_plane *tegra = to_tegra_shared_plane(plane);
 	struct tegra_bo_tiling *tiling = &plane_state->tiling;
-	struct tegra_dc *dc = to_tegra_dc(state->crtc);
+	struct tegra_dc *dc = to_tegra_dc(new_plane_state->crtc);
 	int err;
 
 	/* no need for further checks if the plane is being disabled */
-	if (!state->crtc || !state->fb)
+	if (!new_plane_state->crtc || !new_plane_state->fb)
 		return 0;
 
-	err = tegra_plane_format(state->fb->format->format,
+	err = tegra_plane_format(new_plane_state->fb->format->format,
 				 &plane_state->format,
 				 &plane_state->swap);
 	if (err < 0)
 		return err;
 
-	err = tegra_fb_get_tiling(state->fb, tiling);
+	err = tegra_fb_get_tiling(new_plane_state->fb, tiling);
 	if (err < 0)
 		return err;
 
@@ -354,13 +454,19 @@ static int tegra_shared_plane_atomic_check(struct drm_plane *plane,
 		return -EINVAL;
 	}
 
+	if (tiling->sector_layout == TEGRA_BO_SECTOR_LAYOUT_GPU &&
+	    !dc->soc->supports_sector_layout) {
+		DRM_ERROR("hardware doesn't support GPU sector layout\n");
+		return -EINVAL;
+	}
+
 	/*
 	 * Tegra doesn't support different strides for U and V planes so we
 	 * error out if the user tries to display a framebuffer with such a
 	 * configuration.
 	 */
-	if (state->fb->format->num_planes > 2) {
-		if (state->fb->pitches[2] != state->fb->pitches[1]) {
+	if (new_plane_state->fb->format->num_planes > 2) {
+		if (new_plane_state->fb->pitches[2] != new_plane_state->fb->pitches[1]) {
 			DRM_ERROR("unsupported UV-plane configuration\n");
 			return -EINVAL;
 		}
@@ -368,7 +474,7 @@ static int tegra_shared_plane_atomic_check(struct drm_plane *plane,
 
 	/* XXX scaling is not yet supported, add a check here */
 
-	err = tegra_plane_state_add(&tegra->base, state);
+	err = tegra_plane_state_add(&tegra->base, new_plane_state);
 	if (err < 0)
 		return err;
 
@@ -376,16 +482,27 @@ static int tegra_shared_plane_atomic_check(struct drm_plane *plane,
 }
 
 static void tegra_shared_plane_atomic_disable(struct drm_plane *plane,
-					      struct drm_plane_state *old_state)
+					      struct drm_atomic_state *state)
 {
-	struct tegra_dc *dc = to_tegra_dc(old_state->crtc);
+	struct drm_plane_state *old_state = drm_atomic_get_old_plane_state(state,
+									   plane);
 	struct tegra_plane *p = to_tegra_plane(plane);
+	struct tegra_dc *dc;
 	u32 value;
+	int err;
 
 	/* rien ne va plus */
 	if (!old_state || !old_state->crtc)
 		return;
 
+	dc = to_tegra_dc(old_state->crtc);
+
+	err = host1x_client_resume(&dc->client);
+	if (err < 0) {
+		dev_err(dc->dev, "failed to resume: %d\n", err);
+		return;
+	}
+
 	/*
 	 * XXX Legacy helpers seem to sometimes call ->atomic_disable() even
 	 * on planes that are already disabled. Make sure we fallback to the
@@ -394,39 +511,58 @@ static void tegra_shared_plane_atomic_disable(struct drm_plane *plane,
 	if (WARN_ON(p->dc == NULL))
 		p->dc = dc;
 
-	pm_runtime_get_sync(dc->dev);
-
 	value = tegra_plane_readl(p, DC_WIN_WIN_OPTIONS);
 	value &= ~WIN_ENABLE;
 	tegra_plane_writel(p, value, DC_WIN_WIN_OPTIONS);
 
 	tegra_dc_remove_shared_plane(dc, p);
 
-	pm_runtime_put(dc->dev);
+	host1x_client_suspend(&dc->client);
+}
+
+static inline u32 compute_phase_incr(fixed20_12 in, unsigned int out)
+{
+	u64 tmp, tmp1;
+
+	tmp = (u64)dfixed_trunc(in);
+	tmp1 = (tmp << NFB) + ((u64)out >> 1);
+	do_div(tmp1, out);
+
+	return lower_32_bits(tmp1);
 }
 
 static void tegra_shared_plane_atomic_update(struct drm_plane *plane,
-					     struct drm_plane_state *old_state)
+					     struct drm_atomic_state *state)
 {
-	struct tegra_plane_state *state = to_tegra_plane_state(plane->state);
-	struct tegra_dc *dc = to_tegra_dc(plane->state->crtc);
-	unsigned int zpos = plane->state->normalized_zpos;
-	struct drm_framebuffer *fb = plane->state->fb;
+	struct drm_plane_state *new_state = drm_atomic_get_new_plane_state(state,
+									   plane);
+	struct tegra_plane_state *tegra_plane_state = to_tegra_plane_state(new_state);
+	struct tegra_dc *dc = to_tegra_dc(new_state->crtc);
+	unsigned int zpos = new_state->normalized_zpos;
+	struct drm_framebuffer *fb = new_state->fb;
 	struct tegra_plane *p = to_tegra_plane(plane);
-	struct tegra_bo *bo;
-	dma_addr_t base;
-	u32 value;
+	u32 value, min_width, bypass = 0;
+	dma_addr_t base, addr_flag = 0;
+	unsigned int bpc, planes;
+	bool yuv;
+	int err;
 
 	/* rien ne va plus */
-	if (!plane->state->crtc || !plane->state->fb)
+	if (!new_state->crtc || !new_state->fb)
+		return;
+
+	if (!new_state->visible) {
+		tegra_shared_plane_atomic_disable(plane, state);
 		return;
+	}
 
-	if (!plane->state->visible) {
-		tegra_shared_plane_atomic_disable(plane, old_state);
+	err = host1x_client_resume(&dc->client);
+	if (err < 0) {
+		dev_err(dc->dev, "failed to resume: %d\n", err);
 		return;
 	}
 
-	pm_runtime_get_sync(dc->dev);
+	yuv = tegra_plane_format_is_yuv(tegra_plane_state->format, &planes, &bpc);
 
 	tegra_dc_assign_shared_plane(dc, p);
 
@@ -446,33 +582,79 @@ static void tegra_shared_plane_atomic_update(struct drm_plane *plane,
 	value = K2(255) | K1(255) | WINDOW_LAYER_DEPTH(255 - zpos);
 	tegra_plane_writel(p, value, DC_WIN_BLEND_LAYER_CONTROL);
 
-	/* bypass scaling */
+	/* scaling */
+	min_width = min(new_state->src_w >> 16, new_state->crtc_w);
+
+	value = tegra_plane_readl(p, DC_WINC_PRECOMP_WGRP_PIPE_CAPC);
+
+	if (min_width < MAX_PIXELS_5TAP444(value)) {
+		value = HORIZONTAL_TAPS_5 | VERTICAL_TAPS_5;
+	} else {
+		value = tegra_plane_readl(p, DC_WINC_PRECOMP_WGRP_PIPE_CAPE);
+
+		if (min_width < MAX_PIXELS_2TAP444(value))
+			value = HORIZONTAL_TAPS_2 | VERTICAL_TAPS_2;
+		else
+			dev_err(dc->dev, "invalid minimum width: %u\n", min_width);
+	}
+
 	value = HORIZONTAL_TAPS_5 | VERTICAL_TAPS_5;
 	tegra_plane_writel(p, value, DC_WIN_WINDOWGROUP_SET_CONTROL_INPUT_SCALER);
 
-	value = INPUT_SCALER_VBYPASS | INPUT_SCALER_HBYPASS;
-	tegra_plane_writel(p, value, DC_WIN_WINDOWGROUP_SET_INPUT_SCALER_USAGE);
+	if (new_state->src_w != new_state->crtc_w << 16) {
+		fixed20_12 width = dfixed_init(new_state->src_w >> 16);
+		u32 incr = compute_phase_incr(width, new_state->crtc_w) & ~0x1;
+		u32 init = (1 << (NFB - 1)) + (incr >> 1);
+
+		tegra_plane_writel(p, incr, DC_WIN_SET_INPUT_SCALER_HPHASE_INCR);
+		tegra_plane_writel(p, init, DC_WIN_SET_INPUT_SCALER_H_START_PHASE);
+	} else {
+		bypass |= INPUT_SCALER_HBYPASS;
+	}
+
+	if (new_state->src_h != new_state->crtc_h << 16) {
+		fixed20_12 height = dfixed_init(new_state->src_h >> 16);
+		u32 incr = compute_phase_incr(height, new_state->crtc_h) & ~0x1;
+		u32 init = (1 << (NFB - 1)) + (incr >> 1);
+
+		tegra_plane_writel(p, incr, DC_WIN_SET_INPUT_SCALER_VPHASE_INCR);
+		tegra_plane_writel(p, init, DC_WIN_SET_INPUT_SCALER_V_START_PHASE);
+	} else {
+		bypass |= INPUT_SCALER_VBYPASS;
+	}
+
+	tegra_plane_writel(p, bypass, DC_WIN_WINDOWGROUP_SET_INPUT_SCALER_USAGE);
 
 	/* disable compression */
 	tegra_plane_writel(p, 0, DC_WINBUF_CDE_CONTROL);
 
-	bo = tegra_fb_get_plane(fb, 0);
-	base = bo->paddr;
+#ifdef CONFIG_ARCH_DMA_ADDR_T_64BIT
+	/*
+	 * Physical address bit 39 in Tegra194 is used as a switch for special
+	 * logic that swizzles the memory using either the legacy Tegra or the
+	 * dGPU sector layout.
+	 */
+	if (tegra_plane_state->tiling.sector_layout == TEGRA_BO_SECTOR_LAYOUT_GPU)
+		addr_flag = BIT_ULL(39);
+#endif
+
+	base = tegra_plane_state->iova[0] + fb->offsets[0];
+	base |= addr_flag;
 
-	tegra_plane_writel(p, state->format, DC_WIN_COLOR_DEPTH);
+	tegra_plane_writel(p, tegra_plane_state->format, DC_WIN_COLOR_DEPTH);
 	tegra_plane_writel(p, 0, DC_WIN_PRECOMP_WGRP_PARAMS);
 
-	value = V_POSITION(plane->state->crtc_y) |
-		H_POSITION(plane->state->crtc_x);
+	value = V_POSITION(new_state->crtc_y) |
+		H_POSITION(new_state->crtc_x);
 	tegra_plane_writel(p, value, DC_WIN_POSITION);
 
-	value = V_SIZE(plane->state->crtc_h) | H_SIZE(plane->state->crtc_w);
+	value = V_SIZE(new_state->crtc_h) | H_SIZE(new_state->crtc_w);
 	tegra_plane_writel(p, value, DC_WIN_SIZE);
 
 	value = WIN_ENABLE | COLOR_EXPAND;
 	tegra_plane_writel(p, value, DC_WIN_WIN_OPTIONS);
 
-	value = V_SIZE(plane->state->crtc_h) | H_SIZE(plane->state->crtc_w);
+	value = V_SIZE(new_state->src_h >> 16) | H_SIZE(new_state->src_w >> 16);
 	tegra_plane_writel(p, value, DC_WIN_CROPPED_SIZE);
 
 	tegra_plane_writel(p, upper_32_bits(base), DC_WINBUF_START_ADDR_HI);
@@ -481,18 +663,61 @@ static void tegra_shared_plane_atomic_update(struct drm_plane *plane,
 	value = PITCH(fb->pitches[0]);
 	tegra_plane_writel(p, value, DC_WIN_PLANAR_STORAGE);
 
-	value = CLAMP_BEFORE_BLEND | DEGAMMA_SRGB | INPUT_RANGE_FULL;
+	if (yuv && planes > 1) {
+		base = tegra_plane_state->iova[1] + fb->offsets[1];
+		base |= addr_flag;
+
+		tegra_plane_writel(p, upper_32_bits(base), DC_WINBUF_START_ADDR_HI_U);
+		tegra_plane_writel(p, lower_32_bits(base), DC_WINBUF_START_ADDR_U);
+
+		if (planes > 2) {
+			base = tegra_plane_state->iova[2] + fb->offsets[2];
+			base |= addr_flag;
+
+			tegra_plane_writel(p, upper_32_bits(base), DC_WINBUF_START_ADDR_HI_V);
+			tegra_plane_writel(p, lower_32_bits(base), DC_WINBUF_START_ADDR_V);
+		}
+
+		value = PITCH_U(fb->pitches[1]);
+
+		if (planes > 2)
+			value |= PITCH_V(fb->pitches[2]);
+
+		tegra_plane_writel(p, value, DC_WIN_PLANAR_STORAGE_UV);
+	} else {
+		tegra_plane_writel(p, 0, DC_WINBUF_START_ADDR_U);
+		tegra_plane_writel(p, 0, DC_WINBUF_START_ADDR_HI_U);
+		tegra_plane_writel(p, 0, DC_WINBUF_START_ADDR_V);
+		tegra_plane_writel(p, 0, DC_WINBUF_START_ADDR_HI_V);
+		tegra_plane_writel(p, 0, DC_WIN_PLANAR_STORAGE_UV);
+	}
+
+	value = CLAMP_BEFORE_BLEND | INPUT_RANGE_FULL;
+
+	if (yuv) {
+		if (bpc < 12)
+			value |= DEGAMMA_YUV8_10;
+		else
+			value |= DEGAMMA_YUV12;
+
+		/* XXX parameterize */
+		value |= COLOR_SPACE_YUV_2020;
+	} else {
+		if (!tegra_plane_format_is_indexed(tegra_plane_state->format))
+			value |= DEGAMMA_SRGB;
+	}
+
 	tegra_plane_writel(p, value, DC_WIN_SET_PARAMS);
 
-	value = OFFSET_X(plane->state->src_y >> 16) |
-		OFFSET_Y(plane->state->src_x >> 16);
+	value = OFFSET_X(new_state->src_y >> 16) |
+		OFFSET_Y(new_state->src_x >> 16);
 	tegra_plane_writel(p, value, DC_WINBUF_CROPPED_POINT);
 
 	if (dc->soc->supports_block_linear) {
-		unsigned long height = state->tiling.value;
+		unsigned long height = tegra_plane_state->tiling.value;
 
 		/* XXX */
-		switch (state->tiling.mode) {
+		switch (tegra_plane_state->tiling.mode) {
 		case TEGRA_BO_TILING_MODE_PITCH:
 			value = DC_WINBUF_SURFACE_KIND_BLOCK_HEIGHT(0) |
 				DC_WINBUF_SURFACE_KIND_PITCH;
@@ -517,10 +742,12 @@ static void tegra_shared_plane_atomic_update(struct drm_plane *plane,
 	value &= ~CONTROL_CSC_ENABLE;
 	tegra_plane_writel(p, value, DC_WIN_WINDOW_SET_CONTROL);
 
-	pm_runtime_put(dc->dev);
+	host1x_client_suspend(&dc->client);
 }
 
 static const struct drm_plane_helper_funcs tegra_shared_plane_helper_funcs = {
+	.prepare_fb = tegra_plane_prepare_fb,
+	.cleanup_fb = tegra_plane_cleanup_fb,
 	.atomic_check = tegra_shared_plane_atomic_check,
 	.atomic_update = tegra_shared_plane_atomic_update,
 	.atomic_disable = tegra_shared_plane_atomic_disable,
@@ -529,14 +756,13 @@ static const struct drm_plane_helper_funcs tegra_shared_plane_helper_funcs = {
 struct drm_plane *tegra_shared_plane_create(struct drm_device *drm,
 					    struct tegra_dc *dc,
 					    unsigned int wgrp,
-					    unsigned int index)
+					    unsigned int index,
+					    enum drm_plane_type type)
 {
-	enum drm_plane_type type = DRM_PLANE_TYPE_OVERLAY;
 	struct tegra_drm *tegra = drm->dev_private;
 	struct tegra_display_hub *hub = tegra->hub;
-	/* planes can be assigned to arbitrary CRTCs */
-	unsigned int possible_crtcs = 0x7;
 	struct tegra_shared_plane *plane;
+	unsigned int possible_crtcs;
 	unsigned int num_formats;
 	const u64 *modifiers;
 	struct drm_plane *p;
@@ -551,10 +777,13 @@ struct drm_plane *tegra_shared_plane_create(struct drm_device *drm,
 	plane->base.index = index;
 
 	plane->wgrp = &hub->wgrps[wgrp];
-	plane->wgrp->parent = dc->dev;
+	plane->wgrp->parent = &dc->client;
 
 	p = &plane->base.base;
 
+	/* planes can be assigned to arbitrary CRTCs */
+	possible_crtcs = BIT(tegra->num_crtcs) - 1;
+
 	num_formats = ARRAY_SIZE(tegra_shared_plane_formats);
 	formats = tegra_shared_plane_formats;
 	modifiers = tegra_shared_plane_modifiers;
@@ -605,11 +834,8 @@ static struct tegra_display_hub_state *
 tegra_display_hub_get_state(struct tegra_display_hub *hub,
 			    struct drm_atomic_state *state)
 {
-	struct drm_device *drm = dev_get_drvdata(hub->client.parent);
 	struct drm_private_state *priv;
 
-	WARN_ON(!drm_modeset_is_locked(&drm->mode_config.connection_mutex));
-
 	priv = drm_atomic_get_private_obj_state(state, &hub->base);
 	if (IS_ERR(priv))
 		return ERR_CAST(priv);
@@ -659,8 +885,13 @@ int tegra_display_hub_atomic_check(struct drm_device *drm,
 static void tegra_display_hub_update(struct tegra_dc *dc)
 {
 	u32 value;
+	int err;
 
-	pm_runtime_get_sync(dc->dev);
+	err = host1x_client_resume(&dc->client);
+	if (err < 0) {
+		dev_err(dc->dev, "failed to resume: %d\n", err);
+		return;
+	}
 
 	value = tegra_dc_readl(dc, DC_CMD_IHUB_COMMON_MISC_CTL);
 	value &= ~LATENCY_EVENT;
@@ -675,7 +906,7 @@ static void tegra_display_hub_update(struct tegra_dc *dc)
 	tegra_dc_writel(dc, COMMON_ACTREQ, DC_CMD_STATE_CONTROL);
 	tegra_dc_readl(dc, DC_CMD_STATE_CONTROL);
 
-	pm_runtime_put(dc->dev);
+	host1x_client_suspend(&dc->client);
 }
 
 void tegra_display_hub_atomic_commit(struct drm_device *drm,
@@ -708,7 +939,7 @@ void tegra_display_hub_atomic_commit(struct drm_device *drm,
 static int tegra_display_hub_init(struct host1x_client *client)
 {
 	struct tegra_display_hub *hub = to_tegra_display_hub(client);
-	struct drm_device *drm = dev_get_drvdata(client->parent);
+	struct drm_device *drm = dev_get_drvdata(client->host);
 	struct tegra_drm *tegra = drm->dev_private;
 	struct tegra_display_hub_state *state;
 
@@ -716,7 +947,7 @@ static int tegra_display_hub_init(struct host1x_client *client)
 	if (!state)
 		return -ENOMEM;
 
-	drm_atomic_private_obj_init(&hub->base, &state->base,
+	drm_atomic_private_obj_init(drm, &hub->base, &state->base,
 				    &tegra_display_hub_state_funcs);
 
 	tegra->hub = hub;
@@ -726,7 +957,7 @@ static int tegra_display_hub_init(struct host1x_client *client)
 
 static int tegra_display_hub_exit(struct host1x_client *client)
 {
-	struct drm_device *drm = dev_get_drvdata(client->parent);
+	struct drm_device *drm = dev_get_drvdata(client->host);
 	struct tegra_drm *tegra = drm->dev_private;
 
 	drm_atomic_private_obj_fini(&tegra->hub->base);
@@ -735,19 +966,102 @@ static int tegra_display_hub_exit(struct host1x_client *client)
 	return 0;
 }
 
+static int tegra_display_hub_runtime_suspend(struct host1x_client *client)
+{
+	struct tegra_display_hub *hub = to_tegra_display_hub(client);
+	struct device *dev = client->dev;
+	unsigned int i = hub->num_heads;
+	int err;
+
+	err = reset_control_assert(hub->rst);
+	if (err < 0)
+		return err;
+
+	while (i--)
+		clk_disable_unprepare(hub->clk_heads[i]);
+
+	clk_disable_unprepare(hub->clk_hub);
+	clk_disable_unprepare(hub->clk_dsc);
+	clk_disable_unprepare(hub->clk_disp);
+
+	pm_runtime_put_sync(dev);
+
+	return 0;
+}
+
+static int tegra_display_hub_runtime_resume(struct host1x_client *client)
+{
+	struct tegra_display_hub *hub = to_tegra_display_hub(client);
+	struct device *dev = client->dev;
+	unsigned int i;
+	int err;
+
+	err = pm_runtime_resume_and_get(dev);
+	if (err < 0) {
+		dev_err(dev, "failed to get runtime PM: %d\n", err);
+		return err;
+	}
+
+	err = clk_prepare_enable(hub->clk_disp);
+	if (err < 0)
+		goto put_rpm;
+
+	err = clk_prepare_enable(hub->clk_dsc);
+	if (err < 0)
+		goto disable_disp;
+
+	err = clk_prepare_enable(hub->clk_hub);
+	if (err < 0)
+		goto disable_dsc;
+
+	for (i = 0; i < hub->num_heads; i++) {
+		err = clk_prepare_enable(hub->clk_heads[i]);
+		if (err < 0)
+			goto disable_heads;
+	}
+
+	err = reset_control_deassert(hub->rst);
+	if (err < 0)
+		goto disable_heads;
+
+	return 0;
+
+disable_heads:
+	while (i--)
+		clk_disable_unprepare(hub->clk_heads[i]);
+
+	clk_disable_unprepare(hub->clk_hub);
+disable_dsc:
+	clk_disable_unprepare(hub->clk_dsc);
+disable_disp:
+	clk_disable_unprepare(hub->clk_disp);
+put_rpm:
+	pm_runtime_put_sync(dev);
+	return err;
+}
+
 static const struct host1x_client_ops tegra_display_hub_ops = {
 	.init = tegra_display_hub_init,
 	.exit = tegra_display_hub_exit,
+	.suspend = tegra_display_hub_runtime_suspend,
+	.resume = tegra_display_hub_runtime_resume,
 };
 
 static int tegra_display_hub_probe(struct platform_device *pdev)
 {
+	u64 dma_mask = dma_get_mask(pdev->dev.parent);
 	struct device_node *child = NULL;
 	struct tegra_display_hub *hub;
 	struct clk *clk;
 	unsigned int i;
 	int err;
 
+	err = dma_coerce_mask_and_coherent(&pdev->dev, dma_mask);
+	if (err < 0) {
+		dev_err(&pdev->dev, "failed to set DMA mask: %d\n", err);
+		return err;
+	}
+
 	hub = devm_kzalloc(&pdev->dev, sizeof(*hub), GFP_KERNEL);
 	if (!hub)
 		return -ENOMEM;
@@ -787,7 +1101,7 @@ static int tegra_display_hub_probe(struct platform_device *pdev)
 
 	for (i = 0; i < hub->soc->num_wgrps; i++) {
 		struct tegra_windowgroup *wgrp = &hub->wgrps[i];
-		char id[8];
+		char id[16];
 
 		snprintf(id, sizeof(id), "wgrp%u", i);
 		mutex_init(&wgrp->lock);
@@ -848,92 +1162,34 @@ static int tegra_display_hub_probe(struct platform_device *pdev)
 		dev_err(&pdev->dev, "failed to register host1x client: %d\n",
 			err);
 
-	return err;
-}
-
-static int tegra_display_hub_remove(struct platform_device *pdev)
-{
-	struct tegra_display_hub *hub = platform_get_drvdata(pdev);
-	int err;
+	err = devm_of_platform_populate(&pdev->dev);
+	if (err < 0)
+		goto unregister;
 
-	err = host1x_client_unregister(&hub->client);
-	if (err < 0) {
-		dev_err(&pdev->dev, "failed to unregister host1x client: %d\n",
-			err);
-	}
+	return err;
 
+unregister:
+	host1x_client_unregister(&hub->client);
 	pm_runtime_disable(&pdev->dev);
-
 	return err;
 }
 
-static int __maybe_unused tegra_display_hub_suspend(struct device *dev)
-{
-	struct tegra_display_hub *hub = dev_get_drvdata(dev);
-	unsigned int i = hub->num_heads;
-	int err;
-
-	err = reset_control_assert(hub->rst);
-	if (err < 0)
-		return err;
-
-	while (i--)
-		clk_disable_unprepare(hub->clk_heads[i]);
-
-	clk_disable_unprepare(hub->clk_hub);
-	clk_disable_unprepare(hub->clk_dsc);
-	clk_disable_unprepare(hub->clk_disp);
-
-	return 0;
-}
-
-static int __maybe_unused tegra_display_hub_resume(struct device *dev)
+static void tegra_display_hub_remove(struct platform_device *pdev)
 {
-	struct tegra_display_hub *hub = dev_get_drvdata(dev);
+	struct tegra_display_hub *hub = platform_get_drvdata(pdev);
 	unsigned int i;
-	int err;
 
-	err = clk_prepare_enable(hub->clk_disp);
-	if (err < 0)
-		return err;
+	host1x_client_unregister(&hub->client);
 
-	err = clk_prepare_enable(hub->clk_dsc);
-	if (err < 0)
-		goto disable_disp;
-
-	err = clk_prepare_enable(hub->clk_hub);
-	if (err < 0)
-		goto disable_dsc;
+	for (i = 0; i < hub->soc->num_wgrps; i++) {
+		struct tegra_windowgroup *wgrp = &hub->wgrps[i];
 
-	for (i = 0; i < hub->num_heads; i++) {
-		err = clk_prepare_enable(hub->clk_heads[i]);
-		if (err < 0)
-			goto disable_heads;
+		mutex_destroy(&wgrp->lock);
 	}
 
-	err = reset_control_deassert(hub->rst);
-	if (err < 0)
-		goto disable_heads;
-
-	return 0;
-
-disable_heads:
-	while (i--)
-		clk_disable_unprepare(hub->clk_heads[i]);
-
-	clk_disable_unprepare(hub->clk_hub);
-disable_dsc:
-	clk_disable_unprepare(hub->clk_dsc);
-disable_disp:
-	clk_disable_unprepare(hub->clk_disp);
-	return err;
+	pm_runtime_disable(&pdev->dev);
 }
 
-static const struct dev_pm_ops tegra_display_hub_pm_ops = {
-	SET_RUNTIME_PM_OPS(tegra_display_hub_suspend,
-			   tegra_display_hub_resume, NULL)
-};
-
 static const struct tegra_display_hub_soc tegra186_display_hub = {
 	.num_wgrps = 6,
 	.supports_dsc = true,
@@ -961,7 +1217,6 @@ struct platform_driver tegra_display_hub_driver = {
 	.driver = {
 		.name = "tegra-display-hub",
 		.of_match_table = tegra_display_hub_of_match,
-		.pm = &tegra_display_hub_pm_ops,
 	},
 	.probe = tegra_display_hub_probe,
 	.remove = tegra_display_hub_remove,
diff --git a/drivers/gpu/drm/tegra/hub.h b/drivers/gpu/drm/tegra/hub.h
index 479087c0705a..a66f18c4facc 100644
--- a/drivers/gpu/drm/tegra/hub.h
+++ b/drivers/gpu/drm/tegra/hub.h
@@ -1,15 +1,11 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
 /*
  * Copyright (C) 2017 NVIDIA CORPORATION.  All rights reserved.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
  */
 
 #ifndef TEGRA_HUB_H
 #define TEGRA_HUB_H 1
 
-#include <drm/drmP.h>
 #include <drm/drm_plane.h>
 
 #include "plane.h"
@@ -21,7 +17,7 @@ struct tegra_windowgroup {
 	struct mutex lock;
 
 	unsigned int index;
-	struct device *parent;
+	struct host1x_client *parent;
 	struct reset_control *rst;
 };
 
@@ -76,7 +72,6 @@ to_tegra_display_hub_state(struct drm_private_state *priv)
 	return container_of(priv, struct tegra_display_hub_state, base);
 }
 
-struct tegra_dc;
 struct tegra_plane;
 
 int tegra_display_hub_prepare(struct tegra_display_hub *hub);
@@ -85,7 +80,8 @@ void tegra_display_hub_cleanup(struct tegra_display_hub *hub);
 struct drm_plane *tegra_shared_plane_create(struct drm_device *drm,
 					    struct tegra_dc *dc,
 					    unsigned int wgrp,
-					    unsigned int index);
+					    unsigned int index,
+					    enum drm_plane_type type);
 
 int tegra_display_hub_atomic_check(struct drm_device *drm,
 				   struct drm_atomic_state *state);
diff --git a/drivers/gpu/drm/tegra/mipi-phy.c b/drivers/gpu/drm/tegra/mipi-phy.c
index ba2ae6511957..4fe05f2df637 100644
--- a/drivers/gpu/drm/tegra/mipi-phy.c
+++ b/drivers/gpu/drm/tegra/mipi-phy.c
@@ -1,9 +1,6 @@
+// SPDX-License-Identifier: GPL-2.0-only
 /*
  * Copyright (C) 2013 NVIDIA Corporation
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
  */
 
 #include <linux/errno.h>
diff --git a/drivers/gpu/drm/tegra/mipi-phy.h b/drivers/gpu/drm/tegra/mipi-phy.h
index 012ea8ac36d7..41889a75035c 100644
--- a/drivers/gpu/drm/tegra/mipi-phy.h
+++ b/drivers/gpu/drm/tegra/mipi-phy.h
@@ -1,9 +1,6 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
 /*
  * Copyright (C) 2013 NVIDIA Corporation
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
  */
 
 #ifndef DRM_TEGRA_MIPI_PHY_H
diff --git a/drivers/gpu/drm/tegra/nvdec.c b/drivers/gpu/drm/tegra/nvdec.c
new file mode 100644
index 000000000000..7a38664e890e
--- /dev/null
+++ b/drivers/gpu/drm/tegra/nvdec.c
@@ -0,0 +1,578 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright (c) 2015-2022, NVIDIA Corporation.
+ */
+
+#include <linux/clk.h>
+#include <linux/delay.h>
+#include <linux/dma-mapping.h>
+#include <linux/host1x.h>
+#include <linux/iommu.h>
+#include <linux/iopoll.h>
+#include <linux/module.h>
+#include <linux/of.h>
+#include <linux/platform_device.h>
+#include <linux/pm_runtime.h>
+#include <linux/reset.h>
+
+#include <soc/tegra/mc.h>
+
+#include "drm.h"
+#include "falcon.h"
+#include "riscv.h"
+#include "vic.h"
+
+#define NVDEC_FALCON_DEBUGINFO			0x1094
+#define NVDEC_TFBIF_TRANSCFG			0x2c44
+
+struct nvdec_config {
+	const char *firmware;
+	unsigned int version;
+	bool supports_sid;
+	bool has_riscv;
+	bool has_extra_clocks;
+};
+
+struct nvdec {
+	struct falcon falcon;
+
+	void __iomem *regs;
+	struct tegra_drm_client client;
+	struct host1x_channel *channel;
+	struct device *dev;
+	struct clk_bulk_data clks[3];
+	unsigned int num_clks;
+	struct reset_control *reset;
+
+	/* Platform configuration */
+	const struct nvdec_config *config;
+
+	/* RISC-V specific data */
+	struct tegra_drm_riscv riscv;
+	phys_addr_t carveout_base;
+};
+
+static inline struct nvdec *to_nvdec(struct tegra_drm_client *client)
+{
+	return container_of(client, struct nvdec, client);
+}
+
+static inline void nvdec_writel(struct nvdec *nvdec, u32 value,
+				unsigned int offset)
+{
+	writel(value, nvdec->regs + offset);
+}
+
+static int nvdec_boot_falcon(struct nvdec *nvdec)
+{
+	u32 stream_id;
+	int err;
+
+	if (nvdec->config->supports_sid && tegra_dev_iommu_get_stream_id(nvdec->dev, &stream_id)) {
+		u32 value;
+
+		value = TRANSCFG_ATT(1, TRANSCFG_SID_FALCON) | TRANSCFG_ATT(0, TRANSCFG_SID_HW);
+		nvdec_writel(nvdec, value, NVDEC_TFBIF_TRANSCFG);
+
+		nvdec_writel(nvdec, stream_id, VIC_THI_STREAMID0);
+		nvdec_writel(nvdec, stream_id, VIC_THI_STREAMID1);
+	}
+
+	err = falcon_boot(&nvdec->falcon);
+	if (err < 0)
+		return err;
+
+	err = falcon_wait_idle(&nvdec->falcon);
+	if (err < 0) {
+		dev_err(nvdec->dev, "falcon boot timed out\n");
+		return err;
+	}
+
+	return 0;
+}
+
+static int nvdec_wait_debuginfo(struct nvdec *nvdec, const char *phase)
+{
+	int err;
+	u32 val;
+
+	err = readl_poll_timeout(nvdec->regs + NVDEC_FALCON_DEBUGINFO, val, val == 0x0, 10, 100000);
+	if (err) {
+		dev_err(nvdec->dev, "failed to boot %s, debuginfo=0x%x\n", phase, val);
+		return err;
+	}
+
+	return 0;
+}
+
+static int nvdec_boot_riscv(struct nvdec *nvdec)
+{
+	int err;
+
+	err = reset_control_acquire(nvdec->reset);
+	if (err)
+		return err;
+
+	nvdec_writel(nvdec, 0xabcd1234, NVDEC_FALCON_DEBUGINFO);
+
+	err = tegra_drm_riscv_boot_bootrom(&nvdec->riscv, nvdec->carveout_base, 1,
+					   &nvdec->riscv.bl_desc);
+	if (err) {
+		dev_err(nvdec->dev, "failed to execute bootloader\n");
+		goto release_reset;
+	}
+
+	err = nvdec_wait_debuginfo(nvdec, "bootloader");
+	if (err)
+		goto release_reset;
+
+	err = reset_control_reset(nvdec->reset);
+	if (err)
+		goto release_reset;
+
+	nvdec_writel(nvdec, 0xabcd1234, NVDEC_FALCON_DEBUGINFO);
+
+	err = tegra_drm_riscv_boot_bootrom(&nvdec->riscv, nvdec->carveout_base, 1,
+					   &nvdec->riscv.os_desc);
+	if (err) {
+		dev_err(nvdec->dev, "failed to execute firmware\n");
+		goto release_reset;
+	}
+
+	err = nvdec_wait_debuginfo(nvdec, "firmware");
+	if (err)
+		goto release_reset;
+
+release_reset:
+	reset_control_release(nvdec->reset);
+
+	return err;
+}
+
+static int nvdec_init(struct host1x_client *client)
+{
+	struct tegra_drm_client *drm = host1x_to_drm_client(client);
+	struct drm_device *dev = dev_get_drvdata(client->host);
+	struct tegra_drm *tegra = dev->dev_private;
+	struct nvdec *nvdec = to_nvdec(drm);
+	int err;
+
+	err = host1x_client_iommu_attach(client);
+	if (err < 0 && err != -ENODEV) {
+		dev_err(nvdec->dev, "failed to attach to domain: %d\n", err);
+		return err;
+	}
+
+	nvdec->channel = host1x_channel_request(client);
+	if (!nvdec->channel) {
+		err = -ENOMEM;
+		goto detach;
+	}
+
+	client->syncpts[0] = host1x_syncpt_request(client, 0);
+	if (!client->syncpts[0]) {
+		err = -ENOMEM;
+		goto free_channel;
+	}
+
+	err = tegra_drm_register_client(tegra, drm);
+	if (err < 0)
+		goto free_syncpt;
+
+	/*
+	 * Inherit the DMA parameters (such as maximum segment size) from the
+	 * parent host1x device.
+	 */
+	client->dev->dma_parms = client->host->dma_parms;
+
+	return 0;
+
+free_syncpt:
+	host1x_syncpt_put(client->syncpts[0]);
+free_channel:
+	host1x_channel_put(nvdec->channel);
+detach:
+	host1x_client_iommu_detach(client);
+
+	return err;
+}
+
+static int nvdec_exit(struct host1x_client *client)
+{
+	struct tegra_drm_client *drm = host1x_to_drm_client(client);
+	struct drm_device *dev = dev_get_drvdata(client->host);
+	struct tegra_drm *tegra = dev->dev_private;
+	struct nvdec *nvdec = to_nvdec(drm);
+	int err;
+
+	/* avoid a dangling pointer just in case this disappears */
+	client->dev->dma_parms = NULL;
+
+	err = tegra_drm_unregister_client(tegra, drm);
+	if (err < 0)
+		return err;
+
+	pm_runtime_dont_use_autosuspend(client->dev);
+	pm_runtime_force_suspend(client->dev);
+
+	host1x_syncpt_put(client->syncpts[0]);
+	host1x_channel_put(nvdec->channel);
+	host1x_client_iommu_detach(client);
+
+	nvdec->channel = NULL;
+
+	if (client->group) {
+		dma_unmap_single(nvdec->dev, nvdec->falcon.firmware.phys,
+				 nvdec->falcon.firmware.size, DMA_TO_DEVICE);
+		tegra_drm_free(tegra, nvdec->falcon.firmware.size,
+			       nvdec->falcon.firmware.virt,
+			       nvdec->falcon.firmware.iova);
+	} else {
+		dma_free_coherent(nvdec->dev, nvdec->falcon.firmware.size,
+				  nvdec->falcon.firmware.virt,
+				  nvdec->falcon.firmware.iova);
+	}
+
+	return 0;
+}
+
+static const struct host1x_client_ops nvdec_client_ops = {
+	.init = nvdec_init,
+	.exit = nvdec_exit,
+};
+
+static int nvdec_load_falcon_firmware(struct nvdec *nvdec)
+{
+	struct host1x_client *client = &nvdec->client.base;
+	struct tegra_drm *tegra = nvdec->client.drm;
+	dma_addr_t iova;
+	size_t size;
+	void *virt;
+	int err;
+
+	if (nvdec->falcon.firmware.virt)
+		return 0;
+
+	err = falcon_read_firmware(&nvdec->falcon, nvdec->config->firmware);
+	if (err < 0)
+		return err;
+
+	size = nvdec->falcon.firmware.size;
+
+	if (!client->group) {
+		virt = dma_alloc_coherent(nvdec->dev, size, &iova, GFP_KERNEL);
+		if (!virt)
+			return -ENOMEM;
+	} else {
+		virt = tegra_drm_alloc(tegra, size, &iova);
+		if (IS_ERR(virt))
+			return PTR_ERR(virt);
+	}
+
+	nvdec->falcon.firmware.virt = virt;
+	nvdec->falcon.firmware.iova = iova;
+
+	err = falcon_load_firmware(&nvdec->falcon);
+	if (err < 0)
+		goto cleanup;
+
+	/*
+	 * In this case we have received an IOVA from the shared domain, so we
+	 * need to make sure to get the physical address so that the DMA API
+	 * knows what memory pages to flush the cache for.
+	 */
+	if (client->group) {
+		dma_addr_t phys;
+
+		phys = dma_map_single(nvdec->dev, virt, size, DMA_TO_DEVICE);
+
+		err = dma_mapping_error(nvdec->dev, phys);
+		if (err < 0)
+			goto cleanup;
+
+		nvdec->falcon.firmware.phys = phys;
+	}
+
+	return 0;
+
+cleanup:
+	if (!client->group)
+		dma_free_coherent(nvdec->dev, size, virt, iova);
+	else
+		tegra_drm_free(tegra, size, virt, iova);
+
+	return err;
+}
+
+static __maybe_unused int nvdec_runtime_resume(struct device *dev)
+{
+	struct nvdec *nvdec = dev_get_drvdata(dev);
+	int err;
+
+	err = clk_bulk_prepare_enable(nvdec->num_clks, nvdec->clks);
+	if (err < 0)
+		return err;
+
+	usleep_range(10, 20);
+
+	if (nvdec->config->has_riscv) {
+		err = nvdec_boot_riscv(nvdec);
+		if (err < 0)
+			goto disable;
+	} else {
+		err = nvdec_load_falcon_firmware(nvdec);
+		if (err < 0)
+			goto disable;
+
+		err = nvdec_boot_falcon(nvdec);
+		if (err < 0)
+			goto disable;
+	}
+
+	return 0;
+
+disable:
+	clk_bulk_disable_unprepare(nvdec->num_clks, nvdec->clks);
+	return err;
+}
+
+static __maybe_unused int nvdec_runtime_suspend(struct device *dev)
+{
+	struct nvdec *nvdec = dev_get_drvdata(dev);
+
+	host1x_channel_stop(nvdec->channel);
+
+	clk_bulk_disable_unprepare(nvdec->num_clks, nvdec->clks);
+
+	return 0;
+}
+
+static int nvdec_open_channel(struct tegra_drm_client *client,
+			    struct tegra_drm_context *context)
+{
+	struct nvdec *nvdec = to_nvdec(client);
+
+	context->channel = host1x_channel_get(nvdec->channel);
+	if (!context->channel)
+		return -ENOMEM;
+
+	return 0;
+}
+
+static void nvdec_close_channel(struct tegra_drm_context *context)
+{
+	host1x_channel_put(context->channel);
+}
+
+static int nvdec_can_use_memory_ctx(struct tegra_drm_client *client, bool *supported)
+{
+	*supported = true;
+
+	return 0;
+}
+
+static const struct tegra_drm_client_ops nvdec_ops = {
+	.open_channel = nvdec_open_channel,
+	.close_channel = nvdec_close_channel,
+	.submit = tegra_drm_submit,
+	.get_streamid_offset = tegra_drm_get_streamid_offset_thi,
+	.can_use_memory_ctx = nvdec_can_use_memory_ctx,
+};
+
+#define NVIDIA_TEGRA_210_NVDEC_FIRMWARE "nvidia/tegra210/nvdec.bin"
+
+static const struct nvdec_config nvdec_t210_config = {
+	.firmware = NVIDIA_TEGRA_210_NVDEC_FIRMWARE,
+	.version = 0x21,
+	.supports_sid = false,
+};
+
+#define NVIDIA_TEGRA_186_NVDEC_FIRMWARE "nvidia/tegra186/nvdec.bin"
+
+static const struct nvdec_config nvdec_t186_config = {
+	.firmware = NVIDIA_TEGRA_186_NVDEC_FIRMWARE,
+	.version = 0x18,
+	.supports_sid = true,
+};
+
+#define NVIDIA_TEGRA_194_NVDEC_FIRMWARE "nvidia/tegra194/nvdec.bin"
+
+static const struct nvdec_config nvdec_t194_config = {
+	.firmware = NVIDIA_TEGRA_194_NVDEC_FIRMWARE,
+	.version = 0x19,
+	.supports_sid = true,
+};
+
+static const struct nvdec_config nvdec_t234_config = {
+	.version = 0x23,
+	.supports_sid = true,
+	.has_riscv = true,
+	.has_extra_clocks = true,
+};
+
+static const struct of_device_id tegra_nvdec_of_match[] = {
+	{ .compatible = "nvidia,tegra210-nvdec", .data = &nvdec_t210_config },
+	{ .compatible = "nvidia,tegra186-nvdec", .data = &nvdec_t186_config },
+	{ .compatible = "nvidia,tegra194-nvdec", .data = &nvdec_t194_config },
+	{ .compatible = "nvidia,tegra234-nvdec", .data = &nvdec_t234_config },
+	{ },
+};
+MODULE_DEVICE_TABLE(of, tegra_nvdec_of_match);
+
+static int nvdec_probe(struct platform_device *pdev)
+{
+	struct device *dev = &pdev->dev;
+	struct host1x_syncpt **syncpts;
+	struct nvdec *nvdec;
+	u32 host_class;
+	int err;
+
+	/* inherit DMA mask from host1x parent */
+	err = dma_coerce_mask_and_coherent(dev, *dev->parent->dma_mask);
+	if (err < 0) {
+		dev_err(&pdev->dev, "failed to set DMA mask: %d\n", err);
+		return err;
+	}
+
+	nvdec = devm_kzalloc(dev, sizeof(*nvdec), GFP_KERNEL);
+	if (!nvdec)
+		return -ENOMEM;
+
+	nvdec->config = of_device_get_match_data(dev);
+
+	syncpts = devm_kzalloc(dev, sizeof(*syncpts), GFP_KERNEL);
+	if (!syncpts)
+		return -ENOMEM;
+
+	nvdec->regs = devm_platform_get_and_ioremap_resource(pdev, 0, NULL);
+	if (IS_ERR(nvdec->regs))
+		return PTR_ERR(nvdec->regs);
+
+	nvdec->clks[0].id = "nvdec";
+	nvdec->num_clks = 1;
+
+	if (nvdec->config->has_extra_clocks) {
+		nvdec->num_clks = 3;
+		nvdec->clks[1].id = "fuse";
+		nvdec->clks[2].id = "tsec_pka";
+	}
+
+	err = devm_clk_bulk_get(dev, nvdec->num_clks, nvdec->clks);
+	if (err) {
+		dev_err(&pdev->dev, "failed to get clock(s)\n");
+		return err;
+	}
+
+	err = clk_set_rate(nvdec->clks[0].clk, ULONG_MAX);
+	if (err < 0) {
+		dev_err(&pdev->dev, "failed to set clock rate\n");
+		return err;
+	}
+
+	err = of_property_read_u32(dev->of_node, "nvidia,host1x-class", &host_class);
+	if (err < 0)
+		host_class = HOST1X_CLASS_NVDEC;
+
+	if (nvdec->config->has_riscv) {
+		struct tegra_mc *mc;
+
+		mc = devm_tegra_memory_controller_get(dev);
+		if (IS_ERR(mc)) {
+			dev_err_probe(dev, PTR_ERR(mc),
+				"failed to get memory controller handle\n");
+			return PTR_ERR(mc);
+		}
+
+		err = tegra_mc_get_carveout_info(mc, 1, &nvdec->carveout_base, NULL);
+		if (err) {
+			dev_err(dev, "failed to get carveout info: %d\n", err);
+			return err;
+		}
+
+		nvdec->reset = devm_reset_control_get_exclusive_released(dev, "nvdec");
+		if (IS_ERR(nvdec->reset)) {
+			dev_err_probe(dev, PTR_ERR(nvdec->reset), "failed to get reset\n");
+			return PTR_ERR(nvdec->reset);
+		}
+
+		nvdec->riscv.dev = dev;
+		nvdec->riscv.regs = nvdec->regs;
+
+		err = tegra_drm_riscv_read_descriptors(&nvdec->riscv);
+		if (err < 0)
+			return err;
+	} else {
+		nvdec->falcon.dev = dev;
+		nvdec->falcon.regs = nvdec->regs;
+
+		err = falcon_init(&nvdec->falcon);
+		if (err < 0)
+			return err;
+	}
+
+	platform_set_drvdata(pdev, nvdec);
+
+	INIT_LIST_HEAD(&nvdec->client.base.list);
+	nvdec->client.base.ops = &nvdec_client_ops;
+	nvdec->client.base.dev = dev;
+	nvdec->client.base.class = host_class;
+	nvdec->client.base.syncpts = syncpts;
+	nvdec->client.base.num_syncpts = 1;
+	nvdec->dev = dev;
+
+	INIT_LIST_HEAD(&nvdec->client.list);
+	nvdec->client.version = nvdec->config->version;
+	nvdec->client.ops = &nvdec_ops;
+
+	err = host1x_client_register(&nvdec->client.base);
+	if (err < 0) {
+		dev_err(dev, "failed to register host1x client: %d\n", err);
+		goto exit_falcon;
+	}
+
+	pm_runtime_enable(dev);
+	pm_runtime_use_autosuspend(dev);
+	pm_runtime_set_autosuspend_delay(dev, 500);
+
+	return 0;
+
+exit_falcon:
+	falcon_exit(&nvdec->falcon);
+
+	return err;
+}
+
+static void nvdec_remove(struct platform_device *pdev)
+{
+	struct nvdec *nvdec = platform_get_drvdata(pdev);
+
+	pm_runtime_disable(&pdev->dev);
+	host1x_client_unregister(&nvdec->client.base);
+	falcon_exit(&nvdec->falcon);
+}
+
+static const struct dev_pm_ops nvdec_pm_ops = {
+	SET_RUNTIME_PM_OPS(nvdec_runtime_suspend, nvdec_runtime_resume, NULL)
+	SET_SYSTEM_SLEEP_PM_OPS(pm_runtime_force_suspend,
+				pm_runtime_force_resume)
+};
+
+struct platform_driver tegra_nvdec_driver = {
+	.driver = {
+		.name = "tegra-nvdec",
+		.of_match_table = tegra_nvdec_of_match,
+		.pm = &nvdec_pm_ops
+	},
+	.probe = nvdec_probe,
+	.remove = nvdec_remove,
+};
+
+#if IS_ENABLED(CONFIG_ARCH_TEGRA_210_SOC)
+MODULE_FIRMWARE(NVIDIA_TEGRA_210_NVDEC_FIRMWARE);
+#endif
+#if IS_ENABLED(CONFIG_ARCH_TEGRA_186_SOC)
+MODULE_FIRMWARE(NVIDIA_TEGRA_186_NVDEC_FIRMWARE);
+#endif
+#if IS_ENABLED(CONFIG_ARCH_TEGRA_194_SOC)
+MODULE_FIRMWARE(NVIDIA_TEGRA_194_NVDEC_FIRMWARE);
+#endif
diff --git a/drivers/gpu/drm/tegra/nvjpg.c b/drivers/gpu/drm/tegra/nvjpg.c
new file mode 100644
index 000000000000..94503fd0d52d
--- /dev/null
+++ b/drivers/gpu/drm/tegra/nvjpg.c
@@ -0,0 +1,330 @@
+// SPDX-License-Identifier: GPL-2.0-only
+
+#include <linux/clk.h>
+#include <linux/delay.h>
+#include <linux/dma-mapping.h>
+#include <linux/host1x.h>
+#include <linux/iommu.h>
+#include <linux/module.h>
+#include <linux/of.h>
+#include <linux/platform_device.h>
+#include <linux/pm_runtime.h>
+
+#include "drm.h"
+#include "falcon.h"
+
+struct nvjpg_config {
+	const char *firmware;
+	unsigned int version;
+};
+
+struct nvjpg {
+	struct falcon falcon;
+
+	void __iomem *regs;
+	struct tegra_drm_client client;
+	struct device *dev;
+	struct clk *clk;
+
+	/* Platform configuration */
+	const struct nvjpg_config *config;
+};
+
+static inline struct nvjpg *to_nvjpg(struct tegra_drm_client *client)
+{
+	return container_of(client, struct nvjpg, client);
+}
+
+static int nvjpg_init(struct host1x_client *client)
+{
+	struct tegra_drm_client *drm = host1x_to_drm_client(client);
+	struct drm_device *dev = dev_get_drvdata(client->host);
+	struct tegra_drm *tegra = dev->dev_private;
+	struct nvjpg *nvjpg = to_nvjpg(drm);
+	int err;
+
+	err = host1x_client_iommu_attach(client);
+	if (err < 0 && err != -ENODEV) {
+		dev_err(nvjpg->dev, "failed to attach to domain: %d\n", err);
+		return err;
+	}
+
+	err = tegra_drm_register_client(tegra, drm);
+	if (err < 0)
+		goto detach;
+
+	/*
+	 * Inherit the DMA parameters (such as maximum segment size) from the
+	 * parent host1x device.
+	 */
+	client->dev->dma_parms = client->host->dma_parms;
+
+	return 0;
+
+detach:
+	host1x_client_iommu_detach(client);
+
+	return err;
+}
+
+static int nvjpg_exit(struct host1x_client *client)
+{
+	struct tegra_drm_client *drm = host1x_to_drm_client(client);
+	struct drm_device *dev = dev_get_drvdata(client->host);
+	struct tegra_drm *tegra = dev->dev_private;
+	struct nvjpg *nvjpg = to_nvjpg(drm);
+	int err;
+
+	/* avoid a dangling pointer just in case this disappears */
+	client->dev->dma_parms = NULL;
+
+	err = tegra_drm_unregister_client(tegra, drm);
+	if (err < 0)
+		return err;
+
+	pm_runtime_dont_use_autosuspend(client->dev);
+	pm_runtime_force_suspend(client->dev);
+
+	host1x_client_iommu_detach(client);
+
+	if (client->group) {
+		dma_unmap_single(nvjpg->dev, nvjpg->falcon.firmware.phys,
+				 nvjpg->falcon.firmware.size, DMA_TO_DEVICE);
+		tegra_drm_free(tegra, nvjpg->falcon.firmware.size,
+			       nvjpg->falcon.firmware.virt,
+			       nvjpg->falcon.firmware.iova);
+	} else {
+		dma_free_coherent(nvjpg->dev, nvjpg->falcon.firmware.size,
+				  nvjpg->falcon.firmware.virt,
+				  nvjpg->falcon.firmware.iova);
+	}
+
+	return 0;
+}
+
+static const struct host1x_client_ops nvjpg_client_ops = {
+	.init = nvjpg_init,
+	.exit = nvjpg_exit,
+};
+
+static int nvjpg_load_falcon_firmware(struct nvjpg *nvjpg)
+{
+	struct host1x_client *client = &nvjpg->client.base;
+	struct tegra_drm *tegra = nvjpg->client.drm;
+	dma_addr_t iova;
+	size_t size;
+	void *virt;
+	int err;
+
+	if (nvjpg->falcon.firmware.virt)
+		return 0;
+
+	err = falcon_read_firmware(&nvjpg->falcon, nvjpg->config->firmware);
+	if (err < 0)
+		return err;
+
+	size = nvjpg->falcon.firmware.size;
+
+	if (!client->group) {
+		virt = dma_alloc_coherent(nvjpg->dev, size, &iova, GFP_KERNEL);
+		if (!virt)
+			return -ENOMEM;
+	} else {
+		virt = tegra_drm_alloc(tegra, size, &iova);
+		if (IS_ERR(virt))
+			return PTR_ERR(virt);
+	}
+
+	nvjpg->falcon.firmware.virt = virt;
+	nvjpg->falcon.firmware.iova = iova;
+
+	err = falcon_load_firmware(&nvjpg->falcon);
+	if (err < 0)
+		goto cleanup;
+
+	/*
+	 * In this case we have received an IOVA from the shared domain, so we
+	 * need to make sure to get the physical address so that the DMA API
+	 * knows what memory pages to flush the cache for.
+	 */
+	if (client->group) {
+		dma_addr_t phys;
+
+		phys = dma_map_single(nvjpg->dev, virt, size, DMA_TO_DEVICE);
+
+		err = dma_mapping_error(nvjpg->dev, phys);
+		if (err < 0)
+			goto cleanup;
+
+		nvjpg->falcon.firmware.phys = phys;
+	}
+
+	return 0;
+
+cleanup:
+	if (!client->group)
+		dma_free_coherent(nvjpg->dev, size, virt, iova);
+	else
+		tegra_drm_free(tegra, size, virt, iova);
+
+	return err;
+}
+
+static __maybe_unused int nvjpg_runtime_resume(struct device *dev)
+{
+	struct nvjpg *nvjpg = dev_get_drvdata(dev);
+	int err;
+
+	err = clk_prepare_enable(nvjpg->clk);
+	if (err < 0)
+		return err;
+
+	usleep_range(20, 30);
+
+	err = nvjpg_load_falcon_firmware(nvjpg);
+	if (err < 0)
+		goto disable_clk;
+
+	err = falcon_boot(&nvjpg->falcon);
+	if (err < 0)
+		goto disable_clk;
+
+	return 0;
+
+disable_clk:
+	clk_disable_unprepare(nvjpg->clk);
+	return err;
+}
+
+static __maybe_unused int nvjpg_runtime_suspend(struct device *dev)
+{
+	struct nvjpg *nvjpg = dev_get_drvdata(dev);
+
+	clk_disable_unprepare(nvjpg->clk);
+
+	return 0;
+}
+
+static int nvjpg_can_use_memory_ctx(struct tegra_drm_client *client, bool *supported)
+{
+	*supported = false;
+
+	return 0;
+}
+
+static const struct tegra_drm_client_ops nvjpg_ops = {
+	.get_streamid_offset = NULL,
+	.can_use_memory_ctx = nvjpg_can_use_memory_ctx,
+};
+
+#define NVIDIA_TEGRA_210_NVJPG_FIRMWARE "nvidia/tegra210/nvjpg.bin"
+
+static const struct nvjpg_config tegra210_nvjpg_config = {
+	.firmware = NVIDIA_TEGRA_210_NVJPG_FIRMWARE,
+	.version = 0x21,
+};
+
+static const struct of_device_id tegra_nvjpg_of_match[] = {
+	{ .compatible = "nvidia,tegra210-nvjpg", .data = &tegra210_nvjpg_config },
+	{ },
+};
+MODULE_DEVICE_TABLE(of, tegra_nvjpg_of_match);
+
+static int nvjpg_probe(struct platform_device *pdev)
+{
+	struct device *dev = &pdev->dev;
+	struct nvjpg *nvjpg;
+	int err;
+
+	/* inherit DMA mask from host1x parent */
+	err = dma_coerce_mask_and_coherent(dev, *dev->parent->dma_mask);
+	if (err < 0) {
+		dev_err(&pdev->dev, "failed to set DMA mask: %d\n", err);
+		return err;
+	}
+
+	nvjpg = devm_kzalloc(dev, sizeof(*nvjpg), GFP_KERNEL);
+	if (!nvjpg)
+		return -ENOMEM;
+
+	nvjpg->config = of_device_get_match_data(dev);
+
+	nvjpg->regs = devm_platform_ioremap_resource(pdev, 0);
+	if (IS_ERR(nvjpg->regs))
+		return PTR_ERR(nvjpg->regs);
+
+	nvjpg->clk = devm_clk_get(dev, "nvjpg");
+	if (IS_ERR(nvjpg->clk)) {
+		dev_err(&pdev->dev, "failed to get clock\n");
+		return PTR_ERR(nvjpg->clk);
+	}
+
+	err = clk_set_rate(nvjpg->clk, ULONG_MAX);
+	if (err < 0) {
+		dev_err(&pdev->dev, "failed to set clock rate\n");
+		return err;
+	}
+
+	nvjpg->falcon.dev = dev;
+	nvjpg->falcon.regs = nvjpg->regs;
+
+	err = falcon_init(&nvjpg->falcon);
+	if (err < 0)
+		return err;
+
+	platform_set_drvdata(pdev, nvjpg);
+
+	INIT_LIST_HEAD(&nvjpg->client.base.list);
+	nvjpg->client.base.ops = &nvjpg_client_ops;
+	nvjpg->client.base.dev = dev;
+	nvjpg->client.base.class = HOST1X_CLASS_NVJPG;
+	nvjpg->dev = dev;
+
+	INIT_LIST_HEAD(&nvjpg->client.list);
+	nvjpg->client.version = nvjpg->config->version;
+	nvjpg->client.ops = &nvjpg_ops;
+
+	err = host1x_client_register(&nvjpg->client.base);
+	if (err < 0) {
+		dev_err(dev, "failed to register host1x client: %d\n", err);
+		goto exit_falcon;
+	}
+
+	pm_runtime_use_autosuspend(dev);
+	pm_runtime_set_autosuspend_delay(dev, 500);
+	devm_pm_runtime_enable(dev);
+
+	return 0;
+
+exit_falcon:
+	falcon_exit(&nvjpg->falcon);
+
+	return err;
+}
+
+static void nvjpg_remove(struct platform_device *pdev)
+{
+	struct nvjpg *nvjpg = platform_get_drvdata(pdev);
+
+	host1x_client_unregister(&nvjpg->client.base);
+	falcon_exit(&nvjpg->falcon);
+}
+
+static const struct dev_pm_ops nvjpg_pm_ops = {
+	RUNTIME_PM_OPS(nvjpg_runtime_suspend, nvjpg_runtime_resume, NULL)
+	SYSTEM_SLEEP_PM_OPS(pm_runtime_force_suspend, pm_runtime_force_resume)
+};
+
+struct platform_driver tegra_nvjpg_driver = {
+	.driver = {
+		.name = "tegra-nvjpg",
+		.of_match_table = tegra_nvjpg_of_match,
+		.pm = &nvjpg_pm_ops
+	},
+	.probe = nvjpg_probe,
+	.remove = nvjpg_remove,
+};
+
+#if IS_ENABLED(CONFIG_ARCH_TEGRA_210_SOC)
+MODULE_FIRMWARE(NVIDIA_TEGRA_210_NVJPG_FIRMWARE);
+#endif
diff --git a/drivers/gpu/drm/tegra/output.c b/drivers/gpu/drm/tegra/output.c
index c662efc7e413..49e4f63a5550 100644
--- a/drivers/gpu/drm/tegra/output.c
+++ b/drivers/gpu/drm/tegra/output.c
@@ -1,14 +1,17 @@
+// SPDX-License-Identifier: GPL-2.0-only
 /*
  * Copyright (C) 2012 Avionic Design GmbH
  * Copyright (C) 2012 NVIDIA CORPORATION.  All rights reserved.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
  */
 
+#include <linux/i2c.h>
+#include <linux/of.h>
+
 #include <drm/drm_atomic_helper.h>
+#include <drm/drm_edid.h>
+#include <drm/drm_of.h>
 #include <drm/drm_panel.h>
+#include <drm/drm_simple_kms_helper.h>
 
 #include "drm.h"
 #include "dc.h"
@@ -18,7 +21,7 @@
 int tegra_output_connector_get_modes(struct drm_connector *connector)
 {
 	struct tegra_output *output = connector_to_output(connector);
-	struct edid *edid = NULL;
+	const struct drm_edid *drm_edid = NULL;
 	int err = 0;
 
 	/*
@@ -26,23 +29,22 @@ int tegra_output_connector_get_modes(struct drm_connector *connector)
 	 * ignore any other means of obtaining a mode.
 	 */
 	if (output->panel) {
-		err = output->panel->funcs->get_modes(output->panel);
+		err = drm_panel_get_modes(output->panel, connector);
 		if (err > 0)
 			return err;
 	}
 
-	if (output->edid)
-		edid = kmemdup(output->edid, sizeof(*edid), GFP_KERNEL);
+	if (output->drm_edid)
+		drm_edid = drm_edid_dup(output->drm_edid);
 	else if (output->ddc)
-		edid = drm_get_edid(connector, output->ddc);
+		drm_edid = drm_edid_read_ddc(connector, output->ddc);
 
-	cec_notifier_set_phys_addr_from_edid(output->notifier, edid);
-	drm_connector_update_edid_property(connector, edid);
+	drm_edid_connector_update(connector, drm_edid);
+	cec_notifier_set_phys_addr(output->cec,
+				   connector->display_info.source_physical_address);
 
-	if (edid) {
-		err = drm_add_edid_modes(connector, edid);
-		kfree(edid);
-	}
+	err = drm_edid_connector_add_modes(connector);
+	drm_edid_free(drm_edid);
 
 	return err;
 }
@@ -53,18 +55,11 @@ tegra_output_connector_detect(struct drm_connector *connector, bool force)
 	struct tegra_output *output = connector_to_output(connector);
 	enum drm_connector_status status = connector_status_unknown;
 
-	if (gpio_is_valid(output->hpd_gpio)) {
-		if (output->hpd_gpio_flags & OF_GPIO_ACTIVE_LOW) {
-			if (gpio_get_value(output->hpd_gpio) != 0)
-				status = connector_status_disconnected;
-			else
-				status = connector_status_connected;
-		} else {
-			if (gpio_get_value(output->hpd_gpio) == 0)
-				status = connector_status_disconnected;
-			else
-				status = connector_status_connected;
-		}
+	if (output->hpd_gpio) {
+		if (gpiod_get_value(output->hpd_gpio) == 0)
+			status = connector_status_disconnected;
+		else
+			status = connector_status_connected;
 	} else {
 		if (!output->panel)
 			status = connector_status_disconnected;
@@ -73,22 +68,22 @@ tegra_output_connector_detect(struct drm_connector *connector, bool force)
 	}
 
 	if (status != connector_status_connected)
-		cec_notifier_phys_addr_invalidate(output->notifier);
+		cec_notifier_phys_addr_invalidate(output->cec);
 
 	return status;
 }
 
 void tegra_output_connector_destroy(struct drm_connector *connector)
 {
+	struct tegra_output *output = connector_to_output(connector);
+
+	if (output->cec)
+		cec_notifier_conn_unregister(output->cec);
+
 	drm_connector_unregister(connector);
 	drm_connector_cleanup(connector);
 }
 
-void tegra_output_encoder_destroy(struct drm_encoder *encoder)
-{
-	drm_encoder_cleanup(encoder);
-}
-
 static irqreturn_t hpd_irq(int irq, void *data)
 {
 	struct tegra_output *output = data;
@@ -102,52 +97,66 @@ static irqreturn_t hpd_irq(int irq, void *data)
 int tegra_output_probe(struct tegra_output *output)
 {
 	struct device_node *ddc, *panel;
+	const void *edid;
+	unsigned long flags;
 	int err, size;
 
 	if (!output->of_node)
 		output->of_node = output->dev->of_node;
 
+	err = drm_of_find_panel_or_bridge(output->of_node, -1, -1,
+					  &output->panel, &output->bridge);
+	if (err && err != -ENODEV)
+		return err;
+
 	panel = of_parse_phandle(output->of_node, "nvidia,panel", 0);
 	if (panel) {
+		/*
+		 * Don't mix nvidia,panel phandle with the graph in a
+		 * device-tree.
+		 */
+		WARN_ON(output->panel || output->bridge);
+
 		output->panel = of_drm_find_panel(panel);
+		of_node_put(panel);
+
 		if (IS_ERR(output->panel))
 			return PTR_ERR(output->panel);
-
-		of_node_put(panel);
 	}
 
-	output->edid = of_get_property(output->of_node, "nvidia,edid", &size);
-
 	ddc = of_parse_phandle(output->of_node, "nvidia,ddc-i2c-bus", 0);
 	if (ddc) {
-		output->ddc = of_find_i2c_adapter_by_node(ddc);
+		output->ddc = of_get_i2c_adapter_by_node(ddc);
+		of_node_put(ddc);
+
 		if (!output->ddc) {
 			err = -EPROBE_DEFER;
-			of_node_put(ddc);
 			return err;
 		}
-
-		of_node_put(ddc);
 	}
 
-	output->hpd_gpio = of_get_named_gpio_flags(output->of_node,
-						   "nvidia,hpd-gpio", 0,
-						   &output->hpd_gpio_flags);
-	if (gpio_is_valid(output->hpd_gpio)) {
-		unsigned long flags;
-
-		err = gpio_request_one(output->hpd_gpio, GPIOF_DIR_IN,
-				       "HDMI hotplug detect");
-		if (err < 0) {
-			dev_err(output->dev, "gpio_request_one(): %d\n", err);
-			return err;
+	edid = of_get_property(output->of_node, "nvidia,edid", &size);
+	output->drm_edid = drm_edid_alloc(edid, size);
+
+	output->hpd_gpio = devm_fwnode_gpiod_get(output->dev,
+					of_fwnode_handle(output->of_node),
+					"nvidia,hpd",
+					GPIOD_IN,
+					"HDMI hotplug detect");
+	if (IS_ERR(output->hpd_gpio)) {
+		if (PTR_ERR(output->hpd_gpio) != -ENOENT) {
+			err = PTR_ERR(output->hpd_gpio);
+			goto put_i2c;
 		}
 
-		err = gpio_to_irq(output->hpd_gpio);
+		output->hpd_gpio = NULL;
+	}
+
+	if (output->hpd_gpio) {
+		err = gpiod_to_irq(output->hpd_gpio);
 		if (err < 0) {
-			dev_err(output->dev, "gpio_to_irq(): %d\n", err);
-			gpio_free(output->hpd_gpio);
-			return err;
+			dev_err(output->dev, "gpiod_to_irq(): %d\n", err);
+			goto put_i2c;
 		}
 
 		output->hpd_irq = err;
@@ -160,8 +169,7 @@ int tegra_output_probe(struct tegra_output *output)
 		if (err < 0) {
 			dev_err(output->dev, "failed to request IRQ#%u: %d\n",
 				output->hpd_irq, err);
-			gpio_free(output->hpd_gpio);
-			return err;
+			goto put_i2c;
 		}
 
 		output->connector.polled = DRM_CONNECTOR_POLL_HPD;
@@ -175,36 +183,53 @@ int tegra_output_probe(struct tegra_output *output)
 	}
 
 	return 0;
+
+put_i2c:
+	if (output->ddc)
+		i2c_put_adapter(output->ddc);
+
+	drm_edid_free(output->drm_edid);
+
+	return err;
 }
 
 void tegra_output_remove(struct tegra_output *output)
 {
-	if (gpio_is_valid(output->hpd_gpio)) {
+	if (output->hpd_gpio)
 		free_irq(output->hpd_irq, output);
-		gpio_free(output->hpd_gpio);
-	}
 
 	if (output->ddc)
-		put_device(&output->ddc->dev);
+		i2c_put_adapter(output->ddc);
+
+	drm_edid_free(output->drm_edid);
 }
 
 int tegra_output_init(struct drm_device *drm, struct tegra_output *output)
 {
-	int err;
-
-	if (output->panel) {
-		err = drm_panel_attach(output->panel, &output->connector);
-		if (err < 0)
-			return err;
-	}
+	int connector_type;
 
 	/*
 	 * The connector is now registered and ready to receive hotplug events
 	 * so the hotplug interrupt can be enabled.
 	 */
-	if (gpio_is_valid(output->hpd_gpio))
+	if (output->hpd_gpio)
 		enable_irq(output->hpd_irq);
 
+	connector_type = output->connector.connector_type;
+	/*
+	 * Create a CEC notifier for HDMI connector.
+	 */
+	if (connector_type == DRM_MODE_CONNECTOR_HDMIA ||
+	    connector_type == DRM_MODE_CONNECTOR_HDMIB) {
+		struct cec_connector_info conn_info;
+
+		cec_fill_conn_info_from_drm(&conn_info, &output->connector);
+		output->cec = cec_notifier_conn_register(output->dev, NULL,
+							 &conn_info);
+		if (!output->cec)
+			return -ENOMEM;
+	}
+
 	return 0;
 }
 
@@ -214,11 +239,8 @@ void tegra_output_exit(struct tegra_output *output)
 	 * The connector is going away, so the interrupt must be disabled to
 	 * prevent the hotplug interrupt handler from potentially crashing.
 	 */
-	if (gpio_is_valid(output->hpd_gpio))
+	if (output->hpd_gpio)
 		disable_irq(output->hpd_irq);
-
-	if (output->panel)
-		drm_panel_detach(output->panel);
 }
 
 void tegra_output_find_possible_crtcs(struct tegra_output *output,
@@ -242,3 +264,19 @@ void tegra_output_find_possible_crtcs(struct tegra_output *output,
 
 	output->encoder.possible_crtcs = mask;
 }
+
+int tegra_output_suspend(struct tegra_output *output)
+{
+	if (output->hpd_irq)
+		disable_irq(output->hpd_irq);
+
+	return 0;
+}
+
+int tegra_output_resume(struct tegra_output *output)
+{
+	if (output->hpd_irq)
+		enable_irq(output->hpd_irq);
+
+	return 0;
+}
diff --git a/drivers/gpu/drm/tegra/plane.c b/drivers/gpu/drm/tegra/plane.c
index d068e8aa3553..ffe5f06b770d 100644
--- a/drivers/gpu/drm/tegra/plane.c
+++ b/drivers/gpu/drm/tegra/plane.c
@@ -1,14 +1,17 @@
+// SPDX-License-Identifier: GPL-2.0-only
 /*
  * Copyright (C) 2017 NVIDIA CORPORATION.  All rights reserved.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
  */
 
+#include <linux/dma-mapping.h>
+#include <linux/iommu.h>
+#include <linux/interconnect.h>
+
 #include <drm/drm_atomic.h>
 #include <drm/drm_atomic_helper.h>
-#include <drm/drm_plane_helper.h>
+#include <drm/drm_fourcc.h>
+#include <drm/drm_framebuffer.h>
+#include <drm/drm_gem_atomic_helper.h>
 
 #include "dc.h"
 #include "plane.h"
@@ -25,6 +28,7 @@ static void tegra_plane_reset(struct drm_plane *plane)
 {
 	struct tegra_plane *p = to_tegra_plane(plane);
 	struct tegra_plane_state *state;
+	unsigned int i;
 
 	if (plane->state)
 		__drm_atomic_helper_plane_destroy_state(plane->state);
@@ -38,6 +42,9 @@ static void tegra_plane_reset(struct drm_plane *plane)
 		plane->state->plane = plane;
 		plane->state->zpos = p->index;
 		plane->state->normalized_zpos = p->index;
+
+		for (i = 0; i < 3; i++)
+			state->iova[i] = DMA_MAPPING_ERROR;
 	}
 }
 
@@ -56,12 +63,21 @@ tegra_plane_atomic_duplicate_state(struct drm_plane *plane)
 	copy->tiling = state->tiling;
 	copy->format = state->format;
 	copy->swap = state->swap;
-	copy->bottom_up = state->bottom_up;
+	copy->reflect_x = state->reflect_x;
+	copy->reflect_y = state->reflect_y;
 	copy->opaque = state->opaque;
+	copy->total_peak_memory_bandwidth = state->total_peak_memory_bandwidth;
+	copy->peak_memory_bandwidth = state->peak_memory_bandwidth;
+	copy->avg_memory_bandwidth = state->avg_memory_bandwidth;
 
 	for (i = 0; i < 2; i++)
 		copy->blending[i] = state->blending[i];
 
+	for (i = 0; i < 3; i++) {
+		copy->iova[i] = DMA_MAPPING_ERROR;
+		copy->map[i] = NULL;
+	}
+
 	return &copy->base;
 }
 
@@ -72,6 +88,22 @@ static void tegra_plane_atomic_destroy_state(struct drm_plane *plane,
 	kfree(state);
 }
 
+static bool tegra_plane_supports_sector_layout(struct drm_plane *plane)
+{
+	struct drm_crtc *crtc;
+
+	drm_for_each_crtc(crtc, plane->dev) {
+		if (plane->possible_crtcs & drm_crtc_mask(crtc)) {
+			struct tegra_dc *dc = to_tegra_dc(crtc);
+
+			if (!dc->soc->supports_sector_layout)
+				return false;
+		}
+	}
+
+	return true;
+}
+
 static bool tegra_plane_format_mod_supported(struct drm_plane *plane,
 					     uint32_t format,
 					     uint64_t modifier)
@@ -81,6 +113,14 @@ static bool tegra_plane_format_mod_supported(struct drm_plane *plane,
 	if (modifier == DRM_FORMAT_MOD_LINEAR)
 		return true;
 
+	/* check for the sector layout bit */
+	if (fourcc_mod_is_vendor(modifier, NVIDIA)) {
+		if (modifier & DRM_FORMAT_MOD_NVIDIA_SECTOR_LAYOUT) {
+			if (!tegra_plane_supports_sector_layout(plane))
+				return false;
+		}
+	}
+
 	if (info->num_planes == 1)
 		return true;
 
@@ -97,6 +137,163 @@ const struct drm_plane_funcs tegra_plane_funcs = {
 	.format_mod_supported = tegra_plane_format_mod_supported,
 };
 
+static int tegra_dc_pin(struct tegra_dc *dc, struct tegra_plane_state *state)
+{
+	unsigned int i;
+	int err;
+
+	for (i = 0; i < state->base.fb->format->num_planes; i++) {
+		struct tegra_bo *bo = tegra_fb_get_plane(state->base.fb, i);
+		struct host1x_bo_mapping *map;
+
+		map = host1x_bo_pin(dc->dev, &bo->base, DMA_TO_DEVICE, &dc->client.cache);
+		if (IS_ERR(map)) {
+			err = PTR_ERR(map);
+			goto unpin;
+		}
+
+		if (!dc->client.group) {
+			/*
+			 * The display controller needs contiguous memory, so
+			 * fail if the buffer is discontiguous and we fail to
+			 * map its SG table to a single contiguous chunk of
+			 * I/O virtual memory.
+			 */
+			if (map->chunks > 1) {
+				err = -EINVAL;
+				goto unpin;
+			}
+
+			state->iova[i] = map->phys;
+		} else {
+			state->iova[i] = bo->iova;
+		}
+
+		state->map[i] = map;
+	}
+
+	return 0;
+
+unpin:
+	dev_err(dc->dev, "failed to map plane %u: %d\n", i, err);
+
+	while (i--) {
+		host1x_bo_unpin(state->map[i]);
+		state->iova[i] = DMA_MAPPING_ERROR;
+		state->map[i] = NULL;
+	}
+
+	return err;
+}
+
+static void tegra_dc_unpin(struct tegra_dc *dc, struct tegra_plane_state *state)
+{
+	unsigned int i;
+
+	for (i = 0; i < state->base.fb->format->num_planes; i++) {
+		host1x_bo_unpin(state->map[i]);
+		state->iova[i] = DMA_MAPPING_ERROR;
+		state->map[i] = NULL;
+	}
+}
+
+int tegra_plane_prepare_fb(struct drm_plane *plane,
+			   struct drm_plane_state *state)
+{
+	struct tegra_dc *dc = to_tegra_dc(state->crtc);
+	int err;
+
+	if (!state->fb)
+		return 0;
+
+	err = drm_gem_plane_helper_prepare_fb(plane, state);
+	if (err < 0)
+		return err;
+
+	return tegra_dc_pin(dc, to_tegra_plane_state(state));
+}
+
+void tegra_plane_cleanup_fb(struct drm_plane *plane,
+			    struct drm_plane_state *state)
+{
+	struct tegra_dc *dc = to_tegra_dc(state->crtc);
+
+	if (dc)
+		tegra_dc_unpin(dc, to_tegra_plane_state(state));
+}
+
+static int tegra_plane_calculate_memory_bandwidth(struct drm_plane_state *state)
+{
+	struct tegra_plane_state *tegra_state = to_tegra_plane_state(state);
+	unsigned int i, bpp, dst_w, dst_h, src_w, src_h, mul;
+	const struct tegra_dc_soc_info *soc;
+	const struct drm_format_info *fmt;
+	struct drm_crtc_state *crtc_state;
+	u64 avg_bandwidth, peak_bandwidth;
+
+	if (!state->visible)
+		return 0;
+
+	crtc_state = drm_atomic_get_new_crtc_state(state->state, state->crtc);
+	if (!crtc_state)
+		return -EINVAL;
+
+	src_w = drm_rect_width(&state->src) >> 16;
+	src_h = drm_rect_height(&state->src) >> 16;
+	dst_w = drm_rect_width(&state->dst);
+	dst_h = drm_rect_height(&state->dst);
+
+	fmt = state->fb->format;
+	soc = to_tegra_dc(state->crtc)->soc;
+
+	/*
+	 * Note that real memory bandwidth vary depending on format and
+	 * memory layout, we are not taking that into account because small
+	 * estimation error isn't important since bandwidth is rounded up
+	 * anyway.
+	 */
+	for (i = 0, bpp = 0; i < fmt->num_planes; i++) {
+		unsigned int bpp_plane = fmt->cpp[i] * 8;
+
+		/*
+		 * Sub-sampling is relevant for chroma planes only and vertical
+		 * readouts are not cached, hence only horizontal sub-sampling
+		 * matters.
+		 */
+		if (i > 0)
+			bpp_plane /= fmt->hsub;
+
+		bpp += bpp_plane;
+	}
+
+	/* average bandwidth in kbytes/sec */
+	avg_bandwidth  = min(src_w, dst_w) * min(src_h, dst_h);
+	avg_bandwidth *= drm_mode_vrefresh(&crtc_state->adjusted_mode);
+	avg_bandwidth  = DIV_ROUND_UP(avg_bandwidth * bpp, 8) + 999;
+	do_div(avg_bandwidth, 1000);
+
+	/* mode.clock in kHz, peak bandwidth in kbytes/sec */
+	peak_bandwidth = DIV_ROUND_UP(crtc_state->adjusted_mode.clock * bpp, 8);
+
+	/*
+	 * Tegra30/114 Memory Controller can't interleave DC memory requests
+	 * for the tiled windows because DC uses 16-bytes atom, while DDR3
+	 * uses 32-bytes atom.  Hence there is x2 memory overfetch for tiled
+	 * framebuffer and DDR3 on these SoCs.
+	 */
+	if (soc->plane_tiled_memory_bandwidth_x2 &&
+	    tegra_state->tiling.mode == TEGRA_BO_TILING_MODE_TILED)
+		mul = 2;
+	else
+		mul = 1;
+
+	/* ICC bandwidth in kbytes/sec */
+	tegra_state->peak_memory_bandwidth = kBps_to_icc(peak_bandwidth) * mul;
+	tegra_state->avg_memory_bandwidth  = kBps_to_icc(avg_bandwidth)  * mul;
+
+	return 0;
+}
+
 int tegra_plane_state_add(struct tegra_plane *plane,
 			  struct drm_plane_state *state)
 {
@@ -115,6 +312,10 @@ int tegra_plane_state_add(struct tegra_plane *plane,
 	if (err < 0)
 		return err;
 
+	err = tegra_plane_calculate_memory_bandwidth(state);
+	if (err < 0)
+		return err;
+
 	tegra = to_dc_state(crtc_state);
 
 	tegra->planes |= WIN_A_ACT_REQ << plane->index;
@@ -213,6 +414,22 @@ int tegra_plane_format(u32 fourcc, u32 *format, u32 *swap)
 		*swap = BYTE_SWAP_SWAP2;
 		break;
 
+	case DRM_FORMAT_YVYU:
+		if (!swap)
+			return -EINVAL;
+
+		*format = WIN_COLOR_DEPTH_YCbCr422;
+		*swap = BYTE_SWAP_SWAP4;
+		break;
+
+	case DRM_FORMAT_VYUY:
+		if (!swap)
+			return -EINVAL;
+
+		*format = WIN_COLOR_DEPTH_YCbCr422;
+		*swap = BYTE_SWAP_SWAP4HW;
+		break;
+
 	case DRM_FORMAT_YUV420:
 		*format = WIN_COLOR_DEPTH_YCbCr420P;
 		break;
@@ -221,6 +438,34 @@ int tegra_plane_format(u32 fourcc, u32 *format, u32 *swap)
 		*format = WIN_COLOR_DEPTH_YCbCr422P;
 		break;
 
+	case DRM_FORMAT_YUV444:
+		*format = WIN_COLOR_DEPTH_YCbCr444P;
+		break;
+
+	case DRM_FORMAT_NV12:
+		*format = WIN_COLOR_DEPTH_YCbCr420SP;
+		break;
+
+	case DRM_FORMAT_NV21:
+		*format = WIN_COLOR_DEPTH_YCrCb420SP;
+		break;
+
+	case DRM_FORMAT_NV16:
+		*format = WIN_COLOR_DEPTH_YCbCr422SP;
+		break;
+
+	case DRM_FORMAT_NV61:
+		*format = WIN_COLOR_DEPTH_YCrCb422SP;
+		break;
+
+	case DRM_FORMAT_NV24:
+		*format = WIN_COLOR_DEPTH_YCbCr444SP;
+		break;
+
+	case DRM_FORMAT_NV42:
+		*format = WIN_COLOR_DEPTH_YCrCb444SP;
+		break;
+
 	default:
 		return -EINVAL;
 	}
@@ -228,13 +473,29 @@ int tegra_plane_format(u32 fourcc, u32 *format, u32 *swap)
 	return 0;
 }
 
-bool tegra_plane_format_is_yuv(unsigned int format, bool *planar)
+bool tegra_plane_format_is_indexed(unsigned int format)
+{
+	switch (format) {
+	case WIN_COLOR_DEPTH_P1:
+	case WIN_COLOR_DEPTH_P2:
+	case WIN_COLOR_DEPTH_P4:
+	case WIN_COLOR_DEPTH_P8:
+		return true;
+	}
+
+	return false;
+}
+
+bool tegra_plane_format_is_yuv(unsigned int format, unsigned int *planes, unsigned int *bpc)
 {
 	switch (format) {
 	case WIN_COLOR_DEPTH_YCbCr422:
 	case WIN_COLOR_DEPTH_YUV422:
-		if (planar)
-			*planar = false;
+		if (planes)
+			*planes = 1;
+
+		if (bpc)
+			*bpc = 8;
 
 		return true;
 
@@ -246,14 +507,32 @@ bool tegra_plane_format_is_yuv(unsigned int format, bool *planar)
 	case WIN_COLOR_DEPTH_YUV422R:
 	case WIN_COLOR_DEPTH_YCbCr422RA:
 	case WIN_COLOR_DEPTH_YUV422RA:
-		if (planar)
-			*planar = true;
+	case WIN_COLOR_DEPTH_YCbCr444P:
+		if (planes)
+			*planes = 3;
+
+		if (bpc)
+			*bpc = 8;
+
+		return true;
+
+	case WIN_COLOR_DEPTH_YCrCb420SP:
+	case WIN_COLOR_DEPTH_YCbCr420SP:
+	case WIN_COLOR_DEPTH_YCrCb422SP:
+	case WIN_COLOR_DEPTH_YCbCr422SP:
+	case WIN_COLOR_DEPTH_YCrCb444SP:
+	case WIN_COLOR_DEPTH_YCbCr444SP:
+		if (planes)
+			*planes = 2;
+
+		if (bpc)
+			*bpc = 8;
 
 		return true;
 	}
 
-	if (planar)
-		*planar = false;
+	if (planes)
+		*planes = 1;
 
 	return false;
 }
@@ -274,7 +553,7 @@ static bool __drm_format_has_alpha(u32 format)
 static int tegra_plane_format_get_alpha(unsigned int opaque,
 					unsigned int *alpha)
 {
-	if (tegra_plane_format_is_yuv(opaque, NULL)) {
+	if (tegra_plane_format_is_yuv(opaque, NULL, NULL)) {
 		*alpha = opaque;
 		return 0;
 	}
@@ -480,3 +759,36 @@ int tegra_plane_setup_legacy_state(struct tegra_plane *tegra,
 
 	return 0;
 }
+
+static const char * const tegra_plane_icc_names[TEGRA_DC_LEGACY_PLANES_NUM] = {
+	"wina", "winb", "winc", NULL, NULL, NULL, "cursor",
+};
+
+int tegra_plane_interconnect_init(struct tegra_plane *plane)
+{
+	const char *icc_name = tegra_plane_icc_names[plane->index];
+	struct device *dev = plane->dc->dev;
+	struct tegra_dc *dc = plane->dc;
+	int err;
+
+	if (WARN_ON(plane->index >= TEGRA_DC_LEGACY_PLANES_NUM) ||
+	    WARN_ON(!tegra_plane_icc_names[plane->index]))
+		return -EINVAL;
+
+	plane->icc_mem = devm_of_icc_get(dev, icc_name);
+	err = PTR_ERR_OR_ZERO(plane->icc_mem);
+	if (err)
+		return dev_err_probe(dev, err, "failed to get %s interconnect\n",
+				     icc_name);
+
+	/* plane B on T20/30 has a dedicated memory client for a 6-tap vertical filter */
+	if (plane->index == 1 && dc->soc->has_win_b_vfilter_mem_client) {
+		plane->icc_mem_vfilter = devm_of_icc_get(dev, "winb-vfilter");
+		err = PTR_ERR_OR_ZERO(plane->icc_mem_vfilter);
+		if (err)
+			return dev_err_probe(dev, err, "failed to get %s interconnect\n",
+					     "winb-vfilter");
+	}
+
+	return 0;
+}
diff --git a/drivers/gpu/drm/tegra/plane.h b/drivers/gpu/drm/tegra/plane.h
index e79e6b4a8e0a..e33a581e6490 100644
--- a/drivers/gpu/drm/tegra/plane.h
+++ b/drivers/gpu/drm/tegra/plane.h
@@ -1,9 +1,6 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
 /*
  * Copyright (C) 2017 NVIDIA CORPORATION.  All rights reserved.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
  */
 
 #ifndef TEGRA_PLANE_H
@@ -11,6 +8,7 @@
 
 #include <drm/drm_plane.h>
 
+struct icc_path;
 struct tegra_bo;
 struct tegra_dc;
 
@@ -19,6 +17,9 @@ struct tegra_plane {
 	struct tegra_dc *dc;
 	unsigned int offset;
 	unsigned int index;
+
+	struct icc_path *icc_mem;
+	struct icc_path *icc_mem_vfilter;
 };
 
 struct tegra_cursor {
@@ -42,15 +43,24 @@ struct tegra_plane_legacy_blending_state {
 struct tegra_plane_state {
 	struct drm_plane_state base;
 
+	struct host1x_bo_mapping *map[3];
+	dma_addr_t iova[3];
+
 	struct tegra_bo_tiling tiling;
 	u32 format;
 	u32 swap;
 
-	bool bottom_up;
+	bool reflect_x;
+	bool reflect_y;
 
 	/* used for legacy blending support only */
 	struct tegra_plane_legacy_blending_state blending[2];
 	bool opaque;
+
+	/* bandwidths are in ICC units, i.e. kbytes/sec */
+	u32 total_peak_memory_bandwidth;
+	u32 peak_memory_bandwidth;
+	u32 avg_memory_bandwidth;
 };
 
 static inline struct tegra_plane_state *
@@ -62,14 +72,27 @@ to_tegra_plane_state(struct drm_plane_state *state)
 	return NULL;
 }
 
+static inline const struct tegra_plane_state *
+to_const_tegra_plane_state(const struct drm_plane_state *state)
+{
+	return to_tegra_plane_state((struct drm_plane_state *)state);
+}
+
 extern const struct drm_plane_funcs tegra_plane_funcs;
 
+int tegra_plane_prepare_fb(struct drm_plane *plane,
+			   struct drm_plane_state *state);
+void tegra_plane_cleanup_fb(struct drm_plane *plane,
+			    struct drm_plane_state *state);
+
 int tegra_plane_state_add(struct tegra_plane *plane,
 			  struct drm_plane_state *state);
 
 int tegra_plane_format(u32 fourcc, u32 *format, u32 *swap);
-bool tegra_plane_format_is_yuv(unsigned int format, bool *planar);
+bool tegra_plane_format_is_indexed(unsigned int format);
+bool tegra_plane_format_is_yuv(unsigned int format, unsigned int *planes, unsigned int *bpc);
 int tegra_plane_setup_legacy_state(struct tegra_plane *tegra,
 				   struct tegra_plane_state *state);
+int tegra_plane_interconnect_init(struct tegra_plane *plane);
 
 #endif /* TEGRA_PLANE_H */
diff --git a/drivers/gpu/drm/tegra/rgb.c b/drivers/gpu/drm/tegra/rgb.c
index 28a78d3120bc..ff5a749710db 100644
--- a/drivers/gpu/drm/tegra/rgb.c
+++ b/drivers/gpu/drm/tegra/rgb.c
@@ -1,16 +1,15 @@
+// SPDX-License-Identifier: GPL-2.0-only
 /*
  * Copyright (C) 2012 Avionic Design GmbH
  * Copyright (C) 2012 NVIDIA CORPORATION.  All rights reserved.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
  */
 
 #include <linux/clk.h>
+#include <linux/of.h>
 
 #include <drm/drm_atomic_helper.h>
-#include <drm/drm_panel.h>
+#include <drm/drm_bridge_connector.h>
+#include <drm/drm_simple_kms_helper.h>
 
 #include "drm.h"
 #include "dc.h"
@@ -19,6 +18,8 @@ struct tegra_rgb {
 	struct tegra_output output;
 	struct tegra_dc *dc;
 
+	struct clk *pll_d_out0;
+	struct clk *pll_d2_out0;
 	struct clk *clk_parent;
 	struct clk *clk;
 };
@@ -87,69 +88,40 @@ static void tegra_dc_write_regs(struct tegra_dc *dc,
 		tegra_dc_writel(dc, table[i].value, table[i].offset);
 }
 
-static const struct drm_connector_funcs tegra_rgb_connector_funcs = {
-	.reset = drm_atomic_helper_connector_reset,
-	.detect = tegra_output_connector_detect,
-	.fill_modes = drm_helper_probe_single_connector_modes,
-	.destroy = tegra_output_connector_destroy,
-	.atomic_duplicate_state = drm_atomic_helper_connector_duplicate_state,
-	.atomic_destroy_state = drm_atomic_helper_connector_destroy_state,
-};
-
-static enum drm_mode_status
-tegra_rgb_connector_mode_valid(struct drm_connector *connector,
-			       struct drm_display_mode *mode)
-{
-	/*
-	 * FIXME: For now, always assume that the mode is okay. There are
-	 * unresolved issues with clk_round_rate(), which doesn't always
-	 * reliably report whether a frequency can be set or not.
-	 */
-	return MODE_OK;
-}
-
-static const struct drm_connector_helper_funcs tegra_rgb_connector_helper_funcs = {
-	.get_modes = tegra_output_connector_get_modes,
-	.mode_valid = tegra_rgb_connector_mode_valid,
-};
-
-static const struct drm_encoder_funcs tegra_rgb_encoder_funcs = {
-	.destroy = tegra_output_encoder_destroy,
-};
-
 static void tegra_rgb_encoder_disable(struct drm_encoder *encoder)
 {
 	struct tegra_output *output = encoder_to_output(encoder);
 	struct tegra_rgb *rgb = to_rgb(output);
 
-	if (output->panel)
-		drm_panel_disable(output->panel);
-
 	tegra_dc_write_regs(rgb->dc, rgb_disable, ARRAY_SIZE(rgb_disable));
 	tegra_dc_commit(rgb->dc);
-
-	if (output->panel)
-		drm_panel_unprepare(output->panel);
 }
 
 static void tegra_rgb_encoder_enable(struct drm_encoder *encoder)
 {
+	struct drm_display_mode *mode = &encoder->crtc->state->adjusted_mode;
 	struct tegra_output *output = encoder_to_output(encoder);
 	struct tegra_rgb *rgb = to_rgb(output);
 	u32 value;
 
-	if (output->panel)
-		drm_panel_prepare(output->panel);
-
 	tegra_dc_write_regs(rgb->dc, rgb_enable, ARRAY_SIZE(rgb_enable));
 
 	value = DE_SELECT_ACTIVE | DE_CONTROL_NORMAL;
 	tegra_dc_writel(rgb->dc, value, DC_DISP_DATA_ENABLE_OPTIONS);
 
-	/* XXX: parameterize? */
+	/* configure H- and V-sync signal polarities */
 	value = tegra_dc_readl(rgb->dc, DC_COM_PIN_OUTPUT_POLARITY(1));
-	value &= ~LVS_OUTPUT_POLARITY_LOW;
-	value &= ~LHS_OUTPUT_POLARITY_LOW;
+
+	if (mode->flags & DRM_MODE_FLAG_NHSYNC)
+		value |= LHS_OUTPUT_POLARITY_LOW;
+	else
+		value &= ~LHS_OUTPUT_POLARITY_LOW;
+
+	if (mode->flags & DRM_MODE_FLAG_NVSYNC)
+		value |= LVS_OUTPUT_POLARITY_LOW;
+	else
+		value &= ~LVS_OUTPUT_POLARITY_LOW;
+
 	tegra_dc_writel(rgb->dc, value, DC_COM_PIN_OUTPUT_POLARITY(1));
 
 	/* XXX: parameterize? */
@@ -157,14 +129,19 @@ static void tegra_rgb_encoder_enable(struct drm_encoder *encoder)
 		DISP_ORDER_RED_BLUE;
 	tegra_dc_writel(rgb->dc, value, DC_DISP_DISP_INTERFACE_CONTROL);
 
-	/* XXX: parameterize? */
-	value = SC0_H_QUALIFIER_NONE | SC1_H_QUALIFIER_NONE;
-	tegra_dc_writel(rgb->dc, value, DC_DISP_SHIFT_CLOCK_OPTIONS);
-
 	tegra_dc_commit(rgb->dc);
+}
 
-	if (output->panel)
-		drm_panel_enable(output->panel);
+static bool tegra_rgb_pll_rate_change_allowed(struct tegra_rgb *rgb)
+{
+	if (!rgb->pll_d2_out0)
+		return false;
+
+	if (!clk_is_match(rgb->clk_parent, rgb->pll_d_out0) &&
+	    !clk_is_match(rgb->clk_parent, rgb->pll_d2_out0))
+		return false;
+
+	return true;
 }
 
 static int
@@ -195,8 +172,17 @@ tegra_rgb_encoder_atomic_check(struct drm_encoder *encoder,
 	 * and hope that the desired frequency can be matched (or at least
 	 * matched sufficiently close that the panel will still work).
 	 */
-	div = ((clk_get_rate(rgb->clk) * 2) / pclk) - 2;
-	pclk = 0;
+	if (tegra_rgb_pll_rate_change_allowed(rgb)) {
+		/*
+		 * Set display controller clock to x2 of PCLK in order to
+		 * produce higher resolution pulse positions.
+		 */
+		div = 2;
+		pclk *= 2;
+	} else {
+		div = ((clk_get_rate(rgb->clk) * 2) / pclk) - 2;
+		pclk = 0;
+	}
 
 	err = tegra_dc_state_setup_clock(dc, crtc_state, rgb->clk_parent,
 					 pclk, div);
@@ -214,6 +200,11 @@ static const struct drm_encoder_helper_funcs tegra_rgb_encoder_helper_funcs = {
 	.atomic_check = tegra_rgb_encoder_atomic_check,
 };
 
+static void tegra_dc_of_node_put(void *data)
+{
+	of_node_put(data);
+}
+
 int tegra_dc_rgb_probe(struct tegra_dc *dc)
 {
 	struct device_node *np;
@@ -221,7 +212,14 @@ int tegra_dc_rgb_probe(struct tegra_dc *dc)
 	int err;
 
 	np = of_get_child_by_name(dc->dev->of_node, "rgb");
-	if (!np || !of_device_is_available(np))
+	if (!np)
+		return -ENODEV;
+
+	err = devm_add_action_or_reset(dc->dev, tegra_dc_of_node_put, np);
+	if (err < 0)
+		return err;
+
+	if (!of_device_is_available(np))
 		return -ENODEV;
 
 	rgb = devm_kzalloc(dc->dev, sizeof(*rgb), GFP_KERNEL);
@@ -239,59 +237,127 @@ int tegra_dc_rgb_probe(struct tegra_dc *dc)
 	rgb->clk = devm_clk_get(dc->dev, NULL);
 	if (IS_ERR(rgb->clk)) {
 		dev_err(dc->dev, "failed to get clock\n");
-		return PTR_ERR(rgb->clk);
+		err = PTR_ERR(rgb->clk);
+		goto remove;
 	}
 
 	rgb->clk_parent = devm_clk_get(dc->dev, "parent");
 	if (IS_ERR(rgb->clk_parent)) {
 		dev_err(dc->dev, "failed to get parent clock\n");
-		return PTR_ERR(rgb->clk_parent);
+		err = PTR_ERR(rgb->clk_parent);
+		goto remove;
 	}
 
 	err = clk_set_parent(rgb->clk, rgb->clk_parent);
 	if (err < 0) {
 		dev_err(dc->dev, "failed to set parent clock: %d\n", err);
-		return err;
+		goto remove;
+	}
+
+	rgb->pll_d_out0 = clk_get_sys(NULL, "pll_d_out0");
+	if (IS_ERR(rgb->pll_d_out0)) {
+		err = PTR_ERR(rgb->pll_d_out0);
+		dev_err(dc->dev, "failed to get pll_d_out0: %d\n", err);
+		goto remove;
+	}
+
+	if (dc->soc->has_pll_d2_out0) {
+		rgb->pll_d2_out0 = clk_get_sys(NULL, "pll_d2_out0");
+		if (IS_ERR(rgb->pll_d2_out0)) {
+			err = PTR_ERR(rgb->pll_d2_out0);
+			dev_err(dc->dev, "failed to get pll_d2_out0: %d\n", err);
+			goto put_pll;
+		}
 	}
 
 	dc->rgb = &rgb->output;
 
 	return 0;
+
+put_pll:
+	clk_put(rgb->pll_d_out0);
+remove:
+	tegra_output_remove(&rgb->output);
+	return err;
 }
 
-int tegra_dc_rgb_remove(struct tegra_dc *dc)
+void tegra_dc_rgb_remove(struct tegra_dc *dc)
 {
+	struct tegra_rgb *rgb;
+
 	if (!dc->rgb)
-		return 0;
+		return;
+
+	rgb = to_rgb(dc->rgb);
+	clk_put(rgb->pll_d2_out0);
+	clk_put(rgb->pll_d_out0);
 
 	tegra_output_remove(dc->rgb);
 	dc->rgb = NULL;
-
-	return 0;
 }
 
 int tegra_dc_rgb_init(struct drm_device *drm, struct tegra_dc *dc)
 {
 	struct tegra_output *output = dc->rgb;
+	struct drm_connector *connector;
 	int err;
 
 	if (!dc->rgb)
 		return -ENODEV;
 
-	drm_connector_init(drm, &output->connector, &tegra_rgb_connector_funcs,
-			   DRM_MODE_CONNECTOR_LVDS);
-	drm_connector_helper_add(&output->connector,
-				 &tegra_rgb_connector_helper_funcs);
-	output->connector.dpms = DRM_MODE_DPMS_OFF;
-
-	drm_encoder_init(drm, &output->encoder, &tegra_rgb_encoder_funcs,
-			 DRM_MODE_ENCODER_LVDS, NULL);
+	drm_simple_encoder_init(drm, &output->encoder, DRM_MODE_ENCODER_LVDS);
 	drm_encoder_helper_add(&output->encoder,
 			       &tegra_rgb_encoder_helper_funcs);
 
-	drm_connector_attach_encoder(&output->connector,
-					  &output->encoder);
-	drm_connector_register(&output->connector);
+	/*
+	 * Wrap directly-connected panel into DRM bridge in order to let
+	 * DRM core to handle panel for us.
+	 */
+	if (output->panel) {
+		output->bridge = devm_drm_panel_bridge_add(output->dev,
+							   output->panel);
+		if (IS_ERR(output->bridge)) {
+			dev_err(output->dev,
+				"failed to wrap panel into bridge: %pe\n",
+				output->bridge);
+			return PTR_ERR(output->bridge);
+		}
+
+		output->panel = NULL;
+	}
+
+	/*
+	 * Tegra devices that have LVDS panel utilize LVDS encoder bridge
+	 * for converting up to 28 LCD LVTTL lanes into 5/4 LVDS lanes that
+	 * go to display panel's receiver.
+	 *
+	 * Encoder usually have a power-down control which needs to be enabled
+	 * in order to transmit data to the panel.  Historically devices that
+	 * use an older device-tree version didn't model the bridge, assuming
+	 * that encoder is turned ON by default, while today's DRM allows us
+	 * to model LVDS encoder properly.
+	 *
+	 * Newer device-trees utilize LVDS encoder bridge, which provides
+	 * us with a connector and handles the display panel.
+	 *
+	 * For older device-trees we wrapped panel into the panel-bridge.
+	 */
+	if (output->bridge) {
+		err = drm_bridge_attach(&output->encoder, output->bridge,
+					NULL, DRM_BRIDGE_ATTACH_NO_CONNECTOR);
+		if (err)
+			return err;
+
+		connector = drm_bridge_connector_init(drm, &output->encoder);
+		if (IS_ERR(connector)) {
+			dev_err(output->dev,
+				"failed to initialize bridge connector: %pe\n",
+				connector);
+			return PTR_ERR(connector);
+		}
+
+		drm_connector_attach_encoder(connector, &output->encoder);
+	}
 
 	err = tegra_output_init(drm, output);
 	if (err < 0) {
diff --git a/drivers/gpu/drm/tegra/riscv.c b/drivers/gpu/drm/tegra/riscv.c
new file mode 100644
index 000000000000..6580416408f8
--- /dev/null
+++ b/drivers/gpu/drm/tegra/riscv.c
@@ -0,0 +1,106 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright (c) 2022, NVIDIA Corporation.
+ */
+
+#include <linux/dev_printk.h>
+#include <linux/device.h>
+#include <linux/iopoll.h>
+#include <linux/of.h>
+
+#include "riscv.h"
+
+#define RISCV_CPUCTL					0x4388
+#define RISCV_CPUCTL_STARTCPU_TRUE			(1 << 0)
+#define RISCV_BR_RETCODE				0x465c
+#define RISCV_BR_RETCODE_RESULT_V(x)			((x) & 0x3)
+#define RISCV_BR_RETCODE_RESULT_PASS_V			3
+#define RISCV_BCR_CTRL					0x4668
+#define RISCV_BCR_CTRL_CORE_SELECT_RISCV		(1 << 4)
+#define RISCV_BCR_DMACFG				0x466c
+#define RISCV_BCR_DMACFG_TARGET_LOCAL_FB		(0 << 0)
+#define RISCV_BCR_DMACFG_LOCK_LOCKED			(1 << 31)
+#define RISCV_BCR_DMAADDR_PKCPARAM_LO			0x4670
+#define RISCV_BCR_DMAADDR_PKCPARAM_HI			0x4674
+#define RISCV_BCR_DMAADDR_FMCCODE_LO			0x4678
+#define RISCV_BCR_DMAADDR_FMCCODE_HI			0x467c
+#define RISCV_BCR_DMAADDR_FMCDATA_LO			0x4680
+#define RISCV_BCR_DMAADDR_FMCDATA_HI			0x4684
+#define RISCV_BCR_DMACFG_SEC				0x4694
+#define RISCV_BCR_DMACFG_SEC_GSCID(v)			((v) << 16)
+
+static void riscv_writel(struct tegra_drm_riscv *riscv, u32 value, u32 offset)
+{
+	writel(value, riscv->regs + offset);
+}
+
+int tegra_drm_riscv_read_descriptors(struct tegra_drm_riscv *riscv)
+{
+	struct tegra_drm_riscv_descriptor *bl = &riscv->bl_desc;
+	struct tegra_drm_riscv_descriptor *os = &riscv->os_desc;
+	const struct device_node *np = riscv->dev->of_node;
+	int err;
+
+#define READ_PROP(name, location) \
+	err = of_property_read_u32(np, name, location); \
+	if (err) { \
+		dev_err(riscv->dev, "failed to read " name ": %d\n", err); \
+		return err; \
+	}
+
+	READ_PROP("nvidia,bl-manifest-offset", &bl->manifest_offset);
+	READ_PROP("nvidia,bl-code-offset", &bl->code_offset);
+	READ_PROP("nvidia,bl-data-offset", &bl->data_offset);
+	READ_PROP("nvidia,os-manifest-offset", &os->manifest_offset);
+	READ_PROP("nvidia,os-code-offset", &os->code_offset);
+	READ_PROP("nvidia,os-data-offset", &os->data_offset);
+#undef READ_PROP
+
+	if (bl->manifest_offset == 0 && bl->code_offset == 0 &&
+	    bl->data_offset == 0 && os->manifest_offset == 0 &&
+	    os->code_offset == 0 && os->data_offset == 0) {
+		dev_err(riscv->dev, "descriptors not available\n");
+		return -EINVAL;
+	}
+
+	return 0;
+}
+
+int tegra_drm_riscv_boot_bootrom(struct tegra_drm_riscv *riscv, phys_addr_t image_address,
+				 u32 gscid, const struct tegra_drm_riscv_descriptor *desc)
+{
+	phys_addr_t addr;
+	int err;
+	u32 val;
+
+	riscv_writel(riscv, RISCV_BCR_CTRL_CORE_SELECT_RISCV, RISCV_BCR_CTRL);
+
+	addr = image_address + desc->manifest_offset;
+	riscv_writel(riscv, lower_32_bits(addr >> 8), RISCV_BCR_DMAADDR_PKCPARAM_LO);
+	riscv_writel(riscv, upper_32_bits(addr >> 8), RISCV_BCR_DMAADDR_PKCPARAM_HI);
+
+	addr = image_address + desc->code_offset;
+	riscv_writel(riscv, lower_32_bits(addr >> 8), RISCV_BCR_DMAADDR_FMCCODE_LO);
+	riscv_writel(riscv, upper_32_bits(addr >> 8), RISCV_BCR_DMAADDR_FMCCODE_HI);
+
+	addr = image_address + desc->data_offset;
+	riscv_writel(riscv, lower_32_bits(addr >> 8), RISCV_BCR_DMAADDR_FMCDATA_LO);
+	riscv_writel(riscv, upper_32_bits(addr >> 8), RISCV_BCR_DMAADDR_FMCDATA_HI);
+
+	riscv_writel(riscv, RISCV_BCR_DMACFG_SEC_GSCID(gscid), RISCV_BCR_DMACFG_SEC);
+	riscv_writel(riscv,
+		RISCV_BCR_DMACFG_TARGET_LOCAL_FB | RISCV_BCR_DMACFG_LOCK_LOCKED, RISCV_BCR_DMACFG);
+
+	riscv_writel(riscv, RISCV_CPUCTL_STARTCPU_TRUE, RISCV_CPUCTL);
+
+	err = readl_poll_timeout(
+		riscv->regs + RISCV_BR_RETCODE, val,
+		RISCV_BR_RETCODE_RESULT_V(val) == RISCV_BR_RETCODE_RESULT_PASS_V,
+		10, 100000);
+	if (err) {
+		dev_err(riscv->dev, "error during bootrom execution. BR_RETCODE=%d\n", val);
+		return err;
+	}
+
+	return 0;
+}
diff --git a/drivers/gpu/drm/tegra/riscv.h b/drivers/gpu/drm/tegra/riscv.h
new file mode 100644
index 000000000000..bbeb2db078b6
--- /dev/null
+++ b/drivers/gpu/drm/tegra/riscv.h
@@ -0,0 +1,30 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * Copyright (c) 2022, NVIDIA Corporation.
+ */
+
+#ifndef DRM_TEGRA_RISCV_H
+#define DRM_TEGRA_RISCV_H
+
+struct tegra_drm_riscv_descriptor {
+	u32 manifest_offset;
+	u32 code_offset;
+	u32 code_size;
+	u32 data_offset;
+	u32 data_size;
+};
+
+struct tegra_drm_riscv {
+	/* User initializes */
+	struct device *dev;
+	void __iomem *regs;
+
+	struct tegra_drm_riscv_descriptor bl_desc;
+	struct tegra_drm_riscv_descriptor os_desc;
+};
+
+int tegra_drm_riscv_read_descriptors(struct tegra_drm_riscv *riscv);
+int tegra_drm_riscv_boot_bootrom(struct tegra_drm_riscv *riscv, phys_addr_t image_address,
+				 u32 gscid, const struct tegra_drm_riscv_descriptor *desc);
+
+#endif
diff --git a/drivers/gpu/drm/tegra/sor.c b/drivers/gpu/drm/tegra/sor.c
index ef8692b7075a..4023cb5998f1 100644
--- a/drivers/gpu/drm/tegra/sor.c
+++ b/drivers/gpu/drm/tegra/sor.c
@@ -1,17 +1,14 @@
+// SPDX-License-Identifier: GPL-2.0-only
 /*
  * Copyright (C) 2013 NVIDIA Corporation
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
  */
 
 #include <linux/clk.h>
 #include <linux/clk-provider.h>
 #include <linux/debugfs.h>
-#include <linux/gpio.h>
 #include <linux/io.h>
-#include <linux/of_device.h>
+#include <linux/module.h>
+#include <linux/of.h>
 #include <linux/platform_device.h>
 #include <linux/pm_runtime.h>
 #include <linux/regulator/consumer.h>
@@ -19,15 +16,21 @@
 
 #include <soc/tegra/pmc.h>
 
-#include <sound/hda_verbs.h>
-
+#include <drm/display/drm_dp_helper.h>
+#include <drm/display/drm_scdc_helper.h>
 #include <drm/drm_atomic_helper.h>
-#include <drm/drm_dp_helper.h>
+#include <drm/drm_debugfs.h>
+#include <drm/drm_edid.h>
+#include <drm/drm_eld.h>
+#include <drm/drm_file.h>
 #include <drm/drm_panel.h>
-#include <drm/drm_scdc_helper.h>
+#include <drm/drm_print.h>
+#include <drm/drm_simple_kms_helper.h>
 
 #include "dc.h"
+#include "dp.h"
 #include "drm.h"
+#include "hda.h"
 #include "sor.h"
 #include "trace.h"
 
@@ -371,10 +374,11 @@ struct tegra_sor_regs {
 };
 
 struct tegra_sor_soc {
-	bool supports_edp;
 	bool supports_lvds;
 	bool supports_hdmi;
 	bool supports_dp;
+	bool supports_audio;
+	bool supports_hdcp;
 
 	const struct tegra_sor_regs *regs;
 	bool has_nvdisplay;
@@ -383,6 +387,12 @@ struct tegra_sor_soc {
 	unsigned int num_settings;
 
 	const u8 *xbar_cfg;
+	const u8 *lane_map;
+
+	const u8 (*voltage_swing)[4][4];
+	const u8 (*pre_emphasis)[4][4];
+	const u8 (*post_cursor)[4][4];
+	const u8 (*tx_pu)[4][4];
 };
 
 struct tegra_sor;
@@ -390,7 +400,8 @@ struct tegra_sor;
 struct tegra_sor_ops {
 	const char *name;
 	int (*probe)(struct tegra_sor *sor);
-	int (*remove)(struct tegra_sor *sor);
+	void (*audio_enable)(struct tegra_sor *sor);
+	void (*audio_disable)(struct tegra_sor *sor);
 };
 
 struct tegra_sor {
@@ -411,6 +422,9 @@ struct tegra_sor {
 	struct clk *clk_dp;
 	struct clk *clk;
 
+	u8 xbar_cfg[5];
+
+	struct drm_dp_link link;
 	struct drm_dp_aux *aux;
 
 	struct drm_info_list *debugfs_files;
@@ -429,10 +443,7 @@ struct tegra_sor {
 	struct delayed_work scdc;
 	bool scdc_enabled;
 
-	struct {
-		unsigned int sample_rate;
-		unsigned int channels;
-	} audio;
+	struct tegra_hda_format format;
 };
 
 struct tegra_sor_state {
@@ -516,10 +527,19 @@ static inline struct tegra_clk_sor_pad *to_pad(struct clk_hw *hw)
 	return container_of(hw, struct tegra_clk_sor_pad, hw);
 }
 
-static const char * const tegra_clk_sor_pad_parents[] = {
-	"pll_d2_out0", "pll_dp"
+static const char * const tegra_clk_sor_pad_parents[2][2] = {
+	{ "pll_d_out0", "pll_dp" },
+	{ "pll_d2_out0", "pll_dp" },
 };
 
+/*
+ * Implementing ->set_parent() here isn't really required because the parent
+ * will be explicitly selected in the driver code via the DP_CLK_SEL mux in
+ * the SOR_CLK_CNTRL register. This is primarily for compatibility with the
+ * Tegra186 and later SoC generations where the BPMP implements this clock
+ * and doesn't expose the mux via the common clock framework.
+ */
+
 static int tegra_clk_sor_pad_set_parent(struct clk_hw *hw, u8 index)
 {
 	struct tegra_clk_sor_pad *pad = to_pad(hw);
@@ -569,6 +589,7 @@ static u8 tegra_clk_sor_pad_get_parent(struct clk_hw *hw)
 }
 
 static const struct clk_ops tegra_clk_sor_pad_ops = {
+	.determine_rate = clk_hw_determine_rate_no_reparent,
 	.set_parent = tegra_clk_sor_pad_set_parent,
 	.get_parent = tegra_clk_sor_pad_get_parent,
 };
@@ -588,8 +609,8 @@ static struct clk *tegra_clk_sor_pad_register(struct tegra_sor *sor,
 
 	init.name = name;
 	init.flags = 0;
-	init.parent_names = tegra_clk_sor_pad_parents;
-	init.num_parents = ARRAY_SIZE(tegra_clk_sor_pad_parents);
+	init.parent_names = tegra_clk_sor_pad_parents[sor->index];
+	init.num_parents = ARRAY_SIZE(tegra_clk_sor_pad_parents[sor->index]);
 	init.ops = &tegra_clk_sor_pad_ops;
 
 	pad->hw.init = &init;
@@ -599,112 +620,340 @@ static struct clk *tegra_clk_sor_pad_register(struct tegra_sor *sor,
 	return clk;
 }
 
-static int tegra_sor_dp_train_fast(struct tegra_sor *sor,
-				   struct drm_dp_link *link)
+static void tegra_sor_filter_rates(struct tegra_sor *sor)
 {
+	struct drm_dp_link *link = &sor->link;
 	unsigned int i;
-	u8 pattern;
+
+	/* Tegra only supports RBR, HBR and HBR2 */
+	for (i = 0; i < link->num_rates; i++) {
+		switch (link->rates[i]) {
+		case 1620000:
+		case 2700000:
+		case 5400000:
+			break;
+
+		default:
+			DRM_DEBUG_KMS("link rate %lu kHz not supported\n",
+				      link->rates[i]);
+			link->rates[i] = 0;
+			break;
+		}
+	}
+
+	drm_dp_link_update_rates(link);
+}
+
+static int tegra_sor_power_up_lanes(struct tegra_sor *sor, unsigned int lanes)
+{
+	unsigned long timeout;
 	u32 value;
-	int err;
 
-	/* setup lane parameters */
-	value = SOR_LANE_DRIVE_CURRENT_LANE3(0x40) |
-		SOR_LANE_DRIVE_CURRENT_LANE2(0x40) |
-		SOR_LANE_DRIVE_CURRENT_LANE1(0x40) |
-		SOR_LANE_DRIVE_CURRENT_LANE0(0x40);
-	tegra_sor_writel(sor, value, SOR_LANE_DRIVE_CURRENT0);
+	/*
+	 * Clear or set the PD_TXD bit corresponding to each lane, depending
+	 * on whether it is used or not.
+	 */
+	value = tegra_sor_readl(sor, sor->soc->regs->dp_padctl0);
 
-	value = SOR_LANE_PREEMPHASIS_LANE3(0x0f) |
-		SOR_LANE_PREEMPHASIS_LANE2(0x0f) |
-		SOR_LANE_PREEMPHASIS_LANE1(0x0f) |
-		SOR_LANE_PREEMPHASIS_LANE0(0x0f);
-	tegra_sor_writel(sor, value, SOR_LANE_PREEMPHASIS0);
+	if (lanes <= 2)
+		value &= ~(SOR_DP_PADCTL_PD_TXD(sor->soc->lane_map[3]) |
+			   SOR_DP_PADCTL_PD_TXD(sor->soc->lane_map[2]));
+	else
+		value |= SOR_DP_PADCTL_PD_TXD(sor->soc->lane_map[3]) |
+			 SOR_DP_PADCTL_PD_TXD(sor->soc->lane_map[2]);
 
-	value = SOR_LANE_POSTCURSOR_LANE3(0x00) |
-		SOR_LANE_POSTCURSOR_LANE2(0x00) |
-		SOR_LANE_POSTCURSOR_LANE1(0x00) |
-		SOR_LANE_POSTCURSOR_LANE0(0x00);
-	tegra_sor_writel(sor, value, SOR_LANE_POSTCURSOR0);
+	if (lanes <= 1)
+		value &= ~SOR_DP_PADCTL_PD_TXD(sor->soc->lane_map[1]);
+	else
+		value |= SOR_DP_PADCTL_PD_TXD(sor->soc->lane_map[1]);
 
-	/* disable LVDS mode */
-	tegra_sor_writel(sor, 0, SOR_LVDS);
+	if (lanes == 0)
+		value &= ~SOR_DP_PADCTL_PD_TXD(sor->soc->lane_map[0]);
+	else
+		value |= SOR_DP_PADCTL_PD_TXD(sor->soc->lane_map[0]);
+
+	tegra_sor_writel(sor, value, sor->soc->regs->dp_padctl0);
+
+	/* start lane sequencer */
+	value = SOR_LANE_SEQ_CTL_TRIGGER | SOR_LANE_SEQ_CTL_SEQUENCE_DOWN |
+		SOR_LANE_SEQ_CTL_POWER_STATE_UP;
+	tegra_sor_writel(sor, value, SOR_LANE_SEQ_CTL);
+
+	timeout = jiffies + msecs_to_jiffies(250);
 
+	while (time_before(jiffies, timeout)) {
+		value = tegra_sor_readl(sor, SOR_LANE_SEQ_CTL);
+		if ((value & SOR_LANE_SEQ_CTL_TRIGGER) == 0)
+			break;
+
+		usleep_range(250, 1000);
+	}
+
+	if ((value & SOR_LANE_SEQ_CTL_TRIGGER) != 0)
+		return -ETIMEDOUT;
+
+	return 0;
+}
+
+static int tegra_sor_power_down_lanes(struct tegra_sor *sor)
+{
+	unsigned long timeout;
+	u32 value;
+
+	/* power down all lanes */
 	value = tegra_sor_readl(sor, sor->soc->regs->dp_padctl0);
-	value |= SOR_DP_PADCTL_TX_PU_ENABLE;
-	value &= ~SOR_DP_PADCTL_TX_PU_MASK;
-	value |= SOR_DP_PADCTL_TX_PU(2); /* XXX: don't hardcode? */
+	value &= ~(SOR_DP_PADCTL_PD_TXD_3 | SOR_DP_PADCTL_PD_TXD_0 |
+		   SOR_DP_PADCTL_PD_TXD_1 | SOR_DP_PADCTL_PD_TXD_2);
 	tegra_sor_writel(sor, value, sor->soc->regs->dp_padctl0);
 
+	/* start lane sequencer */
+	value = SOR_LANE_SEQ_CTL_TRIGGER | SOR_LANE_SEQ_CTL_SEQUENCE_UP |
+		SOR_LANE_SEQ_CTL_POWER_STATE_DOWN;
+	tegra_sor_writel(sor, value, SOR_LANE_SEQ_CTL);
+
+	timeout = jiffies + msecs_to_jiffies(250);
+
+	while (time_before(jiffies, timeout)) {
+		value = tegra_sor_readl(sor, SOR_LANE_SEQ_CTL);
+		if ((value & SOR_LANE_SEQ_CTL_TRIGGER) == 0)
+			break;
+
+		usleep_range(25, 100);
+	}
+
+	if ((value & SOR_LANE_SEQ_CTL_TRIGGER) != 0)
+		return -ETIMEDOUT;
+
+	return 0;
+}
+
+static void tegra_sor_dp_precharge(struct tegra_sor *sor, unsigned int lanes)
+{
+	u32 value;
+
+	/* pre-charge all used lanes */
 	value = tegra_sor_readl(sor, sor->soc->regs->dp_padctl0);
-	value |= SOR_DP_PADCTL_CM_TXD_3 | SOR_DP_PADCTL_CM_TXD_2 |
-		 SOR_DP_PADCTL_CM_TXD_1 | SOR_DP_PADCTL_CM_TXD_0;
+
+	if (lanes <= 2)
+		value &= ~(SOR_DP_PADCTL_CM_TXD(sor->soc->lane_map[3]) |
+			   SOR_DP_PADCTL_CM_TXD(sor->soc->lane_map[2]));
+	else
+		value |= SOR_DP_PADCTL_CM_TXD(sor->soc->lane_map[3]) |
+			 SOR_DP_PADCTL_CM_TXD(sor->soc->lane_map[2]);
+
+	if (lanes <= 1)
+		value &= ~SOR_DP_PADCTL_CM_TXD(sor->soc->lane_map[1]);
+	else
+		value |= SOR_DP_PADCTL_CM_TXD(sor->soc->lane_map[1]);
+
+	if (lanes == 0)
+		value &= ~SOR_DP_PADCTL_CM_TXD(sor->soc->lane_map[0]);
+	else
+		value |= SOR_DP_PADCTL_CM_TXD(sor->soc->lane_map[0]);
+
 	tegra_sor_writel(sor, value, sor->soc->regs->dp_padctl0);
 
-	usleep_range(10, 100);
+	usleep_range(15, 100);
 
 	value = tegra_sor_readl(sor, sor->soc->regs->dp_padctl0);
 	value &= ~(SOR_DP_PADCTL_CM_TXD_3 | SOR_DP_PADCTL_CM_TXD_2 |
 		   SOR_DP_PADCTL_CM_TXD_1 | SOR_DP_PADCTL_CM_TXD_0);
 	tegra_sor_writel(sor, value, sor->soc->regs->dp_padctl0);
+}
 
-	err = drm_dp_aux_prepare(sor->aux, DP_SET_ANSI_8B10B);
-	if (err < 0)
-		return err;
+static void tegra_sor_dp_term_calibrate(struct tegra_sor *sor)
+{
+	u32 mask = 0x08, adj = 0, value;
+
+	/* enable pad calibration logic */
+	value = tegra_sor_readl(sor, sor->soc->regs->dp_padctl0);
+	value &= ~SOR_DP_PADCTL_PAD_CAL_PD;
+	tegra_sor_writel(sor, value, sor->soc->regs->dp_padctl0);
+
+	value = tegra_sor_readl(sor, sor->soc->regs->pll1);
+	value |= SOR_PLL1_TMDS_TERM;
+	tegra_sor_writel(sor, value, sor->soc->regs->pll1);
+
+	while (mask) {
+		adj |= mask;
 
-	for (i = 0, value = 0; i < link->num_lanes; i++) {
-		unsigned long lane = SOR_DP_TPG_CHANNEL_CODING |
-				     SOR_DP_TPG_SCRAMBLER_NONE |
-				     SOR_DP_TPG_PATTERN_TRAIN1;
-		value = (value << 8) | lane;
+		value = tegra_sor_readl(sor, sor->soc->regs->pll1);
+		value &= ~SOR_PLL1_TMDS_TERMADJ_MASK;
+		value |= SOR_PLL1_TMDS_TERMADJ(adj);
+		tegra_sor_writel(sor, value, sor->soc->regs->pll1);
+
+		usleep_range(100, 200);
+
+		value = tegra_sor_readl(sor, sor->soc->regs->pll1);
+		if (value & SOR_PLL1_TERM_COMPOUT)
+			adj &= ~mask;
+
+		mask >>= 1;
 	}
 
-	tegra_sor_writel(sor, value, SOR_DP_TPG);
+	value = tegra_sor_readl(sor, sor->soc->regs->pll1);
+	value &= ~SOR_PLL1_TMDS_TERMADJ_MASK;
+	value |= SOR_PLL1_TMDS_TERMADJ(adj);
+	tegra_sor_writel(sor, value, sor->soc->regs->pll1);
+
+	/* disable pad calibration logic */
+	value = tegra_sor_readl(sor, sor->soc->regs->dp_padctl0);
+	value |= SOR_DP_PADCTL_PAD_CAL_PD;
+	tegra_sor_writel(sor, value, sor->soc->regs->dp_padctl0);
+}
 
-	pattern = DP_TRAINING_PATTERN_1;
+static int tegra_sor_dp_link_apply_training(struct drm_dp_link *link)
+{
+	struct tegra_sor *sor = container_of(link, struct tegra_sor, link);
+	u32 voltage_swing = 0, pre_emphasis = 0, post_cursor = 0;
+	const struct tegra_sor_soc *soc = sor->soc;
+	u32 pattern = 0, tx_pu = 0, value;
+	unsigned int i;
 
-	err = drm_dp_aux_train(sor->aux, link, pattern);
-	if (err < 0)
-		return err;
+	for (value = 0, i = 0; i < link->lanes; i++) {
+		u8 vs = link->train.request.voltage_swing[i];
+		u8 pe = link->train.request.pre_emphasis[i];
+		u8 pc = link->train.request.post_cursor[i];
+		u8 shift = sor->soc->lane_map[i] << 3;
 
-	value = tegra_sor_readl(sor, SOR_DP_SPARE0);
-	value |= SOR_DP_SPARE_SEQ_ENABLE;
-	value &= ~SOR_DP_SPARE_PANEL_INTERNAL;
-	value |= SOR_DP_SPARE_MACRO_SOR_CLK;
-	tegra_sor_writel(sor, value, SOR_DP_SPARE0);
+		voltage_swing |= soc->voltage_swing[pc][vs][pe] << shift;
+		pre_emphasis |= soc->pre_emphasis[pc][vs][pe] << shift;
+		post_cursor |= soc->post_cursor[pc][vs][pe] << shift;
+
+		if (sor->soc->tx_pu[pc][vs][pe] > tx_pu)
+			tx_pu = sor->soc->tx_pu[pc][vs][pe];
+
+		switch (link->train.pattern) {
+		case DP_TRAINING_PATTERN_DISABLE:
+			value = SOR_DP_TPG_SCRAMBLER_GALIOS |
+				SOR_DP_TPG_PATTERN_NONE;
+			break;
+
+		case DP_TRAINING_PATTERN_1:
+			value = SOR_DP_TPG_SCRAMBLER_NONE |
+				SOR_DP_TPG_PATTERN_TRAIN1;
+			break;
+
+		case DP_TRAINING_PATTERN_2:
+			value = SOR_DP_TPG_SCRAMBLER_NONE |
+				SOR_DP_TPG_PATTERN_TRAIN2;
+			break;
+
+		case DP_TRAINING_PATTERN_3:
+			value = SOR_DP_TPG_SCRAMBLER_NONE |
+				SOR_DP_TPG_PATTERN_TRAIN3;
+			break;
 
-	for (i = 0, value = 0; i < link->num_lanes; i++) {
-		unsigned long lane = SOR_DP_TPG_CHANNEL_CODING |
-				     SOR_DP_TPG_SCRAMBLER_NONE |
-				     SOR_DP_TPG_PATTERN_TRAIN2;
-		value = (value << 8) | lane;
+		default:
+			return -EINVAL;
+		}
+
+		if (link->caps.channel_coding)
+			value |= SOR_DP_TPG_CHANNEL_CODING;
+
+		pattern = pattern << 8 | value;
 	}
 
-	tegra_sor_writel(sor, value, SOR_DP_TPG);
+	tegra_sor_writel(sor, voltage_swing, SOR_LANE_DRIVE_CURRENT0);
+	tegra_sor_writel(sor, pre_emphasis, SOR_LANE_PREEMPHASIS0);
 
-	pattern = DP_LINK_SCRAMBLING_DISABLE | DP_TRAINING_PATTERN_2;
+	if (link->caps.tps3_supported)
+		tegra_sor_writel(sor, post_cursor, SOR_LANE_POSTCURSOR0);
 
-	err = drm_dp_aux_train(sor->aux, link, pattern);
-	if (err < 0)
-		return err;
+	tegra_sor_writel(sor, pattern, SOR_DP_TPG);
 
-	for (i = 0, value = 0; i < link->num_lanes; i++) {
-		unsigned long lane = SOR_DP_TPG_CHANNEL_CODING |
-				     SOR_DP_TPG_SCRAMBLER_GALIOS |
-				     SOR_DP_TPG_PATTERN_NONE;
-		value = (value << 8) | lane;
+	value = tegra_sor_readl(sor, sor->soc->regs->dp_padctl0);
+	value &= ~SOR_DP_PADCTL_TX_PU_MASK;
+	value |= SOR_DP_PADCTL_TX_PU_ENABLE;
+	value |= SOR_DP_PADCTL_TX_PU(tx_pu);
+	tegra_sor_writel(sor, value, sor->soc->regs->dp_padctl0);
+
+	usleep_range(20, 100);
+
+	return 0;
+}
+
+static int tegra_sor_dp_link_configure(struct drm_dp_link *link)
+{
+	struct tegra_sor *sor = container_of(link, struct tegra_sor, link);
+	unsigned int rate, lanes;
+	u32 value;
+	int err;
+
+	rate = drm_dp_link_rate_to_bw_code(link->rate);
+	lanes = link->lanes;
+
+	/* configure link speed and lane count */
+	value = tegra_sor_readl(sor, SOR_CLK_CNTRL);
+	value &= ~SOR_CLK_CNTRL_DP_LINK_SPEED_MASK;
+	value |= SOR_CLK_CNTRL_DP_LINK_SPEED(rate);
+	tegra_sor_writel(sor, value, SOR_CLK_CNTRL);
+
+	value = tegra_sor_readl(sor, SOR_DP_LINKCTL0);
+	value &= ~SOR_DP_LINKCTL_LANE_COUNT_MASK;
+	value |= SOR_DP_LINKCTL_LANE_COUNT(lanes);
+
+	if (link->caps.enhanced_framing)
+		value |= SOR_DP_LINKCTL_ENHANCED_FRAME;
+
+	tegra_sor_writel(sor, value, SOR_DP_LINKCTL0);
+
+	usleep_range(400, 1000);
+
+	/* configure load pulse position adjustment */
+	value = tegra_sor_readl(sor, sor->soc->regs->pll1);
+	value &= ~SOR_PLL1_LOADADJ_MASK;
+
+	switch (rate) {
+	case DP_LINK_BW_1_62:
+		value |= SOR_PLL1_LOADADJ(0x3);
+		break;
+
+	case DP_LINK_BW_2_7:
+		value |= SOR_PLL1_LOADADJ(0x4);
+		break;
+
+	case DP_LINK_BW_5_4:
+		value |= SOR_PLL1_LOADADJ(0x6);
+		break;
 	}
 
-	tegra_sor_writel(sor, value, SOR_DP_TPG);
+	tegra_sor_writel(sor, value, sor->soc->regs->pll1);
 
-	pattern = DP_TRAINING_PATTERN_DISABLE;
+	/* use alternate scrambler reset for eDP */
+	value = tegra_sor_readl(sor, SOR_DP_SPARE0);
 
-	err = drm_dp_aux_train(sor->aux, link, pattern);
-	if (err < 0)
+	if (link->edp == 0)
+		value &= ~SOR_DP_SPARE_PANEL_INTERNAL;
+	else
+		value |= SOR_DP_SPARE_PANEL_INTERNAL;
+
+	tegra_sor_writel(sor, value, SOR_DP_SPARE0);
+
+	err = tegra_sor_power_down_lanes(sor);
+	if (err < 0) {
+		dev_err(sor->dev, "failed to power down lanes: %d\n", err);
 		return err;
+	}
+
+	/* power up and pre-charge lanes */
+	err = tegra_sor_power_up_lanes(sor, lanes);
+	if (err < 0) {
+		dev_err(sor->dev, "failed to power up %u lane%s: %d\n",
+			lanes, (lanes != 1) ? "s" : "", err);
+		return err;
+	}
+
+	tegra_sor_dp_precharge(sor, lanes);
 
 	return 0;
 }
 
+static const struct drm_dp_link_ops tegra_sor_dp_link_ops = {
+	.apply_training = tegra_sor_dp_link_apply_training,
+	.configure = tegra_sor_dp_link_configure,
+};
+
 static void tegra_sor_super_update(struct tegra_sor *sor)
 {
 	tegra_sor_writel(sor, 0, SOR_SUPER_STATE0);
@@ -908,17 +1157,17 @@ static int tegra_sor_compute_config(struct tegra_sor *sor,
 				    struct drm_dp_link *link)
 {
 	const u64 f = 100000, link_rate = link->rate * 1000;
-	const u64 pclk = mode->clock * 1000;
+	const u64 pclk = (u64)mode->clock * 1000;
 	u64 input, output, watermark, num;
 	struct tegra_sor_params params;
 	u32 num_syms_per_line;
 	unsigned int i;
 
-	if (!link_rate || !link->num_lanes || !pclk || !config->bits_per_pixel)
+	if (!link_rate || !link->lanes || !pclk || !config->bits_per_pixel)
 		return -EINVAL;
 
-	output = link_rate * 8 * link->num_lanes;
 	input = pclk * config->bits_per_pixel;
+	output = link_rate * 8 * link->lanes;
 
 	if (input >= output)
 		return -ERANGE;
@@ -961,7 +1210,7 @@ static int tegra_sor_compute_config(struct tegra_sor *sor,
 	watermark = div_u64(watermark + params.error, f);
 	config->watermark = watermark + (config->bits_per_pixel / 8) + 2;
 	num_syms_per_line = (mode->hdisplay * config->bits_per_pixel) *
-			    (link->num_lanes * 8);
+			    (link->lanes * 8);
 
 	if (config->watermark > 30) {
 		config->watermark = 30;
@@ -978,15 +1227,15 @@ static int tegra_sor_compute_config(struct tegra_sor *sor,
 	num = ((mode->htotal - mode->hdisplay) - 7) * link_rate;
 	config->hblank_symbols = div_u64(num, pclk);
 
-	if (link->capabilities & DP_LINK_CAP_ENHANCED_FRAMING)
+	if (link->caps.enhanced_framing)
 		config->hblank_symbols -= 3;
 
-	config->hblank_symbols -= 12 / link->num_lanes;
+	config->hblank_symbols -= 12 / link->lanes;
 
 	/* compute the number of symbols per vertical blanking interval */
 	num = (mode->hdisplay - 25) * link_rate;
 	config->vblank_symbols = div_u64(num, pclk);
-	config->vblank_symbols -= 36 / link->num_lanes + 4;
+	config->vblank_symbols -= 36 / link->lanes + 4;
 
 	dev_dbg(sor->dev, "blank symbols: H:%u V:%u\n", config->hblank_symbols,
 		config->vblank_symbols);
@@ -1202,29 +1451,6 @@ static int tegra_sor_power_down(struct tegra_sor *sor)
 		return err;
 	}
 
-	value = tegra_sor_readl(sor, sor->soc->regs->dp_padctl0);
-	value &= ~(SOR_DP_PADCTL_PD_TXD_3 | SOR_DP_PADCTL_PD_TXD_0 |
-		   SOR_DP_PADCTL_PD_TXD_1 | SOR_DP_PADCTL_PD_TXD_2);
-	tegra_sor_writel(sor, value, sor->soc->regs->dp_padctl0);
-
-	/* stop lane sequencer */
-	value = SOR_LANE_SEQ_CTL_TRIGGER | SOR_LANE_SEQ_CTL_SEQUENCE_UP |
-		SOR_LANE_SEQ_CTL_POWER_STATE_DOWN;
-	tegra_sor_writel(sor, value, SOR_LANE_SEQ_CTL);
-
-	timeout = jiffies + msecs_to_jiffies(250);
-
-	while (time_before(jiffies, timeout)) {
-		value = tegra_sor_readl(sor, SOR_LANE_SEQ_CTL);
-		if ((value & SOR_LANE_SEQ_CTL_TRIGGER) == 0)
-			break;
-
-		usleep_range(25, 100);
-	}
-
-	if ((value & SOR_LANE_SEQ_CTL_TRIGGER) != 0)
-		return -ETIMEDOUT;
-
 	value = tegra_sor_readl(sor, sor->soc->regs->pll2);
 	value |= SOR_PLL2_PORT_POWERDOWN;
 	tegra_sor_writel(sor, value, sor->soc->regs->pll2);
@@ -1464,7 +1690,6 @@ static int tegra_sor_late_register(struct drm_connector *connector)
 	struct drm_minor *minor = connector->dev->primary;
 	struct dentry *root = connector->debugfs_entry;
 	struct tegra_sor *sor = to_sor(output);
-	int err;
 
 	sor->debugfs_files = kmemdup(debugfs_files, sizeof(debugfs_files),
 				     GFP_KERNEL);
@@ -1474,17 +1699,9 @@ static int tegra_sor_late_register(struct drm_connector *connector)
 	for (i = 0; i < count; i++)
 		sor->debugfs_files[i].data = sor;
 
-	err = drm_debugfs_create_files(sor->debugfs_files, count, root, minor);
-	if (err < 0)
-		goto free;
+	drm_debugfs_create_files(sor->debugfs_files, count, root, minor);
 
 	return 0;
-
-free:
-	kfree(sor->debugfs_files);
-	sor->debugfs_files = NULL;
-
-	return err;
 }
 
 static void tegra_sor_early_unregister(struct drm_connector *connector)
@@ -1494,6 +1711,7 @@ static void tegra_sor_early_unregister(struct drm_connector *connector)
 	struct tegra_sor *sor = to_sor(output);
 
 	drm_debugfs_remove_files(sor->debugfs_files, count,
+				 connector->debugfs_entry,
 				 connector->dev->primary);
 	kfree(sor->debugfs_files);
 	sor->debugfs_files = NULL;
@@ -1572,7 +1790,7 @@ static int tegra_sor_connector_get_modes(struct drm_connector *connector)
 
 static enum drm_mode_status
 tegra_sor_connector_mode_valid(struct drm_connector *connector,
-			       struct drm_display_mode *mode)
+			       const struct drm_display_mode *mode)
 {
 	return MODE_OK;
 }
@@ -1582,407 +1800,6 @@ static const struct drm_connector_helper_funcs tegra_sor_connector_helper_funcs
 	.mode_valid = tegra_sor_connector_mode_valid,
 };
 
-static const struct drm_encoder_funcs tegra_sor_encoder_funcs = {
-	.destroy = tegra_output_encoder_destroy,
-};
-
-static void tegra_sor_edp_disable(struct drm_encoder *encoder)
-{
-	struct tegra_output *output = encoder_to_output(encoder);
-	struct tegra_dc *dc = to_tegra_dc(encoder->crtc);
-	struct tegra_sor *sor = to_sor(output);
-	u32 value;
-	int err;
-
-	if (output->panel)
-		drm_panel_disable(output->panel);
-
-	err = tegra_sor_detach(sor);
-	if (err < 0)
-		dev_err(sor->dev, "failed to detach SOR: %d\n", err);
-
-	tegra_sor_writel(sor, 0, SOR_STATE1);
-	tegra_sor_update(sor);
-
-	/*
-	 * The following accesses registers of the display controller, so make
-	 * sure it's only executed when the output is attached to one.
-	 */
-	if (dc) {
-		value = tegra_dc_readl(dc, DC_DISP_DISP_WIN_OPTIONS);
-		value &= ~SOR_ENABLE(0);
-		tegra_dc_writel(dc, value, DC_DISP_DISP_WIN_OPTIONS);
-
-		tegra_dc_commit(dc);
-	}
-
-	err = tegra_sor_power_down(sor);
-	if (err < 0)
-		dev_err(sor->dev, "failed to power down SOR: %d\n", err);
-
-	if (sor->aux) {
-		err = drm_dp_aux_disable(sor->aux);
-		if (err < 0)
-			dev_err(sor->dev, "failed to disable DP: %d\n", err);
-	}
-
-	err = tegra_io_pad_power_disable(sor->pad);
-	if (err < 0)
-		dev_err(sor->dev, "failed to power off I/O pad: %d\n", err);
-
-	if (output->panel)
-		drm_panel_unprepare(output->panel);
-
-	pm_runtime_put(sor->dev);
-}
-
-#if 0
-static int calc_h_ref_to_sync(const struct drm_display_mode *mode,
-			      unsigned int *value)
-{
-	unsigned int hfp, hsw, hbp, a = 0, b;
-
-	hfp = mode->hsync_start - mode->hdisplay;
-	hsw = mode->hsync_end - mode->hsync_start;
-	hbp = mode->htotal - mode->hsync_end;
-
-	pr_info("hfp: %u, hsw: %u, hbp: %u\n", hfp, hsw, hbp);
-
-	b = hfp - 1;
-
-	pr_info("a: %u, b: %u\n", a, b);
-	pr_info("a + hsw + hbp = %u\n", a + hsw + hbp);
-
-	if (a + hsw + hbp <= 11) {
-		a = 1 + 11 - hsw - hbp;
-		pr_info("a: %u\n", a);
-	}
-
-	if (a > b)
-		return -EINVAL;
-
-	if (hsw < 1)
-		return -EINVAL;
-
-	if (mode->hdisplay < 16)
-		return -EINVAL;
-
-	if (value) {
-		if (b > a && a % 2)
-			*value = a + 1;
-		else
-			*value = a;
-	}
-
-	return 0;
-}
-#endif
-
-static void tegra_sor_edp_enable(struct drm_encoder *encoder)
-{
-	struct drm_display_mode *mode = &encoder->crtc->state->adjusted_mode;
-	struct tegra_output *output = encoder_to_output(encoder);
-	struct tegra_dc *dc = to_tegra_dc(encoder->crtc);
-	struct tegra_sor *sor = to_sor(output);
-	struct tegra_sor_config config;
-	struct tegra_sor_state *state;
-	struct drm_dp_link link;
-	u8 rate, lanes;
-	unsigned int i;
-	int err = 0;
-	u32 value;
-
-	state = to_sor_state(output->connector.state);
-
-	pm_runtime_get_sync(sor->dev);
-
-	if (output->panel)
-		drm_panel_prepare(output->panel);
-
-	err = drm_dp_aux_enable(sor->aux);
-	if (err < 0)
-		dev_err(sor->dev, "failed to enable DP: %d\n", err);
-
-	err = drm_dp_link_probe(sor->aux, &link);
-	if (err < 0) {
-		dev_err(sor->dev, "failed to probe eDP link: %d\n", err);
-		return;
-	}
-
-	/* switch to safe parent clock */
-	err = tegra_sor_set_parent_clock(sor, sor->clk_safe);
-	if (err < 0)
-		dev_err(sor->dev, "failed to set safe parent clock: %d\n", err);
-
-	memset(&config, 0, sizeof(config));
-	config.bits_per_pixel = state->bpc * 3;
-
-	err = tegra_sor_compute_config(sor, mode, &config, &link);
-	if (err < 0)
-		dev_err(sor->dev, "failed to compute configuration: %d\n", err);
-
-	value = tegra_sor_readl(sor, SOR_CLK_CNTRL);
-	value &= ~SOR_CLK_CNTRL_DP_CLK_SEL_MASK;
-	value |= SOR_CLK_CNTRL_DP_CLK_SEL_SINGLE_DPCLK;
-	tegra_sor_writel(sor, value, SOR_CLK_CNTRL);
-
-	value = tegra_sor_readl(sor, sor->soc->regs->pll2);
-	value &= ~SOR_PLL2_BANDGAP_POWERDOWN;
-	tegra_sor_writel(sor, value, sor->soc->regs->pll2);
-	usleep_range(20, 100);
-
-	value = tegra_sor_readl(sor, sor->soc->regs->pll3);
-	value |= SOR_PLL3_PLL_VDD_MODE_3V3;
-	tegra_sor_writel(sor, value, sor->soc->regs->pll3);
-
-	value = SOR_PLL0_ICHPMP(0xf) | SOR_PLL0_VCOCAP_RST |
-		SOR_PLL0_PLLREG_LEVEL_V45 | SOR_PLL0_RESISTOR_EXT;
-	tegra_sor_writel(sor, value, sor->soc->regs->pll0);
-
-	value = tegra_sor_readl(sor, sor->soc->regs->pll2);
-	value |= SOR_PLL2_SEQ_PLLCAPPD;
-	value &= ~SOR_PLL2_SEQ_PLLCAPPD_ENFORCE;
-	value |= SOR_PLL2_LVDS_ENABLE;
-	tegra_sor_writel(sor, value, sor->soc->regs->pll2);
-
-	value = SOR_PLL1_TERM_COMPOUT | SOR_PLL1_TMDS_TERM;
-	tegra_sor_writel(sor, value, sor->soc->regs->pll1);
-
-	while (true) {
-		value = tegra_sor_readl(sor, sor->soc->regs->pll2);
-		if ((value & SOR_PLL2_SEQ_PLLCAPPD_ENFORCE) == 0)
-			break;
-
-		usleep_range(250, 1000);
-	}
-
-	value = tegra_sor_readl(sor, sor->soc->regs->pll2);
-	value &= ~SOR_PLL2_POWERDOWN_OVERRIDE;
-	value &= ~SOR_PLL2_PORT_POWERDOWN;
-	tegra_sor_writel(sor, value, sor->soc->regs->pll2);
-
-	/*
-	 * power up
-	 */
-
-	/* set safe link bandwidth (1.62 Gbps) */
-	value = tegra_sor_readl(sor, SOR_CLK_CNTRL);
-	value &= ~SOR_CLK_CNTRL_DP_LINK_SPEED_MASK;
-	value |= SOR_CLK_CNTRL_DP_LINK_SPEED_G1_62;
-	tegra_sor_writel(sor, value, SOR_CLK_CNTRL);
-
-	/* step 1 */
-	value = tegra_sor_readl(sor, sor->soc->regs->pll2);
-	value |= SOR_PLL2_SEQ_PLLCAPPD_ENFORCE | SOR_PLL2_PORT_POWERDOWN |
-		 SOR_PLL2_BANDGAP_POWERDOWN;
-	tegra_sor_writel(sor, value, sor->soc->regs->pll2);
-
-	value = tegra_sor_readl(sor, sor->soc->regs->pll0);
-	value |= SOR_PLL0_VCOPD | SOR_PLL0_PWR;
-	tegra_sor_writel(sor, value, sor->soc->regs->pll0);
-
-	value = tegra_sor_readl(sor, sor->soc->regs->dp_padctl0);
-	value &= ~SOR_DP_PADCTL_PAD_CAL_PD;
-	tegra_sor_writel(sor, value, sor->soc->regs->dp_padctl0);
-
-	/* step 2 */
-	err = tegra_io_pad_power_enable(sor->pad);
-	if (err < 0)
-		dev_err(sor->dev, "failed to power on I/O pad: %d\n", err);
-
-	usleep_range(5, 100);
-
-	/* step 3 */
-	value = tegra_sor_readl(sor, sor->soc->regs->pll2);
-	value &= ~SOR_PLL2_BANDGAP_POWERDOWN;
-	tegra_sor_writel(sor, value, sor->soc->regs->pll2);
-
-	usleep_range(20, 100);
-
-	/* step 4 */
-	value = tegra_sor_readl(sor, sor->soc->regs->pll0);
-	value &= ~SOR_PLL0_VCOPD;
-	value &= ~SOR_PLL0_PWR;
-	tegra_sor_writel(sor, value, sor->soc->regs->pll0);
-
-	value = tegra_sor_readl(sor, sor->soc->regs->pll2);
-	value &= ~SOR_PLL2_SEQ_PLLCAPPD_ENFORCE;
-	tegra_sor_writel(sor, value, sor->soc->regs->pll2);
-
-	usleep_range(200, 1000);
-
-	/* step 5 */
-	value = tegra_sor_readl(sor, sor->soc->regs->pll2);
-	value &= ~SOR_PLL2_PORT_POWERDOWN;
-	tegra_sor_writel(sor, value, sor->soc->regs->pll2);
-
-	/* XXX not in TRM */
-	for (value = 0, i = 0; i < 5; i++)
-		value |= SOR_XBAR_CTRL_LINK0_XSEL(i, sor->soc->xbar_cfg[i]) |
-			 SOR_XBAR_CTRL_LINK1_XSEL(i, i);
-
-	tegra_sor_writel(sor, 0x00000000, SOR_XBAR_POL);
-	tegra_sor_writel(sor, value, SOR_XBAR_CTRL);
-
-	/* switch to DP parent clock */
-	err = tegra_sor_set_parent_clock(sor, sor->clk_dp);
-	if (err < 0)
-		dev_err(sor->dev, "failed to set parent clock: %d\n", err);
-
-	/* power DP lanes */
-	value = tegra_sor_readl(sor, sor->soc->regs->dp_padctl0);
-
-	if (link.num_lanes <= 2)
-		value &= ~(SOR_DP_PADCTL_PD_TXD_3 | SOR_DP_PADCTL_PD_TXD_2);
-	else
-		value |= SOR_DP_PADCTL_PD_TXD_3 | SOR_DP_PADCTL_PD_TXD_2;
-
-	if (link.num_lanes <= 1)
-		value &= ~SOR_DP_PADCTL_PD_TXD_1;
-	else
-		value |= SOR_DP_PADCTL_PD_TXD_1;
-
-	if (link.num_lanes == 0)
-		value &= ~SOR_DP_PADCTL_PD_TXD_0;
-	else
-		value |= SOR_DP_PADCTL_PD_TXD_0;
-
-	tegra_sor_writel(sor, value, sor->soc->regs->dp_padctl0);
-
-	value = tegra_sor_readl(sor, SOR_DP_LINKCTL0);
-	value &= ~SOR_DP_LINKCTL_LANE_COUNT_MASK;
-	value |= SOR_DP_LINKCTL_LANE_COUNT(link.num_lanes);
-	tegra_sor_writel(sor, value, SOR_DP_LINKCTL0);
-
-	/* start lane sequencer */
-	value = SOR_LANE_SEQ_CTL_TRIGGER | SOR_LANE_SEQ_CTL_SEQUENCE_DOWN |
-		SOR_LANE_SEQ_CTL_POWER_STATE_UP;
-	tegra_sor_writel(sor, value, SOR_LANE_SEQ_CTL);
-
-	while (true) {
-		value = tegra_sor_readl(sor, SOR_LANE_SEQ_CTL);
-		if ((value & SOR_LANE_SEQ_CTL_TRIGGER) == 0)
-			break;
-
-		usleep_range(250, 1000);
-	}
-
-	/* set link bandwidth */
-	value = tegra_sor_readl(sor, SOR_CLK_CNTRL);
-	value &= ~SOR_CLK_CNTRL_DP_LINK_SPEED_MASK;
-	value |= drm_dp_link_rate_to_bw_code(link.rate) << 2;
-	tegra_sor_writel(sor, value, SOR_CLK_CNTRL);
-
-	tegra_sor_apply_config(sor, &config);
-
-	/* enable link */
-	value = tegra_sor_readl(sor, SOR_DP_LINKCTL0);
-	value |= SOR_DP_LINKCTL_ENABLE;
-	value |= SOR_DP_LINKCTL_ENHANCED_FRAME;
-	tegra_sor_writel(sor, value, SOR_DP_LINKCTL0);
-
-	for (i = 0, value = 0; i < 4; i++) {
-		unsigned long lane = SOR_DP_TPG_CHANNEL_CODING |
-				     SOR_DP_TPG_SCRAMBLER_GALIOS |
-				     SOR_DP_TPG_PATTERN_NONE;
-		value = (value << 8) | lane;
-	}
-
-	tegra_sor_writel(sor, value, SOR_DP_TPG);
-
-	/* enable pad calibration logic */
-	value = tegra_sor_readl(sor, sor->soc->regs->dp_padctl0);
-	value |= SOR_DP_PADCTL_PAD_CAL_PD;
-	tegra_sor_writel(sor, value, sor->soc->regs->dp_padctl0);
-
-	err = drm_dp_link_probe(sor->aux, &link);
-	if (err < 0)
-		dev_err(sor->dev, "failed to probe eDP link: %d\n", err);
-
-	err = drm_dp_link_power_up(sor->aux, &link);
-	if (err < 0)
-		dev_err(sor->dev, "failed to power up eDP link: %d\n", err);
-
-	err = drm_dp_link_configure(sor->aux, &link);
-	if (err < 0)
-		dev_err(sor->dev, "failed to configure eDP link: %d\n", err);
-
-	rate = drm_dp_link_rate_to_bw_code(link.rate);
-	lanes = link.num_lanes;
-
-	value = tegra_sor_readl(sor, SOR_CLK_CNTRL);
-	value &= ~SOR_CLK_CNTRL_DP_LINK_SPEED_MASK;
-	value |= SOR_CLK_CNTRL_DP_LINK_SPEED(rate);
-	tegra_sor_writel(sor, value, SOR_CLK_CNTRL);
-
-	value = tegra_sor_readl(sor, SOR_DP_LINKCTL0);
-	value &= ~SOR_DP_LINKCTL_LANE_COUNT_MASK;
-	value |= SOR_DP_LINKCTL_LANE_COUNT(lanes);
-
-	if (link.capabilities & DP_LINK_CAP_ENHANCED_FRAMING)
-		value |= SOR_DP_LINKCTL_ENHANCED_FRAME;
-
-	tegra_sor_writel(sor, value, SOR_DP_LINKCTL0);
-
-	/* disable training pattern generator */
-
-	for (i = 0; i < link.num_lanes; i++) {
-		unsigned long lane = SOR_DP_TPG_CHANNEL_CODING |
-				     SOR_DP_TPG_SCRAMBLER_GALIOS |
-				     SOR_DP_TPG_PATTERN_NONE;
-		value = (value << 8) | lane;
-	}
-
-	tegra_sor_writel(sor, value, SOR_DP_TPG);
-
-	err = tegra_sor_dp_train_fast(sor, &link);
-	if (err < 0)
-		dev_err(sor->dev, "DP fast link training failed: %d\n", err);
-
-	dev_dbg(sor->dev, "fast link training succeeded\n");
-
-	err = tegra_sor_power_up(sor, 250);
-	if (err < 0)
-		dev_err(sor->dev, "failed to power up SOR: %d\n", err);
-
-	/* CSTM (LVDS, link A/B, upper) */
-	value = SOR_CSTM_LVDS | SOR_CSTM_LINK_ACT_A | SOR_CSTM_LINK_ACT_B |
-		SOR_CSTM_UPPER;
-	tegra_sor_writel(sor, value, SOR_CSTM);
-
-	/* use DP-A protocol */
-	value = tegra_sor_readl(sor, SOR_STATE1);
-	value &= ~SOR_STATE_ASY_PROTOCOL_MASK;
-	value |= SOR_STATE_ASY_PROTOCOL_DP_A;
-	tegra_sor_writel(sor, value, SOR_STATE1);
-
-	tegra_sor_mode_set(sor, mode, state);
-
-	/* PWM setup */
-	err = tegra_sor_setup_pwm(sor, 250);
-	if (err < 0)
-		dev_err(sor->dev, "failed to setup PWM: %d\n", err);
-
-	tegra_sor_update(sor);
-
-	value = tegra_dc_readl(dc, DC_DISP_DISP_WIN_OPTIONS);
-	value |= SOR_ENABLE(0);
-	tegra_dc_writel(dc, value, DC_DISP_DISP_WIN_OPTIONS);
-
-	tegra_dc_commit(dc);
-
-	err = tegra_sor_attach(sor);
-	if (err < 0)
-		dev_err(sor->dev, "failed to attach SOR: %d\n", err);
-
-	err = tegra_sor_wakeup(sor);
-	if (err < 0)
-		dev_err(sor->dev, "failed to enable DC: %d\n", err);
-
-	if (output->panel)
-		drm_panel_enable(output->panel);
-}
-
 static int
 tegra_sor_encoder_atomic_check(struct drm_encoder *encoder,
 			       struct drm_crtc_state *crtc_state,
@@ -2032,12 +1849,6 @@ tegra_sor_encoder_atomic_check(struct drm_encoder *encoder,
 	return 0;
 }
 
-static const struct drm_encoder_helper_funcs tegra_sor_edp_helpers = {
-	.disable = tegra_sor_edp_disable,
-	.enable = tegra_sor_edp_enable,
-	.atomic_check = tegra_sor_encoder_atomic_check,
-};
-
 static inline u32 tegra_sor_hdmi_subpack(const u8 *ptr, size_t size)
 {
 	u32 value = 0;
@@ -2054,7 +1865,7 @@ static void tegra_sor_hdmi_write_infopack(struct tegra_sor *sor,
 {
 	const u8 *ptr = data;
 	unsigned long offset;
-	size_t i, j;
+	size_t i;
 	u32 value;
 
 	switch (ptr[0]) {
@@ -2087,7 +1898,7 @@ static void tegra_sor_hdmi_write_infopack(struct tegra_sor *sor,
 	 * - subpack_low: bytes 0 - 3
 	 * - subpack_high: bytes 4 - 6 (with byte 7 padded to 0x00)
 	 */
-	for (i = 3, j = 0; i < size; i += 7, j += 8) {
+	for (i = 3; i < size; i += 7) {
 		size_t rem = size - i, num = min_t(size_t, rem, 4);
 
 		value = tegra_sor_hdmi_subpack(&ptr[i], num);
@@ -2116,7 +1927,8 @@ tegra_sor_hdmi_setup_avi_infoframe(struct tegra_sor *sor,
 	value &= ~INFOFRAME_CTRL_ENABLE;
 	tegra_sor_writel(sor, value, SOR_HDMI_AVI_INFOFRAME_CTRL);
 
-	err = drm_hdmi_avi_infoframe_from_display_mode(&frame, mode, false);
+	err = drm_hdmi_avi_infoframe_from_display_mode(&frame,
+						       &sor->output.connector, mode);
 	if (err < 0) {
 		dev_err(sor->dev, "failed to setup AVI infoframe: %d\n", err);
 		return err;
@@ -2161,6 +1973,15 @@ static void tegra_sor_audio_prepare(struct tegra_sor *sor)
 {
 	u32 value;
 
+	/*
+	 * Enable and unmask the HDA codec SCRATCH0 register interrupt. This
+	 * is used for interoperability between the HDA codec driver and the
+	 * HDMI/DP driver.
+	 */
+	value = SOR_INT_CODEC_SCRATCH1 | SOR_INT_CODEC_SCRATCH0;
+	tegra_sor_writel(sor, value, SOR_INT_ENABLE);
+	tegra_sor_writel(sor, value, SOR_INT_MASK);
+
 	tegra_sor_write_eld(sor);
 
 	value = SOR_AUDIO_HDA_PRESENSE_ELDV | SOR_AUDIO_HDA_PRESENSE_PD;
@@ -2170,6 +1991,32 @@ static void tegra_sor_audio_prepare(struct tegra_sor *sor)
 static void tegra_sor_audio_unprepare(struct tegra_sor *sor)
 {
 	tegra_sor_writel(sor, 0, SOR_AUDIO_HDA_PRESENSE);
+	tegra_sor_writel(sor, 0, SOR_INT_MASK);
+	tegra_sor_writel(sor, 0, SOR_INT_ENABLE);
+}
+
+static void tegra_sor_audio_enable(struct tegra_sor *sor)
+{
+	u32 value;
+
+	value = tegra_sor_readl(sor, SOR_AUDIO_CNTRL);
+
+	/* select HDA audio input */
+	value &= ~SOR_AUDIO_CNTRL_SOURCE_SELECT(SOURCE_SELECT_MASK);
+	value |= SOR_AUDIO_CNTRL_SOURCE_SELECT(SOURCE_SELECT_HDA);
+
+	/* inject null samples */
+	if (sor->format.channels != 2)
+		value &= ~SOR_AUDIO_CNTRL_INJECT_NULLSMPL;
+	else
+		value |= SOR_AUDIO_CNTRL_INJECT_NULLSMPL;
+
+	value |= SOR_AUDIO_CNTRL_AFIFO_FLUSH;
+
+	tegra_sor_writel(sor, value, SOR_AUDIO_CNTRL);
+
+	/* enable advertising HBR capability */
+	tegra_sor_writel(sor, SOR_AUDIO_SPARE_HBR_ENABLE, SOR_AUDIO_SPARE);
 }
 
 static int tegra_sor_hdmi_enable_audio_infoframe(struct tegra_sor *sor)
@@ -2185,7 +2032,7 @@ static int tegra_sor_hdmi_enable_audio_infoframe(struct tegra_sor *sor)
 		return err;
 	}
 
-	frame.channels = sor->audio.channels;
+	frame.channels = sor->format.channels;
 
 	err = hdmi_audio_infoframe_pack(&frame, buffer, sizeof(buffer));
 	if (err < 0) {
@@ -2207,24 +2054,7 @@ static void tegra_sor_hdmi_audio_enable(struct tegra_sor *sor)
 {
 	u32 value;
 
-	value = tegra_sor_readl(sor, SOR_AUDIO_CNTRL);
-
-	/* select HDA audio input */
-	value &= ~SOR_AUDIO_CNTRL_SOURCE_SELECT(SOURCE_SELECT_MASK);
-	value |= SOR_AUDIO_CNTRL_SOURCE_SELECT(SOURCE_SELECT_HDA);
-
-	/* inject null samples */
-	if (sor->audio.channels != 2)
-		value &= ~SOR_AUDIO_CNTRL_INJECT_NULLSMPL;
-	else
-		value |= SOR_AUDIO_CNTRL_INJECT_NULLSMPL;
-
-	value |= SOR_AUDIO_CNTRL_AFIFO_FLUSH;
-
-	tegra_sor_writel(sor, value, SOR_AUDIO_CNTRL);
-
-	/* enable advertising HBR capability */
-	tegra_sor_writel(sor, SOR_AUDIO_SPARE_HBR_ENABLE, SOR_AUDIO_SPARE);
+	tegra_sor_audio_enable(sor);
 
 	tegra_sor_writel(sor, 0, SOR_HDMI_ACR_CTRL);
 
@@ -2245,7 +2075,7 @@ static void tegra_sor_hdmi_audio_enable(struct tegra_sor *sor)
 	value = SOR_HDMI_AUDIO_N_RESET | SOR_HDMI_AUDIO_N_LOOKUP;
 	tegra_sor_writel(sor, value, SOR_HDMI_AUDIO_N);
 
-	value = (24000 * 4096) / (128 * sor->audio.sample_rate / 1000);
+	value = (24000 * 4096) / (128 * sor->format.sample_rate / 1000);
 	tegra_sor_writel(sor, value, SOR_AUDIO_AVAL_0320);
 	tegra_sor_writel(sor, 4096, SOR_AUDIO_NVAL_0320);
 
@@ -2258,15 +2088,15 @@ static void tegra_sor_hdmi_audio_enable(struct tegra_sor *sor)
 	tegra_sor_writel(sor, 20000, SOR_AUDIO_AVAL_1764);
 	tegra_sor_writel(sor, 18816, SOR_AUDIO_NVAL_1764);
 
-	value = (24000 * 6144) / (128 * sor->audio.sample_rate / 1000);
+	value = (24000 * 6144) / (128 * sor->format.sample_rate / 1000);
 	tegra_sor_writel(sor, value, SOR_AUDIO_AVAL_0480);
 	tegra_sor_writel(sor, 6144, SOR_AUDIO_NVAL_0480);
 
-	value = (24000 * 12288) / (128 * sor->audio.sample_rate / 1000);
+	value = (24000 * 12288) / (128 * sor->format.sample_rate / 1000);
 	tegra_sor_writel(sor, value, SOR_AUDIO_AVAL_0960);
 	tegra_sor_writel(sor, 12288, SOR_AUDIO_NVAL_0960);
 
-	value = (24000 * 24576) / (128 * sor->audio.sample_rate / 1000);
+	value = (24000 * 24576) / (128 * sor->format.sample_rate / 1000);
 	tegra_sor_writel(sor, value, SOR_AUDIO_AVAL_1920);
 	tegra_sor_writel(sor, 24576, SOR_AUDIO_NVAL_1920);
 
@@ -2315,10 +2145,8 @@ static void tegra_sor_hdmi_disable_scrambling(struct tegra_sor *sor)
 
 static void tegra_sor_hdmi_scdc_disable(struct tegra_sor *sor)
 {
-	struct i2c_adapter *ddc = sor->output.ddc;
-
-	drm_scdc_set_high_tmds_clock_ratio(ddc, false);
-	drm_scdc_set_scrambling(ddc, false);
+	drm_scdc_set_high_tmds_clock_ratio(&sor->output.connector, false);
+	drm_scdc_set_scrambling(&sor->output.connector, false);
 
 	tegra_sor_hdmi_disable_scrambling(sor);
 }
@@ -2343,10 +2171,8 @@ static void tegra_sor_hdmi_enable_scrambling(struct tegra_sor *sor)
 
 static void tegra_sor_hdmi_scdc_enable(struct tegra_sor *sor)
 {
-	struct i2c_adapter *ddc = sor->output.ddc;
-
-	drm_scdc_set_high_tmds_clock_ratio(ddc, true);
-	drm_scdc_set_scrambling(ddc, true);
+	drm_scdc_set_high_tmds_clock_ratio(&sor->output.connector, true);
+	drm_scdc_set_scrambling(&sor->output.connector, true);
 
 	tegra_sor_hdmi_enable_scrambling(sor);
 }
@@ -2354,9 +2180,8 @@ static void tegra_sor_hdmi_scdc_enable(struct tegra_sor *sor)
 static void tegra_sor_hdmi_scdc_work(struct work_struct *work)
 {
 	struct tegra_sor *sor = container_of(work, struct tegra_sor, scdc.work);
-	struct i2c_adapter *ddc = sor->output.ddc;
 
-	if (!drm_scdc_get_scrambling_status(ddc)) {
+	if (!drm_scdc_get_scrambling_status(&sor->output.connector)) {
 		DRM_DEBUG_KMS("SCDC not scrambled\n");
 		tegra_sor_hdmi_scdc_enable(sor);
 	}
@@ -2400,9 +2225,9 @@ static void tegra_sor_hdmi_disable(struct drm_encoder *encoder)
 	value = tegra_dc_readl(dc, DC_DISP_DISP_WIN_OPTIONS);
 
 	if (!sor->soc->has_nvdisplay)
-		value &= ~(SOR1_TIMING_CYA | SOR_ENABLE(1));
-	else
-		value &= ~SOR_ENABLE(sor->index);
+		value &= ~SOR1_TIMING_CYA;
+
+	value &= ~SOR_ENABLE(sor->index);
 
 	tegra_dc_writel(dc, value, DC_DISP_DISP_WIN_OPTIONS);
 
@@ -2416,7 +2241,7 @@ static void tegra_sor_hdmi_disable(struct drm_encoder *encoder)
 	if (err < 0)
 		dev_err(sor->dev, "failed to power off I/O pad: %d\n", err);
 
-	pm_runtime_put(sor->dev);
+	host1x_client_suspend(&sor->client);
 }
 
 static void tegra_sor_hdmi_enable(struct drm_encoder *encoder)
@@ -2437,7 +2262,11 @@ static void tegra_sor_hdmi_enable(struct drm_encoder *encoder)
 	mode = &encoder->crtc->state->adjusted_mode;
 	pclk = mode->clock * 1000;
 
-	pm_runtime_get_sync(sor->dev);
+	err = host1x_client_resume(&sor->client);
+	if (err < 0) {
+		dev_err(sor->dev, "failed to resume: %d\n", err);
+		return;
+	}
 
 	/* switch to safe parent clock */
 	err = tegra_sor_set_parent_clock(sor, sor->clk_safe);
@@ -2554,22 +2383,40 @@ static void tegra_sor_hdmi_enable(struct drm_encoder *encoder)
 
 	/* XXX not in TRM */
 	for (value = 0, i = 0; i < 5; i++)
-		value |= SOR_XBAR_CTRL_LINK0_XSEL(i, sor->soc->xbar_cfg[i]) |
+		value |= SOR_XBAR_CTRL_LINK0_XSEL(i, sor->xbar_cfg[i]) |
 			 SOR_XBAR_CTRL_LINK1_XSEL(i, i);
 
 	tegra_sor_writel(sor, 0x00000000, SOR_XBAR_POL);
 	tegra_sor_writel(sor, value, SOR_XBAR_CTRL);
 
-	/* switch to parent clock */
-	err = clk_set_parent(sor->clk, sor->clk_parent);
+	/*
+	 * Switch the pad clock to the DP clock. Note that we cannot actually
+	 * do this because Tegra186 and later don't support clk_set_parent()
+	 * on the sorX_pad_clkout clocks. We already do the equivalent above
+	 * using the DP_CLK_SEL mux of the SOR_CLK_CNTRL register.
+	 */
+#if 0
+	err = clk_set_parent(sor->clk_pad, sor->clk_dp);
 	if (err < 0) {
-		dev_err(sor->dev, "failed to set parent clock: %d\n", err);
+		dev_err(sor->dev, "failed to select pad parent clock: %d\n",
+			err);
 		return;
 	}
+#endif
 
+	/* switch the SOR clock to the pad clock */
 	err = tegra_sor_set_parent_clock(sor, sor->clk_pad);
 	if (err < 0) {
-		dev_err(sor->dev, "failed to set pad clock: %d\n", err);
+		dev_err(sor->dev, "failed to select SOR parent clock: %d\n",
+			err);
+		return;
+	}
+
+	/* switch the output clock to the parent pixel clock */
+	err = clk_set_parent(sor->clk, sor->clk_parent);
+	if (err < 0) {
+		dev_err(sor->dev, "failed to select output parent clock: %d\n",
+			err);
 		return;
 	}
 
@@ -2775,9 +2622,9 @@ static void tegra_sor_hdmi_enable(struct drm_encoder *encoder)
 	value = tegra_dc_readl(dc, DC_DISP_DISP_WIN_OPTIONS);
 
 	if (!sor->soc->has_nvdisplay)
-		value |= SOR_ENABLE(1) | SOR1_TIMING_CYA;
-	else
-		value |= SOR_ENABLE(sor->index);
+		value |= SOR1_TIMING_CYA;
+
+	value |= SOR_ENABLE(sor->index);
 
 	tegra_dc_writel(dc, value, DC_DISP_DISP_WIN_OPTIONS);
 
@@ -2804,18 +2651,404 @@ static const struct drm_encoder_helper_funcs tegra_sor_hdmi_helpers = {
 	.atomic_check = tegra_sor_encoder_atomic_check,
 };
 
+static void tegra_sor_dp_disable(struct drm_encoder *encoder)
+{
+	struct tegra_output *output = encoder_to_output(encoder);
+	struct tegra_dc *dc = to_tegra_dc(encoder->crtc);
+	struct tegra_sor *sor = to_sor(output);
+	u32 value;
+	int err;
+
+	if (output->panel)
+		drm_panel_disable(output->panel);
+
+	/*
+	 * Do not attempt to power down a DP link if we're not connected since
+	 * the AUX transactions would just be timing out.
+	 */
+	if (output->connector.status != connector_status_disconnected) {
+		err = drm_dp_link_power_down(sor->aux, sor->link.revision);
+		if (err < 0)
+			dev_err(sor->dev, "failed to power down link: %d\n",
+				err);
+	}
+
+	err = tegra_sor_detach(sor);
+	if (err < 0)
+		dev_err(sor->dev, "failed to detach SOR: %d\n", err);
+
+	tegra_sor_writel(sor, 0, SOR_STATE1);
+	tegra_sor_update(sor);
+
+	value = tegra_dc_readl(dc, DC_DISP_DISP_WIN_OPTIONS);
+	value &= ~SOR_ENABLE(sor->index);
+	tegra_dc_writel(dc, value, DC_DISP_DISP_WIN_OPTIONS);
+	tegra_dc_commit(dc);
+
+	value = tegra_sor_readl(sor, SOR_STATE1);
+	value &= ~SOR_STATE_ASY_PROTOCOL_MASK;
+	value &= ~SOR_STATE_ASY_SUBOWNER_MASK;
+	value &= ~SOR_STATE_ASY_OWNER_MASK;
+	tegra_sor_writel(sor, value, SOR_STATE1);
+	tegra_sor_update(sor);
+
+	/* switch to safe parent clock */
+	err = tegra_sor_set_parent_clock(sor, sor->clk_safe);
+	if (err < 0)
+		dev_err(sor->dev, "failed to set safe clock: %d\n", err);
+
+	err = tegra_sor_power_down(sor);
+	if (err < 0)
+		dev_err(sor->dev, "failed to power down SOR: %d\n", err);
+
+	err = tegra_io_pad_power_disable(sor->pad);
+	if (err < 0)
+		dev_err(sor->dev, "failed to power off I/O pad: %d\n", err);
+
+	err = drm_dp_aux_disable(sor->aux);
+	if (err < 0)
+		dev_err(sor->dev, "failed disable DPAUX: %d\n", err);
+
+	if (output->panel)
+		drm_panel_unprepare(output->panel);
+
+	host1x_client_suspend(&sor->client);
+}
+
+static void tegra_sor_dp_enable(struct drm_encoder *encoder)
+{
+	struct tegra_output *output = encoder_to_output(encoder);
+	struct tegra_dc *dc = to_tegra_dc(encoder->crtc);
+	struct tegra_sor *sor = to_sor(output);
+	struct tegra_sor_config config;
+	struct tegra_sor_state *state;
+	struct drm_display_mode *mode;
+	struct drm_display_info *info;
+	unsigned int i;
+	u32 value;
+	int err;
+
+	state = to_sor_state(output->connector.state);
+	mode = &encoder->crtc->state->adjusted_mode;
+	info = &output->connector.display_info;
+
+	err = host1x_client_resume(&sor->client);
+	if (err < 0) {
+		dev_err(sor->dev, "failed to resume: %d\n", err);
+		return;
+	}
+
+	/* switch to safe parent clock */
+	err = tegra_sor_set_parent_clock(sor, sor->clk_safe);
+	if (err < 0)
+		dev_err(sor->dev, "failed to set safe parent clock: %d\n", err);
+
+	err = tegra_io_pad_power_enable(sor->pad);
+	if (err < 0)
+		dev_err(sor->dev, "failed to power on LVDS rail: %d\n", err);
+
+	usleep_range(20, 100);
+
+	err = drm_dp_aux_enable(sor->aux);
+	if (err < 0)
+		dev_err(sor->dev, "failed to enable DPAUX: %d\n", err);
+
+	err = drm_dp_link_probe(sor->aux, &sor->link);
+	if (err < 0)
+		dev_err(sor->dev, "failed to probe DP link: %d\n", err);
+
+	tegra_sor_filter_rates(sor);
+
+	err = drm_dp_link_choose(&sor->link, mode, info);
+	if (err < 0)
+		dev_err(sor->dev, "failed to choose link: %d\n", err);
+
+	if (output->panel)
+		drm_panel_prepare(output->panel);
+
+	value = tegra_sor_readl(sor, sor->soc->regs->pll2);
+	value &= ~SOR_PLL2_BANDGAP_POWERDOWN;
+	tegra_sor_writel(sor, value, sor->soc->regs->pll2);
+
+	usleep_range(20, 40);
+
+	value = tegra_sor_readl(sor, sor->soc->regs->pll3);
+	value |= SOR_PLL3_PLL_VDD_MODE_3V3;
+	tegra_sor_writel(sor, value, sor->soc->regs->pll3);
+
+	value = tegra_sor_readl(sor, sor->soc->regs->pll0);
+	value &= ~(SOR_PLL0_VCOPD | SOR_PLL0_PWR);
+	tegra_sor_writel(sor, value, sor->soc->regs->pll0);
+
+	value = tegra_sor_readl(sor, sor->soc->regs->pll2);
+	value &= ~SOR_PLL2_SEQ_PLLCAPPD_ENFORCE;
+	value |= SOR_PLL2_SEQ_PLLCAPPD;
+	tegra_sor_writel(sor, value, sor->soc->regs->pll2);
+
+	usleep_range(200, 400);
+
+	value = tegra_sor_readl(sor, sor->soc->regs->pll2);
+	value &= ~SOR_PLL2_POWERDOWN_OVERRIDE;
+	value &= ~SOR_PLL2_PORT_POWERDOWN;
+	tegra_sor_writel(sor, value, sor->soc->regs->pll2);
+
+	value = tegra_sor_readl(sor, SOR_CLK_CNTRL);
+	value &= ~SOR_CLK_CNTRL_DP_CLK_SEL_MASK;
+
+	if (output->panel)
+		value |= SOR_CLK_CNTRL_DP_CLK_SEL_SINGLE_DPCLK;
+	else
+		value |= SOR_CLK_CNTRL_DP_CLK_SEL_DIFF_DPCLK;
+
+	tegra_sor_writel(sor, value, SOR_CLK_CNTRL);
+
+	usleep_range(200, 400);
+
+	value = tegra_sor_readl(sor, SOR_DP_SPARE0);
+	/* XXX not in TRM */
+	if (output->panel)
+		value |= SOR_DP_SPARE_PANEL_INTERNAL;
+	else
+		value &= ~SOR_DP_SPARE_PANEL_INTERNAL;
+
+	value |= SOR_DP_SPARE_SEQ_ENABLE;
+	tegra_sor_writel(sor, value, SOR_DP_SPARE0);
+
+	/* XXX not in TRM */
+	tegra_sor_writel(sor, 0, SOR_LVDS);
+
+	value = tegra_sor_readl(sor, sor->soc->regs->pll0);
+	value &= ~SOR_PLL0_ICHPMP_MASK;
+	value &= ~SOR_PLL0_VCOCAP_MASK;
+	value |= SOR_PLL0_ICHPMP(0x1);
+	value |= SOR_PLL0_VCOCAP(0x3);
+	value |= SOR_PLL0_RESISTOR_EXT;
+	tegra_sor_writel(sor, value, sor->soc->regs->pll0);
+
+	/* XXX not in TRM */
+	for (value = 0, i = 0; i < 5; i++)
+		value |= SOR_XBAR_CTRL_LINK0_XSEL(i, sor->soc->xbar_cfg[i]) |
+			 SOR_XBAR_CTRL_LINK1_XSEL(i, i);
+
+	tegra_sor_writel(sor, 0x00000000, SOR_XBAR_POL);
+	tegra_sor_writel(sor, value, SOR_XBAR_CTRL);
+
+	/*
+	 * Switch the pad clock to the DP clock. Note that we cannot actually
+	 * do this because Tegra186 and later don't support clk_set_parent()
+	 * on the sorX_pad_clkout clocks. We already do the equivalent above
+	 * using the DP_CLK_SEL mux of the SOR_CLK_CNTRL register.
+	 */
+#if 0
+	err = clk_set_parent(sor->clk_pad, sor->clk_parent);
+	if (err < 0) {
+		dev_err(sor->dev, "failed to select pad parent clock: %d\n",
+			err);
+		return;
+	}
+#endif
+
+	/* switch the SOR clock to the pad clock */
+	err = tegra_sor_set_parent_clock(sor, sor->clk_pad);
+	if (err < 0) {
+		dev_err(sor->dev, "failed to select SOR parent clock: %d\n",
+			err);
+		return;
+	}
+
+	/* switch the output clock to the parent pixel clock */
+	err = clk_set_parent(sor->clk, sor->clk_parent);
+	if (err < 0) {
+		dev_err(sor->dev, "failed to select output parent clock: %d\n",
+			err);
+		return;
+	}
+
+	/* use DP-A protocol */
+	value = tegra_sor_readl(sor, SOR_STATE1);
+	value &= ~SOR_STATE_ASY_PROTOCOL_MASK;
+	value |= SOR_STATE_ASY_PROTOCOL_DP_A;
+	tegra_sor_writel(sor, value, SOR_STATE1);
+
+	/* enable port */
+	value = tegra_sor_readl(sor, SOR_DP_LINKCTL0);
+	value |= SOR_DP_LINKCTL_ENABLE;
+	tegra_sor_writel(sor, value, SOR_DP_LINKCTL0);
+
+	tegra_sor_dp_term_calibrate(sor);
+
+	err = drm_dp_link_train(&sor->link);
+	if (err < 0)
+		dev_err(sor->dev, "link training failed: %d\n", err);
+	else
+		dev_dbg(sor->dev, "link training succeeded\n");
+
+	err = drm_dp_link_power_up(sor->aux, sor->link.revision);
+	if (err < 0)
+		dev_err(sor->dev, "failed to power up DP link: %d\n", err);
+
+	/* compute configuration */
+	memset(&config, 0, sizeof(config));
+	config.bits_per_pixel = state->bpc * 3;
+
+	err = tegra_sor_compute_config(sor, mode, &config, &sor->link);
+	if (err < 0)
+		dev_err(sor->dev, "failed to compute configuration: %d\n", err);
+
+	tegra_sor_apply_config(sor, &config);
+	tegra_sor_mode_set(sor, mode, state);
+
+	if (output->panel) {
+		/* CSTM (LVDS, link A/B, upper) */
+		value = SOR_CSTM_LVDS | SOR_CSTM_LINK_ACT_A | SOR_CSTM_LINK_ACT_B |
+			SOR_CSTM_UPPER;
+		tegra_sor_writel(sor, value, SOR_CSTM);
+
+		/* PWM setup */
+		err = tegra_sor_setup_pwm(sor, 250);
+		if (err < 0)
+			dev_err(sor->dev, "failed to setup PWM: %d\n", err);
+	}
+
+	tegra_sor_update(sor);
+
+	err = tegra_sor_power_up(sor, 250);
+	if (err < 0)
+		dev_err(sor->dev, "failed to power up SOR: %d\n", err);
+
+	/* attach and wake up */
+	err = tegra_sor_attach(sor);
+	if (err < 0)
+		dev_err(sor->dev, "failed to attach SOR: %d\n", err);
+
+	value = tegra_dc_readl(dc, DC_DISP_DISP_WIN_OPTIONS);
+	value |= SOR_ENABLE(sor->index);
+	tegra_dc_writel(dc, value, DC_DISP_DISP_WIN_OPTIONS);
+
+	tegra_dc_commit(dc);
+
+	err = tegra_sor_wakeup(sor);
+	if (err < 0)
+		dev_err(sor->dev, "failed to wakeup SOR: %d\n", err);
+
+	if (output->panel)
+		drm_panel_enable(output->panel);
+}
+
+static const struct drm_encoder_helper_funcs tegra_sor_dp_helpers = {
+	.disable = tegra_sor_dp_disable,
+	.enable = tegra_sor_dp_enable,
+	.atomic_check = tegra_sor_encoder_atomic_check,
+};
+
+static void tegra_sor_disable_regulator(void *data)
+{
+	struct regulator *reg = data;
+
+	regulator_disable(reg);
+}
+
+static int tegra_sor_enable_regulator(struct tegra_sor *sor, struct regulator *reg)
+{
+	int err;
+
+	err = regulator_enable(reg);
+	if (err)
+		return err;
+
+	return devm_add_action_or_reset(sor->dev, tegra_sor_disable_regulator, reg);
+}
+
+static int tegra_sor_hdmi_probe(struct tegra_sor *sor)
+{
+	int err;
+
+	sor->avdd_io_supply = devm_regulator_get(sor->dev, "avdd-io-hdmi-dp");
+	if (IS_ERR(sor->avdd_io_supply))
+		return dev_err_probe(sor->dev, PTR_ERR(sor->avdd_io_supply),
+				     "cannot get AVDD I/O supply\n");
+
+	err = tegra_sor_enable_regulator(sor, sor->avdd_io_supply);
+	if (err < 0) {
+		dev_err(sor->dev, "failed to enable AVDD I/O supply: %d\n",
+			err);
+		return err;
+	}
+
+	sor->vdd_pll_supply = devm_regulator_get(sor->dev, "vdd-hdmi-dp-pll");
+	if (IS_ERR(sor->vdd_pll_supply))
+		return dev_err_probe(sor->dev, PTR_ERR(sor->vdd_pll_supply),
+				     "cannot get VDD PLL supply\n");
+
+	err = tegra_sor_enable_regulator(sor, sor->vdd_pll_supply);
+	if (err < 0) {
+		dev_err(sor->dev, "failed to enable VDD PLL supply: %d\n",
+			err);
+		return err;
+	}
+
+	sor->hdmi_supply = devm_regulator_get(sor->dev, "hdmi");
+	if (IS_ERR(sor->hdmi_supply))
+		return dev_err_probe(sor->dev, PTR_ERR(sor->hdmi_supply),
+				     "cannot get HDMI supply\n");
+
+	err = tegra_sor_enable_regulator(sor, sor->hdmi_supply);
+	if (err < 0) {
+		dev_err(sor->dev, "failed to enable HDMI supply: %d\n", err);
+		return err;
+	}
+
+	INIT_DELAYED_WORK(&sor->scdc, tegra_sor_hdmi_scdc_work);
+
+	return 0;
+}
+
+static const struct tegra_sor_ops tegra_sor_hdmi_ops = {
+	.name = "HDMI",
+	.probe = tegra_sor_hdmi_probe,
+	.audio_enable = tegra_sor_hdmi_audio_enable,
+	.audio_disable = tegra_sor_hdmi_audio_disable,
+};
+
+static int tegra_sor_dp_probe(struct tegra_sor *sor)
+{
+	int err;
+
+	sor->avdd_io_supply = devm_regulator_get(sor->dev, "avdd-io-hdmi-dp");
+	if (IS_ERR(sor->avdd_io_supply))
+		return PTR_ERR(sor->avdd_io_supply);
+
+	err = tegra_sor_enable_regulator(sor, sor->avdd_io_supply);
+	if (err < 0)
+		return err;
+
+	sor->vdd_pll_supply = devm_regulator_get(sor->dev, "vdd-hdmi-dp-pll");
+	if (IS_ERR(sor->vdd_pll_supply))
+		return PTR_ERR(sor->vdd_pll_supply);
+
+	err = tegra_sor_enable_regulator(sor, sor->vdd_pll_supply);
+	if (err < 0)
+		return err;
+
+	return 0;
+}
+
+static const struct tegra_sor_ops tegra_sor_dp_ops = {
+	.name = "DP",
+	.probe = tegra_sor_dp_probe,
+};
+
 static int tegra_sor_init(struct host1x_client *client)
 {
-	struct drm_device *drm = dev_get_drvdata(client->parent);
+	struct drm_device *drm = dev_get_drvdata(client->host);
 	const struct drm_encoder_helper_funcs *helpers = NULL;
 	struct tegra_sor *sor = host1x_client_to_sor(client);
 	int connector = DRM_MODE_CONNECTOR_Unknown;
 	int encoder = DRM_MODE_ENCODER_NONE;
-	u32 value;
 	int err;
 
 	if (!sor->aux) {
-		if (sor->soc->supports_hdmi) {
+		if (sor->ops == &tegra_sor_hdmi_ops) {
 			connector = DRM_MODE_CONNECTOR_HDMIA;
 			encoder = DRM_MODE_ENCODER_TMDS;
 			helpers = &tegra_sor_hdmi_helpers;
@@ -2824,27 +3057,31 @@ static int tegra_sor_init(struct host1x_client *client)
 			encoder = DRM_MODE_ENCODER_LVDS;
 		}
 	} else {
-		if (sor->soc->supports_edp) {
+		if (sor->output.panel) {
 			connector = DRM_MODE_CONNECTOR_eDP;
 			encoder = DRM_MODE_ENCODER_TMDS;
-			helpers = &tegra_sor_edp_helpers;
-		} else if (sor->soc->supports_dp) {
+			helpers = &tegra_sor_dp_helpers;
+		} else {
 			connector = DRM_MODE_CONNECTOR_DisplayPort;
 			encoder = DRM_MODE_ENCODER_TMDS;
+			helpers = &tegra_sor_dp_helpers;
 		}
+
+		sor->link.ops = &tegra_sor_dp_link_ops;
+		sor->link.aux = sor->aux;
 	}
 
 	sor->output.dev = sor->dev;
 
-	drm_connector_init(drm, &sor->output.connector,
-			   &tegra_sor_connector_funcs,
-			   connector);
+	drm_connector_init_with_ddc(drm, &sor->output.connector,
+				    &tegra_sor_connector_funcs,
+				    connector,
+				    sor->output.ddc);
 	drm_connector_helper_add(&sor->output.connector,
 				 &tegra_sor_connector_helper_funcs);
 	sor->output.connector.dpms = DRM_MODE_DPMS_OFF;
 
-	drm_encoder_init(drm, &sor->output.encoder, &tegra_sor_encoder_funcs,
-			 encoder, NULL);
+	drm_simple_encoder_init(drm, &sor->output.encoder, encoder);
 	drm_encoder_helper_add(&sor->output.encoder, helpers);
 
 	drm_connector_attach_encoder(&sor->output.connector,
@@ -2872,18 +3109,31 @@ static int tegra_sor_init(struct host1x_client *client)
 	 * kernel is possible.
 	 */
 	if (sor->rst) {
+		err = pm_runtime_resume_and_get(sor->dev);
+		if (err < 0) {
+			dev_err(sor->dev, "failed to get runtime PM: %d\n", err);
+			return err;
+		}
+
+		err = reset_control_acquire(sor->rst);
+		if (err < 0) {
+			dev_err(sor->dev, "failed to acquire SOR reset: %d\n",
+				err);
+			goto rpm_put;
+		}
+
 		err = reset_control_assert(sor->rst);
 		if (err < 0) {
 			dev_err(sor->dev, "failed to assert SOR reset: %d\n",
 				err);
-			return err;
+			goto rpm_put;
 		}
 	}
 
 	err = clk_prepare_enable(sor->clk);
 	if (err < 0) {
 		dev_err(sor->dev, "failed to enable clock: %d\n", err);
-		return err;
+		goto rpm_put;
 	}
 
 	usleep_range(1000, 3000);
@@ -2893,28 +3143,34 @@ static int tegra_sor_init(struct host1x_client *client)
 		if (err < 0) {
 			dev_err(sor->dev, "failed to deassert SOR reset: %d\n",
 				err);
-			return err;
+			clk_disable_unprepare(sor->clk);
+			goto rpm_put;
 		}
+
+		reset_control_release(sor->rst);
+		pm_runtime_put(sor->dev);
 	}
 
 	err = clk_prepare_enable(sor->clk_safe);
-	if (err < 0)
+	if (err < 0) {
+		clk_disable_unprepare(sor->clk);
 		return err;
+	}
 
 	err = clk_prepare_enable(sor->clk_dp);
-	if (err < 0)
+	if (err < 0) {
+		clk_disable_unprepare(sor->clk_safe);
+		clk_disable_unprepare(sor->clk);
 		return err;
-
-	/*
-	 * Enable and unmask the HDA codec SCRATCH0 register interrupt. This
-	 * is used for interoperability between the HDA codec driver and the
-	 * HDMI/DP driver.
-	 */
-	value = SOR_INT_CODEC_SCRATCH1 | SOR_INT_CODEC_SCRATCH0;
-	tegra_sor_writel(sor, value, SOR_INT_ENABLE);
-	tegra_sor_writel(sor, value, SOR_INT_MASK);
+	}
 
 	return 0;
+
+rpm_put:
+	if (sor->rst)
+		pm_runtime_put(sor->dev);
+
+	return err;
 }
 
 static int tegra_sor_exit(struct host1x_client *client)
@@ -2922,9 +3178,6 @@ static int tegra_sor_exit(struct host1x_client *client)
 	struct tegra_sor *sor = host1x_client_to_sor(client);
 	int err;
 
-	tegra_sor_writel(sor, 0, SOR_INT_MASK);
-	tegra_sor_writel(sor, 0, SOR_INT_ENABLE);
-
 	tegra_output_exit(&sor->output);
 
 	if (sor->aux) {
@@ -2942,78 +3195,80 @@ static int tegra_sor_exit(struct host1x_client *client)
 	return 0;
 }
 
-static const struct host1x_client_ops sor_client_ops = {
-	.init = tegra_sor_init,
-	.exit = tegra_sor_exit,
-};
-
-static const struct tegra_sor_ops tegra_sor_edp_ops = {
-	.name = "eDP",
-};
-
-static int tegra_sor_hdmi_probe(struct tegra_sor *sor)
+static int tegra_sor_runtime_suspend(struct host1x_client *client)
 {
+	struct tegra_sor *sor = host1x_client_to_sor(client);
+	struct device *dev = client->dev;
 	int err;
 
-	sor->avdd_io_supply = devm_regulator_get(sor->dev, "avdd-io");
-	if (IS_ERR(sor->avdd_io_supply)) {
-		dev_err(sor->dev, "cannot get AVDD I/O supply: %ld\n",
-			PTR_ERR(sor->avdd_io_supply));
-		return PTR_ERR(sor->avdd_io_supply);
-	}
+	if (sor->rst) {
+		err = reset_control_assert(sor->rst);
+		if (err < 0) {
+			dev_err(dev, "failed to assert reset: %d\n", err);
+			return err;
+		}
 
-	err = regulator_enable(sor->avdd_io_supply);
-	if (err < 0) {
-		dev_err(sor->dev, "failed to enable AVDD I/O supply: %d\n",
-			err);
-		return err;
+		reset_control_release(sor->rst);
 	}
 
-	sor->vdd_pll_supply = devm_regulator_get(sor->dev, "vdd-pll");
-	if (IS_ERR(sor->vdd_pll_supply)) {
-		dev_err(sor->dev, "cannot get VDD PLL supply: %ld\n",
-			PTR_ERR(sor->vdd_pll_supply));
-		return PTR_ERR(sor->vdd_pll_supply);
-	}
+	usleep_range(1000, 2000);
+
+	clk_disable_unprepare(sor->clk);
+	pm_runtime_put_sync(dev);
+
+	return 0;
+}
+
+static int tegra_sor_runtime_resume(struct host1x_client *client)
+{
+	struct tegra_sor *sor = host1x_client_to_sor(client);
+	struct device *dev = client->dev;
+	int err;
 
-	err = regulator_enable(sor->vdd_pll_supply);
+	err = pm_runtime_resume_and_get(dev);
 	if (err < 0) {
-		dev_err(sor->dev, "failed to enable VDD PLL supply: %d\n",
-			err);
+		dev_err(dev, "failed to get runtime PM: %d\n", err);
 		return err;
 	}
 
-	sor->hdmi_supply = devm_regulator_get(sor->dev, "hdmi");
-	if (IS_ERR(sor->hdmi_supply)) {
-		dev_err(sor->dev, "cannot get HDMI supply: %ld\n",
-			PTR_ERR(sor->hdmi_supply));
-		return PTR_ERR(sor->hdmi_supply);
-	}
-
-	err = regulator_enable(sor->hdmi_supply);
+	err = clk_prepare_enable(sor->clk);
 	if (err < 0) {
-		dev_err(sor->dev, "failed to enable HDMI supply: %d\n", err);
-		return err;
+		dev_err(dev, "failed to enable clock: %d\n", err);
+		goto put_rpm;
 	}
 
-	INIT_DELAYED_WORK(&sor->scdc, tegra_sor_hdmi_scdc_work);
+	usleep_range(1000, 2000);
 
-	return 0;
-}
+	if (sor->rst) {
+		err = reset_control_acquire(sor->rst);
+		if (err < 0) {
+			dev_err(dev, "failed to acquire reset: %d\n", err);
+			goto disable_clk;
+		}
 
-static int tegra_sor_hdmi_remove(struct tegra_sor *sor)
-{
-	regulator_disable(sor->hdmi_supply);
-	regulator_disable(sor->vdd_pll_supply);
-	regulator_disable(sor->avdd_io_supply);
+		err = reset_control_deassert(sor->rst);
+		if (err < 0) {
+			dev_err(dev, "failed to deassert reset: %d\n", err);
+			goto release_reset;
+		}
+	}
 
 	return 0;
+
+release_reset:
+	reset_control_release(sor->rst);
+disable_clk:
+	clk_disable_unprepare(sor->clk);
+put_rpm:
+	pm_runtime_put_sync(dev);
+	return err;
 }
 
-static const struct tegra_sor_ops tegra_sor_hdmi_ops = {
-	.name = "HDMI",
-	.probe = tegra_sor_hdmi_probe,
-	.remove = tegra_sor_hdmi_remove,
+static const struct host1x_client_ops sor_client_ops = {
+	.init = tegra_sor_init,
+	.exit = tegra_sor_exit,
+	.suspend = tegra_sor_runtime_suspend,
+	.resume = tegra_sor_runtime_resume,
 };
 
 static const u8 tegra124_sor_xbar_cfg[5] = {
@@ -3035,14 +3290,161 @@ static const struct tegra_sor_regs tegra124_sor_regs = {
 	.dp_padctl2 = 0x73,
 };
 
+/* Tegra124 and Tegra132 have lanes 0 and 2 swapped. */
+static const u8 tegra124_sor_lane_map[4] = {
+	2, 1, 0, 3,
+};
+
+static const u8 tegra124_sor_voltage_swing[4][4][4] = {
+	{
+		{ 0x13, 0x19, 0x1e, 0x28 },
+		{ 0x1e, 0x25, 0x2d, },
+		{ 0x28, 0x32, },
+		{ 0x3c, },
+	}, {
+		{ 0x12, 0x17, 0x1b, 0x25 },
+		{ 0x1c, 0x23, 0x2a, },
+		{ 0x25, 0x2f, },
+		{ 0x39, }
+	}, {
+		{ 0x12, 0x16, 0x1a, 0x22 },
+		{ 0x1b, 0x20, 0x27, },
+		{ 0x24, 0x2d, },
+		{ 0x36, },
+	}, {
+		{ 0x11, 0x14, 0x17, 0x1f },
+		{ 0x19, 0x1e, 0x24, },
+		{ 0x22, 0x2a, },
+		{ 0x32, },
+	},
+};
+
+static const u8 tegra124_sor_pre_emphasis[4][4][4] = {
+	{
+		{ 0x00, 0x09, 0x13, 0x25 },
+		{ 0x00, 0x0f, 0x1e, },
+		{ 0x00, 0x14, },
+		{ 0x00, },
+	}, {
+		{ 0x00, 0x0a, 0x14, 0x28 },
+		{ 0x00, 0x0f, 0x1e, },
+		{ 0x00, 0x14, },
+		{ 0x00 },
+	}, {
+		{ 0x00, 0x0a, 0x14, 0x28 },
+		{ 0x00, 0x0f, 0x1e, },
+		{ 0x00, 0x14, },
+		{ 0x00, },
+	}, {
+		{ 0x00, 0x0a, 0x14, 0x28 },
+		{ 0x00, 0x0f, 0x1e, },
+		{ 0x00, 0x14, },
+		{ 0x00, },
+	},
+};
+
+static const u8 tegra124_sor_post_cursor[4][4][4] = {
+	{
+		{ 0x00, 0x00, 0x00, 0x00 },
+		{ 0x00, 0x00, 0x00, },
+		{ 0x00, 0x00, },
+		{ 0x00, },
+	}, {
+		{ 0x02, 0x02, 0x04, 0x05 },
+		{ 0x02, 0x04, 0x05, },
+		{ 0x04, 0x05, },
+		{ 0x05, },
+	}, {
+		{ 0x04, 0x05, 0x08, 0x0b },
+		{ 0x05, 0x09, 0x0b, },
+		{ 0x08, 0x0a, },
+		{ 0x0b, },
+	}, {
+		{ 0x05, 0x09, 0x0b, 0x12 },
+		{ 0x09, 0x0d, 0x12, },
+		{ 0x0b, 0x0f, },
+		{ 0x12, },
+	},
+};
+
+static const u8 tegra124_sor_tx_pu[4][4][4] = {
+	{
+		{ 0x20, 0x30, 0x40, 0x60 },
+		{ 0x30, 0x40, 0x60, },
+		{ 0x40, 0x60, },
+		{ 0x60, },
+	}, {
+		{ 0x20, 0x20, 0x30, 0x50 },
+		{ 0x30, 0x40, 0x50, },
+		{ 0x40, 0x50, },
+		{ 0x60, },
+	}, {
+		{ 0x20, 0x20, 0x30, 0x40, },
+		{ 0x30, 0x30, 0x40, },
+		{ 0x40, 0x50, },
+		{ 0x60, },
+	}, {
+		{ 0x20, 0x20, 0x20, 0x40, },
+		{ 0x30, 0x30, 0x40, },
+		{ 0x40, 0x40, },
+		{ 0x60, },
+	},
+};
+
 static const struct tegra_sor_soc tegra124_sor = {
-	.supports_edp = true,
 	.supports_lvds = true,
 	.supports_hdmi = false,
-	.supports_dp = false,
+	.supports_dp = true,
+	.supports_audio = false,
+	.supports_hdcp = false,
+	.regs = &tegra124_sor_regs,
+	.has_nvdisplay = false,
+	.xbar_cfg = tegra124_sor_xbar_cfg,
+	.lane_map = tegra124_sor_lane_map,
+	.voltage_swing = tegra124_sor_voltage_swing,
+	.pre_emphasis = tegra124_sor_pre_emphasis,
+	.post_cursor = tegra124_sor_post_cursor,
+	.tx_pu = tegra124_sor_tx_pu,
+};
+
+static const u8 tegra132_sor_pre_emphasis[4][4][4] = {
+	{
+		{ 0x00, 0x08, 0x12, 0x24 },
+		{ 0x01, 0x0e, 0x1d, },
+		{ 0x01, 0x13, },
+		{ 0x00, },
+	}, {
+		{ 0x00, 0x08, 0x12, 0x24 },
+		{ 0x00, 0x0e, 0x1d, },
+		{ 0x00, 0x13, },
+		{ 0x00 },
+	}, {
+		{ 0x00, 0x08, 0x12, 0x24 },
+		{ 0x00, 0x0e, 0x1d, },
+		{ 0x00, 0x13, },
+		{ 0x00, },
+	}, {
+		{ 0x00, 0x08, 0x12, 0x24 },
+		{ 0x00, 0x0e, 0x1d, },
+		{ 0x00, 0x13, },
+		{ 0x00, },
+	},
+};
+
+static const struct tegra_sor_soc tegra132_sor = {
+	.supports_lvds = true,
+	.supports_hdmi = false,
+	.supports_dp = true,
+	.supports_audio = false,
+	.supports_hdcp = false,
 	.regs = &tegra124_sor_regs,
 	.has_nvdisplay = false,
 	.xbar_cfg = tegra124_sor_xbar_cfg,
+	.lane_map = tegra124_sor_lane_map,
+	.voltage_swing = tegra124_sor_voltage_swing,
+	.pre_emphasis = tegra132_sor_pre_emphasis,
+	.post_cursor = tegra124_sor_post_cursor,
+	.tx_pu = tegra124_sor_tx_pu,
 };
 
 static const struct tegra_sor_regs tegra210_sor_regs = {
@@ -3060,33 +3462,50 @@ static const struct tegra_sor_regs tegra210_sor_regs = {
 	.dp_padctl2 = 0x73,
 };
 
+static const u8 tegra210_sor_xbar_cfg[5] = {
+	2, 1, 0, 3, 4
+};
+
+static const u8 tegra210_sor_lane_map[4] = {
+	0, 1, 2, 3,
+};
+
 static const struct tegra_sor_soc tegra210_sor = {
-	.supports_edp = true,
 	.supports_lvds = false,
 	.supports_hdmi = false,
-	.supports_dp = false,
+	.supports_dp = true,
+	.supports_audio = false,
+	.supports_hdcp = false,
+
 	.regs = &tegra210_sor_regs,
 	.has_nvdisplay = false,
-	.xbar_cfg = tegra124_sor_xbar_cfg,
-};
 
-static const u8 tegra210_sor_xbar_cfg[5] = {
-	2, 1, 0, 3, 4
+	.xbar_cfg = tegra210_sor_xbar_cfg,
+	.lane_map = tegra210_sor_lane_map,
+	.voltage_swing = tegra124_sor_voltage_swing,
+	.pre_emphasis = tegra124_sor_pre_emphasis,
+	.post_cursor = tegra124_sor_post_cursor,
+	.tx_pu = tegra124_sor_tx_pu,
 };
 
 static const struct tegra_sor_soc tegra210_sor1 = {
-	.supports_edp = false,
 	.supports_lvds = false,
 	.supports_hdmi = true,
 	.supports_dp = true,
+	.supports_audio = true,
+	.supports_hdcp = true,
 
 	.regs = &tegra210_sor_regs,
 	.has_nvdisplay = false,
 
 	.num_settings = ARRAY_SIZE(tegra210_sor_hdmi_defaults),
 	.settings = tegra210_sor_hdmi_defaults,
-
 	.xbar_cfg = tegra210_sor_xbar_cfg,
+	.lane_map = tegra210_sor_lane_map,
+	.voltage_swing = tegra124_sor_voltage_swing,
+	.pre_emphasis = tegra124_sor_pre_emphasis,
+	.post_cursor = tegra124_sor_post_cursor,
+	.tx_pu = tegra124_sor_tx_pu,
 };
 
 static const struct tegra_sor_regs tegra186_sor_regs = {
@@ -3104,31 +3523,72 @@ static const struct tegra_sor_regs tegra186_sor_regs = {
 	.dp_padctl2 = 0x16a,
 };
 
-static const struct tegra_sor_soc tegra186_sor = {
-	.supports_edp = false,
-	.supports_lvds = false,
-	.supports_hdmi = false,
-	.supports_dp = true,
-
-	.regs = &tegra186_sor_regs,
-	.has_nvdisplay = true,
+static const u8 tegra186_sor_voltage_swing[4][4][4] = {
+	{
+		{ 0x13, 0x19, 0x1e, 0x28 },
+		{ 0x1e, 0x25, 0x2d, },
+		{ 0x28, 0x32, },
+		{ 0x39, },
+	}, {
+		{ 0x12, 0x16, 0x1b, 0x25 },
+		{ 0x1c, 0x23, 0x2a, },
+		{ 0x25, 0x2f, },
+		{ 0x37, }
+	}, {
+		{ 0x12, 0x16, 0x1a, 0x22 },
+		{ 0x1b, 0x20, 0x27, },
+		{ 0x24, 0x2d, },
+		{ 0x35, },
+	}, {
+		{ 0x11, 0x14, 0x17, 0x1f },
+		{ 0x19, 0x1e, 0x24, },
+		{ 0x22, 0x2a, },
+		{ 0x32, },
+	},
+};
 
-	.xbar_cfg = tegra124_sor_xbar_cfg,
+static const u8 tegra186_sor_pre_emphasis[4][4][4] = {
+	{
+		{ 0x00, 0x08, 0x12, 0x24 },
+		{ 0x01, 0x0e, 0x1d, },
+		{ 0x01, 0x13, },
+		{ 0x00, },
+	}, {
+		{ 0x00, 0x08, 0x12, 0x24 },
+		{ 0x00, 0x0e, 0x1d, },
+		{ 0x00, 0x13, },
+		{ 0x00 },
+	}, {
+		{ 0x00, 0x08, 0x14, 0x24 },
+		{ 0x00, 0x0e, 0x1d, },
+		{ 0x00, 0x13, },
+		{ 0x00, },
+	}, {
+		{ 0x00, 0x08, 0x12, 0x24 },
+		{ 0x00, 0x0e, 0x1d, },
+		{ 0x00, 0x13, },
+		{ 0x00, },
+	},
 };
 
-static const struct tegra_sor_soc tegra186_sor1 = {
-	.supports_edp = false,
+static const struct tegra_sor_soc tegra186_sor = {
 	.supports_lvds = false,
 	.supports_hdmi = true,
 	.supports_dp = true,
+	.supports_audio = true,
+	.supports_hdcp = true,
 
 	.regs = &tegra186_sor_regs,
 	.has_nvdisplay = true,
 
 	.num_settings = ARRAY_SIZE(tegra186_sor_hdmi_defaults),
 	.settings = tegra186_sor_hdmi_defaults,
-
 	.xbar_cfg = tegra124_sor_xbar_cfg,
+	.lane_map = tegra124_sor_lane_map,
+	.voltage_swing = tegra186_sor_voltage_swing,
+	.pre_emphasis = tegra186_sor_pre_emphasis,
+	.post_cursor = tegra124_sor_post_cursor,
+	.tx_pu = tegra124_sor_tx_pu,
 };
 
 static const struct tegra_sor_regs tegra194_sor_regs = {
@@ -3147,10 +3607,11 @@ static const struct tegra_sor_regs tegra194_sor_regs = {
 };
 
 static const struct tegra_sor_soc tegra194_sor = {
-	.supports_edp = true,
 	.supports_lvds = false,
 	.supports_hdmi = true,
 	.supports_dp = true,
+	.supports_audio = true,
+	.supports_hdcp = true,
 
 	.regs = &tegra194_sor_regs,
 	.has_nvdisplay = true,
@@ -3159,14 +3620,19 @@ static const struct tegra_sor_soc tegra194_sor = {
 	.settings = tegra194_sor_hdmi_defaults,
 
 	.xbar_cfg = tegra210_sor_xbar_cfg,
+	.lane_map = tegra124_sor_lane_map,
+	.voltage_swing = tegra186_sor_voltage_swing,
+	.pre_emphasis = tegra186_sor_pre_emphasis,
+	.post_cursor = tegra124_sor_post_cursor,
+	.tx_pu = tegra124_sor_tx_pu,
 };
 
 static const struct of_device_id tegra_sor_of_match[] = {
 	{ .compatible = "nvidia,tegra194-sor", .data = &tegra194_sor },
-	{ .compatible = "nvidia,tegra186-sor1", .data = &tegra186_sor1 },
 	{ .compatible = "nvidia,tegra186-sor", .data = &tegra186_sor },
 	{ .compatible = "nvidia,tegra210-sor1", .data = &tegra210_sor1 },
 	{ .compatible = "nvidia,tegra210-sor", .data = &tegra210_sor },
+	{ .compatible = "nvidia,tegra132-sor", .data = &tegra132_sor },
 	{ .compatible = "nvidia,tegra124-sor", .data = &tegra124_sor },
 	{ },
 };
@@ -3175,6 +3641,8 @@ MODULE_DEVICE_TABLE(of, tegra_sor_of_match);
 static int tegra_sor_parse_dt(struct tegra_sor *sor)
 {
 	struct device_node *np = sor->dev->of_node;
+	u32 xbar_cfg[5];
+	unsigned int i;
 	u32 value;
 	int err;
 
@@ -3190,27 +3658,25 @@ static int tegra_sor_parse_dt(struct tegra_sor *sor)
 		 * earlier
 		 */
 		sor->pad = TEGRA_IO_PAD_HDMI_DP0 + sor->index;
+	} else {
+		if (!sor->soc->supports_audio)
+			sor->index = 0;
+		else
+			sor->index = 1;
 	}
 
-	return 0;
-}
-
-static void tegra_hda_parse_format(unsigned int format, unsigned int *rate,
-				   unsigned int *channels)
-{
-	unsigned int mul, div;
-
-	if (format & AC_FMT_BASE_44K)
-		*rate = 44100;
-	else
-		*rate = 48000;
-
-	mul = (format & AC_FMT_MULT_MASK) >> AC_FMT_MULT_SHIFT;
-	div = (format & AC_FMT_DIV_MASK) >> AC_FMT_DIV_SHIFT;
-
-	*rate = *rate * (mul + 1) / (div + 1);
+	err = of_property_read_u32_array(np, "nvidia,xbar-cfg", xbar_cfg, 5);
+	if (err < 0) {
+		/* fall back to default per-SoC XBAR configuration */
+		for (i = 0; i < 5; i++)
+			sor->xbar_cfg[i] = sor->soc->xbar_cfg[i];
+	} else {
+		/* copy cells to SOR XBAR configuration */
+		for (i = 0; i < 5; i++)
+			sor->xbar_cfg[i] = xbar_cfg[i];
+	}
 
-	*channels = (format & AC_FMT_CHAN_MASK) >> AC_FMT_CHAN_SHIFT;
+	return 0;
 }
 
 static irqreturn_t tegra_sor_irq(int irq, void *data)
@@ -3225,18 +3691,17 @@ static irqreturn_t tegra_sor_irq(int irq, void *data)
 		value = tegra_sor_readl(sor, SOR_AUDIO_HDA_CODEC_SCRATCH0);
 
 		if (value & SOR_AUDIO_HDA_CODEC_SCRATCH0_VALID) {
-			unsigned int format, sample_rate, channels;
+			unsigned int format;
 
 			format = value & SOR_AUDIO_HDA_CODEC_SCRATCH0_FMT_MASK;
 
-			tegra_hda_parse_format(format, &sample_rate, &channels);
-
-			sor->audio.sample_rate = sample_rate;
-			sor->audio.channels = channels;
+			tegra_hda_parse_format(format, &sor->format);
 
-			tegra_sor_hdmi_audio_enable(sor);
+			if (sor->ops->audio_enable)
+				sor->ops->audio_enable(sor);
 		} else {
-			tegra_sor_hdmi_audio_disable(sor);
+			if (sor->ops->audio_disable)
+				sor->ops->audio_disable(sor);
 		}
 	}
 
@@ -3247,7 +3712,6 @@ static int tegra_sor_probe(struct platform_device *pdev)
 {
 	struct device_node *np;
 	struct tegra_sor *sor;
-	struct resource *regs;
 	int err;
 
 	sor = devm_kzalloc(&pdev->dev, sizeof(*sor), GFP_KERNEL);
@@ -3273,6 +3737,9 @@ static int tegra_sor_probe(struct platform_device *pdev)
 
 		if (!sor->aux)
 			return -EPROBE_DEFER;
+
+		if (get_device(sor->aux->dev))
+			sor->output.ddc = &sor->aux->ddc;
 	}
 
 	if (!sor->aux) {
@@ -3287,26 +3754,25 @@ static int tegra_sor_probe(struct platform_device *pdev)
 			return -ENODEV;
 		}
 	} else {
-		if (sor->soc->supports_edp) {
-			sor->ops = &tegra_sor_edp_ops;
-			sor->pad = TEGRA_IO_PAD_LVDS;
-		} else if (sor->soc->supports_dp) {
-			dev_err(&pdev->dev, "DisplayPort not supported yet\n");
-			return -ENODEV;
-		} else {
-			dev_err(&pdev->dev, "unknown (DP) support\n");
-			return -ENODEV;
-		}
+		np = of_parse_phandle(pdev->dev.of_node, "nvidia,panel", 0);
+		/*
+		 * No need to keep this around since we only use it as a check
+		 * to see if a panel is connected (eDP) or not (DP).
+		 */
+		of_node_put(np);
+
+		sor->ops = &tegra_sor_dp_ops;
+		sor->pad = TEGRA_IO_PAD_LVDS;
 	}
 
 	err = tegra_sor_parse_dt(sor);
 	if (err < 0)
-		return err;
+		goto put_aux;
 
 	err = tegra_output_probe(&sor->output);
 	if (err < 0) {
-		dev_err(&pdev->dev, "failed to probe output: %d\n", err);
-		return err;
+		dev_err_probe(&pdev->dev, err, "failed to probe output\n");
+		goto put_aux;
 	}
 
 	if (sor->ops && sor->ops->probe) {
@@ -3314,22 +3780,19 @@ static int tegra_sor_probe(struct platform_device *pdev)
 		if (err < 0) {
 			dev_err(&pdev->dev, "failed to probe %s: %d\n",
 				sor->ops->name, err);
-			goto output;
+			goto remove;
 		}
 	}
 
-	regs = platform_get_resource(pdev, IORESOURCE_MEM, 0);
-	sor->regs = devm_ioremap_resource(&pdev->dev, regs);
+	sor->regs = devm_platform_ioremap_resource(pdev, 0);
 	if (IS_ERR(sor->regs)) {
 		err = PTR_ERR(sor->regs);
 		goto remove;
 	}
 
 	err = platform_get_irq(pdev, 0);
-	if (err < 0) {
-		dev_err(&pdev->dev, "failed to get IRQ: %d\n", err);
+	if (err < 0)
 		goto remove;
-	}
 
 	sor->irq = err;
 
@@ -3340,7 +3803,7 @@ static int tegra_sor_probe(struct platform_device *pdev)
 		goto remove;
 	}
 
-	sor->rst = devm_reset_control_get(&pdev->dev, "sor");
+	sor->rst = devm_reset_control_get_exclusive_released(&pdev->dev, "sor");
 	if (IS_ERR(sor->rst)) {
 		err = PTR_ERR(sor->rst);
 
@@ -3446,125 +3909,129 @@ static int tegra_sor_probe(struct platform_device *pdev)
 	platform_set_drvdata(pdev, sor);
 	pm_runtime_enable(&pdev->dev);
 
+	host1x_client_init(&sor->client);
+	sor->client.ops = &sor_client_ops;
+	sor->client.dev = &pdev->dev;
+
 	/*
 	 * On Tegra210 and earlier, provide our own implementation for the
 	 * pad output clock.
 	 */
 	if (!sor->clk_pad) {
-		err = pm_runtime_get_sync(&pdev->dev);
+		char *name;
+
+		name = devm_kasprintf(sor->dev, GFP_KERNEL, "sor%u_pad_clkout",
+				      sor->index);
+		if (!name) {
+			err = -ENOMEM;
+			goto uninit;
+		}
+
+		err = host1x_client_resume(&sor->client);
 		if (err < 0) {
-			dev_err(&pdev->dev, "failed to get runtime PM: %d\n",
-				err);
-			goto remove;
+			dev_err(sor->dev, "failed to resume: %d\n", err);
+			goto uninit;
 		}
 
-		sor->clk_pad = tegra_clk_sor_pad_register(sor,
-							  "sor1_pad_clkout");
-		pm_runtime_put(&pdev->dev);
+		sor->clk_pad = tegra_clk_sor_pad_register(sor, name);
+		host1x_client_suspend(&sor->client);
 	}
 
 	if (IS_ERR(sor->clk_pad)) {
 		err = PTR_ERR(sor->clk_pad);
-		dev_err(&pdev->dev, "failed to register SOR pad clock: %d\n",
+		dev_err(sor->dev, "failed to register SOR pad clock: %d\n",
 			err);
-		goto remove;
+		goto uninit;
 	}
 
-	INIT_LIST_HEAD(&sor->client.list);
-	sor->client.ops = &sor_client_ops;
-	sor->client.dev = &pdev->dev;
-
-	err = host1x_client_register(&sor->client);
+	err = __host1x_client_register(&sor->client);
 	if (err < 0) {
 		dev_err(&pdev->dev, "failed to register host1x client: %d\n",
 			err);
-		goto remove;
+		goto uninit;
 	}
 
 	return 0;
 
+uninit:
+	host1x_client_exit(&sor->client);
+	pm_runtime_disable(&pdev->dev);
 remove:
-	if (sor->ops && sor->ops->remove)
-		sor->ops->remove(sor);
-output:
+	if (sor->aux)
+		sor->output.ddc = NULL;
+
 	tegra_output_remove(&sor->output);
+put_aux:
+	if (sor->aux)
+		put_device(sor->aux->dev);
+
 	return err;
 }
 
-static int tegra_sor_remove(struct platform_device *pdev)
+static void tegra_sor_remove(struct platform_device *pdev)
 {
 	struct tegra_sor *sor = platform_get_drvdata(pdev);
-	int err;
 
-	pm_runtime_disable(&pdev->dev);
+	host1x_client_unregister(&sor->client);
 
-	err = host1x_client_unregister(&sor->client);
-	if (err < 0) {
-		dev_err(&pdev->dev, "failed to unregister host1x client: %d\n",
-			err);
-		return err;
-	}
+	pm_runtime_disable(&pdev->dev);
 
-	if (sor->ops && sor->ops->remove) {
-		err = sor->ops->remove(sor);
-		if (err < 0)
-			dev_err(&pdev->dev, "failed to remove SOR: %d\n", err);
+	if (sor->aux) {
+		put_device(sor->aux->dev);
+		sor->output.ddc = NULL;
 	}
 
 	tegra_output_remove(&sor->output);
-
-	return 0;
 }
 
-#ifdef CONFIG_PM
-static int tegra_sor_suspend(struct device *dev)
+static int __maybe_unused tegra_sor_suspend(struct device *dev)
 {
 	struct tegra_sor *sor = dev_get_drvdata(dev);
 	int err;
 
-	if (sor->rst) {
-		err = reset_control_assert(sor->rst);
+	err = tegra_output_suspend(&sor->output);
+	if (err < 0) {
+		dev_err(dev, "failed to suspend output: %d\n", err);
+		return err;
+	}
+
+	if (sor->hdmi_supply) {
+		err = regulator_disable(sor->hdmi_supply);
 		if (err < 0) {
-			dev_err(dev, "failed to assert reset: %d\n", err);
+			tegra_output_resume(&sor->output);
 			return err;
 		}
 	}
 
-	usleep_range(1000, 2000);
-
-	clk_disable_unprepare(sor->clk);
-
 	return 0;
 }
 
-static int tegra_sor_resume(struct device *dev)
+static int __maybe_unused tegra_sor_resume(struct device *dev)
 {
 	struct tegra_sor *sor = dev_get_drvdata(dev);
 	int err;
 
-	err = clk_prepare_enable(sor->clk);
-	if (err < 0) {
-		dev_err(dev, "failed to enable clock: %d\n", err);
-		return err;
+	if (sor->hdmi_supply) {
+		err = regulator_enable(sor->hdmi_supply);
+		if (err < 0)
+			return err;
 	}
 
-	usleep_range(1000, 2000);
+	err = tegra_output_resume(&sor->output);
+	if (err < 0) {
+		dev_err(dev, "failed to resume output: %d\n", err);
 
-	if (sor->rst) {
-		err = reset_control_deassert(sor->rst);
-		if (err < 0) {
-			dev_err(dev, "failed to deassert reset: %d\n", err);
-			clk_disable_unprepare(sor->clk);
-			return err;
-		}
+		if (sor->hdmi_supply)
+			regulator_disable(sor->hdmi_supply);
+
+		return err;
 	}
 
 	return 0;
 }
-#endif
 
 static const struct dev_pm_ops tegra_sor_pm_ops = {
-	SET_RUNTIME_PM_OPS(tegra_sor_suspend, tegra_sor_resume, NULL)
+	SET_SYSTEM_SLEEP_PM_OPS(tegra_sor_suspend, tegra_sor_resume)
 };
 
 struct platform_driver tegra_sor_driver = {
diff --git a/drivers/gpu/drm/tegra/sor.h b/drivers/gpu/drm/tegra/sor.h
index 13f7e68bec42..00e09d5dca30 100644
--- a/drivers/gpu/drm/tegra/sor.h
+++ b/drivers/gpu/drm/tegra/sor.h
@@ -1,9 +1,6 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
 /*
  * Copyright (C) 2013 NVIDIA Corporation
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
  */
 
 #ifndef DRM_TEGRA_SOR_H
@@ -42,6 +39,7 @@
 #define  SOR_STATE_ASY_CRC_MODE_NON_ACTIVE	(0x2 << 6)
 #define  SOR_STATE_ASY_CRC_MODE_COMPLETE	(0x1 << 6)
 #define  SOR_STATE_ASY_CRC_MODE_ACTIVE		(0x0 << 6)
+#define  SOR_STATE_ASY_SUBOWNER_MASK		(0x3 << 4)
 #define  SOR_STATE_ASY_OWNER_MASK		0xf
 #define  SOR_STATE_ASY_OWNER(x)			(((x) & 0xf) << 0)
 
@@ -286,10 +284,12 @@
 #define  SOR_DP_PADCTL_CM_TXD_2		(1 << 6)
 #define  SOR_DP_PADCTL_CM_TXD_1		(1 << 5)
 #define  SOR_DP_PADCTL_CM_TXD_0		(1 << 4)
+#define  SOR_DP_PADCTL_CM_TXD(x)	(1 << (4 + (x)))
 #define  SOR_DP_PADCTL_PD_TXD_3		(1 << 3)
 #define  SOR_DP_PADCTL_PD_TXD_0		(1 << 2)
 #define  SOR_DP_PADCTL_PD_TXD_1		(1 << 1)
 #define  SOR_DP_PADCTL_PD_TXD_2		(1 << 0)
+#define  SOR_DP_PADCTL_PD_TXD(x)	(1 << (0 + (x)))
 
 #define SOR_DP_PADCTL1 0x5d
 
diff --git a/drivers/gpu/drm/tegra/submit.c b/drivers/gpu/drm/tegra/submit.c
new file mode 100644
index 000000000000..2430fcc97448
--- /dev/null
+++ b/drivers/gpu/drm/tegra/submit.c
@@ -0,0 +1,684 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/* Copyright (c) 2020 NVIDIA Corporation */
+
+#include <linux/dma-fence-array.h>
+#include <linux/dma-mapping.h>
+#include <linux/file.h>
+#include <linux/host1x.h>
+#include <linux/iommu.h>
+#include <linux/kref.h>
+#include <linux/list.h>
+#include <linux/nospec.h>
+#include <linux/pm_runtime.h>
+#include <linux/scatterlist.h>
+#include <linux/slab.h>
+#include <linux/sync_file.h>
+
+#include <drm/drm_drv.h>
+#include <drm/drm_file.h>
+#include <drm/drm_syncobj.h>
+
+#include "drm.h"
+#include "gem.h"
+#include "submit.h"
+#include "uapi.h"
+
+#define SUBMIT_ERR(context, fmt, ...) \
+	dev_err_ratelimited(context->client->base.dev, \
+		"%s: job submission failed: " fmt "\n", \
+		current->comm, ##__VA_ARGS__)
+
+struct gather_bo {
+	struct host1x_bo base;
+
+	struct kref ref;
+
+	struct device *dev;
+	u32 *gather_data;
+	dma_addr_t gather_data_dma;
+	size_t gather_data_words;
+};
+
+static struct host1x_bo *gather_bo_get(struct host1x_bo *host_bo)
+{
+	struct gather_bo *bo = container_of(host_bo, struct gather_bo, base);
+
+	kref_get(&bo->ref);
+
+	return host_bo;
+}
+
+static void gather_bo_release(struct kref *ref)
+{
+	struct gather_bo *bo = container_of(ref, struct gather_bo, ref);
+
+	dma_free_attrs(bo->dev, bo->gather_data_words * 4, bo->gather_data, bo->gather_data_dma,
+		       0);
+	kfree(bo);
+}
+
+static void gather_bo_put(struct host1x_bo *host_bo)
+{
+	struct gather_bo *bo = container_of(host_bo, struct gather_bo, base);
+
+	kref_put(&bo->ref, gather_bo_release);
+}
+
+static struct host1x_bo_mapping *
+gather_bo_pin(struct device *dev, struct host1x_bo *bo, enum dma_data_direction direction)
+{
+	struct gather_bo *gather = container_of(bo, struct gather_bo, base);
+	struct host1x_bo_mapping *map;
+	int err;
+
+	map = kzalloc(sizeof(*map), GFP_KERNEL);
+	if (!map)
+		return ERR_PTR(-ENOMEM);
+
+	kref_init(&map->ref);
+	map->bo = host1x_bo_get(bo);
+	map->direction = direction;
+	map->dev = dev;
+
+	map->sgt = kzalloc(sizeof(*map->sgt), GFP_KERNEL);
+	if (!map->sgt) {
+		err = -ENOMEM;
+		goto free;
+	}
+
+	err = dma_get_sgtable(gather->dev, map->sgt, gather->gather_data, gather->gather_data_dma,
+			      gather->gather_data_words * 4);
+	if (err)
+		goto free_sgt;
+
+	err = dma_map_sgtable(dev, map->sgt, direction, 0);
+	if (err)
+		goto free_sgt;
+
+	map->phys = sg_dma_address(map->sgt->sgl);
+	map->size = gather->gather_data_words * 4;
+	map->chunks = err;
+
+	return map;
+
+free_sgt:
+	sg_free_table(map->sgt);
+	kfree(map->sgt);
+free:
+	kfree(map);
+	return ERR_PTR(err);
+}
+
+static void gather_bo_unpin(struct host1x_bo_mapping *map)
+{
+	if (!map)
+		return;
+
+	dma_unmap_sgtable(map->dev, map->sgt, map->direction, 0);
+	sg_free_table(map->sgt);
+	kfree(map->sgt);
+	host1x_bo_put(map->bo);
+
+	kfree(map);
+}
+
+static void *gather_bo_mmap(struct host1x_bo *host_bo)
+{
+	struct gather_bo *bo = container_of(host_bo, struct gather_bo, base);
+
+	return bo->gather_data;
+}
+
+static void gather_bo_munmap(struct host1x_bo *host_bo, void *addr)
+{
+}
+
+static const struct host1x_bo_ops gather_bo_ops = {
+	.get = gather_bo_get,
+	.put = gather_bo_put,
+	.pin = gather_bo_pin,
+	.unpin = gather_bo_unpin,
+	.mmap = gather_bo_mmap,
+	.munmap = gather_bo_munmap,
+};
+
+static struct tegra_drm_mapping *
+tegra_drm_mapping_get(struct tegra_drm_context *context, u32 id)
+{
+	struct tegra_drm_mapping *mapping;
+
+	xa_lock(&context->mappings);
+
+	mapping = xa_load(&context->mappings, id);
+	if (mapping)
+		kref_get(&mapping->ref);
+
+	xa_unlock(&context->mappings);
+
+	return mapping;
+}
+
+static void *alloc_copy_user_array(void __user *from, size_t count, size_t size)
+{
+	size_t copy_len;
+	void *data;
+
+	if (check_mul_overflow(count, size, &copy_len))
+		return ERR_PTR(-EINVAL);
+
+	if (copy_len > 0x4000)
+		return ERR_PTR(-E2BIG);
+
+	data = vmemdup_user(from, copy_len);
+	if (IS_ERR(data))
+		return ERR_CAST(data);
+
+	return data;
+}
+
+static int submit_copy_gather_data(struct gather_bo **pbo, struct device *dev,
+				   struct tegra_drm_context *context,
+				   struct drm_tegra_channel_submit *args)
+{
+	struct gather_bo *bo;
+	size_t copy_len;
+
+	if (args->gather_data_words == 0) {
+		SUBMIT_ERR(context, "gather_data_words cannot be zero");
+		return -EINVAL;
+	}
+
+	if (check_mul_overflow((size_t)args->gather_data_words, (size_t)4, &copy_len)) {
+		SUBMIT_ERR(context, "gather_data_words is too large");
+		return -EINVAL;
+	}
+
+	bo = kzalloc(sizeof(*bo), GFP_KERNEL);
+	if (!bo) {
+		SUBMIT_ERR(context, "failed to allocate memory for bo info");
+		return -ENOMEM;
+	}
+
+	host1x_bo_init(&bo->base, &gather_bo_ops);
+	kref_init(&bo->ref);
+	bo->dev = dev;
+
+	bo->gather_data = dma_alloc_attrs(dev, copy_len, &bo->gather_data_dma,
+					  GFP_KERNEL | __GFP_NOWARN, 0);
+	if (!bo->gather_data) {
+		SUBMIT_ERR(context, "failed to allocate memory for gather data");
+		kfree(bo);
+		return -ENOMEM;
+	}
+
+	if (copy_from_user(bo->gather_data, u64_to_user_ptr(args->gather_data_ptr), copy_len)) {
+		SUBMIT_ERR(context, "failed to copy gather data from userspace");
+		dma_free_attrs(dev, copy_len, bo->gather_data, bo->gather_data_dma, 0);
+		kfree(bo);
+		return -EFAULT;
+	}
+
+	bo->gather_data_words = args->gather_data_words;
+
+	*pbo = bo;
+
+	return 0;
+}
+
+static int submit_write_reloc(struct tegra_drm_context *context, struct gather_bo *bo,
+			      struct drm_tegra_submit_buf *buf, struct tegra_drm_mapping *mapping)
+{
+	/* TODO check that target_offset is within bounds */
+	dma_addr_t iova = mapping->iova + buf->reloc.target_offset;
+	u32 written_ptr;
+
+#ifdef CONFIG_ARCH_DMA_ADDR_T_64BIT
+	if (buf->flags & DRM_TEGRA_SUBMIT_RELOC_SECTOR_LAYOUT)
+		iova |= BIT_ULL(39);
+#endif
+
+	written_ptr = iova >> buf->reloc.shift;
+
+	if (buf->reloc.gather_offset_words >= bo->gather_data_words) {
+		SUBMIT_ERR(context,
+			   "relocation has too large gather offset (%u vs gather length %zu)",
+			   buf->reloc.gather_offset_words, bo->gather_data_words);
+		return -EINVAL;
+	}
+
+	buf->reloc.gather_offset_words = array_index_nospec(buf->reloc.gather_offset_words,
+							    bo->gather_data_words);
+
+	bo->gather_data[buf->reloc.gather_offset_words] = written_ptr;
+
+	return 0;
+}
+
+static int submit_process_bufs(struct tegra_drm_context *context, struct gather_bo *bo,
+			       struct drm_tegra_channel_submit *args,
+			       struct tegra_drm_submit_data *job_data)
+{
+	struct tegra_drm_used_mapping *mappings;
+	struct drm_tegra_submit_buf *bufs;
+	int err;
+	u32 i;
+
+	bufs = alloc_copy_user_array(u64_to_user_ptr(args->bufs_ptr), args->num_bufs,
+				     sizeof(*bufs));
+	if (IS_ERR(bufs)) {
+		SUBMIT_ERR(context, "failed to copy bufs array from userspace");
+		return PTR_ERR(bufs);
+	}
+
+	mappings = kcalloc(args->num_bufs, sizeof(*mappings), GFP_KERNEL);
+	if (!mappings) {
+		SUBMIT_ERR(context, "failed to allocate memory for mapping info");
+		err = -ENOMEM;
+		goto done;
+	}
+
+	for (i = 0; i < args->num_bufs; i++) {
+		struct drm_tegra_submit_buf *buf = &bufs[i];
+		struct tegra_drm_mapping *mapping;
+
+		if (buf->flags & ~DRM_TEGRA_SUBMIT_RELOC_SECTOR_LAYOUT) {
+			SUBMIT_ERR(context, "invalid flag specified for buffer");
+			err = -EINVAL;
+			goto drop_refs;
+		}
+
+		mapping = tegra_drm_mapping_get(context, buf->mapping);
+		if (!mapping) {
+			SUBMIT_ERR(context, "invalid mapping ID '%u' for buffer", buf->mapping);
+			err = -EINVAL;
+			goto drop_refs;
+		}
+
+		err = submit_write_reloc(context, bo, buf, mapping);
+		if (err) {
+			tegra_drm_mapping_put(mapping);
+			goto drop_refs;
+		}
+
+		mappings[i].mapping = mapping;
+		mappings[i].flags = buf->flags;
+	}
+
+	job_data->used_mappings = mappings;
+	job_data->num_used_mappings = i;
+
+	err = 0;
+
+	goto done;
+
+drop_refs:
+	while (i--)
+		tegra_drm_mapping_put(mappings[i].mapping);
+
+	kfree(mappings);
+	job_data->used_mappings = NULL;
+
+done:
+	kvfree(bufs);
+
+	return err;
+}
+
+static int submit_get_syncpt(struct tegra_drm_context *context, struct host1x_job *job,
+			     struct xarray *syncpoints, struct drm_tegra_channel_submit *args)
+{
+	struct host1x_syncpt *sp;
+
+	if (args->syncpt.flags) {
+		SUBMIT_ERR(context, "invalid flag specified for syncpt");
+		return -EINVAL;
+	}
+
+	/* Syncpt ref will be dropped on job release */
+	sp = xa_load(syncpoints, args->syncpt.id);
+	if (!sp) {
+		SUBMIT_ERR(context, "syncpoint specified in syncpt was not allocated");
+		return -EINVAL;
+	}
+
+	job->syncpt = host1x_syncpt_get(sp);
+	job->syncpt_incrs = args->syncpt.increments;
+
+	return 0;
+}
+
+static int submit_job_add_gather(struct host1x_job *job, struct tegra_drm_context *context,
+				 struct drm_tegra_submit_cmd_gather_uptr *cmd,
+				 struct gather_bo *bo, u32 *offset,
+				 struct tegra_drm_submit_data *job_data,
+				 u32 *class)
+{
+	u32 next_offset;
+
+	if (cmd->reserved[0] || cmd->reserved[1] || cmd->reserved[2]) {
+		SUBMIT_ERR(context, "non-zero reserved field in GATHER_UPTR command");
+		return -EINVAL;
+	}
+
+	/* Check for maximum gather size */
+	if (cmd->words > 16383) {
+		SUBMIT_ERR(context, "too many words in GATHER_UPTR command");
+		return -EINVAL;
+	}
+
+	if (check_add_overflow(*offset, cmd->words, &next_offset)) {
+		SUBMIT_ERR(context, "too many total words in job");
+		return -EINVAL;
+	}
+
+	if (next_offset > bo->gather_data_words) {
+		SUBMIT_ERR(context, "GATHER_UPTR command overflows gather data");
+		return -EINVAL;
+	}
+
+	if (tegra_drm_fw_validate(context->client, bo->gather_data, *offset,
+				  cmd->words, job_data, class)) {
+		SUBMIT_ERR(context, "job was rejected by firewall");
+		return -EINVAL;
+	}
+
+	host1x_job_add_gather(job, &bo->base, cmd->words, *offset * 4);
+
+	*offset = next_offset;
+
+	return 0;
+}
+
+static struct host1x_job *
+submit_create_job(struct tegra_drm_context *context, struct gather_bo *bo,
+		  struct drm_tegra_channel_submit *args, struct tegra_drm_submit_data *job_data,
+		  struct xarray *syncpoints)
+{
+	struct drm_tegra_submit_cmd *cmds;
+	u32 i, gather_offset = 0, class;
+	struct host1x_job *job;
+	int err;
+
+	/* Set initial class for firewall. */
+	class = context->client->base.class;
+
+	cmds = alloc_copy_user_array(u64_to_user_ptr(args->cmds_ptr), args->num_cmds,
+				     sizeof(*cmds));
+	if (IS_ERR(cmds)) {
+		SUBMIT_ERR(context, "failed to copy cmds array from userspace");
+		return ERR_CAST(cmds);
+	}
+
+	job = host1x_job_alloc(context->channel, args->num_cmds, 0, true);
+	if (!job) {
+		SUBMIT_ERR(context, "failed to allocate memory for job");
+		job = ERR_PTR(-ENOMEM);
+		goto done;
+	}
+
+	err = submit_get_syncpt(context, job, syncpoints, args);
+	if (err < 0)
+		goto free_job;
+
+	job->client = &context->client->base;
+	job->class = context->client->base.class;
+	job->serialize = true;
+
+	for (i = 0; i < args->num_cmds; i++) {
+		struct drm_tegra_submit_cmd *cmd = &cmds[i];
+
+		if (cmd->flags) {
+			SUBMIT_ERR(context, "unknown flags given for cmd");
+			err = -EINVAL;
+			goto free_job;
+		}
+
+		if (cmd->type == DRM_TEGRA_SUBMIT_CMD_GATHER_UPTR) {
+			err = submit_job_add_gather(job, context, &cmd->gather_uptr, bo,
+						    &gather_offset, job_data, &class);
+			if (err)
+				goto free_job;
+		} else if (cmd->type == DRM_TEGRA_SUBMIT_CMD_WAIT_SYNCPT) {
+			if (cmd->wait_syncpt.reserved[0] || cmd->wait_syncpt.reserved[1]) {
+				SUBMIT_ERR(context, "non-zero reserved value");
+				err = -EINVAL;
+				goto free_job;
+			}
+
+			host1x_job_add_wait(job, cmd->wait_syncpt.id, cmd->wait_syncpt.value,
+					    false, class);
+		} else if (cmd->type == DRM_TEGRA_SUBMIT_CMD_WAIT_SYNCPT_RELATIVE) {
+			if (cmd->wait_syncpt.reserved[0] || cmd->wait_syncpt.reserved[1]) {
+				SUBMIT_ERR(context, "non-zero reserved value");
+				err = -EINVAL;
+				goto free_job;
+			}
+
+			if (cmd->wait_syncpt.id != args->syncpt.id) {
+				SUBMIT_ERR(context, "syncpoint ID in CMD_WAIT_SYNCPT_RELATIVE is not used by the job");
+				err = -EINVAL;
+				goto free_job;
+			}
+
+			host1x_job_add_wait(job, cmd->wait_syncpt.id, cmd->wait_syncpt.value,
+					    true, class);
+		} else {
+			SUBMIT_ERR(context, "unknown cmd type");
+			err = -EINVAL;
+			goto free_job;
+		}
+	}
+
+	if (gather_offset == 0) {
+		SUBMIT_ERR(context, "job must have at least one gather");
+		err = -EINVAL;
+		goto free_job;
+	}
+
+	goto done;
+
+free_job:
+	host1x_job_put(job);
+	job = ERR_PTR(err);
+
+done:
+	kvfree(cmds);
+
+	return job;
+}
+
+static void release_job(struct host1x_job *job)
+{
+	struct tegra_drm_client *client = container_of(job->client, struct tegra_drm_client, base);
+	struct tegra_drm_submit_data *job_data = job->user_data;
+	u32 i;
+
+	if (job->memory_context)
+		host1x_memory_context_put(job->memory_context);
+
+	for (i = 0; i < job_data->num_used_mappings; i++)
+		tegra_drm_mapping_put(job_data->used_mappings[i].mapping);
+
+	kfree(job_data->used_mappings);
+	kfree(job_data);
+
+	pm_runtime_mark_last_busy(client->base.dev);
+	pm_runtime_put_autosuspend(client->base.dev);
+}
+
+int tegra_drm_ioctl_channel_submit(struct drm_device *drm, void *data,
+				   struct drm_file *file)
+{
+	struct tegra_drm_file *fpriv = file->driver_priv;
+	struct drm_tegra_channel_submit *args = data;
+	struct tegra_drm_submit_data *job_data;
+	struct drm_syncobj *syncobj = NULL;
+	struct tegra_drm_context *context;
+	struct host1x_job *job;
+	struct gather_bo *bo;
+	u32 i;
+	int err;
+
+	mutex_lock(&fpriv->lock);
+
+	context = xa_load(&fpriv->contexts, args->context);
+	if (!context) {
+		mutex_unlock(&fpriv->lock);
+		pr_err_ratelimited("%s: %s: invalid channel context '%#x'", __func__,
+				   current->comm, args->context);
+		return -EINVAL;
+	}
+
+	if (args->syncobj_in) {
+		struct dma_fence *fence;
+
+		err = drm_syncobj_find_fence(file, args->syncobj_in, 0, 0, &fence);
+		if (err) {
+			SUBMIT_ERR(context, "invalid syncobj_in '%#x'", args->syncobj_in);
+			goto unlock;
+		}
+
+		err = dma_fence_wait_timeout(fence, true, msecs_to_jiffies(10000));
+		dma_fence_put(fence);
+		if (err) {
+			SUBMIT_ERR(context, "wait for syncobj_in timed out");
+			goto unlock;
+		}
+	}
+
+	if (args->syncobj_out) {
+		syncobj = drm_syncobj_find(file, args->syncobj_out);
+		if (!syncobj) {
+			SUBMIT_ERR(context, "invalid syncobj_out '%#x'", args->syncobj_out);
+			err = -ENOENT;
+			goto unlock;
+		}
+	}
+
+	/* Allocate gather BO and copy gather words in. */
+	err = submit_copy_gather_data(&bo, drm->dev, context, args);
+	if (err)
+		goto unlock;
+
+	job_data = kzalloc(sizeof(*job_data), GFP_KERNEL);
+	if (!job_data) {
+		SUBMIT_ERR(context, "failed to allocate memory for job data");
+		err = -ENOMEM;
+		goto put_bo;
+	}
+
+	/* Get data buffer mappings and do relocation patching. */
+	err = submit_process_bufs(context, bo, args, job_data);
+	if (err)
+		goto free_job_data;
+
+	/* Allocate host1x_job and add gathers and waits to it. */
+	job = submit_create_job(context, bo, args, job_data, &fpriv->syncpoints);
+	if (IS_ERR(job)) {
+		err = PTR_ERR(job);
+		goto free_job_data;
+	}
+
+	/* Map gather data for Host1x. */
+	err = host1x_job_pin(job, context->client->base.dev);
+	if (err) {
+		SUBMIT_ERR(context, "failed to pin job: %d", err);
+		goto put_job;
+	}
+
+	if (context->client->ops->get_streamid_offset) {
+		err = context->client->ops->get_streamid_offset(
+			context->client, &job->engine_streamid_offset);
+		if (err) {
+			SUBMIT_ERR(context, "failed to get streamid offset: %d", err);
+			goto unpin_job;
+		}
+	}
+
+	if (context->memory_context && context->client->ops->can_use_memory_ctx) {
+		bool supported;
+
+		err = context->client->ops->can_use_memory_ctx(context->client, &supported);
+		if (err) {
+			SUBMIT_ERR(context, "failed to detect if engine can use memory context: %d", err);
+			goto unpin_job;
+		}
+
+		if (supported) {
+			job->memory_context = context->memory_context;
+			host1x_memory_context_get(job->memory_context);
+		}
+	} else if (context->client->ops->get_streamid_offset) {
+		/*
+		 * Job submission will need to temporarily change stream ID,
+		 * so need to tell it what to change it back to.
+		 */
+		if (!tegra_dev_iommu_get_stream_id(context->client->base.dev,
+						   &job->engine_fallback_streamid))
+			job->engine_fallback_streamid = TEGRA_STREAM_ID_BYPASS;
+	}
+
+	/* Boot engine. */
+	err = pm_runtime_resume_and_get(context->client->base.dev);
+	if (err < 0) {
+		SUBMIT_ERR(context, "could not power up engine: %d", err);
+		goto put_memory_context;
+	}
+
+	job->user_data = job_data;
+	job->release = release_job;
+	job->timeout = 10000;
+
+	/*
+	 * job_data is now part of job reference counting, so don't release
+	 * it from here.
+	 */
+	job_data = NULL;
+
+	/* Submit job to hardware. */
+	err = host1x_job_submit(job);
+	if (err) {
+		SUBMIT_ERR(context, "host1x job submission failed: %d", err);
+		goto unpin_job;
+	}
+
+	/* Return postfences to userspace and add fences to DMA reservations. */
+	args->syncpt.value = job->syncpt_end;
+
+	if (syncobj) {
+		struct dma_fence *fence = host1x_fence_create(job->syncpt, job->syncpt_end, true);
+		if (IS_ERR(fence)) {
+			err = PTR_ERR(fence);
+			SUBMIT_ERR(context, "failed to create postfence: %d", err);
+		}
+
+		drm_syncobj_replace_fence(syncobj, fence);
+	}
+
+	goto put_job;
+
+put_memory_context:
+	if (job->memory_context)
+		host1x_memory_context_put(job->memory_context);
+unpin_job:
+	host1x_job_unpin(job);
+put_job:
+	host1x_job_put(job);
+free_job_data:
+	if (job_data && job_data->used_mappings) {
+		for (i = 0; i < job_data->num_used_mappings; i++)
+			tegra_drm_mapping_put(job_data->used_mappings[i].mapping);
+
+		kfree(job_data->used_mappings);
+	}
+
+	kfree(job_data);
+put_bo:
+	gather_bo_put(&bo->base);
+unlock:
+	if (syncobj)
+		drm_syncobj_put(syncobj);
+
+	mutex_unlock(&fpriv->lock);
+	return err;
+}
diff --git a/drivers/gpu/drm/tegra/submit.h b/drivers/gpu/drm/tegra/submit.h
new file mode 100644
index 000000000000..cf6a2f0a29fc
--- /dev/null
+++ b/drivers/gpu/drm/tegra/submit.h
@@ -0,0 +1,21 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/* Copyright (c) 2020 NVIDIA Corporation */
+
+#ifndef _TEGRA_DRM_UAPI_SUBMIT_H
+#define _TEGRA_DRM_UAPI_SUBMIT_H
+
+struct tegra_drm_used_mapping {
+	struct tegra_drm_mapping *mapping;
+	u32 flags;
+};
+
+struct tegra_drm_submit_data {
+	struct tegra_drm_used_mapping *used_mappings;
+	u32 num_used_mappings;
+};
+
+int tegra_drm_fw_validate(struct tegra_drm_client *client, u32 *data, u32 start,
+			  u32 words, struct tegra_drm_submit_data *submit,
+			  u32 *job_class);
+
+#endif
diff --git a/drivers/gpu/drm/tegra/uapi.c b/drivers/gpu/drm/tegra/uapi.c
new file mode 100644
index 000000000000..d0b6a1fa6efa
--- /dev/null
+++ b/drivers/gpu/drm/tegra/uapi.c
@@ -0,0 +1,362 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/* Copyright (c) 2020 NVIDIA Corporation */
+
+#include <linux/host1x.h>
+#include <linux/iommu.h>
+#include <linux/list.h>
+
+#include <drm/drm_drv.h>
+#include <drm/drm_file.h>
+#include <drm/drm_utils.h>
+
+#include "drm.h"
+#include "uapi.h"
+
+static void tegra_drm_mapping_release(struct kref *ref)
+{
+	struct tegra_drm_mapping *mapping =
+		container_of(ref, struct tegra_drm_mapping, ref);
+
+	host1x_bo_unpin(mapping->map);
+	host1x_bo_put(mapping->bo);
+
+	kfree(mapping);
+}
+
+void tegra_drm_mapping_put(struct tegra_drm_mapping *mapping)
+{
+	kref_put(&mapping->ref, tegra_drm_mapping_release);
+}
+
+static void tegra_drm_channel_context_close(struct tegra_drm_context *context)
+{
+	struct tegra_drm_mapping *mapping;
+	unsigned long id;
+
+	if (context->memory_context)
+		host1x_memory_context_put(context->memory_context);
+
+	xa_for_each(&context->mappings, id, mapping)
+		tegra_drm_mapping_put(mapping);
+
+	xa_destroy(&context->mappings);
+
+	host1x_channel_put(context->channel);
+
+	kfree(context);
+}
+
+void tegra_drm_uapi_close_file(struct tegra_drm_file *file)
+{
+	struct tegra_drm_context *context;
+	struct host1x_syncpt *sp;
+	unsigned long id;
+
+	xa_for_each(&file->contexts, id, context)
+		tegra_drm_channel_context_close(context);
+
+	xa_for_each(&file->syncpoints, id, sp)
+		host1x_syncpt_put(sp);
+
+	xa_destroy(&file->contexts);
+	xa_destroy(&file->syncpoints);
+}
+
+static struct tegra_drm_client *tegra_drm_find_client(struct tegra_drm *tegra, u32 class)
+{
+	struct tegra_drm_client *client;
+
+	list_for_each_entry(client, &tegra->clients, list)
+		if (client->base.class == class)
+			return client;
+
+	return NULL;
+}
+
+int tegra_drm_ioctl_channel_open(struct drm_device *drm, void *data, struct drm_file *file)
+{
+	struct host1x *host = tegra_drm_to_host1x(drm->dev_private);
+	struct tegra_drm_file *fpriv = file->driver_priv;
+	struct tegra_drm *tegra = drm->dev_private;
+	struct drm_tegra_channel_open *args = data;
+	struct tegra_drm_client *client = NULL;
+	struct tegra_drm_context *context;
+	int err;
+
+	if (args->flags)
+		return -EINVAL;
+
+	context = kzalloc(sizeof(*context), GFP_KERNEL);
+	if (!context)
+		return -ENOMEM;
+
+	client = tegra_drm_find_client(tegra, args->host1x_class);
+	if (!client) {
+		err = -ENODEV;
+		goto free;
+	}
+
+	if (client->shared_channel) {
+		context->channel = host1x_channel_get(client->shared_channel);
+	} else {
+		context->channel = host1x_channel_request(&client->base);
+		if (!context->channel) {
+			err = -EBUSY;
+			goto free;
+		}
+	}
+
+	/* Only allocate context if the engine supports context isolation. */
+	if (device_iommu_mapped(client->base.dev) && client->ops->can_use_memory_ctx) {
+		bool supported;
+
+		err = client->ops->can_use_memory_ctx(client, &supported);
+		if (err)
+			goto put_channel;
+
+		if (supported) {
+			struct pid *pid = get_task_pid(current, PIDTYPE_TGID);
+			context->memory_context = host1x_memory_context_alloc(
+				host, client->base.dev, pid);
+			put_pid(pid);
+		}
+
+		if (IS_ERR(context->memory_context)) {
+			if (PTR_ERR(context->memory_context) != -EOPNOTSUPP) {
+				err = PTR_ERR(context->memory_context);
+				goto put_channel;
+			} else {
+				/*
+				 * OK, HW does not support contexts or contexts
+				 * are disabled.
+				 */
+				context->memory_context = NULL;
+			}
+		}
+	}
+
+	err = xa_alloc(&fpriv->contexts, &args->context, context, XA_LIMIT(1, U32_MAX),
+		       GFP_KERNEL);
+	if (err < 0)
+		goto put_memctx;
+
+	context->client = client;
+	xa_init_flags(&context->mappings, XA_FLAGS_ALLOC1);
+
+	args->version = client->version;
+	args->capabilities = 0;
+
+	if (device_get_dma_attr(client->base.dev) == DEV_DMA_COHERENT)
+		args->capabilities |= DRM_TEGRA_CHANNEL_CAP_CACHE_COHERENT;
+
+	return 0;
+
+put_memctx:
+	if (context->memory_context)
+		host1x_memory_context_put(context->memory_context);
+put_channel:
+	host1x_channel_put(context->channel);
+free:
+	kfree(context);
+
+	return err;
+}
+
+int tegra_drm_ioctl_channel_close(struct drm_device *drm, void *data, struct drm_file *file)
+{
+	struct tegra_drm_file *fpriv = file->driver_priv;
+	struct drm_tegra_channel_close *args = data;
+	struct tegra_drm_context *context;
+
+	mutex_lock(&fpriv->lock);
+
+	context = xa_load(&fpriv->contexts, args->context);
+	if (!context) {
+		mutex_unlock(&fpriv->lock);
+		return -EINVAL;
+	}
+
+	xa_erase(&fpriv->contexts, args->context);
+
+	mutex_unlock(&fpriv->lock);
+
+	tegra_drm_channel_context_close(context);
+
+	return 0;
+}
+
+int tegra_drm_ioctl_channel_map(struct drm_device *drm, void *data, struct drm_file *file)
+{
+	struct tegra_drm_file *fpriv = file->driver_priv;
+	struct drm_tegra_channel_map *args = data;
+	struct tegra_drm_mapping *mapping;
+	struct tegra_drm_context *context;
+	enum dma_data_direction direction;
+	struct device *mapping_dev;
+	int err = 0;
+
+	if (args->flags & ~DRM_TEGRA_CHANNEL_MAP_READ_WRITE)
+		return -EINVAL;
+
+	mutex_lock(&fpriv->lock);
+
+	context = xa_load(&fpriv->contexts, args->context);
+	if (!context) {
+		mutex_unlock(&fpriv->lock);
+		return -EINVAL;
+	}
+
+	mapping = kzalloc(sizeof(*mapping), GFP_KERNEL);
+	if (!mapping) {
+		err = -ENOMEM;
+		goto unlock;
+	}
+
+	kref_init(&mapping->ref);
+
+	if (context->memory_context)
+		mapping_dev = &context->memory_context->dev;
+	else
+		mapping_dev = context->client->base.dev;
+
+	mapping->bo = tegra_gem_lookup(file, args->handle);
+	if (!mapping->bo) {
+		err = -EINVAL;
+		goto free;
+	}
+
+	switch (args->flags & DRM_TEGRA_CHANNEL_MAP_READ_WRITE) {
+	case DRM_TEGRA_CHANNEL_MAP_READ_WRITE:
+		direction = DMA_BIDIRECTIONAL;
+		break;
+
+	case DRM_TEGRA_CHANNEL_MAP_WRITE:
+		direction = DMA_FROM_DEVICE;
+		break;
+
+	case DRM_TEGRA_CHANNEL_MAP_READ:
+		direction = DMA_TO_DEVICE;
+		break;
+
+	default:
+		err = -EINVAL;
+		goto put_gem;
+	}
+
+	mapping->map = host1x_bo_pin(mapping_dev, mapping->bo, direction, NULL);
+	if (IS_ERR(mapping->map)) {
+		err = PTR_ERR(mapping->map);
+		goto put_gem;
+	}
+
+	mapping->iova = mapping->map->phys;
+	mapping->iova_end = mapping->iova + host1x_to_tegra_bo(mapping->bo)->gem.size;
+
+	err = xa_alloc(&context->mappings, &args->mapping, mapping, XA_LIMIT(1, U32_MAX),
+		       GFP_KERNEL);
+	if (err < 0)
+		goto unpin;
+
+	mutex_unlock(&fpriv->lock);
+
+	return 0;
+
+unpin:
+	host1x_bo_unpin(mapping->map);
+put_gem:
+	host1x_bo_put(mapping->bo);
+free:
+	kfree(mapping);
+unlock:
+	mutex_unlock(&fpriv->lock);
+	return err;
+}
+
+int tegra_drm_ioctl_channel_unmap(struct drm_device *drm, void *data, struct drm_file *file)
+{
+	struct tegra_drm_file *fpriv = file->driver_priv;
+	struct drm_tegra_channel_unmap *args = data;
+	struct tegra_drm_mapping *mapping;
+	struct tegra_drm_context *context;
+
+	mutex_lock(&fpriv->lock);
+
+	context = xa_load(&fpriv->contexts, args->context);
+	if (!context) {
+		mutex_unlock(&fpriv->lock);
+		return -EINVAL;
+	}
+
+	mapping = xa_erase(&context->mappings, args->mapping);
+
+	mutex_unlock(&fpriv->lock);
+
+	if (!mapping)
+		return -EINVAL;
+
+	tegra_drm_mapping_put(mapping);
+	return 0;
+}
+
+int tegra_drm_ioctl_syncpoint_allocate(struct drm_device *drm, void *data, struct drm_file *file)
+{
+	struct host1x *host1x = tegra_drm_to_host1x(drm->dev_private);
+	struct tegra_drm_file *fpriv = file->driver_priv;
+	struct drm_tegra_syncpoint_allocate *args = data;
+	struct host1x_syncpt *sp;
+	int err;
+
+	if (args->id)
+		return -EINVAL;
+
+	sp = host1x_syncpt_alloc(host1x, HOST1X_SYNCPT_CLIENT_MANAGED, current->comm);
+	if (!sp)
+		return -EBUSY;
+
+	args->id = host1x_syncpt_id(sp);
+
+	err = xa_insert(&fpriv->syncpoints, args->id, sp, GFP_KERNEL);
+	if (err) {
+		host1x_syncpt_put(sp);
+		return err;
+	}
+
+	return 0;
+}
+
+int tegra_drm_ioctl_syncpoint_free(struct drm_device *drm, void *data, struct drm_file *file)
+{
+	struct tegra_drm_file *fpriv = file->driver_priv;
+	struct drm_tegra_syncpoint_allocate *args = data;
+	struct host1x_syncpt *sp;
+
+	mutex_lock(&fpriv->lock);
+	sp = xa_erase(&fpriv->syncpoints, args->id);
+	mutex_unlock(&fpriv->lock);
+
+	if (!sp)
+		return -EINVAL;
+
+	host1x_syncpt_put(sp);
+
+	return 0;
+}
+
+int tegra_drm_ioctl_syncpoint_wait(struct drm_device *drm, void *data, struct drm_file *file)
+{
+	struct host1x *host1x = tegra_drm_to_host1x(drm->dev_private);
+	struct drm_tegra_syncpoint_wait *args = data;
+	signed long timeout_jiffies;
+	struct host1x_syncpt *sp;
+
+	if (args->padding != 0)
+		return -EINVAL;
+
+	sp = host1x_syncpt_get_by_id_noref(host1x, args->id);
+	if (!sp)
+		return -EINVAL;
+
+	timeout_jiffies = drm_timeout_abs_to_jiffies(args->timeout_ns);
+
+	return host1x_syncpt_wait(sp, args->threshold, timeout_jiffies, &args->value);
+}
diff --git a/drivers/gpu/drm/tegra/uapi.h b/drivers/gpu/drm/tegra/uapi.h
new file mode 100644
index 000000000000..92ff1e44ff15
--- /dev/null
+++ b/drivers/gpu/drm/tegra/uapi.h
@@ -0,0 +1,57 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/* Copyright (c) 2020 NVIDIA Corporation */
+
+#ifndef _TEGRA_DRM_UAPI_H
+#define _TEGRA_DRM_UAPI_H
+
+#include <linux/dma-mapping.h>
+#include <linux/idr.h>
+#include <linux/kref.h>
+#include <linux/xarray.h>
+
+#include <drm/drm.h>
+
+struct drm_file;
+struct drm_device;
+
+struct tegra_drm_file {
+	/* Legacy UAPI state */
+	struct idr legacy_contexts;
+	struct mutex lock;
+
+	/* New UAPI state */
+	struct xarray contexts;
+	struct xarray syncpoints;
+};
+
+struct tegra_drm_mapping {
+	struct kref ref;
+
+	struct host1x_bo_mapping *map;
+	struct host1x_bo *bo;
+
+	dma_addr_t iova;
+	dma_addr_t iova_end;
+};
+
+int tegra_drm_ioctl_channel_open(struct drm_device *drm, void *data,
+				 struct drm_file *file);
+int tegra_drm_ioctl_channel_close(struct drm_device *drm, void *data,
+				  struct drm_file *file);
+int tegra_drm_ioctl_channel_map(struct drm_device *drm, void *data,
+				struct drm_file *file);
+int tegra_drm_ioctl_channel_unmap(struct drm_device *drm, void *data,
+				  struct drm_file *file);
+int tegra_drm_ioctl_channel_submit(struct drm_device *drm, void *data,
+				   struct drm_file *file);
+int tegra_drm_ioctl_syncpoint_allocate(struct drm_device *drm, void *data,
+				       struct drm_file *file);
+int tegra_drm_ioctl_syncpoint_free(struct drm_device *drm, void *data,
+				   struct drm_file *file);
+int tegra_drm_ioctl_syncpoint_wait(struct drm_device *drm, void *data,
+				   struct drm_file *file);
+
+void tegra_drm_uapi_close_file(struct tegra_drm_file *file);
+void tegra_drm_mapping_put(struct tegra_drm_mapping *mapping);
+
+#endif
diff --git a/drivers/gpu/drm/tegra/vic.c b/drivers/gpu/drm/tegra/vic.c
index d47983deb1cf..332c9b563d3f 100644
--- a/drivers/gpu/drm/tegra/vic.c
+++ b/drivers/gpu/drm/tegra/vic.c
@@ -1,18 +1,15 @@
+// SPDX-License-Identifier: GPL-2.0-only
 /*
  * Copyright (c) 2015, NVIDIA Corporation.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
  */
 
 #include <linux/clk.h>
+#include <linux/delay.h>
+#include <linux/dma-mapping.h>
 #include <linux/host1x.h>
 #include <linux/iommu.h>
 #include <linux/module.h>
 #include <linux/of.h>
-#include <linux/of_device.h>
-#include <linux/of_platform.h>
 #include <linux/platform_device.h>
 #include <linux/pm_runtime.h>
 #include <linux/reset.h>
@@ -26,20 +23,21 @@
 struct vic_config {
 	const char *firmware;
 	unsigned int version;
+	bool supports_sid;
 };
 
 struct vic {
 	struct falcon falcon;
-	bool booted;
 
 	void __iomem *regs;
 	struct tegra_drm_client client;
 	struct host1x_channel *channel;
-	struct iommu_domain *domain;
 	struct device *dev;
 	struct clk *clk;
 	struct reset_control *rst;
 
+	bool can_use_context;
+
 	/* Platform configuration */
 	const struct vic_config *config;
 };
@@ -54,56 +52,32 @@ static void vic_writel(struct vic *vic, u32 value, unsigned int offset)
 	writel(value, vic->regs + offset);
 }
 
-static int vic_runtime_resume(struct device *dev)
-{
-	struct vic *vic = dev_get_drvdata(dev);
-	int err;
-
-	err = clk_prepare_enable(vic->clk);
-	if (err < 0)
-		return err;
-
-	usleep_range(10, 20);
-
-	err = reset_control_deassert(vic->rst);
-	if (err < 0)
-		goto disable;
-
-	usleep_range(10, 20);
-
-	return 0;
-
-disable:
-	clk_disable_unprepare(vic->clk);
-	return err;
-}
-
-static int vic_runtime_suspend(struct device *dev)
-{
-	struct vic *vic = dev_get_drvdata(dev);
-	int err;
-
-	err = reset_control_assert(vic->rst);
-	if (err < 0)
-		return err;
-
-	usleep_range(2000, 4000);
-
-	clk_disable_unprepare(vic->clk);
-
-	vic->booted = false;
-
-	return 0;
-}
-
 static int vic_boot(struct vic *vic)
 {
-	u32 fce_ucode_size, fce_bin_data_offset;
+	u32 fce_ucode_size, fce_bin_data_offset, stream_id;
 	void *hdr;
 	int err = 0;
 
-	if (vic->booted)
-		return 0;
+	if (vic->config->supports_sid && tegra_dev_iommu_get_stream_id(vic->dev, &stream_id)) {
+		u32 value;
+
+		value = TRANSCFG_ATT(1, TRANSCFG_SID_FALCON) |
+			TRANSCFG_ATT(0, TRANSCFG_SID_HW);
+		vic_writel(vic, value, VIC_TFBIF_TRANSCFG);
+
+		/*
+		 * STREAMID0 is used for input/output buffers. Initialize it to SID_VIC in case
+		 * context isolation is not enabled, and SID_VIC is used for both firmware and
+		 * data buffers.
+		 *
+		 * If context isolation is enabled, it will be overridden by the SETSTREAMID
+		 * opcode as part of each job.
+		 */
+		vic_writel(vic, stream_id, VIC_THI_STREAMID0);
+
+		/* STREAMID1 is used for firmware loading. */
+		vic_writel(vic, stream_id, VIC_THI_STREAMID1);
+	}
 
 	/* setup clockgating registers */
 	vic_writel(vic, CG_IDLE_CG_DLY_CNT(4) |
@@ -115,18 +89,21 @@ static int vic_boot(struct vic *vic)
 	if (err < 0)
 		return err;
 
-	hdr = vic->falcon.firmware.vaddr;
+	hdr = vic->falcon.firmware.virt;
 	fce_bin_data_offset = *(u32 *)(hdr + VIC_UCODE_FCE_DATA_OFFSET);
-	hdr = vic->falcon.firmware.vaddr +
-		*(u32 *)(hdr + VIC_UCODE_FCE_HEADER_OFFSET);
-	fce_ucode_size = *(u32 *)(hdr + FCE_UCODE_SIZE_OFFSET);
 
-	falcon_execute_method(&vic->falcon, VIC_SET_APPLICATION_ID, 1);
-	falcon_execute_method(&vic->falcon, VIC_SET_FCE_UCODE_SIZE,
-			      fce_ucode_size);
-	falcon_execute_method(&vic->falcon, VIC_SET_FCE_UCODE_OFFSET,
-			      (vic->falcon.firmware.paddr + fce_bin_data_offset)
-				>> 8);
+	/* Old VIC firmware needs kernel help with setting up FCE microcode. */
+	if (fce_bin_data_offset != 0x0 && fce_bin_data_offset != 0xa5a5a5a5) {
+		hdr = vic->falcon.firmware.virt +
+			*(u32 *)(hdr + VIC_UCODE_FCE_HEADER_OFFSET);
+		fce_ucode_size = *(u32 *)(hdr + FCE_UCODE_SIZE_OFFSET);
+
+		falcon_execute_method(&vic->falcon, VIC_SET_FCE_UCODE_SIZE,
+				      fce_ucode_size);
+		falcon_execute_method(
+			&vic->falcon, VIC_SET_FCE_UCODE_OFFSET,
+			(vic->falcon.firmware.iova + fce_bin_data_offset) >> 8);
+	}
 
 	err = falcon_wait_idle(&vic->falcon);
 	if (err < 0) {
@@ -135,60 +112,24 @@ static int vic_boot(struct vic *vic)
 		return err;
 	}
 
-	vic->booted = true;
-
 	return 0;
 }
 
-static void *vic_falcon_alloc(struct falcon *falcon, size_t size,
-			      dma_addr_t *iova)
-{
-	struct tegra_drm *tegra = falcon->data;
-
-	return tegra_drm_alloc(tegra, size, iova);
-}
-
-static void vic_falcon_free(struct falcon *falcon, size_t size,
-			    dma_addr_t iova, void *va)
-{
-	struct tegra_drm *tegra = falcon->data;
-
-	return tegra_drm_free(tegra, size, va, iova);
-}
-
-static const struct falcon_ops vic_falcon_ops = {
-	.alloc = vic_falcon_alloc,
-	.free = vic_falcon_free
-};
-
 static int vic_init(struct host1x_client *client)
 {
 	struct tegra_drm_client *drm = host1x_to_drm_client(client);
-	struct iommu_group *group = iommu_group_get(client->dev);
-	struct drm_device *dev = dev_get_drvdata(client->parent);
+	struct drm_device *dev = dev_get_drvdata(client->host);
 	struct tegra_drm *tegra = dev->dev_private;
 	struct vic *vic = to_vic(drm);
 	int err;
 
-	if (group && tegra->domain) {
-		err = iommu_attach_group(tegra->domain, group);
-		if (err < 0) {
-			dev_err(vic->dev, "failed to attach to domain: %d\n",
-				err);
-			return err;
-		}
-
-		vic->domain = tegra->domain;
-	}
-
-	if (!vic->falcon.data) {
-		vic->falcon.data = tegra;
-		err = falcon_load_firmware(&vic->falcon);
-		if (err < 0)
-			goto detach;
+	err = host1x_client_iommu_attach(client);
+	if (err < 0 && err != -ENODEV) {
+		dev_err(vic->dev, "failed to attach to domain: %d\n", err);
+		return err;
 	}
 
-	vic->channel = host1x_channel_request(client->dev);
+	vic->channel = host1x_channel_request(client);
 	if (!vic->channel) {
 		err = -ENOMEM;
 		goto detach;
@@ -204,15 +145,20 @@ static int vic_init(struct host1x_client *client)
 	if (err < 0)
 		goto free_syncpt;
 
+	/*
+	 * Inherit the DMA parameters (such as maximum segment size) from the
+	 * parent host1x device.
+	 */
+	client->dev->dma_parms = client->host->dma_parms;
+
 	return 0;
 
 free_syncpt:
-	host1x_syncpt_free(client->syncpts[0]);
+	host1x_syncpt_put(client->syncpts[0]);
 free_channel:
 	host1x_channel_put(vic->channel);
 detach:
-	if (group && tegra->domain)
-		iommu_detach_group(tegra->domain, group);
+	host1x_client_iommu_detach(client);
 
 	return err;
 }
@@ -220,22 +166,37 @@ detach:
 static int vic_exit(struct host1x_client *client)
 {
 	struct tegra_drm_client *drm = host1x_to_drm_client(client);
-	struct iommu_group *group = iommu_group_get(client->dev);
-	struct drm_device *dev = dev_get_drvdata(client->parent);
+	struct drm_device *dev = dev_get_drvdata(client->host);
 	struct tegra_drm *tegra = dev->dev_private;
 	struct vic *vic = to_vic(drm);
 	int err;
 
+	/* avoid a dangling pointer just in case this disappears */
+	client->dev->dma_parms = NULL;
+
 	err = tegra_drm_unregister_client(tegra, drm);
 	if (err < 0)
 		return err;
 
-	host1x_syncpt_free(client->syncpts[0]);
-	host1x_channel_put(vic->channel);
+	pm_runtime_dont_use_autosuspend(client->dev);
+	pm_runtime_force_suspend(client->dev);
 
-	if (vic->domain) {
-		iommu_detach_group(vic->domain, group);
-		vic->domain = NULL;
+	host1x_syncpt_put(client->syncpts[0]);
+	host1x_channel_put(vic->channel);
+	host1x_client_iommu_detach(client);
+
+	vic->channel = NULL;
+
+	if (client->group) {
+		dma_unmap_single(vic->dev, vic->falcon.firmware.phys,
+				 vic->falcon.firmware.size, DMA_TO_DEVICE);
+		tegra_drm_free(tegra, vic->falcon.firmware.size,
+			       vic->falcon.firmware.virt,
+			       vic->falcon.firmware.iova);
+	} else {
+		dma_free_coherent(vic->dev, vic->falcon.firmware.size,
+				  vic->falcon.firmware.virt,
+				  vic->falcon.firmware.iova);
 	}
 
 	return 0;
@@ -246,44 +207,192 @@ static const struct host1x_client_ops vic_client_ops = {
 	.exit = vic_exit,
 };
 
-static int vic_open_channel(struct tegra_drm_client *client,
-			    struct tegra_drm_context *context)
+static int vic_load_firmware(struct vic *vic)
 {
-	struct vic *vic = to_vic(client);
+	struct host1x_client *client = &vic->client.base;
+	struct tegra_drm *tegra = vic->client.drm;
+	static DEFINE_MUTEX(lock);
+	u32 fce_bin_data_offset;
+	dma_addr_t iova;
+	size_t size;
+	void *virt;
+	int err;
+
+	mutex_lock(&lock);
+
+	if (vic->falcon.firmware.virt) {
+		err = 0;
+		goto unlock;
+	}
+
+	err = falcon_read_firmware(&vic->falcon, vic->config->firmware);
+	if (err < 0)
+		goto unlock;
+
+	size = vic->falcon.firmware.size;
+
+	if (!client->group) {
+		virt = dma_alloc_coherent(vic->dev, size, &iova, GFP_KERNEL);
+		if (!virt) {
+			err = -ENOMEM;
+			goto unlock;
+		}
+	} else {
+		virt = tegra_drm_alloc(tegra, size, &iova);
+		if (IS_ERR(virt)) {
+			err = PTR_ERR(virt);
+			goto unlock;
+		}
+	}
+
+	vic->falcon.firmware.virt = virt;
+	vic->falcon.firmware.iova = iova;
+
+	err = falcon_load_firmware(&vic->falcon);
+	if (err < 0)
+		goto cleanup;
+
+	/*
+	 * In this case we have received an IOVA from the shared domain, so we
+	 * need to make sure to get the physical address so that the DMA API
+	 * knows what memory pages to flush the cache for.
+	 */
+	if (client->group) {
+		dma_addr_t phys;
+
+		phys = dma_map_single(vic->dev, virt, size, DMA_TO_DEVICE);
+
+		err = dma_mapping_error(vic->dev, phys);
+		if (err < 0)
+			goto cleanup;
+
+		vic->falcon.firmware.phys = phys;
+	}
+
+	/*
+	 * Check if firmware is new enough to not require mapping firmware
+	 * to data buffer domains.
+	 */
+	fce_bin_data_offset = *(u32 *)(virt + VIC_UCODE_FCE_DATA_OFFSET);
+
+	if (!vic->config->supports_sid) {
+		vic->can_use_context = false;
+	} else if (fce_bin_data_offset != 0x0 && fce_bin_data_offset != 0xa5a5a5a5) {
+		/*
+		 * Firmware will access FCE through STREAMID0, so context
+		 * isolation cannot be used.
+		 */
+		vic->can_use_context = false;
+		dev_warn_once(vic->dev, "context isolation disabled due to old firmware\n");
+	} else {
+		vic->can_use_context = true;
+	}
+
+unlock:
+	mutex_unlock(&lock);
+	return err;
+
+cleanup:
+	if (!client->group)
+		dma_free_coherent(vic->dev, size, virt, iova);
+	else
+		tegra_drm_free(tegra, size, virt, iova);
+
+	mutex_unlock(&lock);
+	return err;
+}
+
+
+static int __maybe_unused vic_runtime_resume(struct device *dev)
+{
+	struct vic *vic = dev_get_drvdata(dev);
 	int err;
 
-	err = pm_runtime_get_sync(vic->dev);
+	err = clk_prepare_enable(vic->clk);
 	if (err < 0)
 		return err;
 
+	usleep_range(10, 20);
+
+	err = reset_control_deassert(vic->rst);
+	if (err < 0)
+		goto disable;
+
+	usleep_range(10, 20);
+
+	err = vic_load_firmware(vic);
+	if (err < 0)
+		goto assert;
+
 	err = vic_boot(vic);
-	if (err < 0) {
-		pm_runtime_put(vic->dev);
+	if (err < 0)
+		goto assert;
+
+	return 0;
+
+assert:
+	reset_control_assert(vic->rst);
+disable:
+	clk_disable_unprepare(vic->clk);
+	return err;
+}
+
+static int __maybe_unused vic_runtime_suspend(struct device *dev)
+{
+	struct vic *vic = dev_get_drvdata(dev);
+	int err;
+
+	host1x_channel_stop(vic->channel);
+
+	err = reset_control_assert(vic->rst);
+	if (err < 0)
 		return err;
-	}
+
+	usleep_range(2000, 4000);
+
+	clk_disable_unprepare(vic->clk);
+
+	return 0;
+}
+
+static int vic_open_channel(struct tegra_drm_client *client,
+			    struct tegra_drm_context *context)
+{
+	struct vic *vic = to_vic(client);
 
 	context->channel = host1x_channel_get(vic->channel);
-	if (!context->channel) {
-		pm_runtime_put(vic->dev);
+	if (!context->channel)
 		return -ENOMEM;
-	}
 
 	return 0;
 }
 
 static void vic_close_channel(struct tegra_drm_context *context)
 {
-	struct vic *vic = to_vic(context->client);
-
 	host1x_channel_put(context->channel);
+}
 
-	pm_runtime_put(vic->dev);
+static int vic_can_use_memory_ctx(struct tegra_drm_client *client, bool *supported)
+{
+	struct vic *vic = to_vic(client);
+	int err;
+
+	/* This doesn't access HW so it's safe to call without powering up. */
+	err = vic_load_firmware(vic);
+	if (err < 0)
+		return err;
+
+	*supported = vic->can_use_context;
+
+	return 0;
 }
 
 static const struct tegra_drm_client_ops vic_ops = {
 	.open_channel = vic_open_channel,
 	.close_channel = vic_close_channel,
 	.submit = tegra_drm_submit,
+	.get_streamid_offset = tegra_drm_get_streamid_offset_thi,
+	.can_use_memory_ctx = vic_can_use_memory_ctx,
 };
 
 #define NVIDIA_TEGRA_124_VIC_FIRMWARE "nvidia/tegra124/vic03_ucode.bin"
@@ -291,6 +400,7 @@ static const struct tegra_drm_client_ops vic_ops = {
 static const struct vic_config vic_t124_config = {
 	.firmware = NVIDIA_TEGRA_124_VIC_FIRMWARE,
 	.version = 0x40,
+	.supports_sid = false,
 };
 
 #define NVIDIA_TEGRA_210_VIC_FIRMWARE "nvidia/tegra210/vic04_ucode.bin"
@@ -298,6 +408,7 @@ static const struct vic_config vic_t124_config = {
 static const struct vic_config vic_t210_config = {
 	.firmware = NVIDIA_TEGRA_210_VIC_FIRMWARE,
 	.version = 0x21,
+	.supports_sid = false,
 };
 
 #define NVIDIA_TEGRA_186_VIC_FIRMWARE "nvidia/tegra186/vic04_ucode.bin"
@@ -305,6 +416,7 @@ static const struct vic_config vic_t210_config = {
 static const struct vic_config vic_t186_config = {
 	.firmware = NVIDIA_TEGRA_186_VIC_FIRMWARE,
 	.version = 0x18,
+	.supports_sid = true,
 };
 
 #define NVIDIA_TEGRA_194_VIC_FIRMWARE "nvidia/tegra194/vic.bin"
@@ -312,24 +424,41 @@ static const struct vic_config vic_t186_config = {
 static const struct vic_config vic_t194_config = {
 	.firmware = NVIDIA_TEGRA_194_VIC_FIRMWARE,
 	.version = 0x19,
+	.supports_sid = true,
+};
+
+#define NVIDIA_TEGRA_234_VIC_FIRMWARE "nvidia/tegra234/vic.bin"
+
+static const struct vic_config vic_t234_config = {
+	.firmware = NVIDIA_TEGRA_234_VIC_FIRMWARE,
+	.version = 0x23,
+	.supports_sid = true,
 };
 
-static const struct of_device_id vic_match[] = {
+static const struct of_device_id tegra_vic_of_match[] = {
 	{ .compatible = "nvidia,tegra124-vic", .data = &vic_t124_config },
 	{ .compatible = "nvidia,tegra210-vic", .data = &vic_t210_config },
 	{ .compatible = "nvidia,tegra186-vic", .data = &vic_t186_config },
 	{ .compatible = "nvidia,tegra194-vic", .data = &vic_t194_config },
+	{ .compatible = "nvidia,tegra234-vic", .data = &vic_t234_config },
 	{ },
 };
+MODULE_DEVICE_TABLE(of, tegra_vic_of_match);
 
 static int vic_probe(struct platform_device *pdev)
 {
 	struct device *dev = &pdev->dev;
 	struct host1x_syncpt **syncpts;
-	struct resource *regs;
 	struct vic *vic;
 	int err;
 
+	/* inherit DMA mask from host1x parent */
+	err = dma_coerce_mask_and_coherent(dev, *dev->parent->dma_mask);
+	if (err < 0) {
+		dev_err(&pdev->dev, "failed to set DMA mask: %d\n", err);
+		return err;
+	}
+
 	vic = devm_kzalloc(dev, sizeof(*vic), GFP_KERNEL);
 	if (!vic)
 		return -ENOMEM;
@@ -340,13 +469,7 @@ static int vic_probe(struct platform_device *pdev)
 	if (!syncpts)
 		return -ENOMEM;
 
-	regs = platform_get_resource(pdev, IORESOURCE_MEM, 0);
-	if (!regs) {
-		dev_err(&pdev->dev, "failed to get registers\n");
-		return -ENXIO;
-	}
-
-	vic->regs = devm_ioremap_resource(dev, regs);
+	vic->regs = devm_platform_ioremap_resource(pdev, 0);
 	if (IS_ERR(vic->regs))
 		return PTR_ERR(vic->regs);
 
@@ -356,6 +479,12 @@ static int vic_probe(struct platform_device *pdev)
 		return PTR_ERR(vic->clk);
 	}
 
+	err = clk_set_rate(vic->clk, ULONG_MAX);
+	if (err < 0) {
+		dev_err(&pdev->dev, "failed to set clock rate\n");
+		return err;
+	}
+
 	if (!dev->pm_domain) {
 		vic->rst = devm_reset_control_get(dev, "vic");
 		if (IS_ERR(vic->rst)) {
@@ -366,16 +495,11 @@ static int vic_probe(struct platform_device *pdev)
 
 	vic->falcon.dev = dev;
 	vic->falcon.regs = vic->regs;
-	vic->falcon.ops = &vic_falcon_ops;
 
 	err = falcon_init(&vic->falcon);
 	if (err < 0)
 		return err;
 
-	err = falcon_read_firmware(&vic->falcon, vic->config->firmware);
-	if (err < 0)
-		goto exit_falcon;
-
 	platform_set_drvdata(pdev, vic);
 
 	INIT_LIST_HEAD(&vic->client.base.list);
@@ -393,57 +517,39 @@ static int vic_probe(struct platform_device *pdev)
 	err = host1x_client_register(&vic->client.base);
 	if (err < 0) {
 		dev_err(dev, "failed to register host1x client: %d\n", err);
-		platform_set_drvdata(pdev, NULL);
 		goto exit_falcon;
 	}
 
-	pm_runtime_enable(&pdev->dev);
-	if (!pm_runtime_enabled(&pdev->dev)) {
-		err = vic_runtime_resume(&pdev->dev);
-		if (err < 0)
-			goto unregister_client;
-	}
+	pm_runtime_enable(dev);
+	pm_runtime_use_autosuspend(dev);
+	pm_runtime_set_autosuspend_delay(dev, 500);
 
 	return 0;
 
-unregister_client:
-	host1x_client_unregister(&vic->client.base);
 exit_falcon:
 	falcon_exit(&vic->falcon);
 
 	return err;
 }
 
-static int vic_remove(struct platform_device *pdev)
+static void vic_remove(struct platform_device *pdev)
 {
 	struct vic *vic = platform_get_drvdata(pdev);
-	int err;
-
-	err = host1x_client_unregister(&vic->client.base);
-	if (err < 0) {
-		dev_err(&pdev->dev, "failed to unregister host1x client: %d\n",
-			err);
-		return err;
-	}
-
-	if (pm_runtime_enabled(&pdev->dev))
-		pm_runtime_disable(&pdev->dev);
-	else
-		vic_runtime_suspend(&pdev->dev);
 
+	pm_runtime_disable(&pdev->dev);
+	host1x_client_unregister(&vic->client.base);
 	falcon_exit(&vic->falcon);
-
-	return 0;
 }
 
 static const struct dev_pm_ops vic_pm_ops = {
-	SET_RUNTIME_PM_OPS(vic_runtime_suspend, vic_runtime_resume, NULL)
+	RUNTIME_PM_OPS(vic_runtime_suspend, vic_runtime_resume, NULL)
+	SYSTEM_SLEEP_PM_OPS(pm_runtime_force_suspend, pm_runtime_force_resume)
 };
 
 struct platform_driver tegra_vic_driver = {
 	.driver = {
 		.name = "tegra-vic",
-		.of_match_table = vic_match,
+		.of_match_table = tegra_vic_of_match,
 		.pm = &vic_pm_ops
 	},
 	.probe = vic_probe,
@@ -462,3 +568,6 @@ MODULE_FIRMWARE(NVIDIA_TEGRA_186_VIC_FIRMWARE);
 #if IS_ENABLED(CONFIG_ARCH_TEGRA_194_SOC)
 MODULE_FIRMWARE(NVIDIA_TEGRA_194_VIC_FIRMWARE);
 #endif
+#if IS_ENABLED(CONFIG_ARCH_TEGRA_234_SOC)
+MODULE_FIRMWARE(NVIDIA_TEGRA_234_VIC_FIRMWARE);
+#endif
diff --git a/drivers/gpu/drm/tegra/vic.h b/drivers/gpu/drm/tegra/vic.h
index 21844817a7e1..acf35aac948b 100644
--- a/drivers/gpu/drm/tegra/vic.h
+++ b/drivers/gpu/drm/tegra/vic.h
@@ -1,9 +1,6 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
 /*
  * Copyright (c) 2015, NVIDIA Corporation.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
  */
 
 #ifndef TEGRA_VIC_H
@@ -11,17 +8,25 @@
 
 /* VIC methods */
 
-#define VIC_SET_APPLICATION_ID			0x00000200
 #define VIC_SET_FCE_UCODE_SIZE			0x0000071C
 #define VIC_SET_FCE_UCODE_OFFSET		0x0000072C
 
 /* VIC registers */
 
+#define VIC_THI_STREAMID0	0x00000030
+#define VIC_THI_STREAMID1	0x00000034
+
 #define NV_PVIC_MISC_PRI_VIC_CG			0x000016d0
 #define CG_IDLE_CG_DLY_CNT(val)			((val & 0x3f) << 0)
 #define CG_IDLE_CG_EN				(1 << 6)
 #define CG_WAKEUP_DLY_CNT(val)			((val & 0xf) << 16)
 
+#define VIC_TFBIF_TRANSCFG	0x00002044
+#define  TRANSCFG_ATT(i, v)	(((v) & 0x3) << (i * 4))
+#define  TRANSCFG_SID_HW	0
+#define  TRANSCFG_SID_PHY	1
+#define  TRANSCFG_SID_FALCON	2
+
 /* Firmware offsets */
 
 #define VIC_UCODE_FCE_HEADER_OFFSET		(6*4)