95 files changed, 8035 insertions, 8043 deletions
diff --git a/drivers/gpu/host1x/Kconfig b/drivers/gpu/host1x/Kconfig
index ccfd42b23606..e6c78ae2003a 100644
--- a/drivers/gpu/host1x/Kconfig
+++ b/drivers/gpu/host1x/Kconfig
@@ -1,6 +1,14 @@
+# SPDX-License-Identifier: GPL-2.0-only
+
+config TEGRA_HOST1X_CONTEXT_BUS
+	bool
+
 config TEGRA_HOST1X
 	tristate "NVIDIA Tegra host1x driver"
-	depends on ARCH_TEGRA || ARCH_MULTIPLATFORM
+	depends on ARCH_TEGRA || COMPILE_TEST
+	select DMA_SHARED_BUFFER
+	select TEGRA_HOST1X_CONTEXT_BUS
+	select IOMMU_IOVA
 	help
 	  Driver for the NVIDIA Tegra host1x hardware.
 
@@ -19,6 +27,4 @@ config TEGRA_HOST1X_FIREWALL
 
 	  If unsure, choose Y.
 
-source "drivers/gpu/host1x/drm/Kconfig"
-
 endif
diff --git a/drivers/gpu/host1x/Makefile b/drivers/gpu/host1x/Makefile
index 3b037b6e0298..ee5286ffe08d 100644
--- a/drivers/gpu/host1x/Makefile
+++ b/drivers/gpu/host1x/Makefile
@@ -1,6 +1,6 @@
-ccflags-y = -Idrivers/gpu/host1x
-
+# SPDX-License-Identifier: GPL-2.0
 host1x-y = \
+	bus.o \
 	syncpt.o \
 	dev.o \
 	intr.o \
@@ -8,13 +8,18 @@ host1x-y = \
 	channel.o \
 	job.o \
 	debug.o \
-	hw/host1x01.o
+	mipi.o \
+	fence.o \
+	hw/host1x01.o \
+	hw/host1x02.o \
+	hw/host1x04.o \
+	hw/host1x05.o \
+	hw/host1x06.o \
+	hw/host1x07.o \
+	hw/host1x08.o
 
-ccflags-y += -Iinclude/drm
-ccflags-$(CONFIG_DRM_TEGRA_DEBUG) += -DDEBUG
+host1x-$(CONFIG_IOMMU_API) += \
+	context.o
 
-host1x-$(CONFIG_DRM_TEGRA) += drm/drm.o drm/fb.o drm/dc.o
-host1x-$(CONFIG_DRM_TEGRA) += drm/output.o drm/rgb.o drm/hdmi.o
-host1x-$(CONFIG_DRM_TEGRA) += drm/gem.o
-host1x-$(CONFIG_DRM_TEGRA) += drm/gr2d.o
 obj-$(CONFIG_TEGRA_HOST1X) += host1x.o
+obj-$(CONFIG_TEGRA_HOST1X_CONTEXT_BUS) += context_bus.o
diff --git a/drivers/gpu/host1x/bus.c b/drivers/gpu/host1x/bus.c
new file mode 100644
index 000000000000..723a80895cd4
--- /dev/null
+++ b/drivers/gpu/host1x/bus.c
@@ -0,0 +1,964 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright (C) 2012 Avionic Design GmbH
+ * Copyright (C) 2012-2013, NVIDIA Corporation
+ */
+
+#include <linux/debugfs.h>
+#include <linux/dma-mapping.h>
+#include <linux/host1x.h>
+#include <linux/of.h>
+#include <linux/seq_file.h>
+#include <linux/slab.h>
+#include <linux/of_device.h>
+
+#include "bus.h"
+#include "dev.h"
+
+static DEFINE_MUTEX(clients_lock);
+static LIST_HEAD(clients);
+
+static DEFINE_MUTEX(drivers_lock);
+static LIST_HEAD(drivers);
+
+static DEFINE_MUTEX(devices_lock);
+static LIST_HEAD(devices);
+
+struct host1x_subdev {
+	struct host1x_client *client;
+	struct device_node *np;
+	struct list_head list;
+};
+
+/**
+ * host1x_subdev_add() - add a new subdevice with an associated device node
+ * @device: host1x device to add the subdevice to
+ * @driver: host1x driver containing the subdevices
+ * @np: device node
+ */
+static int host1x_subdev_add(struct host1x_device *device,
+			     struct host1x_driver *driver,
+			     struct device_node *np)
+{
+	struct host1x_subdev *subdev;
+	int err;
+
+	subdev = kzalloc(sizeof(*subdev), GFP_KERNEL);
+	if (!subdev)
+		return -ENOMEM;
+
+	INIT_LIST_HEAD(&subdev->list);
+	subdev->np = of_node_get(np);
+
+	mutex_lock(&device->subdevs_lock);
+	list_add_tail(&subdev->list, &device->subdevs);
+	mutex_unlock(&device->subdevs_lock);
+
+	/* recursively add children */
+	for_each_child_of_node_scoped(np, child) {
+		if (of_match_node(driver->subdevs, child) &&
+		    of_device_is_available(child)) {
+			err = host1x_subdev_add(device, driver, child);
+			if (err < 0) {
+				/* XXX cleanup? */
+				return err;
+			}
+		}
+	}
+
+	return 0;
+}
+
+/**
+ * host1x_subdev_del() - remove subdevice
+ * @subdev: subdevice to remove
+ */
+static void host1x_subdev_del(struct host1x_subdev *subdev)
+{
+	list_del(&subdev->list);
+	of_node_put(subdev->np);
+	kfree(subdev);
+}
+
+/**
+ * host1x_device_parse_dt() - scan device tree and add matching subdevices
+ * @device: host1x logical device
+ * @driver: host1x driver
+ */
+static int host1x_device_parse_dt(struct host1x_device *device,
+				  struct host1x_driver *driver)
+{
+	int err;
+
+	for_each_child_of_node_scoped(device->dev.parent->of_node, np) {
+		if (of_match_node(driver->subdevs, np) &&
+		    of_device_is_available(np)) {
+			err = host1x_subdev_add(device, driver, np);
+			if (err < 0)
+				return err;
+		}
+	}
+
+	return 0;
+}
+
+static void host1x_subdev_register(struct host1x_device *device,
+				   struct host1x_subdev *subdev,
+				   struct host1x_client *client)
+{
+	int err;
+
+	/*
+	 * Move the subdevice to the list of active (registered) subdevices
+	 * and associate it with a client. At the same time, associate the
+	 * client with its parent device.
+	 */
+	mutex_lock(&device->subdevs_lock);
+	mutex_lock(&device->clients_lock);
+	list_move_tail(&client->list, &device->clients);
+	list_move_tail(&subdev->list, &device->active);
+	client->host = &device->dev;
+	subdev->client = client;
+	mutex_unlock(&device->clients_lock);
+	mutex_unlock(&device->subdevs_lock);
+
+	if (list_empty(&device->subdevs)) {
+		err = device_add(&device->dev);
+		if (err < 0)
+			dev_err(&device->dev, "failed to add: %d\n", err);
+		else
+			device->registered = true;
+	}
+}
+
+static void __host1x_subdev_unregister(struct host1x_device *device,
+				       struct host1x_subdev *subdev)
+{
+	struct host1x_client *client = subdev->client;
+
+	/*
+	 * If all subdevices have been activated, we're about to remove the
+	 * first active subdevice, so unload the driver first.
+	 */
+	if (list_empty(&device->subdevs)) {
+		if (device->registered) {
+			device->registered = false;
+			device_del(&device->dev);
+		}
+	}
+
+	/*
+	 * Move the subdevice back to the list of idle subdevices and remove
+	 * it from list of clients.
+	 */
+	mutex_lock(&device->clients_lock);
+	subdev->client = NULL;
+	client->host = NULL;
+	list_move_tail(&subdev->list, &device->subdevs);
+	/*
+	 * XXX: Perhaps don't do this here, but rather explicitly remove it
+	 * when the device is about to be deleted.
+	 *
+	 * This is somewhat complicated by the fact that this function is
+	 * used to remove the subdevice when a client is unregistered but
+	 * also when the composite device is about to be removed.
+	 */
+	list_del_init(&client->list);
+	mutex_unlock(&device->clients_lock);
+}
+
+static void host1x_subdev_unregister(struct host1x_device *device,
+				     struct host1x_subdev *subdev)
+{
+	mutex_lock(&device->subdevs_lock);
+	__host1x_subdev_unregister(device, subdev);
+	mutex_unlock(&device->subdevs_lock);
+}
+
+/**
+ * host1x_device_init() - initialize a host1x logical device
+ * @device: host1x logical device
+ *
+ * The driver for the host1x logical device can call this during execution of
+ * its &host1x_driver.probe implementation to initialize each of its clients.
+ * The client drivers access the subsystem specific driver data using the
+ * &host1x_client.parent field and driver data associated with it (usually by
+ * calling dev_get_drvdata()).
+ */
+int host1x_device_init(struct host1x_device *device)
+{
+	struct host1x_client *client;
+	int err;
+
+	mutex_lock(&device->clients_lock);
+
+	list_for_each_entry(client, &device->clients, list) {
+		if (client->ops && client->ops->early_init) {
+			err = client->ops->early_init(client);
+			if (err < 0) {
+				dev_err(&device->dev, "failed to early initialize %s: %d\n",
+					dev_name(client->dev), err);
+				goto teardown_late;
+			}
+		}
+	}
+
+	list_for_each_entry(client, &device->clients, list) {
+		if (client->ops && client->ops->init) {
+			err = client->ops->init(client);
+			if (err < 0) {
+				dev_err(&device->dev,
+					"failed to initialize %s: %d\n",
+					dev_name(client->dev), err);
+				goto teardown;
+			}
+		}
+	}
+
+	mutex_unlock(&device->clients_lock);
+
+	return 0;
+
+teardown:
+	list_for_each_entry_continue_reverse(client, &device->clients, list)
+		if (client->ops->exit)
+			client->ops->exit(client);
+
+	/* reset client to end of list for late teardown */
+	client = list_entry(&device->clients, struct host1x_client, list);
+
+teardown_late:
+	list_for_each_entry_continue_reverse(client, &device->clients, list)
+		if (client->ops->late_exit)
+			client->ops->late_exit(client);
+
+	mutex_unlock(&device->clients_lock);
+	return err;
+}
+EXPORT_SYMBOL(host1x_device_init);
+
+/**
+ * host1x_device_exit() - uninitialize host1x logical device
+ * @device: host1x logical device
+ *
+ * When the driver for a host1x logical device is unloaded, it can call this
+ * function to tear down each of its clients. Typically this is done after a
+ * subsystem-specific data structure is removed and the functionality can no
+ * longer be used.
+ */
+int host1x_device_exit(struct host1x_device *device)
+{
+	struct host1x_client *client;
+	int err;
+
+	mutex_lock(&device->clients_lock);
+
+	list_for_each_entry_reverse(client, &device->clients, list) {
+		if (client->ops && client->ops->exit) {
+			err = client->ops->exit(client);
+			if (err < 0) {
+				dev_err(&device->dev,
+					"failed to cleanup %s: %d\n",
+					dev_name(client->dev), err);
+				mutex_unlock(&device->clients_lock);
+				return err;
+			}
+		}
+	}
+
+	list_for_each_entry_reverse(client, &device->clients, list) {
+		if (client->ops && client->ops->late_exit) {
+			err = client->ops->late_exit(client);
+			if (err < 0) {
+				dev_err(&device->dev, "failed to late cleanup %s: %d\n",
+					dev_name(client->dev), err);
+				mutex_unlock(&device->clients_lock);
+				return err;
+			}
+		}
+	}
+
+	mutex_unlock(&device->clients_lock);
+
+	return 0;
+}
+EXPORT_SYMBOL(host1x_device_exit);
+
+static int host1x_add_client(struct host1x *host1x,
+			     struct host1x_client *client)
+{
+	struct host1x_device *device;
+	struct host1x_subdev *subdev;
+
+	mutex_lock(&host1x->devices_lock);
+
+	list_for_each_entry(device, &host1x->devices, list) {
+		list_for_each_entry(subdev, &device->subdevs, list) {
+			if (subdev->np == client->dev->of_node) {
+				host1x_subdev_register(device, subdev, client);
+				mutex_unlock(&host1x->devices_lock);
+				return 0;
+			}
+		}
+	}
+
+	mutex_unlock(&host1x->devices_lock);
+	return -ENODEV;
+}
+
+static int host1x_del_client(struct host1x *host1x,
+			     struct host1x_client *client)
+{
+	struct host1x_device *device, *dt;
+	struct host1x_subdev *subdev;
+
+	mutex_lock(&host1x->devices_lock);
+
+	list_for_each_entry_safe(device, dt, &host1x->devices, list) {
+		list_for_each_entry(subdev, &device->active, list) {
+			if (subdev->client == client) {
+				host1x_subdev_unregister(device, subdev);
+				mutex_unlock(&host1x->devices_lock);
+				return 0;
+			}
+		}
+	}
+
+	mutex_unlock(&host1x->devices_lock);
+	return -ENODEV;
+}
+
+static int host1x_device_match(struct device *dev, const struct device_driver *drv)
+{
+	return strcmp(dev_name(dev), drv->name) == 0;
+}
+
+/*
+ * Note that this is really only needed for backwards compatibility
+ * with libdrm, which parses this information from sysfs and will
+ * fail if it can't find the OF_FULLNAME, specifically.
+ */
+static int host1x_device_uevent(const struct device *dev,
+				struct kobj_uevent_env *env)
+{
+	of_device_uevent(dev->parent, env);
+
+	return 0;
+}
+
+static const struct dev_pm_ops host1x_device_pm_ops = {
+	.suspend = pm_generic_suspend,
+	.resume = pm_generic_resume,
+	.freeze = pm_generic_freeze,
+	.thaw = pm_generic_thaw,
+	.poweroff = pm_generic_poweroff,
+	.restore = pm_generic_restore,
+};
+
+const struct bus_type host1x_bus_type = {
+	.name = "host1x",
+	.match = host1x_device_match,
+	.uevent = host1x_device_uevent,
+	.pm = &host1x_device_pm_ops,
+};
+
+static void __host1x_device_del(struct host1x_device *device)
+{
+	struct host1x_subdev *subdev, *sd;
+	struct host1x_client *client, *cl;
+
+	mutex_lock(&device->subdevs_lock);
+
+	/* unregister subdevices */
+	list_for_each_entry_safe(subdev, sd, &device->active, list) {
+		/*
+		 * host1x_subdev_unregister() will remove the client from
+		 * any lists, so we'll need to manually add it back to the
+		 * list of idle clients.
+		 *
+		 * XXX: Alternatively, perhaps don't remove the client from
+		 * any lists in host1x_subdev_unregister() and instead do
+		 * that explicitly from host1x_unregister_client()?
+		 */
+		client = subdev->client;
+
+		__host1x_subdev_unregister(device, subdev);
+
+		/* add the client to the list of idle clients */
+		mutex_lock(&clients_lock);
+		list_add_tail(&client->list, &clients);
+		mutex_unlock(&clients_lock);
+	}
+
+	/* remove subdevices */
+	list_for_each_entry_safe(subdev, sd, &device->subdevs, list)
+		host1x_subdev_del(subdev);
+
+	mutex_unlock(&device->subdevs_lock);
+
+	/* move clients to idle list */
+	mutex_lock(&clients_lock);
+	mutex_lock(&device->clients_lock);
+
+	list_for_each_entry_safe(client, cl, &device->clients, list)
+		list_move_tail(&client->list, &clients);
+
+	mutex_unlock(&device->clients_lock);
+	mutex_unlock(&clients_lock);
+
+	/* finally remove the device */
+	list_del_init(&device->list);
+}
+
+static void host1x_device_release(struct device *dev)
+{
+	struct host1x_device *device = to_host1x_device(dev);
+
+	__host1x_device_del(device);
+	kfree(device);
+}
+
+static int host1x_device_add(struct host1x *host1x,
+			     struct host1x_driver *driver)
+{
+	struct host1x_client *client, *tmp;
+	struct host1x_subdev *subdev;
+	struct host1x_device *device;
+	int err;
+
+	device = kzalloc(sizeof(*device), GFP_KERNEL);
+	if (!device)
+		return -ENOMEM;
+
+	device_initialize(&device->dev);
+
+	mutex_init(&device->subdevs_lock);
+	INIT_LIST_HEAD(&device->subdevs);
+	INIT_LIST_HEAD(&device->active);
+	mutex_init(&device->clients_lock);
+	INIT_LIST_HEAD(&device->clients);
+	INIT_LIST_HEAD(&device->list);
+	device->driver = driver;
+
+	device->dev.coherent_dma_mask = host1x->dev->coherent_dma_mask;
+	device->dev.dma_mask = &device->dev.coherent_dma_mask;
+	dev_set_name(&device->dev, "%s", driver->driver.name);
+	device->dev.release = host1x_device_release;
+	device->dev.bus = &host1x_bus_type;
+	device->dev.parent = host1x->dev;
+
+	device->dev.dma_parms = &device->dma_parms;
+	dma_set_max_seg_size(&device->dev, UINT_MAX);
+
+	err = host1x_device_parse_dt(device, driver);
+	if (err < 0) {
+		kfree(device);
+		return err;
+	}
+
+	list_add_tail(&device->list, &host1x->devices);
+
+	mutex_lock(&clients_lock);
+
+	list_for_each_entry_safe(client, tmp, &clients, list) {
+		list_for_each_entry(subdev, &device->subdevs, list) {
+			if (subdev->np == client->dev->of_node) {
+				host1x_subdev_register(device, subdev, client);
+				break;
+			}
+		}
+	}
+
+	mutex_unlock(&clients_lock);
+
+	/*
+	 * Add device even if there are no subdevs to ensure syncpoint functionality
+	 * is available regardless of whether any engine subdevices are present
+	 */
+	if (list_empty(&device->subdevs)) {
+		err = device_add(&device->dev);
+		if (err < 0)
+			dev_err(&device->dev, "failed to add device: %d\n", err);
+		else
+			device->registered = true;
+	}
+
+	return 0;
+}
+
+/*
+ * Removes a device by first unregistering any subdevices and then removing
+ * itself from the list of devices.
+ *
+ * This function must be called with the host1x->devices_lock held.
+ */
+static void host1x_device_del(struct host1x *host1x,
+			      struct host1x_device *device)
+{
+	if (device->registered) {
+		device->registered = false;
+		device_del(&device->dev);
+	}
+
+	put_device(&device->dev);
+}
+
+static void host1x_attach_driver(struct host1x *host1x,
+				 struct host1x_driver *driver)
+{
+	struct host1x_device *device;
+	int err;
+
+	mutex_lock(&host1x->devices_lock);
+
+	list_for_each_entry(device, &host1x->devices, list) {
+		if (device->driver == driver) {
+			mutex_unlock(&host1x->devices_lock);
+			return;
+		}
+	}
+
+	err = host1x_device_add(host1x, driver);
+	if (err < 0)
+		dev_err(host1x->dev, "failed to allocate device: %d\n", err);
+
+	mutex_unlock(&host1x->devices_lock);
+}
+
+static void host1x_detach_driver(struct host1x *host1x,
+				 struct host1x_driver *driver)
+{
+	struct host1x_device *device, *tmp;
+
+	mutex_lock(&host1x->devices_lock);
+
+	list_for_each_entry_safe(device, tmp, &host1x->devices, list)
+		if (device->driver == driver)
+			host1x_device_del(host1x, device);
+
+	mutex_unlock(&host1x->devices_lock);
+}
+
+static int host1x_devices_show(struct seq_file *s, void *data)
+{
+	struct host1x *host1x = s->private;
+	struct host1x_device *device;
+
+	mutex_lock(&host1x->devices_lock);
+
+	list_for_each_entry(device, &host1x->devices, list) {
+		struct host1x_subdev *subdev;
+
+		seq_printf(s, "%s\n", dev_name(&device->dev));
+
+		mutex_lock(&device->subdevs_lock);
+
+		list_for_each_entry(subdev, &device->active, list)
+			seq_printf(s, "  %pOFf: %s\n", subdev->np,
+				   dev_name(subdev->client->dev));
+
+		list_for_each_entry(subdev, &device->subdevs, list)
+			seq_printf(s, "  %pOFf:\n", subdev->np);
+
+		mutex_unlock(&device->subdevs_lock);
+	}
+
+	mutex_unlock(&host1x->devices_lock);
+
+	return 0;
+}
+DEFINE_SHOW_ATTRIBUTE(host1x_devices);
+
+/**
+ * host1x_register() - register a host1x controller
+ * @host1x: host1x controller
+ *
+ * The host1x controller driver uses this to register a host1x controller with
+ * the infrastructure. Note that all Tegra SoC generations have only ever come
+ * with a single host1x instance, so this function is somewhat academic.
+ */
+int host1x_register(struct host1x *host1x)
+{
+	struct host1x_driver *driver;
+
+	mutex_lock(&devices_lock);
+	list_add_tail(&host1x->list, &devices);
+	mutex_unlock(&devices_lock);
+
+	mutex_lock(&drivers_lock);
+
+	list_for_each_entry(driver, &drivers, list)
+		host1x_attach_driver(host1x, driver);
+
+	mutex_unlock(&drivers_lock);
+
+	debugfs_create_file("devices", S_IRUGO, host1x->debugfs, host1x,
+			    &host1x_devices_fops);
+
+	return 0;
+}
+
+/**
+ * host1x_unregister() - unregister a host1x controller
+ * @host1x: host1x controller
+ *
+ * The host1x controller driver uses this to remove a host1x controller from
+ * the infrastructure.
+ */
+int host1x_unregister(struct host1x *host1x)
+{
+	struct host1x_driver *driver;
+
+	mutex_lock(&drivers_lock);
+
+	list_for_each_entry(driver, &drivers, list)
+		host1x_detach_driver(host1x, driver);
+
+	mutex_unlock(&drivers_lock);
+
+	mutex_lock(&devices_lock);
+	list_del_init(&host1x->list);
+	mutex_unlock(&devices_lock);
+
+	return 0;
+}
+
+static int host1x_device_probe(struct device *dev)
+{
+	struct host1x_driver *driver = to_host1x_driver(dev->driver);
+	struct host1x_device *device = to_host1x_device(dev);
+
+	if (driver->probe)
+		return driver->probe(device);
+
+	return 0;
+}
+
+static int host1x_device_remove(struct device *dev)
+{
+	struct host1x_driver *driver = to_host1x_driver(dev->driver);
+	struct host1x_device *device = to_host1x_device(dev);
+
+	if (driver->remove)
+		return driver->remove(device);
+
+	return 0;
+}
+
+static void host1x_device_shutdown(struct device *dev)
+{
+	struct host1x_driver *driver = to_host1x_driver(dev->driver);
+	struct host1x_device *device = to_host1x_device(dev);
+
+	if (driver->shutdown)
+		driver->shutdown(device);
+}
+
+/**
+ * host1x_driver_register_full() - register a host1x driver
+ * @driver: host1x driver
+ * @owner: owner module
+ *
+ * Drivers for host1x logical devices call this function to register a driver
+ * with the infrastructure. Note that since these drive logical devices, the
+ * registration of the driver actually triggers tho logical device creation.
+ * A logical device will be created for each host1x instance.
+ */
+int host1x_driver_register_full(struct host1x_driver *driver,
+				struct module *owner)
+{
+	struct host1x *host1x;
+
+	INIT_LIST_HEAD(&driver->list);
+
+	mutex_lock(&drivers_lock);
+	list_add_tail(&driver->list, &drivers);
+	mutex_unlock(&drivers_lock);
+
+	mutex_lock(&devices_lock);
+
+	list_for_each_entry(host1x, &devices, list)
+		host1x_attach_driver(host1x, driver);
+
+	mutex_unlock(&devices_lock);
+
+	driver->driver.bus = &host1x_bus_type;
+	driver->driver.owner = owner;
+	driver->driver.probe = host1x_device_probe;
+	driver->driver.remove = host1x_device_remove;
+	driver->driver.shutdown = host1x_device_shutdown;
+
+	return driver_register(&driver->driver);
+}
+EXPORT_SYMBOL(host1x_driver_register_full);
+
+/**
+ * host1x_driver_unregister() - unregister a host1x driver
+ * @driver: host1x driver
+ *
+ * Unbinds the driver from each of the host1x logical devices that it is
+ * bound to, effectively removing the subsystem devices that they represent.
+ */
+void host1x_driver_unregister(struct host1x_driver *driver)
+{
+	struct host1x *host1x;
+
+	driver_unregister(&driver->driver);
+
+	mutex_lock(&devices_lock);
+
+	list_for_each_entry(host1x, &devices, list)
+		host1x_detach_driver(host1x, driver);
+
+	mutex_unlock(&devices_lock);
+
+	mutex_lock(&drivers_lock);
+	list_del_init(&driver->list);
+	mutex_unlock(&drivers_lock);
+}
+EXPORT_SYMBOL(host1x_driver_unregister);
+
+/**
+ * __host1x_client_init() - initialize a host1x client
+ * @client: host1x client
+ * @key: lock class key for the client-specific mutex
+ */
+void __host1x_client_init(struct host1x_client *client, struct lock_class_key *key)
+{
+	host1x_bo_cache_init(&client->cache);
+	INIT_LIST_HEAD(&client->list);
+	__mutex_init(&client->lock, "host1x client lock", key);
+	client->usecount = 0;
+}
+EXPORT_SYMBOL(__host1x_client_init);
+
+/**
+ * host1x_client_exit() - uninitialize a host1x client
+ * @client: host1x client
+ */
+void host1x_client_exit(struct host1x_client *client)
+{
+	mutex_destroy(&client->lock);
+}
+EXPORT_SYMBOL(host1x_client_exit);
+
+/**
+ * __host1x_client_register() - register a host1x client
+ * @client: host1x client
+ *
+ * Registers a host1x client with each host1x controller instance. Note that
+ * each client will only match their parent host1x controller and will only be
+ * associated with that instance. Once all clients have been registered with
+ * their parent host1x controller, the infrastructure will set up the logical
+ * device and call host1x_device_init(), which will in turn call each client's
+ * &host1x_client_ops.init implementation.
+ */
+int __host1x_client_register(struct host1x_client *client)
+{
+	struct host1x *host1x;
+	int err;
+
+	mutex_lock(&devices_lock);
+
+	list_for_each_entry(host1x, &devices, list) {
+		err = host1x_add_client(host1x, client);
+		if (!err) {
+			mutex_unlock(&devices_lock);
+			return 0;
+		}
+	}
+
+	mutex_unlock(&devices_lock);
+
+	mutex_lock(&clients_lock);
+	list_add_tail(&client->list, &clients);
+	mutex_unlock(&clients_lock);
+
+	return 0;
+}
+EXPORT_SYMBOL(__host1x_client_register);
+
+/**
+ * host1x_client_unregister() - unregister a host1x client
+ * @client: host1x client
+ *
+ * Removes a host1x client from its host1x controller instance. If a logical
+ * device has already been initialized, it will be torn down.
+ */
+void host1x_client_unregister(struct host1x_client *client)
+{
+	struct host1x_client *c;
+	struct host1x *host1x;
+	int err;
+
+	mutex_lock(&devices_lock);
+
+	list_for_each_entry(host1x, &devices, list) {
+		err = host1x_del_client(host1x, client);
+		if (!err) {
+			mutex_unlock(&devices_lock);
+			return;
+		}
+	}
+
+	mutex_unlock(&devices_lock);
+	mutex_lock(&clients_lock);
+
+	list_for_each_entry(c, &clients, list) {
+		if (c == client) {
+			list_del_init(&c->list);
+			break;
+		}
+	}
+
+	mutex_unlock(&clients_lock);
+
+	host1x_bo_cache_destroy(&client->cache);
+}
+EXPORT_SYMBOL(host1x_client_unregister);
+
+int host1x_client_suspend(struct host1x_client *client)
+{
+	int err = 0;
+
+	mutex_lock(&client->lock);
+
+	if (client->usecount == 1) {
+		if (client->ops && client->ops->suspend) {
+			err = client->ops->suspend(client);
+			if (err < 0)
+				goto unlock;
+		}
+	}
+
+	client->usecount--;
+	dev_dbg(client->dev, "use count: %u\n", client->usecount);
+
+	if (client->parent) {
+		err = host1x_client_suspend(client->parent);
+		if (err < 0)
+			goto resume;
+	}
+
+	goto unlock;
+
+resume:
+	if (client->usecount == 0)
+		if (client->ops && client->ops->resume)
+			client->ops->resume(client);
+
+	client->usecount++;
+unlock:
+	mutex_unlock(&client->lock);
+	return err;
+}
+EXPORT_SYMBOL(host1x_client_suspend);
+
+int host1x_client_resume(struct host1x_client *client)
+{
+	int err = 0;
+
+	mutex_lock(&client->lock);
+
+	if (client->parent) {
+		err = host1x_client_resume(client->parent);
+		if (err < 0)
+			goto unlock;
+	}
+
+	if (client->usecount == 0) {
+		if (client->ops && client->ops->resume) {
+			err = client->ops->resume(client);
+			if (err < 0)
+				goto suspend;
+		}
+	}
+
+	client->usecount++;
+	dev_dbg(client->dev, "use count: %u\n", client->usecount);
+
+	goto unlock;
+
+suspend:
+	if (client->parent)
+		host1x_client_suspend(client->parent);
+unlock:
+	mutex_unlock(&client->lock);
+	return err;
+}
+EXPORT_SYMBOL(host1x_client_resume);
+
+struct host1x_bo_mapping *host1x_bo_pin(struct device *dev, struct host1x_bo *bo,
+					enum dma_data_direction dir,
+					struct host1x_bo_cache *cache)
+{
+	struct host1x_bo_mapping *mapping;
+
+	if (cache) {
+		mutex_lock(&cache->lock);
+
+		list_for_each_entry(mapping, &cache->mappings, entry) {
+			if (mapping->bo == bo && mapping->direction == dir) {
+				kref_get(&mapping->ref);
+				goto unlock;
+			}
+		}
+	}
+
+	mapping = bo->ops->pin(dev, bo, dir);
+	if (IS_ERR(mapping))
+		goto unlock;
+
+	spin_lock(&mapping->bo->lock);
+	list_add_tail(&mapping->list, &bo->mappings);
+	spin_unlock(&mapping->bo->lock);
+
+	if (cache) {
+		INIT_LIST_HEAD(&mapping->entry);
+		mapping->cache = cache;
+
+		list_add_tail(&mapping->entry, &cache->mappings);
+
+		/* bump reference count to track the copy in the cache */
+		kref_get(&mapping->ref);
+	}
+
+unlock:
+	if (cache)
+		mutex_unlock(&cache->lock);
+
+	return mapping;
+}
+EXPORT_SYMBOL(host1x_bo_pin);
+
+static void __host1x_bo_unpin(struct kref *ref)
+{
+	struct host1x_bo_mapping *mapping = to_host1x_bo_mapping(ref);
+
+	/*
+	 * When the last reference of the mapping goes away, make sure to remove the mapping from
+	 * the cache.
+	 */
+	if (mapping->cache)
+		list_del(&mapping->entry);
+
+	spin_lock(&mapping->bo->lock);
+	list_del(&mapping->list);
+	spin_unlock(&mapping->bo->lock);
+
+	mapping->bo->ops->unpin(mapping);
+}
+
+void host1x_bo_unpin(struct host1x_bo_mapping *mapping)
+{
+	struct host1x_bo_cache *cache = mapping->cache;
+
+	if (cache)
+		mutex_lock(&cache->lock);
+
+	kref_put(&mapping->ref, __host1x_bo_unpin);
+
+	if (cache)
+		mutex_unlock(&cache->lock);
+}
+EXPORT_SYMBOL(host1x_bo_unpin);
diff --git a/drivers/gpu/host1x/bus.h b/drivers/gpu/host1x/bus.h
new file mode 100644
index 000000000000..a80ceadfeb34
--- /dev/null
+++ b/drivers/gpu/host1x/bus.h
@@ -0,0 +1,18 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * Copyright (C) 2012 Avionic Design GmbH
+ * Copyright (C) 2012-2013, NVIDIA Corporation
+ */
+
+#ifndef HOST1X_BUS_H
+#define HOST1X_BUS_H
+
+struct bus_type;
+struct host1x;
+
+extern const struct bus_type host1x_bus_type;
+
+int host1x_register(struct host1x *host1x);
+int host1x_unregister(struct host1x *host1x);
+
+#endif
diff --git a/drivers/gpu/host1x/cdma.c b/drivers/gpu/host1x/cdma.c
index de72172d3b5f..ba2e572567c0 100644
--- a/drivers/gpu/host1x/cdma.c
+++ b/drivers/gpu/host1x/cdma.c
@@ -1,25 +1,15 @@
+// SPDX-License-Identifier: GPL-2.0-only
 /*
  * Tegra host1x Command DMA
  *
  * Copyright (c) 2010-2013, NVIDIA Corporation.
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms and conditions of the GNU General Public License,
- * version 2, as published by the Free Software Foundation.
- *
- * This program is distributed in the hope it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
- * more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program.  If not, see <http://www.gnu.org/licenses/>.
  */
 
 
 #include <asm/cacheflush.h>
 #include <linux/device.h>
 #include <linux/dma-mapping.h>
+#include <linux/host1x.h>
 #include <linux/interrupt.h>
 #include <linux/kernel.h>
 #include <linux/kfifo.h>
@@ -30,7 +20,6 @@
 #include "channel.h"
 #include "dev.h"
 #include "debug.h"
-#include "host1x_bo.h"
 #include "job.h"
 
 /*
@@ -41,7 +30,17 @@
  * means that the push buffer is full, not empty.
  */
 
-#define HOST1X_PUSHBUFFER_SLOTS	512
+/*
+ * Typically the commands written into the push buffer are a pair of words. We
+ * use slots to represent each of these pairs and to simplify things. Note the
+ * strange number of slots allocated here. 512 slots will fit exactly within a
+ * single memory page. We also need one additional word at the end of the push
+ * buffer for the RESTART opcode that will instruct the CDMA to jump back to
+ * the beginning of the push buffer. With 512 slots, this means that we'll use
+ * 2 memory pages and waste 4092 bytes of the second page that will never be
+ * used.
+ */
+#define HOST1X_PUSHBUFFER_SLOTS	511
 
 /*
  * Clean up push buffer resources
@@ -51,9 +50,15 @@ static void host1x_pushbuffer_destroy(struct push_buffer *pb)
 	struct host1x_cdma *cdma = pb_to_cdma(pb);
 	struct host1x *host1x = cdma_to_host1x(cdma);
 
-	if (pb->phys != 0)
-		dma_free_writecombine(host1x->dev, pb->size_bytes + 4,
-				      pb->mapped, pb->phys);
+	if (!pb->mapped)
+		return;
+
+	if (host1x->domain) {
+		iommu_unmap(host1x->domain, pb->dma, pb->alloc_size);
+		free_iova(&host1x->iova, iova_pfn(&host1x->iova, pb->dma));
+	}
+
+	dma_free_wc(host1x->dev, pb->alloc_size, pb->mapped, pb->phys);
 
 	pb->mapped = NULL;
 	pb->phys = 0;
@@ -66,28 +71,64 @@ static int host1x_pushbuffer_init(struct push_buffer *pb)
 {
 	struct host1x_cdma *cdma = pb_to_cdma(pb);
 	struct host1x *host1x = cdma_to_host1x(cdma);
+	struct iova *alloc;
+	u32 size;
+	int err;
 
 	pb->mapped = NULL;
 	pb->phys = 0;
-	pb->size_bytes = HOST1X_PUSHBUFFER_SLOTS * 8;
+	pb->size = HOST1X_PUSHBUFFER_SLOTS * 8;
+
+	size = pb->size + 4;
 
 	/* initialize buffer pointers */
-	pb->fence = pb->size_bytes - 8;
+	pb->fence = pb->size - 8;
 	pb->pos = 0;
 
-	/* allocate and map pushbuffer memory */
-	pb->mapped = dma_alloc_writecombine(host1x->dev, pb->size_bytes + 4,
-					    &pb->phys, GFP_KERNEL);
-	if (!pb->mapped)
-		goto fail;
+	if (host1x->domain) {
+		unsigned long shift;
+
+		size = iova_align(&host1x->iova, size);
+
+		pb->mapped = dma_alloc_wc(host1x->dev, size, &pb->phys,
+					  GFP_KERNEL);
+		if (!pb->mapped)
+			return -ENOMEM;
+
+		shift = iova_shift(&host1x->iova);
+		alloc = alloc_iova(&host1x->iova, size >> shift,
+				   host1x->iova_end >> shift, true);
+		if (!alloc) {
+			err = -ENOMEM;
+			goto iommu_free_mem;
+		}
+
+		pb->dma = iova_dma_addr(&host1x->iova, alloc);
+		err = iommu_map(host1x->domain, pb->dma, pb->phys, size,
+				IOMMU_READ, GFP_KERNEL);
+		if (err)
+			goto iommu_free_iova;
+	} else {
+		pb->mapped = dma_alloc_wc(host1x->dev, size, &pb->phys,
+					  GFP_KERNEL);
+		if (!pb->mapped)
+			return -ENOMEM;
+
+		pb->dma = pb->phys;
+	}
+
+	pb->alloc_size = size;
 
 	host1x_hw_pushbuffer_init(host1x, pb);
 
 	return 0;
 
-fail:
-	host1x_pushbuffer_destroy(pb);
-	return -ENOMEM;
+iommu_free_iova:
+	__free_iova(&host1x->iova, alloc);
+iommu_free_mem:
+	dma_free_wc(host1x->dev, size, pb->mapped, pb->phys);
+
+	return err;
 }
 
 /*
@@ -96,12 +137,15 @@ fail:
  */
 static void host1x_pushbuffer_push(struct push_buffer *pb, u32 op1, u32 op2)
 {
-	u32 pos = pb->pos;
-	u32 *p = (u32 *)((u32)pb->mapped + pos);
-	WARN_ON(pos == pb->fence);
+	u32 *p = (u32 *)((void *)pb->mapped + pb->pos);
+
+	WARN_ON(pb->pos == pb->fence);
 	*(p++) = op1;
 	*(p++) = op2;
-	pb->pos = (pos + 8) & (pb->size_bytes - 1);
+	pb->pos += 8;
+
+	if (pb->pos >= pb->size)
+		pb->pos -= pb->size;
 }
 
 /*
@@ -111,7 +155,10 @@ static void host1x_pushbuffer_push(struct push_buffer *pb, u32 op1, u32 op2)
 static void host1x_pushbuffer_pop(struct push_buffer *pb, unsigned int slots)
 {
 	/* Advance the next write position */
-	pb->fence = (pb->fence + slots * 8) & (pb->size_bytes - 1);
+	pb->fence += slots * 8;
+
+	if (pb->fence >= pb->size)
+		pb->fence -= pb->size;
 }
 
 /*
@@ -119,7 +166,12 @@ static void host1x_pushbuffer_pop(struct push_buffer *pb, unsigned int slots)
  */
 static u32 host1x_pushbuffer_space(struct push_buffer *pb)
 {
-	return ((pb->fence - pb->pos) & (pb->size_bytes - 1)) / 8;
+	unsigned int fence = pb->fence;
+
+	if (pb->fence < pb->pos)
+		fence += pb->size;
+
+	return (fence - pb->pos) / 8;
 }
 
 /*
@@ -134,14 +186,19 @@ unsigned int host1x_cdma_wait_locked(struct host1x_cdma *cdma,
 				     enum cdma_event event)
 {
 	for (;;) {
+		struct push_buffer *pb = &cdma->push_buffer;
 		unsigned int space;
 
-		if (event == CDMA_EVENT_SYNC_QUEUE_EMPTY)
+		switch (event) {
+		case CDMA_EVENT_SYNC_QUEUE_EMPTY:
 			space = list_empty(&cdma->sync_queue) ? 1 : 0;
-		else if (event == CDMA_EVENT_PUSH_BUFFER_SPACE) {
-			struct push_buffer *pb = &cdma->push_buffer;
+			break;
+
+		case CDMA_EVENT_PUSH_BUFFER_SPACE:
 			space = host1x_pushbuffer_space(pb);
-		} else {
+			break;
+
+		default:
 			WARN_ON(1);
 			return -EINVAL;
 		}
@@ -159,31 +216,68 @@ unsigned int host1x_cdma_wait_locked(struct host1x_cdma *cdma,
 			mutex_lock(&cdma->lock);
 			continue;
 		}
+
 		cdma->event = event;
 
 		mutex_unlock(&cdma->lock);
-		down(&cdma->sem);
+		wait_for_completion(&cdma->complete);
 		mutex_lock(&cdma->lock);
 	}
+
 	return 0;
 }
 
 /*
+ * Sleep (if necessary) until the push buffer has enough free space.
+ *
+ * Must be called with the cdma lock held.
+ */
+static int host1x_cdma_wait_pushbuffer_space(struct host1x *host1x,
+					     struct host1x_cdma *cdma,
+					     unsigned int needed)
+{
+	while (true) {
+		struct push_buffer *pb = &cdma->push_buffer;
+		unsigned int space;
+
+		space = host1x_pushbuffer_space(pb);
+		if (space >= needed)
+			break;
+
+		trace_host1x_wait_cdma(dev_name(cdma_to_channel(cdma)->dev),
+				       CDMA_EVENT_PUSH_BUFFER_SPACE);
+
+		/* If somebody has managed to already start waiting, yield */
+		if (cdma->event != CDMA_EVENT_NONE) {
+			mutex_unlock(&cdma->lock);
+			schedule();
+			mutex_lock(&cdma->lock);
+			continue;
+		}
+
+		cdma->event = CDMA_EVENT_PUSH_BUFFER_SPACE;
+
+		mutex_unlock(&cdma->lock);
+		wait_for_completion(&cdma->complete);
+		mutex_lock(&cdma->lock);
+	}
+
+	return 0;
+}
+/*
  * Start timer that tracks the time spent by the job.
  * Must be called with the cdma lock held.
  */
 static void cdma_start_timer_locked(struct host1x_cdma *cdma,
 				    struct host1x_job *job)
 {
-	struct host1x *host = cdma_to_host1x(cdma);
-
 	if (cdma->timeout.client) {
 		/* timer already started */
 		return;
 	}
 
 	cdma->timeout.client = job->client;
-	cdma->timeout.syncpt = host1x_syncpt_get(host, job->syncpt_id);
+	cdma->timeout.syncpt = job->syncpt;
 	cdma->timeout.syncpt_val = job->syncpt_end;
 	cdma->timeout.start_ktime = ktime_get();
 
@@ -198,7 +292,7 @@ static void cdma_start_timer_locked(struct host1x_cdma *cdma,
 static void stop_cdma_timer_locked(struct host1x_cdma *cdma)
 {
 	cancel_delayed_work(&cdma->timeout.wq);
-	cdma->timeout.client = 0;
+	cdma->timeout.client = NULL;
 }
 
 /*
@@ -214,26 +308,22 @@ static void stop_cdma_timer_locked(struct host1x_cdma *cdma)
 static void update_cdma_locked(struct host1x_cdma *cdma)
 {
 	bool signal = false;
-	struct host1x *host1x = cdma_to_host1x(cdma);
 	struct host1x_job *job, *n;
 
-	/* If CDMA is stopped, queue is cleared and we can return */
-	if (!cdma->running)
-		return;
-
 	/*
 	 * Walk the sync queue, reading the sync point registers as necessary,
 	 * to consume as many sync queue entries as possible without blocking
 	 */
 	list_for_each_entry_safe(job, n, &cdma->sync_queue, list) {
-		struct host1x_syncpt *sp =
-			host1x_syncpt_get(host1x, job->syncpt_id);
+		struct host1x_syncpt *sp = job->syncpt;
 
 		/* Check whether this syncpt has completed, and bail if not */
-		if (!host1x_syncpt_is_expired(sp, job->syncpt_end)) {
+		if (!host1x_syncpt_is_expired(sp, job->syncpt_end) &&
+		    !job->cancelled) {
 			/* Start timer on next pending syncpt */
 			if (job->timeout)
 				cdma_start_timer_locked(cdma, job);
+
 			break;
 		}
 
@@ -247,7 +337,9 @@ static void update_cdma_locked(struct host1x_cdma *cdma)
 		/* Pop push buffer slots */
 		if (job->num_slots) {
 			struct push_buffer *pb = &cdma->push_buffer;
+
 			host1x_pushbuffer_pop(pb, job->num_slots);
+
 			if (cdma->event == CDMA_EVENT_PUSH_BUFFER_SPACE)
 				signal = true;
 		}
@@ -262,18 +354,16 @@ static void update_cdma_locked(struct host1x_cdma *cdma)
 
 	if (signal) {
 		cdma->event = CDMA_EVENT_NONE;
-		up(&cdma->sem);
+		complete(&cdma->complete);
 	}
 }
 
 void host1x_cdma_update_sync_queue(struct host1x_cdma *cdma,
 				   struct device *dev)
 {
-	u32 restart_addr;
-	u32 syncpt_incrs;
-	struct host1x_job *job = NULL;
-	u32 syncpt_val;
 	struct host1x *host1x = cdma_to_host1x(cdma);
+	u32 restart_addr, syncpt_incrs, syncpt_val;
+	struct host1x_job *job, *next_job = NULL;
 
 	syncpt_val = host1x_syncpt_load(cdma->timeout.syncpt);
 
@@ -291,40 +381,40 @@ void host1x_cdma_update_sync_queue(struct host1x_cdma *cdma,
 		__func__);
 
 	list_for_each_entry(job, &cdma->sync_queue, list) {
-		if (syncpt_val < job->syncpt_end)
-			break;
+		if (syncpt_val < job->syncpt_end) {
+
+			if (!list_is_last(&job->list, &cdma->sync_queue))
+				next_job = list_next_entry(job, list);
+
+			goto syncpt_incr;
+		}
 
 		host1x_job_dump(dev, job);
 	}
 
+	/* all jobs have been completed */
+	job = NULL;
+
+syncpt_incr:
+
 	/*
-	 * Walk the sync_queue, first incrementing with the CPU syncpts that
-	 * are partially executed (the first buffer) or fully skipped while
-	 * still in the current context (slots are also NOP-ed).
+	 * Increment with CPU the remaining syncpts of a partially executed job.
 	 *
-	 * At the point contexts are interleaved, syncpt increments must be
-	 * done inline with the pushbuffer from a GATHER buffer to maintain
-	 * the order (slots are modified to be a GATHER of syncpt incrs).
-	 *
-	 * Note: save in restart_addr the location where the timed out buffer
-	 * started in the PB, so we can start the refetch from there (with the
-	 * modified NOP-ed PB slots). This lets things appear to have completed
-	 * properly for this buffer and resources are freed.
+	 * CDMA will continue execution starting with the next job or will get
+	 * into idle state.
 	 */
-
-	dev_dbg(dev, "%s: perform CPU incr on pending same ctx buffers\n",
-		__func__);
-
-	if (!list_empty(&cdma->sync_queue))
-		restart_addr = job->first_get;
+	if (next_job)
+		restart_addr = next_job->first_get;
 	else
 		restart_addr = cdma->last_pos;
 
-	/* do CPU increments as long as this context continues */
-	list_for_each_entry_from(job, &cdma->sync_queue, list) {
-		/* different context, gets us out of this loop */
-		if (job->client != cdma->timeout.client)
-			break;
+	if (!job)
+		goto resume;
+
+	/* do CPU increments for the remaining syncpts */
+	if (job->syncpt_recovery) {
+		dev_dbg(dev, "%s: perform CPU incr on pending buffers\n",
+			__func__);
 
 		/* won't need a timeout when replayed */
 		job->timeout = 0;
@@ -339,23 +429,74 @@ void host1x_cdma_update_sync_queue(struct host1x_cdma *cdma,
 						syncpt_incrs, job->syncpt_end,
 						job->num_slots);
 
-		syncpt_val += syncpt_incrs;
-	}
+		dev_dbg(dev, "%s: finished sync_queue modification\n",
+			__func__);
+	} else {
+		struct host1x_job *failed_job = job;
+
+		host1x_job_dump(dev, job);
+
+		host1x_syncpt_set_locked(job->syncpt);
+		failed_job->cancelled = true;
+
+		list_for_each_entry_continue(job, &cdma->sync_queue, list) {
+			unsigned int i;
+
+			if (job->syncpt != failed_job->syncpt)
+				continue;
+
+			for (i = 0; i < job->num_slots; i++) {
+				unsigned int slot = (job->first_get/8 + i) %
+						    HOST1X_PUSHBUFFER_SLOTS;
+				u32 *mapped = cdma->push_buffer.mapped;
+
+				/*
+				 * Overwrite opcodes with 0 word writes
+				 * to offset 0xbad. This does nothing but
+				 * has a easily detected signature in debug
+				 * traces.
+				 *
+				 * On systems with MLOCK enforcement enabled,
+				 * the above 0 word writes would fall foul of
+				 * the enforcement. As such, in the first slot
+				 * put a RESTART_W opcode to the beginning
+				 * of the next job. We don't use this for older
+				 * chips since those only support the RESTART
+				 * opcode with inconvenient alignment requirements.
+				 */
+				if (i == 0 && host1x->info->has_wide_gather) {
+					unsigned int next_job = (job->first_get/8 + job->num_slots)
+						% HOST1X_PUSHBUFFER_SLOTS;
+					mapped[2*slot+0] = (0xd << 28) | (next_job * 2);
+					mapped[2*slot+1] = 0x0;
+				} else {
+					mapped[2*slot+0] = 0x1bad0000;
+					mapped[2*slot+1] = 0x1bad0000;
+				}
+			}
 
-	/* The following sumbits from the same client may be dependent on the
-	 * failed submit and therefore they may fail. Force a small timeout
-	 * to make the queue cleanup faster */
+			job->cancelled = true;
+		}
 
-	list_for_each_entry_from(job, &cdma->sync_queue, list)
-		if (job->client == cdma->timeout.client)
-			job->timeout = min_t(unsigned int, job->timeout, 500);
+		wmb();
 
-	dev_dbg(dev, "%s: finished sync_queue modification\n", __func__);
+		update_cdma_locked(cdma);
+	}
 
+resume:
 	/* roll back DMAGET and start up channel again */
 	host1x_hw_cdma_resume(host1x, cdma, restart_addr);
 }
 
+static void cdma_update_work(struct work_struct *work)
+{
+	struct host1x_cdma *cdma = container_of(work, struct host1x_cdma, update_work);
+
+	mutex_lock(&cdma->lock);
+	update_cdma_locked(cdma);
+	mutex_unlock(&cdma->lock);
+}
+
 /*
  * Create a cdma
  */
@@ -364,7 +505,8 @@ int host1x_cdma_init(struct host1x_cdma *cdma)
 	int err;
 
 	mutex_init(&cdma->lock);
-	sema_init(&cdma->sem, 0);
+	init_completion(&cdma->complete);
+	INIT_WORK(&cdma->update_work, cdma_update_work);
 
 	INIT_LIST_HEAD(&cdma->sync_queue);
 
@@ -375,6 +517,7 @@ int host1x_cdma_init(struct host1x_cdma *cdma)
 	err = host1x_pushbuffer_init(&cdma->push_buffer);
 	if (err)
 		return err;
+
 	return 0;
 }
 
@@ -406,18 +549,29 @@ int host1x_cdma_begin(struct host1x_cdma *cdma, struct host1x_job *job)
 
 	mutex_lock(&cdma->lock);
 
+	/*
+	 * Check if syncpoint was locked due to previous job timeout.
+	 * This needs to be done within the cdma lock to avoid a race
+	 * with the timeout handler.
+	 */
+	if (job->syncpt->locked) {
+		mutex_unlock(&cdma->lock);
+		return -EPERM;
+	}
+
 	if (job->timeout) {
 		/* init state on first submit with timeout value */
 		if (!cdma->timeout.initialized) {
 			int err;
-			err = host1x_hw_cdma_timeout_init(host1x, cdma,
-							  job->syncpt_id);
+
+			err = host1x_hw_cdma_timeout_init(host1x, cdma);
 			if (err) {
 				mutex_unlock(&cdma->lock);
 				return err;
 			}
 		}
 	}
+
 	if (!cdma->running)
 		host1x_hw_cdma_start(host1x, cdma);
 
@@ -435,7 +589,6 @@ int host1x_cdma_begin(struct host1x_cdma *cdma, struct host1x_job *job)
  */
 void host1x_cdma_push(struct host1x_cdma *cdma, u32 op1, u32 op2)
 {
-	struct host1x *host1x = cdma_to_host1x(cdma);
 	struct push_buffer *pb = &cdma->push_buffer;
 	u32 slots_free = cdma->slots_free;
 
@@ -443,17 +596,62 @@ void host1x_cdma_push(struct host1x_cdma *cdma, u32 op1, u32 op2)
 		trace_host1x_cdma_push(dev_name(cdma_to_channel(cdma)->dev),
 				       op1, op2);
 
-	if (slots_free == 0) {
-		host1x_hw_cdma_flush(host1x, cdma);
+	if (slots_free == 0)
 		slots_free = host1x_cdma_wait_locked(cdma,
 						CDMA_EVENT_PUSH_BUFFER_SPACE);
-	}
+
 	cdma->slots_free = slots_free - 1;
 	cdma->slots_used++;
 	host1x_pushbuffer_push(pb, op1, op2);
 }
 
 /*
+ * Push four words into two consecutive push buffer slots. Note that extra
+ * care needs to be taken not to split the two slots across the end of the
+ * push buffer. Otherwise the RESTART opcode at the end of the push buffer
+ * that ensures processing will restart at the beginning will break up the
+ * four words.
+ *
+ * Blocks as necessary if the push buffer is full.
+ */
+void host1x_cdma_push_wide(struct host1x_cdma *cdma, u32 op1, u32 op2,
+			   u32 op3, u32 op4)
+{
+	struct host1x_channel *channel = cdma_to_channel(cdma);
+	struct host1x *host1x = cdma_to_host1x(cdma);
+	struct push_buffer *pb = &cdma->push_buffer;
+	unsigned int space, needed = 2, extra = 0;
+
+	if (host1x_debug_trace_cmdbuf)
+		trace_host1x_cdma_push_wide(dev_name(channel->dev), op1, op2,
+					    op3, op4);
+
+	/* compute number of extra slots needed for padding */
+	if (pb->pos + 16 > pb->size) {
+		extra = (pb->size - pb->pos) / 8;
+		needed += extra;
+	}
+
+	host1x_cdma_wait_pushbuffer_space(host1x, cdma, needed);
+	space = host1x_pushbuffer_space(pb);
+
+	cdma->slots_free = space - needed;
+	cdma->slots_used += needed;
+
+	if (extra > 0) {
+		/*
+		 * If there isn't enough space at the tail of the pushbuffer,
+		 * insert a RESTART(0) here to go back to the beginning.
+		 * The code above adjusted the indexes appropriately.
+		 */
+		host1x_pushbuffer_push(pb, (0x5 << 28), 0xdead0000);
+	}
+
+	host1x_pushbuffer_push(pb, op1, op2);
+	host1x_pushbuffer_push(pb, op3, op4);
+}
+
+/*
  * End a cdma submit
  * Kick off DMA, add job to the sync queue, and a number of slots to be freed
  * from the pushbuffer. The handles for a submit must all be pinned at the same
@@ -485,7 +683,5 @@ void host1x_cdma_end(struct host1x_cdma *cdma,
  */
 void host1x_cdma_update(struct host1x_cdma *cdma)
 {
-	mutex_lock(&cdma->lock);
-	update_cdma_locked(cdma);
-	mutex_unlock(&cdma->lock);
+	schedule_work(&cdma->update_work);
 }
diff --git a/drivers/gpu/host1x/cdma.h b/drivers/gpu/host1x/cdma.h
index 313c4b784348..7fd8168af4f9 100644
--- a/drivers/gpu/host1x/cdma.h
+++ b/drivers/gpu/host1x/cdma.h
@@ -1,27 +1,17 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
 /*
  * Tegra host1x Command DMA
  *
  * Copyright (c) 2010-2013, NVIDIA Corporation.
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms and conditions of the GNU General Public License,
- * version 2, as published by the Free Software Foundation.
- *
- * This program is distributed in the hope it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
- * more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program.  If not, see <http://www.gnu.org/licenses/>.
  */
 
 #ifndef __HOST1X_CDMA_H
 #define __HOST1X_CDMA_H
 
 #include <linux/sched.h>
-#include <linux/semaphore.h>
+#include <linux/completion.h>
 #include <linux/list.h>
+#include <linux/workqueue.h>
 
 struct host1x_syncpt;
 struct host1x_userctx_timeout;
@@ -42,11 +32,13 @@ struct host1x_job;
  */
 
 struct push_buffer {
-	u32 *mapped;			/* mapped pushbuffer memory */
+	void *mapped;			/* mapped pushbuffer memory */
+	dma_addr_t dma;			/* device address of pushbuffer */
 	dma_addr_t phys;		/* physical address of pushbuffer */
 	u32 fence;			/* index we've written */
 	u32 pos;			/* index to write to */
-	u32 size_bytes;
+	u32 size;
+	u32 alloc_size;
 };
 
 struct buffer_timeout {
@@ -56,7 +48,7 @@ struct buffer_timeout {
 	u32 syncpt_val;			/* syncpt value when completed */
 	ktime_t start_ktime;		/* starting time */
 	/* context timeout information */
-	int client;
+	struct host1x_client *client;
 };
 
 enum cdma_event {
@@ -67,8 +59,8 @@ enum cdma_event {
 
 struct host1x_cdma {
 	struct mutex lock;		/* controls access to shared state */
-	struct semaphore sem;		/* signalled when event occurs */
-	enum cdma_event event;		/* event that sem is waiting for */
+	struct completion complete;	/* signalled when event occurs */
+	enum cdma_event event;		/* event that complete is waiting for */
 	unsigned int slots_used;	/* pb slots used in current submit */
 	unsigned int slots_free;	/* pb slots free in current submit */
 	unsigned int first_get;		/* DMAGET value, where submit begins */
@@ -78,6 +70,7 @@ struct host1x_cdma {
 	struct buffer_timeout timeout;	/* channel's timeout state/wq */
 	bool running;
 	bool torndown;
+	struct work_struct update_work;
 };
 
 #define cdma_to_channel(cdma) container_of(cdma, struct host1x_channel, cdma)
@@ -86,9 +79,10 @@ struct host1x_cdma {
 
 int host1x_cdma_init(struct host1x_cdma *cdma);
 int host1x_cdma_deinit(struct host1x_cdma *cdma);
-void host1x_cdma_stop(struct host1x_cdma *cdma);
 int host1x_cdma_begin(struct host1x_cdma *cdma, struct host1x_job *job);
 void host1x_cdma_push(struct host1x_cdma *cdma, u32 op1, u32 op2);
+void host1x_cdma_push_wide(struct host1x_cdma *cdma, u32 op1, u32 op2,
+			   u32 op3, u32 op4);
 void host1x_cdma_end(struct host1x_cdma *cdma, struct host1x_job *job);
 void host1x_cdma_update(struct host1x_cdma *cdma);
 void host1x_cdma_peek(struct host1x_cdma *cdma, u32 dmaget, int slot,
diff --git a/drivers/gpu/host1x/channel.c b/drivers/gpu/host1x/channel.c
index 83ea51b9f0fc..08077afe4cde 100644
--- a/drivers/gpu/host1x/channel.c
+++ b/drivers/gpu/host1x/channel.c
@@ -1,19 +1,8 @@
+// SPDX-License-Identifier: GPL-2.0-only
 /*
  * Tegra host1x Channel
  *
  * Copyright (c) 2010-2013, NVIDIA Corporation.
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms and conditions of the GNU General Public License,
- * version 2, as published by the Free Software Foundation.
- *
- * This program is distributed in the hope it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
- * more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program.  If not, see <http://www.gnu.org/licenses/>.
  */
 
 #include <linux/slab.h>
@@ -24,103 +13,174 @@
 #include "job.h"
 
 /* Constructor for the host1x device list */
-int host1x_channel_list_init(struct host1x *host)
+int host1x_channel_list_init(struct host1x_channel_list *chlist,
+			     unsigned int num_channels)
 {
-	INIT_LIST_HEAD(&host->chlist.list);
-	mutex_init(&host->chlist_mutex);
-
-	if (host->info->nb_channels > BITS_PER_LONG) {
-		WARN(1, "host1x hardware has more channels than supported by the driver\n");
-		return -ENOSYS;
+	chlist->channels = kcalloc(num_channels, sizeof(struct host1x_channel),
+				   GFP_KERNEL);
+	if (!chlist->channels)
+		return -ENOMEM;
+
+	chlist->allocated_channels = bitmap_zalloc(num_channels, GFP_KERNEL);
+	if (!chlist->allocated_channels) {
+		kfree(chlist->channels);
+		return -ENOMEM;
 	}
 
+	mutex_init(&chlist->lock);
+
 	return 0;
 }
 
+void host1x_channel_list_free(struct host1x_channel_list *chlist)
+{
+	bitmap_free(chlist->allocated_channels);
+	kfree(chlist->channels);
+}
+
 int host1x_job_submit(struct host1x_job *job)
 {
 	struct host1x *host = dev_get_drvdata(job->channel->dev->parent);
 
 	return host1x_hw_channel_submit(host, job);
 }
+EXPORT_SYMBOL(host1x_job_submit);
 
 struct host1x_channel *host1x_channel_get(struct host1x_channel *channel)
 {
-	int err = 0;
+	kref_get(&channel->refcount);
+
+	return channel;
+}
+EXPORT_SYMBOL(host1x_channel_get);
+
+/**
+ * host1x_channel_get_index() - Attempt to get channel reference by index
+ * @host: Host1x device object
+ * @index: Index of channel
+ *
+ * If channel number @index is currently allocated, increase its refcount
+ * and return a pointer to it. Otherwise, return NULL.
+ */
+struct host1x_channel *host1x_channel_get_index(struct host1x *host,
+						unsigned int index)
+{
+	struct host1x_channel *ch = &host->channel_list.channels[index];
+
+	if (!kref_get_unless_zero(&ch->refcount))
+		return NULL;
+
+	return ch;
+}
+
+void host1x_channel_stop(struct host1x_channel *channel)
+{
+	struct host1x *host = dev_get_drvdata(channel->dev->parent);
+
+	host1x_hw_cdma_stop(host, &channel->cdma);
+}
+EXPORT_SYMBOL(host1x_channel_stop);
+
+/**
+ * host1x_channel_stop_all() - disable CDMA on allocated channels
+ * @host: host1x instance
+ *
+ * Stop CDMA on allocated channels
+ */
+void host1x_channel_stop_all(struct host1x *host)
+{
+	struct host1x_channel_list *chlist = &host->channel_list;
+	int bit;
 
-	mutex_lock(&channel->reflock);
+	mutex_lock(&chlist->lock);
 
-	if (channel->refcount == 0)
-		err = host1x_cdma_init(&channel->cdma);
+	for_each_set_bit(bit, chlist->allocated_channels, host->info->nb_channels)
+		host1x_channel_stop(&chlist->channels[bit]);
 
-	if (!err)
-		channel->refcount++;
+	mutex_unlock(&chlist->lock);
+}
 
-	mutex_unlock(&channel->reflock);
+static void release_channel(struct kref *kref)
+{
+	struct host1x_channel *channel =
+		container_of(kref, struct host1x_channel, refcount);
+	struct host1x *host = dev_get_drvdata(channel->dev->parent);
+	struct host1x_channel_list *chlist = &host->channel_list;
+
+	host1x_hw_cdma_stop(host, &channel->cdma);
+	host1x_cdma_deinit(&channel->cdma);
 
-	return err ? NULL : channel;
+	clear_bit(channel->id, chlist->allocated_channels);
 }
 
 void host1x_channel_put(struct host1x_channel *channel)
 {
-	mutex_lock(&channel->reflock);
+	kref_put(&channel->refcount, release_channel);
+}
+EXPORT_SYMBOL(host1x_channel_put);
 
-	if (channel->refcount == 1) {
-		struct host1x *host = dev_get_drvdata(channel->dev->parent);
+static struct host1x_channel *acquire_unused_channel(struct host1x *host)
+{
+	struct host1x_channel_list *chlist = &host->channel_list;
+	unsigned int max_channels = host->info->nb_channels;
+	unsigned int index;
+
+	mutex_lock(&chlist->lock);
 
-		host1x_hw_cdma_stop(host, &channel->cdma);
-		host1x_cdma_deinit(&channel->cdma);
+	index = find_first_zero_bit(chlist->allocated_channels, max_channels);
+	if (index >= max_channels) {
+		mutex_unlock(&chlist->lock);
+		dev_err(host->dev, "failed to find free channel\n");
+		return NULL;
 	}
 
-	channel->refcount--;
+	chlist->channels[index].id = index;
+
+	set_bit(index, chlist->allocated_channels);
 
-	mutex_unlock(&channel->reflock);
+	mutex_unlock(&chlist->lock);
+
+	return &chlist->channels[index];
 }
 
-struct host1x_channel *host1x_channel_request(struct device *dev)
+/**
+ * host1x_channel_request() - Allocate a channel
+ * @client: Host1x client this channel will be used to send commands to
+ *
+ * Allocates a new host1x channel for @client. May return NULL if CDMA
+ * initialization fails.
+ */
+struct host1x_channel *host1x_channel_request(struct host1x_client *client)
 {
-	struct host1x *host = dev_get_drvdata(dev->parent);
-	int max_channels = host->info->nb_channels;
-	struct host1x_channel *channel = NULL;
-	int index, err;
+	struct host1x *host = dev_get_drvdata(client->dev->parent);
+	struct host1x_channel_list *chlist = &host->channel_list;
+	struct host1x_channel *channel;
+	int err;
 
-	mutex_lock(&host->chlist_mutex);
+	channel = acquire_unused_channel(host);
+	if (!channel)
+		return NULL;
 
-	index = find_first_zero_bit(&host->allocated_channels, max_channels);
-	if (index >= max_channels)
-		goto fail;
+	kref_init(&channel->refcount);
+	mutex_init(&channel->submitlock);
+	channel->client = client;
+	channel->dev = client->dev;
 
-	channel = kzalloc(sizeof(*channel), GFP_KERNEL);
-	if (!channel)
+	err = host1x_hw_channel_init(host, channel, channel->id);
+	if (err < 0)
 		goto fail;
 
-	err = host1x_hw_channel_init(host, channel, index);
+	err = host1x_cdma_init(&channel->cdma);
 	if (err < 0)
 		goto fail;
 
-	/* Link device to host1x_channel */
-	channel->dev = dev;
-
-	/* Add to channel list */
-	list_add_tail(&channel->list, &host->chlist.list);
-
-	host->allocated_channels |= BIT(index);
-
-	mutex_unlock(&host->chlist_mutex);
 	return channel;
 
 fail:
-	dev_err(dev, "failed to init channel\n");
-	kfree(channel);
-	mutex_unlock(&host->chlist_mutex);
-	return NULL;
-}
+	clear_bit(channel->id, chlist->allocated_channels);
 
-void host1x_channel_free(struct host1x_channel *channel)
-{
-	struct host1x *host = dev_get_drvdata(channel->dev->parent);
+	dev_err(client->dev, "failed to initialize channel\n");
 
-	host->allocated_channels &= ~BIT(channel->id);
-	list_del(&channel->list);
-	kfree(channel);
+	return NULL;
 }
+EXPORT_SYMBOL(host1x_channel_request);
diff --git a/drivers/gpu/host1x/channel.h b/drivers/gpu/host1x/channel.h
index 48723b8eea42..d7aede204d83 100644
--- a/drivers/gpu/host1x/channel.h
+++ b/drivers/gpu/host1x/channel.h
@@ -1,52 +1,45 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
 /*
  * Tegra host1x Channel
  *
  * Copyright (c) 2010-2013, NVIDIA Corporation.
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms and conditions of the GNU General Public License,
- * version 2, as published by the Free Software Foundation.
- *
- * This program is distributed in the hope it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
- * more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program.  If not, see <http://www.gnu.org/licenses/>.
  */
 
 #ifndef __HOST1X_CHANNEL_H
 #define __HOST1X_CHANNEL_H
 
 #include <linux/io.h>
+#include <linux/kref.h>
+#include <linux/mutex.h>
 
 #include "cdma.h"
 
 struct host1x;
+struct host1x_channel;
 
-struct host1x_channel {
-	struct list_head list;
+struct host1x_channel_list {
+	struct host1x_channel *channels;
 
-	unsigned int refcount;
+	struct mutex lock;
+	unsigned long *allocated_channels;
+};
+
+struct host1x_channel {
+	struct kref refcount;
 	unsigned int id;
-	struct mutex reflock;
 	struct mutex submitlock;
 	void __iomem *regs;
+	struct host1x_client *client;
 	struct device *dev;
 	struct host1x_cdma cdma;
 };
 
 /* channel list operations */
-int host1x_channel_list_init(struct host1x *host);
-
-struct host1x_channel *host1x_channel_request(struct device *dev);
-void host1x_channel_free(struct host1x_channel *channel);
-struct host1x_channel *host1x_channel_get(struct host1x_channel *channel);
-void host1x_channel_put(struct host1x_channel *channel);
-int host1x_job_submit(struct host1x_job *job);
-
-#define host1x_for_each_channel(host, channel)				\
-	list_for_each_entry(channel, &host->chlist.list, list)
+int host1x_channel_list_init(struct host1x_channel_list *chlist,
+			     unsigned int num_channels);
+void host1x_channel_list_free(struct host1x_channel_list *chlist);
+struct host1x_channel *host1x_channel_get_index(struct host1x *host,
+						unsigned int index);
+void host1x_channel_stop_all(struct host1x *host);
 
 #endif
diff --git a/drivers/gpu/host1x/context.c b/drivers/gpu/host1x/context.c
new file mode 100644
index 000000000000..a6f6779662a3
--- /dev/null
+++ b/drivers/gpu/host1x/context.c
@@ -0,0 +1,177 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright (c) 2021, NVIDIA Corporation.
+ */
+
+#include <linux/device.h>
+#include <linux/kref.h>
+#include <linux/of.h>
+#include <linux/of_device.h>
+#include <linux/pid.h>
+#include <linux/slab.h>
+
+#include "context.h"
+#include "dev.h"
+
+static void host1x_memory_context_release(struct device *dev)
+{
+	/* context device is freed in host1x_memory_context_list_free() */
+}
+
+int host1x_memory_context_list_init(struct host1x *host1x)
+{
+	struct host1x_memory_context_list *cdl = &host1x->context_list;
+	struct device_node *node = host1x->dev->of_node;
+	struct host1x_memory_context *ctx;
+	unsigned int i;
+	int err;
+
+	cdl->devs = NULL;
+	cdl->len = 0;
+	mutex_init(&cdl->lock);
+
+	err = of_property_count_u32_elems(node, "iommu-map");
+	if (err < 0)
+		return 0;
+
+	cdl->len = err / 4;
+	cdl->devs = kcalloc(cdl->len, sizeof(*cdl->devs), GFP_KERNEL);
+	if (!cdl->devs)
+		return -ENOMEM;
+
+	for (i = 0; i < cdl->len; i++) {
+		ctx = &cdl->devs[i];
+
+		ctx->host = host1x;
+
+		device_initialize(&ctx->dev);
+
+		/*
+		 * Due to an issue with T194 NVENC, only 38 bits can be used.
+		 * Anyway, 256GiB of IOVA ought to be enough for anyone.
+		 */
+		ctx->dma_mask = DMA_BIT_MASK(38);
+		ctx->dev.dma_mask = &ctx->dma_mask;
+		ctx->dev.coherent_dma_mask = ctx->dma_mask;
+		dev_set_name(&ctx->dev, "host1x-ctx.%d", i);
+		ctx->dev.bus = &host1x_context_device_bus_type;
+		ctx->dev.parent = host1x->dev;
+		ctx->dev.release = host1x_memory_context_release;
+
+		ctx->dev.dma_parms = &ctx->dma_parms;
+		dma_set_max_seg_size(&ctx->dev, UINT_MAX);
+
+		err = device_add(&ctx->dev);
+		if (err) {
+			dev_err(host1x->dev, "could not add context device %d: %d\n", i, err);
+			put_device(&ctx->dev);
+			goto unreg_devices;
+		}
+
+		err = of_dma_configure_id(&ctx->dev, node, true, &i);
+		if (err) {
+			dev_err(host1x->dev, "IOMMU configuration failed for context device %d: %d\n",
+				i, err);
+			device_unregister(&ctx->dev);
+			goto unreg_devices;
+		}
+
+		if (!tegra_dev_iommu_get_stream_id(&ctx->dev, &ctx->stream_id) ||
+		    !device_iommu_mapped(&ctx->dev)) {
+			dev_err(host1x->dev, "Context device %d has no IOMMU!\n", i);
+			device_unregister(&ctx->dev);
+
+			/*
+			 * This means that if IOMMU is disabled but context devices
+			 * are defined in the device tree, Host1x will fail to probe.
+			 * That's probably OK in this time and age.
+			 */
+			err = -EINVAL;
+
+			goto unreg_devices;
+		}
+	}
+
+	return 0;
+
+unreg_devices:
+	while (i--)
+		device_unregister(&cdl->devs[i].dev);
+
+	kfree(cdl->devs);
+	cdl->devs = NULL;
+	cdl->len = 0;
+
+	return err;
+}
+
+void host1x_memory_context_list_free(struct host1x_memory_context_list *cdl)
+{
+	unsigned int i;
+
+	for (i = 0; i < cdl->len; i++)
+		device_unregister(&cdl->devs[i].dev);
+
+	kfree(cdl->devs);
+	cdl->len = 0;
+}
+
+struct host1x_memory_context *host1x_memory_context_alloc(struct host1x *host1x,
+							  struct device *dev,
+							  struct pid *pid)
+{
+	struct host1x_memory_context_list *cdl = &host1x->context_list;
+	struct host1x_memory_context *free = NULL;
+	int i;
+
+	if (!cdl->len)
+		return ERR_PTR(-EOPNOTSUPP);
+
+	mutex_lock(&cdl->lock);
+
+	for (i = 0; i < cdl->len; i++) {
+		struct host1x_memory_context *cd = &cdl->devs[i];
+
+		if (cd->dev.iommu->iommu_dev != dev->iommu->iommu_dev)
+			continue;
+
+		if (cd->owner == pid) {
+			refcount_inc(&cd->ref);
+			mutex_unlock(&cdl->lock);
+			return cd;
+		} else if (!cd->owner && !free) {
+			free = cd;
+		}
+	}
+
+	if (!free) {
+		mutex_unlock(&cdl->lock);
+		return ERR_PTR(-EBUSY);
+	}
+
+	refcount_set(&free->ref, 1);
+	free->owner = get_pid(pid);
+
+	mutex_unlock(&cdl->lock);
+
+	return free;
+}
+EXPORT_SYMBOL_GPL(host1x_memory_context_alloc);
+
+void host1x_memory_context_get(struct host1x_memory_context *cd)
+{
+	refcount_inc(&cd->ref);
+}
+EXPORT_SYMBOL_GPL(host1x_memory_context_get);
+
+void host1x_memory_context_put(struct host1x_memory_context *cd)
+{
+	struct host1x_memory_context_list *cdl = &cd->host->context_list;
+
+	if (refcount_dec_and_mutex_lock(&cd->ref, &cdl->lock)) {
+		put_pid(cd->owner);
+		cd->owner = NULL;
+		mutex_unlock(&cdl->lock);
+	}
+}
+EXPORT_SYMBOL_GPL(host1x_memory_context_put);
diff --git a/drivers/gpu/host1x/context.h b/drivers/gpu/host1x/context.h
new file mode 100644
index 000000000000..3e03bc1d3bac
--- /dev/null
+++ b/drivers/gpu/host1x/context.h
@@ -0,0 +1,38 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * Host1x context devices
+ *
+ * Copyright (c) 2020, NVIDIA Corporation.
+ */
+
+#ifndef __HOST1X_CONTEXT_H
+#define __HOST1X_CONTEXT_H
+
+#include <linux/mutex.h>
+#include <linux/refcount.h>
+
+struct host1x;
+
+extern struct bus_type host1x_context_device_bus_type;
+
+struct host1x_memory_context_list {
+	struct mutex lock;
+	struct host1x_memory_context *devs;
+	unsigned int len;
+};
+
+#ifdef CONFIG_IOMMU_API
+int host1x_memory_context_list_init(struct host1x *host1x);
+void host1x_memory_context_list_free(struct host1x_memory_context_list *cdl);
+#else
+static inline int host1x_memory_context_list_init(struct host1x *host1x)
+{
+	return 0;
+}
+
+static inline void host1x_memory_context_list_free(struct host1x_memory_context_list *cdl)
+{
+}
+#endif
+
+#endif
diff --git a/drivers/gpu/host1x/context_bus.c b/drivers/gpu/host1x/context_bus.c
new file mode 100644
index 000000000000..7cd0e1a5edd1
--- /dev/null
+++ b/drivers/gpu/host1x/context_bus.c
@@ -0,0 +1,26 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright (c) 2021, NVIDIA Corporation.
+ */
+
+#include <linux/device.h>
+#include <linux/of.h>
+
+const struct bus_type host1x_context_device_bus_type = {
+	.name = "host1x-context",
+};
+EXPORT_SYMBOL_GPL(host1x_context_device_bus_type);
+
+static int __init host1x_context_device_bus_init(void)
+{
+	int err;
+
+	err = bus_register(&host1x_context_device_bus_type);
+	if (err < 0) {
+		pr_err("bus type registration failed: %d\n", err);
+		return err;
+	}
+
+	return 0;
+}
+postcore_initcall(host1x_context_device_bus_init);
diff --git a/drivers/gpu/host1x/debug.c b/drivers/gpu/host1x/debug.c
index 3ec7d77de24d..6433c00d5d7e 100644
--- a/drivers/gpu/host1x/debug.c
+++ b/drivers/gpu/host1x/debug.c
@@ -1,21 +1,13 @@
+// SPDX-License-Identifier: GPL-2.0-only
 /*
  * Copyright (C) 2010 Google, Inc.
  * Author: Erik Gilling <konkers@android.com>
  *
  * Copyright (C) 2011-2013 NVIDIA Corporation
- *
- * This software is licensed under the terms of the GNU General Public
- * License version 2, as published by the Free Software Foundation, and
- * may be copied, distributed, and modified under those terms.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
  */
 
 #include <linux/debugfs.h>
+#include <linux/pm_runtime.h>
 #include <linux/seq_file.h>
 #include <linux/uaccess.h>
 
@@ -25,6 +17,8 @@
 #include "debug.h"
 #include "channel.h"
 
+static DEFINE_MUTEX(debug_lock);
+
 unsigned int host1x_debug_trace_cmdbuf;
 
 static pid_t host1x_debug_force_timeout_pid;
@@ -39,85 +33,126 @@ void host1x_debug_output(struct output *o, const char *fmt, ...)
 	va_start(args, fmt);
 	len = vsnprintf(o->buf, sizeof(o->buf), fmt, args);
 	va_end(args);
-	o->fn(o->ctx, o->buf, len);
+
+	o->fn(o->ctx, o->buf, len, false);
+}
+
+void host1x_debug_cont(struct output *o, const char *fmt, ...)
+{
+	va_list args;
+	int len;
+
+	va_start(args, fmt);
+	len = vsnprintf(o->buf, sizeof(o->buf), fmt, args);
+	va_end(args);
+
+	o->fn(o->ctx, o->buf, len, true);
 }
 
-static int show_channels(struct host1x_channel *ch, void *data, bool show_fifo)
+static int show_channel(struct host1x_channel *ch, void *data, bool show_fifo)
 {
 	struct host1x *m = dev_get_drvdata(ch->dev->parent);
 	struct output *o = data;
+	int err;
 
-	mutex_lock(&ch->reflock);
-	if (ch->refcount) {
-		mutex_lock(&ch->cdma.lock);
-		if (show_fifo)
-			host1x_hw_show_channel_fifo(m, ch, o);
-		host1x_hw_show_channel_cdma(m, ch, o);
-		mutex_unlock(&ch->cdma.lock);
-	}
-	mutex_unlock(&ch->reflock);
+	err = pm_runtime_resume_and_get(m->dev);
+	if (err < 0)
+		return err;
+
+	mutex_lock(&ch->cdma.lock);
+	mutex_lock(&debug_lock);
+
+	if (show_fifo)
+		host1x_hw_show_channel_fifo(m, ch, o);
+
+	host1x_hw_show_channel_cdma(m, ch, o);
+
+	mutex_unlock(&debug_lock);
+	mutex_unlock(&ch->cdma.lock);
+
+	pm_runtime_put(m->dev);
 
 	return 0;
 }
 
-static void show_syncpts(struct host1x *m, struct output *o)
+static void show_syncpts(struct host1x *m, struct output *o, bool show_all)
 {
-	int i;
+	unsigned long irqflags;
+	struct list_head *pos;
+	unsigned int i;
+	int err;
+
 	host1x_debug_output(o, "---- syncpts ----\n");
+
+	err = pm_runtime_resume_and_get(m->dev);
+	if (err < 0)
+		return;
+
 	for (i = 0; i < host1x_syncpt_nb_pts(m); i++) {
 		u32 max = host1x_syncpt_read_max(m->syncpt + i);
 		u32 min = host1x_syncpt_load(m->syncpt + i);
-		if (!min && !max)
+		unsigned int waiters = 0;
+
+		spin_lock_irqsave(&m->syncpt[i].fences.lock, irqflags);
+		list_for_each(pos, &m->syncpt[i].fences.list)
+			waiters++;
+		spin_unlock_irqrestore(&m->syncpt[i].fences.lock, irqflags);
+
+		if (!kref_read(&m->syncpt[i].ref))
 			continue;
-		host1x_debug_output(o, "id %d (%s) min %d max %d\n",
-				    i, m->syncpt[i].name, min, max);
+
+		if (!show_all && !min && !max && !waiters)
+			continue;
+
+		host1x_debug_output(o,
+				    "id %u (%s) min %d max %d (%d waiters)\n",
+				    i, m->syncpt[i].name, min, max, waiters);
 	}
 
 	for (i = 0; i < host1x_syncpt_nb_bases(m); i++) {
 		u32 base_val;
+
 		base_val = host1x_syncpt_load_wait_base(m->syncpt + i);
 		if (base_val)
-			host1x_debug_output(o, "waitbase id %d val %d\n", i,
+			host1x_debug_output(o, "waitbase id %u val %d\n", i,
 					    base_val);
 	}
 
+	pm_runtime_put(m->dev);
+
 	host1x_debug_output(o, "\n");
 }
 
-static void show_all(struct host1x *m, struct output *o)
+static void show_all(struct host1x *m, struct output *o, bool show_fifo)
 {
-	struct host1x_channel *ch;
+	unsigned int i;
 
 	host1x_hw_show_mlocks(m, o);
-	show_syncpts(m, o);
+	show_syncpts(m, o, true);
 	host1x_debug_output(o, "---- channels ----\n");
 
-	host1x_for_each_channel(m, ch)
-		show_channels(ch, o, true);
-}
-
-#ifdef CONFIG_DEBUG_FS
-static void show_all_no_fifo(struct host1x *host1x, struct output *o)
-{
-	struct host1x_channel *ch;
-
-	host1x_hw_show_mlocks(host1x, o);
-	show_syncpts(host1x, o);
-	host1x_debug_output(o, "---- channels ----\n");
+	for (i = 0; i < m->info->nb_channels; ++i) {
+		struct host1x_channel *ch = host1x_channel_get_index(m, i);
 
-	host1x_for_each_channel(host1x, ch)
-		show_channels(ch, o, false);
+		if (ch) {
+			show_channel(ch, o, show_fifo);
+			host1x_channel_put(ch);
+		}
+	}
 }
 
-static int host1x_debug_show_all(struct seq_file *s, void *unused)
+static int host1x_debug_all_show(struct seq_file *s, void *unused)
 {
 	struct output o = {
 		.fn = write_to_seqfile,
 		.ctx = s
 	};
-	show_all(s->private, &o);
+
+	show_all(s->private, &o, true);
+
 	return 0;
 }
+DEFINE_SHOW_ATTRIBUTE(host1x_debug_all);
 
 static int host1x_debug_show(struct seq_file *s, void *unused)
 {
@@ -125,41 +160,17 @@ static int host1x_debug_show(struct seq_file *s, void *unused)
 		.fn = write_to_seqfile,
 		.ctx = s
 	};
-	show_all_no_fifo(s->private, &o);
-	return 0;
-}
-
-static int host1x_debug_open_all(struct inode *inode, struct file *file)
-{
-	return single_open(file, host1x_debug_show_all, inode->i_private);
-}
 
-static const struct file_operations host1x_debug_all_fops = {
-	.open		= host1x_debug_open_all,
-	.read		= seq_read,
-	.llseek		= seq_lseek,
-	.release	= single_release,
-};
+	show_all(s->private, &o, false);
 
-static int host1x_debug_open(struct inode *inode, struct file *file)
-{
-	return single_open(file, host1x_debug_show, inode->i_private);
+	return 0;
 }
+DEFINE_SHOW_ATTRIBUTE(host1x_debug);
 
-static const struct file_operations host1x_debug_fops = {
-	.open		= host1x_debug_open,
-	.read		= seq_read,
-	.llseek		= seq_lseek,
-	.release	= single_release,
-};
-
-void host1x_debug_init(struct host1x *host1x)
+static void host1x_debugfs_init(struct host1x *host1x)
 {
 	struct dentry *de = debugfs_create_dir("tegra-host1x", NULL);
 
-	if (!de)
-		return;
-
 	/* Store the created entry */
 	host1x->debugfs = de;
 
@@ -180,31 +191,28 @@ void host1x_debug_init(struct host1x *host1x)
 			   &host1x_debug_force_timeout_channel);
 }
 
-void host1x_debug_deinit(struct host1x *host1x)
+static void host1x_debugfs_exit(struct host1x *host1x)
 {
 	debugfs_remove_recursive(host1x->debugfs);
 }
-#else
+
 void host1x_debug_init(struct host1x *host1x)
 {
+	if (IS_ENABLED(CONFIG_DEBUG_FS))
+		host1x_debugfs_init(host1x);
 }
+
 void host1x_debug_deinit(struct host1x *host1x)
 {
+	if (IS_ENABLED(CONFIG_DEBUG_FS))
+		host1x_debugfs_exit(host1x);
 }
-#endif
 
 void host1x_debug_dump(struct host1x *host1x)
 {
 	struct output o = {
 		.fn = write_to_printk
 	};
-	show_all(host1x, &o);
-}
 
-void host1x_debug_dump_syncpts(struct host1x *host1x)
-{
-	struct output o = {
-		.fn = write_to_printk
-	};
-	show_syncpts(host1x, &o);
+	show_all(host1x, &o, true);
 }
diff --git a/drivers/gpu/host1x/debug.h b/drivers/gpu/host1x/debug.h
index 4595b2e0799f..c43c61d876a9 100644
--- a/drivers/gpu/host1x/debug.h
+++ b/drivers/gpu/host1x/debug.h
@@ -1,19 +1,8 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
 /*
  * Tegra host1x Debug
  *
  * Copyright (c) 2011-2013 NVIDIA Corporation.
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms and conditions of the GNU General Public License,
- * version 2, as published by the Free Software Foundation.
- *
- * This program is distributed in the hope it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
- * more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program.  If not, see <http://www.gnu.org/licenses/>.
  */
 #ifndef __HOST1X_DEBUG_H
 #define __HOST1X_DEBUG_H
@@ -24,28 +13,33 @@
 struct host1x;
 
 struct output {
-	void (*fn)(void *ctx, const char *str, size_t len);
+	void (*fn)(void *ctx, const char *str, size_t len, bool cont);
 	void *ctx;
 	char buf[256];
 };
 
-static inline void write_to_seqfile(void *ctx, const char *str, size_t len)
+static inline void write_to_seqfile(void *ctx, const char *str, size_t len,
+				    bool cont)
 {
 	seq_write((struct seq_file *)ctx, str, len);
 }
 
-static inline void write_to_printk(void *ctx, const char *str, size_t len)
+static inline void write_to_printk(void *ctx, const char *str, size_t len,
+				   bool cont)
 {
-	pr_info("%s", str);
+	if (cont)
+		pr_cont("%s", str);
+	else
+		pr_info("%s", str);
 }
 
 void __printf(2, 3) host1x_debug_output(struct output *o, const char *fmt, ...);
+void __printf(2, 3) host1x_debug_cont(struct output *o, const char *fmt, ...);
 
 extern unsigned int host1x_debug_trace_cmdbuf;
 
 void host1x_debug_init(struct host1x *host1x);
 void host1x_debug_deinit(struct host1x *host1x);
 void host1x_debug_dump(struct host1x *host1x);
-void host1x_debug_dump_syncpts(struct host1x *host1x);
 
 #endif
diff --git a/drivers/gpu/host1x/dev.c b/drivers/gpu/host1x/dev.c
index 28e28a23d444..3f475f0e6545 100644
--- a/drivers/gpu/host1x/dev.c
+++ b/drivers/gpu/host1x/dev.c
@@ -1,49 +1,60 @@
+// SPDX-License-Identifier: GPL-2.0-only
 /*
  * Tegra host1x driver
  *
  * Copyright (c) 2010-2013, NVIDIA Corporation.
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms and conditions of the GNU General Public License,
- * version 2, as published by the Free Software Foundation.
- *
- * This program is distributed in the hope it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
- * more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program.  If not, see <http://www.gnu.org/licenses/>.
  */
 
-#include <linux/module.h>
-#include <linux/list.h>
-#include <linux/slab.h>
-#include <linux/of.h>
-#include <linux/of_device.h>
 #include <linux/clk.h>
+#include <linux/delay.h>
+#include <linux/dma-mapping.h>
 #include <linux/io.h>
+#include <linux/list.h>
+#include <linux/module.h>
+#include <linux/of.h>
+#include <linux/of_platform.h>
+#include <linux/platform_device.h>
+#include <linux/pm_runtime.h>
+#include <linux/slab.h>
+
+#include <soc/tegra/common.h>
 
 #define CREATE_TRACE_POINTS
 #include <trace/events/host1x.h>
+#undef CREATE_TRACE_POINTS
 
-#include "dev.h"
-#include "intr.h"
+#if IS_ENABLED(CONFIG_ARM_DMA_USE_IOMMU)
+#include <asm/dma-iommu.h>
+#endif
+
+#include "bus.h"
 #include "channel.h"
+#include "context.h"
 #include "debug.h"
+#include "dev.h"
+#include "intr.h"
+
 #include "hw/host1x01.h"
-#include "host1x_client.h"
+#include "hw/host1x02.h"
+#include "hw/host1x04.h"
+#include "hw/host1x05.h"
+#include "hw/host1x06.h"
+#include "hw/host1x07.h"
+#include "hw/host1x08.h"
+
+void host1x_common_writel(struct host1x *host1x, u32 v, u32 r)
+{
+	writel(v, host1x->common_regs + r);
+}
 
-void host1x_set_drm_data(struct device *dev, void *data)
+void host1x_hypervisor_writel(struct host1x *host1x, u32 v, u32 r)
 {
-	struct host1x *host1x = dev_get_drvdata(dev);
-	host1x->drm_data = data;
+	writel(v, host1x->hv_regs + r);
 }
 
-void *host1x_get_drm_data(struct device *dev)
+u32 host1x_hypervisor_readl(struct host1x *host1x, u32 r)
 {
-	struct host1x *host1x = dev_get_drvdata(dev);
-	return host1x->drm_data;
+	return readl(host1x->hv_regs + r);
 }
 
 void host1x_sync_writel(struct host1x *host1x, u32 v, u32 r)
@@ -60,6 +71,15 @@ u32 host1x_sync_readl(struct host1x *host1x, u32 r)
 	return readl(sync_regs + r);
 }
 
+#ifdef CONFIG_64BIT
+u64 host1x_sync_readq(struct host1x *host1x, u32 r)
+{
+	void __iomem *sync_regs = host1x->regs + host1x->info->sync_offset;
+
+	return readq(sync_regs + r);
+}
+#endif
+
 void host1x_ch_writel(struct host1x_channel *ch, u32 v, u32 r)
 {
 	writel(v, ch->regs + r);
@@ -71,58 +91,501 @@ u32 host1x_ch_readl(struct host1x_channel *ch, u32 r)
 }
 
 static const struct host1x_info host1x01_info = {
-	.nb_channels	= 8,
-	.nb_pts		= 32,
-	.nb_mlocks	= 16,
-	.nb_bases	= 8,
-	.init		= host1x01_init,
-	.sync_offset	= 0x3000,
+	.nb_channels = 8,
+	.nb_pts = 32,
+	.nb_mlocks = 16,
+	.nb_bases = 8,
+	.init = host1x01_init,
+	.sync_offset = 0x3000,
+	.dma_mask = DMA_BIT_MASK(32),
+	.has_wide_gather = false,
+	.has_hypervisor = false,
+	.num_sid_entries = 0,
+	.sid_table = NULL,
+	.reserve_vblank_syncpts = true,
+};
+
+static const struct host1x_info host1x02_info = {
+	.nb_channels = 9,
+	.nb_pts = 32,
+	.nb_mlocks = 16,
+	.nb_bases = 12,
+	.init = host1x02_init,
+	.sync_offset = 0x3000,
+	.dma_mask = DMA_BIT_MASK(32),
+	.has_wide_gather = false,
+	.has_hypervisor = false,
+	.num_sid_entries = 0,
+	.sid_table = NULL,
+	.reserve_vblank_syncpts = true,
+};
+
+static const struct host1x_info host1x04_info = {
+	.nb_channels = 12,
+	.nb_pts = 192,
+	.nb_mlocks = 16,
+	.nb_bases = 64,
+	.init = host1x04_init,
+	.sync_offset = 0x2100,
+	.dma_mask = DMA_BIT_MASK(34),
+	.has_wide_gather = false,
+	.has_hypervisor = false,
+	.num_sid_entries = 0,
+	.sid_table = NULL,
+	.reserve_vblank_syncpts = false,
+};
+
+static const struct host1x_info host1x05_info = {
+	.nb_channels = 14,
+	.nb_pts = 192,
+	.nb_mlocks = 16,
+	.nb_bases = 64,
+	.init = host1x05_init,
+	.sync_offset = 0x2100,
+	.dma_mask = DMA_BIT_MASK(34),
+	.has_wide_gather = false,
+	.has_hypervisor = false,
+	.num_sid_entries = 0,
+	.sid_table = NULL,
+	.reserve_vblank_syncpts = false,
+};
+
+static const struct host1x_sid_entry tegra186_sid_table[] = {
+	{ /* SE1      */  .base = 0x1ac8, .offset = 0x90,    .limit = 0x90    },
+	{ /* SE2      */  .base = 0x1ad0, .offset = 0x90,    .limit = 0x90    },
+	{ /* SE3      */  .base = 0x1ad8, .offset = 0x90,    .limit = 0x90    },
+	{ /* SE4      */  .base = 0x1ae0, .offset = 0x90,    .limit = 0x90    },
+	{ /* ISP      */  .base = 0x1ae8, .offset = 0x50,    .limit = 0x50    },
+	{ /* VIC      */  .base = 0x1af0, .offset = 0x30,    .limit = 0x34    },
+	{ /* NVENC    */  .base = 0x1af8, .offset = 0x30,    .limit = 0x34    },
+	{ /* NVDEC    */  .base = 0x1b00, .offset = 0x30,    .limit = 0x34    },
+	{ /* NVJPG    */  .base = 0x1b08, .offset = 0x30,    .limit = 0x34    },
+	{ /* TSEC     */  .base = 0x1b10, .offset = 0x30,    .limit = 0x34    },
+	{ /* TSECB    */  .base = 0x1b18, .offset = 0x30,    .limit = 0x34    },
+	{ /* VI 0     */  .base = 0x1b80, .offset = 0x10000, .limit = 0x10000 },
+	{ /* VI 1     */  .base = 0x1b88, .offset = 0x20000, .limit = 0x20000 },
+	{ /* VI 2     */  .base = 0x1b90, .offset = 0x30000, .limit = 0x30000 },
+	{ /* VI 3     */  .base = 0x1b98, .offset = 0x40000, .limit = 0x40000 },
+	{ /* VI 4     */  .base = 0x1ba0, .offset = 0x50000, .limit = 0x50000 },
+	{ /* VI 5     */  .base = 0x1ba8, .offset = 0x60000, .limit = 0x60000 },
+	{ /* VI 6     */  .base = 0x1bb0, .offset = 0x70000, .limit = 0x70000 },
+	{ /* VI 7     */  .base = 0x1bb8, .offset = 0x80000, .limit = 0x80000 },
+	{ /* VI 8     */  .base = 0x1bc0, .offset = 0x90000, .limit = 0x90000 },
+	{ /* VI 9     */  .base = 0x1bc8, .offset = 0xa0000, .limit = 0xa0000 },
+	{ /* VI 10    */  .base = 0x1bd0, .offset = 0xb0000, .limit = 0xb0000 },
+	{ /* VI 11    */  .base = 0x1bd8, .offset = 0xc0000, .limit = 0xc0000 },
+};
+
+static const struct host1x_info host1x06_info = {
+	.nb_channels = 63,
+	.nb_pts = 576,
+	.nb_mlocks = 24,
+	.nb_bases = 16,
+	.init = host1x06_init,
+	.sync_offset = 0x0,
+	.dma_mask = DMA_BIT_MASK(40),
+	.has_wide_gather = true,
+	.has_hypervisor = true,
+	.num_sid_entries = ARRAY_SIZE(tegra186_sid_table),
+	.sid_table = tegra186_sid_table,
+	.reserve_vblank_syncpts = false,
+	.skip_reset_assert = true,
 };
 
-static struct of_device_id host1x_of_match[] = {
+static const struct host1x_sid_entry tegra194_sid_table[] = {
+	{ /* SE1          */  .base = 0x1ac8, .offset = 0x90,  .limit = 0x90  },
+	{ /* SE2          */  .base = 0x1ad0, .offset = 0x90,  .limit = 0x90  },
+	{ /* SE3          */  .base = 0x1ad8, .offset = 0x90,  .limit = 0x90  },
+	{ /* SE4          */  .base = 0x1ae0, .offset = 0x90,  .limit = 0x90  },
+	{ /* ISP          */  .base = 0x1ae8, .offset = 0x800, .limit = 0x800 },
+	{ /* VIC          */  .base = 0x1af0, .offset = 0x30,  .limit = 0x34  },
+	{ /* NVENC        */  .base = 0x1af8, .offset = 0x30,  .limit = 0x34  },
+	{ /* NVDEC        */  .base = 0x1b00, .offset = 0x30,  .limit = 0x34  },
+	{ /* NVJPG        */  .base = 0x1b08, .offset = 0x30,  .limit = 0x34  },
+	{ /* TSEC         */  .base = 0x1b10, .offset = 0x30,  .limit = 0x34  },
+	{ /* TSECB        */  .base = 0x1b18, .offset = 0x30,  .limit = 0x34  },
+	{ /* VI           */  .base = 0x1b80, .offset = 0x800, .limit = 0x800 },
+	{ /* VI_THI       */  .base = 0x1b88, .offset = 0x30,  .limit = 0x34  },
+	{ /* ISP_THI      */  .base = 0x1b90, .offset = 0x30,  .limit = 0x34  },
+	{ /* PVA0_CLUSTER */  .base = 0x1b98, .offset = 0x0,   .limit = 0x0   },
+	{ /* PVA0_CLUSTER */  .base = 0x1ba0, .offset = 0x0,   .limit = 0x0   },
+	{ /* NVDLA0       */  .base = 0x1ba8, .offset = 0x30,  .limit = 0x34  },
+	{ /* NVDLA1       */  .base = 0x1bb0, .offset = 0x30,  .limit = 0x34  },
+	{ /* NVENC1       */  .base = 0x1bb8, .offset = 0x30,  .limit = 0x34  },
+	{ /* NVDEC1       */  .base = 0x1bc0, .offset = 0x30,  .limit = 0x34  },
+};
+
+static const struct host1x_info host1x07_info = {
+	.nb_channels = 63,
+	.nb_pts = 704,
+	.nb_mlocks = 32,
+	.nb_bases = 0,
+	.init = host1x07_init,
+	.sync_offset = 0x0,
+	.dma_mask = DMA_BIT_MASK(40),
+	.has_wide_gather = true,
+	.has_hypervisor = true,
+	.num_sid_entries = ARRAY_SIZE(tegra194_sid_table),
+	.sid_table = tegra194_sid_table,
+	.reserve_vblank_syncpts = false,
+};
+
+/*
+ * Tegra234 has two stream ID protection tables, one for setting stream IDs
+ * through the channel path via SETSTREAMID, and one for setting them via
+ * MMIO. We program each engine's data stream ID in the channel path table
+ * and firmware stream ID in the MMIO path table.
+ */
+static const struct host1x_sid_entry tegra234_sid_table[] = {
+	{ /* SE1 MMIO     */  .base = 0x1650, .offset = 0x90,  .limit = 0x90  },
+	{ /* SE1 ch       */  .base = 0x1730, .offset = 0x90,  .limit = 0x90  },
+	{ /* SE2 MMIO     */  .base = 0x1658, .offset = 0x90,  .limit = 0x90  },
+	{ /* SE2 ch       */  .base = 0x1738, .offset = 0x90,  .limit = 0x90  },
+	{ /* SE4 MMIO     */  .base = 0x1660, .offset = 0x90,  .limit = 0x90  },
+	{ /* SE4 ch       */  .base = 0x1740, .offset = 0x90,  .limit = 0x90  },
+	{ /* ISP MMIO     */  .base = 0x1680, .offset = 0x800, .limit = 0x800 },
+	{ /* VIC MMIO     */  .base = 0x1688, .offset = 0x34,  .limit = 0x34  },
+	{ /* VIC ch       */  .base = 0x17b8, .offset = 0x30,  .limit = 0x30  },
+	{ /* NVENC MMIO   */  .base = 0x1690, .offset = 0x34,  .limit = 0x34  },
+	{ /* NVENC ch     */  .base = 0x17c0, .offset = 0x30,  .limit = 0x30  },
+	{ /* NVDEC MMIO   */  .base = 0x1698, .offset = 0x34,  .limit = 0x34  },
+	{ /* NVDEC ch     */  .base = 0x17c8, .offset = 0x30,  .limit = 0x30  },
+	{ /* NVJPG MMIO   */  .base = 0x16a0, .offset = 0x34,  .limit = 0x34  },
+	{ /* NVJPG ch     */  .base = 0x17d0, .offset = 0x30,  .limit = 0x30  },
+	{ /* TSEC MMIO    */  .base = 0x16a8, .offset = 0x30,  .limit = 0x34  },
+	{ /* NVJPG1 MMIO  */  .base = 0x16b0, .offset = 0x34,  .limit = 0x34  },
+	{ /* NVJPG1 ch    */  .base = 0x17a8, .offset = 0x30,  .limit = 0x30  },
+	{ /* VI MMIO      */  .base = 0x16b8, .offset = 0x800, .limit = 0x800 },
+	{ /* VI_THI MMIO  */  .base = 0x16c0, .offset = 0x30,  .limit = 0x34  },
+	{ /* ISP_THI MMIO */  .base = 0x16c8, .offset = 0x30,  .limit = 0x34  },
+	{ /* NVDLA MMIO   */  .base = 0x16d8, .offset = 0x30,  .limit = 0x34  },
+	{ /* NVDLA ch     */  .base = 0x17e0, .offset = 0x30,  .limit = 0x34  },
+	{ /* NVDLA1 MMIO  */  .base = 0x16e0, .offset = 0x30,  .limit = 0x34  },
+	{ /* NVDLA1 ch    */  .base = 0x17e8, .offset = 0x30,  .limit = 0x34  },
+	{ /* OFA MMIO     */  .base = 0x16e8, .offset = 0x34,  .limit = 0x34  },
+	{ /* OFA ch       */  .base = 0x1768, .offset = 0x30,  .limit = 0x30  },
+	{ /* VI2 MMIO     */  .base = 0x16f0, .offset = 0x800, .limit = 0x800 },
+	{ /* VI2_THI MMIO */  .base = 0x16f8, .offset = 0x30,  .limit = 0x34  },
+};
+
+static const struct host1x_info host1x08_info = {
+	.nb_channels = 63,
+	.nb_pts = 1024,
+	.nb_mlocks = 24,
+	.nb_bases = 0,
+	.init = host1x08_init,
+	.sync_offset = 0x0,
+	.dma_mask = DMA_BIT_MASK(40),
+	.has_wide_gather = true,
+	.has_hypervisor = true,
+	.has_common = true,
+	.num_sid_entries = ARRAY_SIZE(tegra234_sid_table),
+	.sid_table = tegra234_sid_table,
+	.streamid_vm_table = { 0x1004, 128 },
+	.classid_vm_table = { 0x1404, 25 },
+	.mmio_vm_table = { 0x1504, 25 },
+	.reserve_vblank_syncpts = false,
+};
+
+static const struct of_device_id host1x_of_match[] = {
+	{ .compatible = "nvidia,tegra234-host1x", .data = &host1x08_info, },
+	{ .compatible = "nvidia,tegra194-host1x", .data = &host1x07_info, },
+	{ .compatible = "nvidia,tegra186-host1x", .data = &host1x06_info, },
+	{ .compatible = "nvidia,tegra210-host1x", .data = &host1x05_info, },
+	{ .compatible = "nvidia,tegra124-host1x", .data = &host1x04_info, },
+	{ .compatible = "nvidia,tegra114-host1x", .data = &host1x02_info, },
 	{ .compatible = "nvidia,tegra30-host1x", .data = &host1x01_info, },
 	{ .compatible = "nvidia,tegra20-host1x", .data = &host1x01_info, },
 	{ },
 };
 MODULE_DEVICE_TABLE(of, host1x_of_match);
 
-static int host1x_probe(struct platform_device *pdev)
+static void host1x_setup_virtualization_tables(struct host1x *host)
 {
-	const struct of_device_id *id;
-	struct host1x *host;
-	struct resource *regs;
-	int syncpt_irq;
+	const struct host1x_info *info = host->info;
+	unsigned int i;
+
+	if (!info->has_hypervisor)
+		return;
+
+	for (i = 0; i < info->num_sid_entries; i++) {
+		const struct host1x_sid_entry *entry = &info->sid_table[i];
+
+		host1x_hypervisor_writel(host, entry->offset, entry->base);
+		host1x_hypervisor_writel(host, entry->limit, entry->base + 4);
+	}
+
+	for (i = 0; i < info->streamid_vm_table.count; i++) {
+		/* Allow access to all stream IDs to all VMs. */
+		host1x_hypervisor_writel(host, 0xff, info->streamid_vm_table.base + 4 * i);
+	}
+
+	for (i = 0; i < info->classid_vm_table.count; i++) {
+		/* Allow access to all classes to all VMs. */
+		host1x_hypervisor_writel(host, 0xff, info->classid_vm_table.base + 4 * i);
+	}
+
+	for (i = 0; i < info->mmio_vm_table.count; i++) {
+		/* Use VM1 (that's us) as originator VMID for engine MMIO accesses. */
+		host1x_hypervisor_writel(host, 0x1, info->mmio_vm_table.base + 4 * i);
+	}
+}
+
+static bool host1x_wants_iommu(struct host1x *host1x)
+{
+	/* Our IOMMU usage policy doesn't currently play well with GART */
+	if (of_machine_is_compatible("nvidia,tegra20"))
+		return false;
+
+	/*
+	 * If we support addressing a maximum of 32 bits of physical memory
+	 * and if the host1x firewall is enabled, there's no need to enable
+	 * IOMMU support. This can happen for example on Tegra20, Tegra30
+	 * and Tegra114.
+	 *
+	 * Tegra124 and later can address up to 34 bits of physical memory and
+	 * many platforms come equipped with more than 2 GiB of system memory,
+	 * which requires crossing the 4 GiB boundary. But there's a catch: on
+	 * SoCs before Tegra186 (i.e. Tegra124 and Tegra210), the host1x can
+	 * only address up to 32 bits of memory in GATHER opcodes, which means
+	 * that command buffers need to either be in the first 2 GiB of system
+	 * memory (which could quickly lead to memory exhaustion), or command
+	 * buffers need to be treated differently from other buffers (which is
+	 * not possible with the current ABI).
+	 *
+	 * A third option is to use the IOMMU in these cases to make sure all
+	 * buffers will be mapped into a 32-bit IOVA space that host1x can
+	 * address. This allows all of the system memory to be used and works
+	 * within the limitations of the host1x on these SoCs.
+	 *
+	 * In summary, default to enable IOMMU on Tegra124 and later. For any
+	 * of the earlier SoCs, only use the IOMMU for additional safety when
+	 * the host1x firewall is disabled.
+	 */
+	if (host1x->info->dma_mask <= DMA_BIT_MASK(32)) {
+		if (IS_ENABLED(CONFIG_TEGRA_HOST1X_FIREWALL))
+			return false;
+	}
+
+	return true;
+}
+
+/*
+ * Returns ERR_PTR on failure, NULL if the translation is IDENTITY, otherwise a
+ * valid paging domain.
+ */
+static struct iommu_domain *host1x_iommu_attach(struct host1x *host)
+{
+	struct iommu_domain *domain = iommu_get_domain_for_dev(host->dev);
 	int err;
 
-	id = of_match_device(host1x_of_match, &pdev->dev);
-	if (!id)
-		return -EINVAL;
+#if IS_ENABLED(CONFIG_ARM_DMA_USE_IOMMU)
+	if (host->dev->archdata.mapping) {
+		struct dma_iommu_mapping *mapping =
+				to_dma_iommu_mapping(host->dev);
+		arm_iommu_detach_device(host->dev);
+		arm_iommu_release_mapping(mapping);
+
+		domain = iommu_get_domain_for_dev(host->dev);
+	}
+#endif
 
-	regs = platform_get_resource(pdev, IORESOURCE_MEM, 0);
-	if (!regs) {
-		dev_err(&pdev->dev, "failed to get registers\n");
-		return -ENXIO;
+	/*
+	 * We may not always want to enable IOMMU support (for example if the
+	 * host1x firewall is already enabled and we don't support addressing
+	 * more than 32 bits of physical memory), so check for that first.
+	 *
+	 * Similarly, if host1x is already attached to an IOMMU (via the DMA
+	 * API), don't try to attach again.
+	 */
+	if (domain && domain->type == IOMMU_DOMAIN_IDENTITY)
+		domain = NULL;
+	if (!host1x_wants_iommu(host) || domain)
+		return domain;
+
+	host->group = iommu_group_get(host->dev);
+	if (host->group) {
+		struct iommu_domain_geometry *geometry;
+		dma_addr_t start, end;
+		unsigned long order;
+
+		err = iova_cache_get();
+		if (err < 0)
+			goto put_group;
+
+		host->domain = iommu_paging_domain_alloc(host->dev);
+		if (IS_ERR(host->domain)) {
+			err = PTR_ERR(host->domain);
+			host->domain = NULL;
+			goto put_cache;
+		}
+
+		err = iommu_attach_group(host->domain, host->group);
+		if (err) {
+			if (err == -ENODEV)
+				err = 0;
+
+			goto free_domain;
+		}
+
+		geometry = &host->domain->geometry;
+		start = geometry->aperture_start & host->info->dma_mask;
+		end = geometry->aperture_end & host->info->dma_mask;
+
+		order = __ffs(host->domain->pgsize_bitmap);
+		init_iova_domain(&host->iova, 1UL << order, start >> order);
+		host->iova_end = end;
+
+		domain = host->domain;
 	}
 
-	syncpt_irq = platform_get_irq(pdev, 0);
-	if (syncpt_irq < 0) {
-		dev_err(&pdev->dev, "failed to get IRQ\n");
-		return -ENXIO;
+	return domain;
+
+free_domain:
+	iommu_domain_free(host->domain);
+	host->domain = NULL;
+put_cache:
+	iova_cache_put();
+put_group:
+	iommu_group_put(host->group);
+	host->group = NULL;
+
+	return ERR_PTR(err);
+}
+
+static int host1x_iommu_init(struct host1x *host)
+{
+	u64 mask = host->info->dma_mask;
+	struct iommu_domain *domain;
+	int err;
+
+	domain = host1x_iommu_attach(host);
+	if (IS_ERR(domain)) {
+		err = PTR_ERR(domain);
+		dev_err(host->dev, "failed to attach to IOMMU: %d\n", err);
+		return err;
+	}
+
+	/*
+	 * If we're not behind an IOMMU make sure we don't get push buffers
+	 * that are allocated outside of the range addressable by the GATHER
+	 * opcode.
+	 *
+	 * Newer generations of Tegra (Tegra186 and later) support a wide
+	 * variant of the GATHER opcode that allows addressing more bits.
+	 */
+	if (!domain && !host->info->has_wide_gather)
+		mask = DMA_BIT_MASK(32);
+
+	err = dma_coerce_mask_and_coherent(host->dev, mask);
+	if (err < 0) {
+		dev_err(host->dev, "failed to set DMA mask: %d\n", err);
+		return err;
+	}
+
+	return 0;
+}
+
+static void host1x_iommu_exit(struct host1x *host)
+{
+	if (host->domain) {
+		put_iova_domain(&host->iova);
+		iommu_detach_group(host->domain, host->group);
+
+		iommu_domain_free(host->domain);
+		host->domain = NULL;
+
+		iova_cache_put();
+
+		iommu_group_put(host->group);
+		host->group = NULL;
+	}
+}
+
+static int host1x_get_resets(struct host1x *host)
+{
+	int err;
+
+	host->resets[0].id = "mc";
+	host->resets[1].id = "host1x";
+	host->nresets = ARRAY_SIZE(host->resets);
+
+	err = devm_reset_control_bulk_get_optional_exclusive_released(
+				host->dev, host->nresets, host->resets);
+	if (err) {
+		dev_err(host->dev, "failed to get reset: %d\n", err);
+		return err;
 	}
 
+	return 0;
+}
+
+static int host1x_probe(struct platform_device *pdev)
+{
+	struct host1x *host;
+	int err, i;
+
 	host = devm_kzalloc(&pdev->dev, sizeof(*host), GFP_KERNEL);
 	if (!host)
 		return -ENOMEM;
 
+	host->info = of_device_get_match_data(&pdev->dev);
+
+	if (host->info->has_hypervisor) {
+		host->regs = devm_platform_ioremap_resource_byname(pdev, "vm");
+		if (IS_ERR(host->regs))
+			return PTR_ERR(host->regs);
+
+		host->hv_regs = devm_platform_ioremap_resource_byname(pdev, "hypervisor");
+		if (IS_ERR(host->hv_regs))
+			return PTR_ERR(host->hv_regs);
+
+		if (host->info->has_common) {
+			host->common_regs = devm_platform_ioremap_resource_byname(pdev, "common");
+			if (IS_ERR(host->common_regs))
+				return PTR_ERR(host->common_regs);
+		}
+	} else {
+		host->regs = devm_platform_ioremap_resource(pdev, 0);
+		if (IS_ERR(host->regs))
+			return PTR_ERR(host->regs);
+	}
+
+	for (i = 0; i < ARRAY_SIZE(host->syncpt_irqs); i++) {
+		char irq_name[] = "syncptX";
+
+		sprintf(irq_name, "syncpt%d", i);
+
+		err = platform_get_irq_byname_optional(pdev, irq_name);
+		if (err == -ENXIO)
+			break;
+		if (err < 0)
+			return err;
+
+		host->syncpt_irqs[i] = err;
+	}
+
+	host->num_syncpt_irqs = i;
+
+	/* Device tree without irq names */
+	if (i == 0) {
+		host->syncpt_irqs[0] = platform_get_irq(pdev, 0);
+		if (host->syncpt_irqs[0] < 0)
+			return host->syncpt_irqs[0];
+
+		host->num_syncpt_irqs = 1;
+	}
+
+	mutex_init(&host->devices_lock);
+	INIT_LIST_HEAD(&host->devices);
+	INIT_LIST_HEAD(&host->list);
 	host->dev = &pdev->dev;
-	host->info = id->data;
 
 	/* set common host1x device data */
 	platform_set_drvdata(pdev, host);
 
-	host->regs = devm_ioremap_resource(&pdev->dev, regs);
-	if (IS_ERR(host->regs))
-		return PTR_ERR(host->regs);
+	host->dev->dma_parms = &host->dma_parms;
+	dma_set_max_seg_size(host->dev, UINT_MAX);
 
 	if (host->info->init) {
 		err = host->info->init(host);
@@ -131,115 +594,237 @@ static int host1x_probe(struct platform_device *pdev)
 	}
 
 	host->clk = devm_clk_get(&pdev->dev, NULL);
-	if (IS_ERR(host->clk)) {
-		dev_err(&pdev->dev, "failed to get clock\n");
-		err = PTR_ERR(host->clk);
+	if (IS_ERR(host->clk))
+		return dev_err_probe(&pdev->dev, PTR_ERR(host->clk), "failed to get clock\n");
+
+	err = host1x_get_resets(host);
+	if (err)
 		return err;
+
+	host1x_bo_cache_init(&host->cache);
+
+	err = host1x_iommu_init(host);
+	if (err < 0) {
+		dev_err(&pdev->dev, "failed to setup IOMMU: %d\n", err);
+		goto destroy_cache;
 	}
 
-	err = host1x_channel_list_init(host);
+	err = host1x_channel_list_init(&host->channel_list,
+				       host->info->nb_channels);
 	if (err) {
 		dev_err(&pdev->dev, "failed to initialize channel list\n");
-		return err;
+		goto iommu_exit;
 	}
 
-	err = clk_prepare_enable(host->clk);
-	if (err < 0) {
-		dev_err(&pdev->dev, "failed to enable clock\n");
-		return err;
+	err = host1x_memory_context_list_init(host);
+	if (err) {
+		dev_err(&pdev->dev, "failed to initialize context list\n");
+		goto free_channels;
 	}
 
 	err = host1x_syncpt_init(host);
 	if (err) {
 		dev_err(&pdev->dev, "failed to initialize syncpts\n");
-		return err;
+		goto free_contexts;
 	}
 
-	err = host1x_intr_init(host, syncpt_irq);
+	mutex_init(&host->intr_mutex);
+
+	pm_runtime_enable(&pdev->dev);
+
+	err = devm_tegra_core_dev_init_opp_table_common(&pdev->dev);
+	if (err)
+		goto pm_disable;
+
+	/* the driver's code isn't ready yet for the dynamic RPM */
+	err = pm_runtime_resume_and_get(&pdev->dev);
+	if (err)
+		goto pm_disable;
+
+	err = host1x_intr_init(host);
 	if (err) {
 		dev_err(&pdev->dev, "failed to initialize interrupts\n");
-		goto fail_deinit_syncpt;
+		goto pm_put;
 	}
 
 	host1x_debug_init(host);
 
-	host1x_drm_alloc(pdev);
+	err = host1x_register(host);
+	if (err < 0)
+		goto deinit_debugfs;
+
+	err = devm_of_platform_populate(&pdev->dev);
+	if (err < 0)
+		goto unregister;
 
 	return 0;
 
-fail_deinit_syncpt:
+unregister:
+	host1x_unregister(host);
+deinit_debugfs:
+	host1x_debug_deinit(host);
+	host1x_intr_deinit(host);
+pm_put:
+	pm_runtime_put_sync_suspend(&pdev->dev);
+pm_disable:
+	pm_runtime_disable(&pdev->dev);
 	host1x_syncpt_deinit(host);
+free_contexts:
+	host1x_memory_context_list_free(&host->context_list);
+free_channels:
+	host1x_channel_list_free(&host->channel_list);
+iommu_exit:
+	host1x_iommu_exit(host);
+destroy_cache:
+	host1x_bo_cache_destroy(&host->cache);
+
 	return err;
 }
 
-static int __exit host1x_remove(struct platform_device *pdev)
+static void host1x_remove(struct platform_device *pdev)
 {
 	struct host1x *host = platform_get_drvdata(pdev);
 
+	host1x_unregister(host);
+	host1x_debug_deinit(host);
+
+	pm_runtime_force_suspend(&pdev->dev);
+
 	host1x_intr_deinit(host);
 	host1x_syncpt_deinit(host);
+	host1x_memory_context_list_free(&host->context_list);
+	host1x_channel_list_free(&host->channel_list);
+	host1x_iommu_exit(host);
+	host1x_bo_cache_destroy(&host->cache);
+}
+
+static int __maybe_unused host1x_runtime_suspend(struct device *dev)
+{
+	struct host1x *host = dev_get_drvdata(dev);
+	int err;
+
+	host1x_channel_stop_all(host);
+	host1x_intr_stop(host);
+	host1x_syncpt_save(host);
+
+	if (!host->info->skip_reset_assert) {
+		err = reset_control_bulk_assert(host->nresets, host->resets);
+		if (err) {
+			dev_err(dev, "failed to assert reset: %d\n", err);
+			goto resume_host1x;
+		}
+
+		usleep_range(1000, 2000);
+	}
+
 	clk_disable_unprepare(host->clk);
+	reset_control_bulk_release(host->nresets, host->resets);
 
 	return 0;
+
+resume_host1x:
+	host1x_setup_virtualization_tables(host);
+	host1x_syncpt_restore(host);
+	host1x_intr_start(host);
+
+	return err;
 }
 
+static int __maybe_unused host1x_runtime_resume(struct device *dev)
+{
+	struct host1x *host = dev_get_drvdata(dev);
+	int err;
+
+	err = reset_control_bulk_acquire(host->nresets, host->resets);
+	if (err) {
+		dev_err(dev, "failed to acquire reset: %d\n", err);
+		return err;
+	}
+
+	err = clk_prepare_enable(host->clk);
+	if (err) {
+		dev_err(dev, "failed to enable clock: %d\n", err);
+		goto release_reset;
+	}
+
+	err = reset_control_bulk_deassert(host->nresets, host->resets);
+	if (err < 0) {
+		dev_err(dev, "failed to deassert reset: %d\n", err);
+		goto disable_clk;
+	}
+
+	host1x_setup_virtualization_tables(host);
+	host1x_syncpt_restore(host);
+	host1x_intr_start(host);
+
+	return 0;
+
+disable_clk:
+	clk_disable_unprepare(host->clk);
+release_reset:
+	reset_control_bulk_release(host->nresets, host->resets);
+
+	return err;
+}
+
+static const struct dev_pm_ops host1x_pm_ops = {
+	SET_RUNTIME_PM_OPS(host1x_runtime_suspend, host1x_runtime_resume,
+			   NULL)
+	SET_SYSTEM_SLEEP_PM_OPS(pm_runtime_force_suspend, pm_runtime_force_resume)
+};
+
 static struct platform_driver tegra_host1x_driver = {
-	.probe = host1x_probe,
-	.remove = __exit_p(host1x_remove),
 	.driver = {
-		.owner = THIS_MODULE,
 		.name = "tegra-host1x",
 		.of_match_table = host1x_of_match,
+		.pm = &host1x_pm_ops,
 	},
+	.probe = host1x_probe,
+	.remove = host1x_remove,
+};
+
+static struct platform_driver * const drivers[] = {
+	&tegra_host1x_driver,
+	&tegra_mipi_driver,
 };
 
 static int __init tegra_host1x_init(void)
 {
 	int err;
 
-	err = platform_driver_register(&tegra_host1x_driver);
+	err = bus_register(&host1x_bus_type);
 	if (err < 0)
 		return err;
 
-#ifdef CONFIG_DRM_TEGRA
-	err = platform_driver_register(&tegra_dc_driver);
-	if (err < 0)
-		goto unregister_host1x;
-
-	err = platform_driver_register(&tegra_hdmi_driver);
+	err = platform_register_drivers(drivers, ARRAY_SIZE(drivers));
 	if (err < 0)
-		goto unregister_dc;
+		bus_unregister(&host1x_bus_type);
 
-	err = platform_driver_register(&tegra_gr2d_driver);
-	if (err < 0)
-		goto unregister_hdmi;
-#endif
-
-	return 0;
-
-#ifdef CONFIG_DRM_TEGRA
-unregister_hdmi:
-	platform_driver_unregister(&tegra_hdmi_driver);
-unregister_dc:
-	platform_driver_unregister(&tegra_dc_driver);
-unregister_host1x:
-	platform_driver_unregister(&tegra_host1x_driver);
 	return err;
-#endif
 }
 module_init(tegra_host1x_init);
 
 static void __exit tegra_host1x_exit(void)
 {
-#ifdef CONFIG_DRM_TEGRA
-	platform_driver_unregister(&tegra_gr2d_driver);
-	platform_driver_unregister(&tegra_hdmi_driver);
-	platform_driver_unregister(&tegra_dc_driver);
-#endif
-	platform_driver_unregister(&tegra_host1x_driver);
+	platform_unregister_drivers(drivers, ARRAY_SIZE(drivers));
+	bus_unregister(&host1x_bus_type);
 }
 module_exit(tegra_host1x_exit);
 
+/**
+ * host1x_get_dma_mask() - query the supported DMA mask for host1x
+ * @host1x: host1x instance
+ *
+ * Note that this returns the supported DMA mask for host1x, which can be
+ * different from the applicable DMA mask under certain circumstances.
+ */
+u64 host1x_get_dma_mask(struct host1x *host1x)
+{
+	return host1x->info->dma_mask;
+}
+EXPORT_SYMBOL(host1x_get_dma_mask);
+
+MODULE_SOFTDEP("post: tegra-drm");
 MODULE_AUTHOR("Thierry Reding <thierry.reding@avionic-design.de>");
 MODULE_AUTHOR("Terje Bergstrom <tbergstrom@nvidia.com>");
 MODULE_DESCRIPTION("Host1x driver for Tegra products");
diff --git a/drivers/gpu/host1x/dev.h b/drivers/gpu/host1x/dev.h
index 790ddf114e58..ef44618ed88a 100644
--- a/drivers/gpu/host1x/dev.h
+++ b/drivers/gpu/host1x/dev.h
@@ -1,32 +1,27 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
 /*
- * Copyright (c) 2012-2013, NVIDIA Corporation.
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms and conditions of the GNU General Public License,
- * version 2, as published by the Free Software Foundation.
- *
- * This program is distributed in the hope it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
- * more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ * Copyright (c) 2012-2015, NVIDIA Corporation.
  */
 
 #ifndef HOST1X_DEV_H
 #define HOST1X_DEV_H
 
-#include <linux/platform_device.h>
 #include <linux/device.h>
+#include <linux/iommu.h>
+#include <linux/iova.h>
+#include <linux/irqreturn.h>
+#include <linux/platform_device.h>
+#include <linux/reset.h>
 
+#include "cdma.h"
 #include "channel.h"
-#include "syncpt.h"
+#include "context.h"
 #include "intr.h"
-#include "cdma.h"
 #include "job.h"
+#include "syncpt.h"
 
 struct host1x_syncpt;
+struct host1x_syncpt_base;
 struct host1x_channel;
 struct host1x_cdma;
 struct host1x_job;
@@ -44,7 +39,7 @@ struct host1x_cdma_ops {
 	void (*start)(struct host1x_cdma *cdma);
 	void (*stop)(struct host1x_cdma *cdma);
 	void (*flush)(struct  host1x_cdma *cdma);
-	int (*timeout_init)(struct host1x_cdma *cdma, u32 syncpt_id);
+	int (*timeout_init)(struct host1x_cdma *cdma);
 	void (*timeout_destroy)(struct host1x_cdma *cdma);
 	void (*freeze)(struct host1x_cdma *cdma);
 	void (*resume)(struct host1x_cdma *cdma, u32 getptr);
@@ -74,40 +69,84 @@ struct host1x_syncpt_ops {
 	void (*load_wait_base)(struct host1x_syncpt *syncpt);
 	u32 (*load)(struct host1x_syncpt *syncpt);
 	int (*cpu_incr)(struct host1x_syncpt *syncpt);
-	int (*patch_wait)(struct host1x_syncpt *syncpt, void *patch_addr);
+	void (*assign_to_channel)(struct host1x_syncpt *syncpt,
+	                          struct host1x_channel *channel);
+	void (*enable_protection)(struct host1x *host);
 };
 
 struct host1x_intr_ops {
-	int (*init_host_sync)(struct host1x *host, u32 cpm,
-		void (*syncpt_thresh_work)(struct work_struct *work));
+	int (*init_host_sync)(struct host1x *host, u32 cpm);
 	void (*set_syncpt_threshold)(
-		struct host1x *host, u32 id, u32 thresh);
-	void (*enable_syncpt_intr)(struct host1x *host, u32 id);
-	void (*disable_syncpt_intr)(struct host1x *host, u32 id);
+		struct host1x *host, unsigned int id, u32 thresh);
+	void (*enable_syncpt_intr)(struct host1x *host, unsigned int id);
+	void (*disable_syncpt_intr)(struct host1x *host, unsigned int id);
 	void (*disable_all_syncpt_intrs)(struct host1x *host);
 	int (*free_syncpt_irq)(struct host1x *host);
+	irqreturn_t (*isr)(int irq, void *dev_id);
+};
+
+struct host1x_sid_entry {
+	unsigned int base;
+	unsigned int offset;
+	unsigned int limit;
+};
+
+struct host1x_table_desc {
+	unsigned int base;
+	unsigned int count;
 };
 
 struct host1x_info {
-	int	nb_channels;		/* host1x: num channels supported */
-	int	nb_pts;			/* host1x: num syncpoints supported */
-	int	nb_bases;		/* host1x: num syncpoints supported */
-	int	nb_mlocks;		/* host1x: number of mlocks */
-	int	(*init)(struct host1x *); /* initialize per SoC ops */
-	int	sync_offset;
+	unsigned int nb_channels; /* host1x: number of channels supported */
+	unsigned int nb_pts; /* host1x: number of syncpoints supported */
+	unsigned int nb_bases; /* host1x: number of syncpoint bases supported */
+	unsigned int nb_mlocks; /* host1x: number of mlocks supported */
+	int (*init)(struct host1x *host1x); /* initialize per SoC ops */
+	unsigned int sync_offset; /* offset of syncpoint registers */
+	u64 dma_mask; /* mask of addressable memory */
+	bool has_wide_gather; /* supports GATHER_W opcode */
+	bool has_hypervisor; /* has hypervisor registers */
+	bool has_common; /* has common registers separate from hypervisor */
+	unsigned int num_sid_entries;
+	const struct host1x_sid_entry *sid_table;
+	struct host1x_table_desc streamid_vm_table;
+	struct host1x_table_desc classid_vm_table;
+	struct host1x_table_desc mmio_vm_table;
+	/*
+	 * On T20-T148, the boot chain may setup DC to increment syncpoints
+	 * 26/27 on VBLANK. As such we cannot use these syncpoints until
+	 * the display driver disables VBLANK increments.
+	 */
+	bool reserve_vblank_syncpts;
+	/*
+	 * On Tegra186, secure world applications may require access to
+	 * host1x during suspend/resume. To allow this, we need to leave
+	 * host1x not in reset.
+	 */
+	bool skip_reset_assert;
 };
 
 struct host1x {
 	const struct host1x_info *info;
 
 	void __iomem *regs;
+	void __iomem *hv_regs; /* hypervisor region */
+	void __iomem *common_regs;
+	int syncpt_irqs[8];
+	int num_syncpt_irqs;
 	struct host1x_syncpt *syncpt;
+	struct host1x_syncpt_base *bases;
 	struct device *dev;
 	struct clk *clk;
+	struct reset_control_bulk_data resets[2];
+	unsigned int nresets;
+
+	struct iommu_group *group;
+	struct iommu_domain *domain;
+	struct iova_domain iova;
+	dma_addr_t iova_end;
 
 	struct mutex intr_mutex;
-	struct workqueue_struct *intr_wq;
-	int intr_syncpt_irq;
 
 	const struct host1x_syncpt_ops *syncpt_op;
 	const struct host1x_intr_ops *intr_op;
@@ -118,19 +157,32 @@ struct host1x {
 
 	struct host1x_syncpt *nop_sp;
 
-	struct mutex chlist_mutex;
-	struct host1x_channel chlist;
-	unsigned long allocated_channels;
-	unsigned int num_allocated_channels;
+	struct mutex syncpt_mutex;
+
+	struct host1x_channel_list channel_list;
+	struct host1x_memory_context_list context_list;
 
 	struct dentry *debugfs;
 
-	void *drm_data;
+	struct mutex devices_lock;
+	struct list_head devices;
+
+	struct list_head list;
+
+	struct device_dma_parameters dma_parms;
+
+	struct host1x_bo_cache cache;
 };
 
-void host1x_sync_writel(struct host1x *host1x, u32 r, u32 v);
+void host1x_common_writel(struct host1x *host1x, u32 v, u32 r);
+void host1x_hypervisor_writel(struct host1x *host1x, u32 v, u32 r);
+u32 host1x_hypervisor_readl(struct host1x *host1x, u32 r);
+void host1x_sync_writel(struct host1x *host1x, u32 v, u32 r);
 u32 host1x_sync_readl(struct host1x *host1x, u32 r);
-void host1x_ch_writel(struct host1x_channel *ch, u32 r, u32 v);
+#ifdef CONFIG_64BIT
+u64 host1x_sync_readq(struct host1x *host1x, u32 r);
+#endif
+void host1x_ch_writel(struct host1x_channel *ch, u32 v, u32 r);
 u32 host1x_ch_readl(struct host1x_channel *ch, u32 r);
 
 static inline void host1x_hw_syncpt_restore(struct host1x *host,
@@ -163,33 +215,38 @@ static inline int host1x_hw_syncpt_cpu_incr(struct host1x *host,
 	return host->syncpt_op->cpu_incr(sp);
 }
 
-static inline int host1x_hw_syncpt_patch_wait(struct host1x *host,
-					      struct host1x_syncpt *sp,
-					      void *patch_addr)
+static inline void host1x_hw_syncpt_assign_to_channel(
+	struct host1x *host, struct host1x_syncpt *sp,
+	struct host1x_channel *ch)
+{
+	return host->syncpt_op->assign_to_channel(sp, ch);
+}
+
+static inline void host1x_hw_syncpt_enable_protection(struct host1x *host)
 {
-	return host->syncpt_op->patch_wait(sp, patch_addr);
+	return host->syncpt_op->enable_protection(host);
 }
 
-static inline int host1x_hw_intr_init_host_sync(struct host1x *host, u32 cpm,
-			void (*syncpt_thresh_work)(struct work_struct *))
+static inline int host1x_hw_intr_init_host_sync(struct host1x *host, u32 cpm)
 {
-	return host->intr_op->init_host_sync(host, cpm, syncpt_thresh_work);
+	return host->intr_op->init_host_sync(host, cpm);
 }
 
 static inline void host1x_hw_intr_set_syncpt_threshold(struct host1x *host,
-						       u32 id, u32 thresh)
+						       unsigned int id,
+						       u32 thresh)
 {
 	host->intr_op->set_syncpt_threshold(host, id, thresh);
 }
 
 static inline void host1x_hw_intr_enable_syncpt_intr(struct host1x *host,
-						     u32 id)
+						     unsigned int id)
 {
 	host->intr_op->enable_syncpt_intr(host, id);
 }
 
 static inline void host1x_hw_intr_disable_syncpt_intr(struct host1x *host,
-						      u32 id)
+						      unsigned int id)
 {
 	host->intr_op->disable_syncpt_intr(host, id);
 }
@@ -206,9 +263,9 @@ static inline int host1x_hw_intr_free_syncpt_irq(struct host1x *host)
 
 static inline int host1x_hw_channel_init(struct host1x *host,
 					 struct host1x_channel *channel,
-					 int chid)
+					 unsigned int id)
 {
-	return host->channel_op->init(channel, host, chid);
+	return host->channel_op->init(channel, host, id);
 }
 
 static inline int host1x_hw_channel_submit(struct host1x *host,
@@ -236,10 +293,9 @@ static inline void host1x_hw_cdma_flush(struct host1x *host,
 }
 
 static inline int host1x_hw_cdma_timeout_init(struct host1x *host,
-					      struct host1x_cdma *cdma,
-					      u32 syncpt_id)
+					      struct host1x_cdma *cdma)
 {
-	return host->cdma_op->timeout_init(cdma, syncpt_id);
+	return host->cdma_op->timeout_init(cdma);
 }
 
 static inline void host1x_hw_cdma_timeout_destroy(struct host1x *host,
@@ -301,8 +357,6 @@ static inline void host1x_hw_show_mlocks(struct host1x *host, struct output *o)
 	host->debug_op->show_mlocks(host, o);
 }
 
-extern struct platform_driver tegra_hdmi_driver;
-extern struct platform_driver tegra_dc_driver;
-extern struct platform_driver tegra_gr2d_driver;
+extern struct platform_driver tegra_mipi_driver;
 
 #endif
diff --git a/drivers/gpu/host1x/drm/Kconfig b/drivers/gpu/host1x/drm/Kconfig
deleted file mode 100644
index 69853a4de40a..000000000000
--- a/drivers/gpu/host1x/drm/Kconfig
+++ /dev/null
@@ -1,29 +0,0 @@
-config DRM_TEGRA
-	bool "NVIDIA Tegra DRM"
-	depends on DRM
-	select DRM_KMS_HELPER
-	select FB_SYS_FILLRECT
-	select FB_SYS_COPYAREA
-	select FB_SYS_IMAGEBLIT
-	help
-	  Choose this option if you have an NVIDIA Tegra SoC.
-
-	  To compile this driver as a module, choose M here: the module
-	  will be called tegra-drm.
-
-if DRM_TEGRA
-
-config DRM_TEGRA_STAGING
-	bool "Enable HOST1X interface"
-	depends on STAGING
-	help
-	  Say yes if HOST1X should be available for userspace DRM users.
-
-	  If unsure, choose N.
-
-config DRM_TEGRA_DEBUG
-	bool "NVIDIA Tegra DRM debug support"
-	help
-	  Say yes here to enable debugging support.
-
-endif
diff --git a/drivers/gpu/host1x/drm/dc.c b/drivers/gpu/host1x/drm/dc.c
deleted file mode 100644
index 5360e5a57ecc..000000000000
--- a/drivers/gpu/host1x/drm/dc.c
+++ /dev/null
@@ -1,1200 +0,0 @@
-/*
- * Copyright (C) 2012 Avionic Design GmbH
- * Copyright (C) 2012 NVIDIA CORPORATION.  All rights reserved.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- */
-
-#include <linux/clk.h>
-#include <linux/debugfs.h>
-#include <linux/module.h>
-#include <linux/of.h>
-#include <linux/platform_device.h>
-#include <linux/clk/tegra.h>
-
-#include "host1x_client.h"
-#include "dc.h"
-#include "drm.h"
-#include "gem.h"
-
-struct tegra_plane {
-	struct drm_plane base;
-	unsigned int index;
-};
-
-static inline struct tegra_plane *to_tegra_plane(struct drm_plane *plane)
-{
-	return container_of(plane, struct tegra_plane, base);
-}
-
-static int tegra_plane_update(struct drm_plane *plane, struct drm_crtc *crtc,
-			      struct drm_framebuffer *fb, int crtc_x,
-			      int crtc_y, unsigned int crtc_w,
-			      unsigned int crtc_h, uint32_t src_x,
-			      uint32_t src_y, uint32_t src_w, uint32_t src_h)
-{
-	struct tegra_plane *p = to_tegra_plane(plane);
-	struct tegra_dc *dc = to_tegra_dc(crtc);
-	struct tegra_dc_window window;
-	unsigned int i;
-
-	memset(&window, 0, sizeof(window));
-	window.src.x = src_x >> 16;
-	window.src.y = src_y >> 16;
-	window.src.w = src_w >> 16;
-	window.src.h = src_h >> 16;
-	window.dst.x = crtc_x;
-	window.dst.y = crtc_y;
-	window.dst.w = crtc_w;
-	window.dst.h = crtc_h;
-	window.format = tegra_dc_format(fb->pixel_format);
-	window.bits_per_pixel = fb->bits_per_pixel;
-
-	for (i = 0; i < drm_format_num_planes(fb->pixel_format); i++) {
-		struct tegra_bo *bo = tegra_fb_get_plane(fb, i);
-
-		window.base[i] = bo->paddr + fb->offsets[i];
-
-		/*
-		 * Tegra doesn't support different strides for U and V planes
-		 * so we display a warning if the user tries to display a
-		 * framebuffer with such a configuration.
-		 */
-		if (i >= 2) {
-			if (fb->pitches[i] != window.stride[1])
-				DRM_ERROR("unsupported UV-plane configuration\n");
-		} else {
-			window.stride[i] = fb->pitches[i];
-		}
-	}
-
-	return tegra_dc_setup_window(dc, p->index, &window);
-}
-
-static int tegra_plane_disable(struct drm_plane *plane)
-{
-	struct tegra_dc *dc = to_tegra_dc(plane->crtc);
-	struct tegra_plane *p = to_tegra_plane(plane);
-	unsigned long value;
-
-	if (!plane->crtc)
-		return 0;
-
-	value = WINDOW_A_SELECT << p->index;
-	tegra_dc_writel(dc, value, DC_CMD_DISPLAY_WINDOW_HEADER);
-
-	value = tegra_dc_readl(dc, DC_WIN_WIN_OPTIONS);
-	value &= ~WIN_ENABLE;
-	tegra_dc_writel(dc, value, DC_WIN_WIN_OPTIONS);
-
-	tegra_dc_writel(dc, WIN_A_UPDATE << p->index, DC_CMD_STATE_CONTROL);
-	tegra_dc_writel(dc, WIN_A_ACT_REQ << p->index, DC_CMD_STATE_CONTROL);
-
-	return 0;
-}
-
-static void tegra_plane_destroy(struct drm_plane *plane)
-{
-	tegra_plane_disable(plane);
-	drm_plane_cleanup(plane);
-}
-
-static const struct drm_plane_funcs tegra_plane_funcs = {
-	.update_plane = tegra_plane_update,
-	.disable_plane = tegra_plane_disable,
-	.destroy = tegra_plane_destroy,
-};
-
-static const uint32_t plane_formats[] = {
-	DRM_FORMAT_XBGR8888,
-	DRM_FORMAT_XRGB8888,
-	DRM_FORMAT_RGB565,
-	DRM_FORMAT_UYVY,
-	DRM_FORMAT_YUV420,
-	DRM_FORMAT_YUV422,
-};
-
-static int tegra_dc_add_planes(struct drm_device *drm, struct tegra_dc *dc)
-{
-	unsigned int i;
-	int err = 0;
-
-	for (i = 0; i < 2; i++) {
-		struct tegra_plane *plane;
-
-		plane = devm_kzalloc(drm->dev, sizeof(*plane), GFP_KERNEL);
-		if (!plane)
-			return -ENOMEM;
-
-		plane->index = 1 + i;
-
-		err = drm_plane_init(drm, &plane->base, 1 << dc->pipe,
-				     &tegra_plane_funcs, plane_formats,
-				     ARRAY_SIZE(plane_formats), false);
-		if (err < 0)
-			return err;
-	}
-
-	return 0;
-}
-
-static int tegra_dc_set_base(struct tegra_dc *dc, int x, int y,
-			     struct drm_framebuffer *fb)
-{
-	unsigned int format = tegra_dc_format(fb->pixel_format);
-	struct tegra_bo *bo = tegra_fb_get_plane(fb, 0);
-	unsigned long value;
-
-	tegra_dc_writel(dc, WINDOW_A_SELECT, DC_CMD_DISPLAY_WINDOW_HEADER);
-
-	value = fb->offsets[0] + y * fb->pitches[0] +
-		x * fb->bits_per_pixel / 8;
-
-	tegra_dc_writel(dc, bo->paddr + value, DC_WINBUF_START_ADDR);
-	tegra_dc_writel(dc, fb->pitches[0], DC_WIN_LINE_STRIDE);
-	tegra_dc_writel(dc, format, DC_WIN_COLOR_DEPTH);
-
-	value = GENERAL_UPDATE | WIN_A_UPDATE;
-	tegra_dc_writel(dc, value, DC_CMD_STATE_CONTROL);
-
-	value = GENERAL_ACT_REQ | WIN_A_ACT_REQ;
-	tegra_dc_writel(dc, value, DC_CMD_STATE_CONTROL);
-
-	return 0;
-}
-
-void tegra_dc_enable_vblank(struct tegra_dc *dc)
-{
-	unsigned long value, flags;
-
-	spin_lock_irqsave(&dc->lock, flags);
-
-	value = tegra_dc_readl(dc, DC_CMD_INT_MASK);
-	value |= VBLANK_INT;
-	tegra_dc_writel(dc, value, DC_CMD_INT_MASK);
-
-	spin_unlock_irqrestore(&dc->lock, flags);
-}
-
-void tegra_dc_disable_vblank(struct tegra_dc *dc)
-{
-	unsigned long value, flags;
-
-	spin_lock_irqsave(&dc->lock, flags);
-
-	value = tegra_dc_readl(dc, DC_CMD_INT_MASK);
-	value &= ~VBLANK_INT;
-	tegra_dc_writel(dc, value, DC_CMD_INT_MASK);
-
-	spin_unlock_irqrestore(&dc->lock, flags);
-}
-
-static void tegra_dc_finish_page_flip(struct tegra_dc *dc)
-{
-	struct drm_device *drm = dc->base.dev;
-	struct drm_crtc *crtc = &dc->base;
-	unsigned long flags, base;
-	struct tegra_bo *bo;
-
-	if (!dc->event)
-		return;
-
-	bo = tegra_fb_get_plane(crtc->fb, 0);
-
-	/* check if new start address has been latched */
-	tegra_dc_writel(dc, READ_MUX, DC_CMD_STATE_ACCESS);
-	base = tegra_dc_readl(dc, DC_WINBUF_START_ADDR);
-	tegra_dc_writel(dc, 0, DC_CMD_STATE_ACCESS);
-
-	if (base == bo->paddr + crtc->fb->offsets[0]) {
-		spin_lock_irqsave(&drm->event_lock, flags);
-		drm_send_vblank_event(drm, dc->pipe, dc->event);
-		drm_vblank_put(drm, dc->pipe);
-		dc->event = NULL;
-		spin_unlock_irqrestore(&drm->event_lock, flags);
-	}
-}
-
-void tegra_dc_cancel_page_flip(struct drm_crtc *crtc, struct drm_file *file)
-{
-	struct tegra_dc *dc = to_tegra_dc(crtc);
-	struct drm_device *drm = crtc->dev;
-	unsigned long flags;
-
-	spin_lock_irqsave(&drm->event_lock, flags);
-
-	if (dc->event && dc->event->base.file_priv == file) {
-		dc->event->base.destroy(&dc->event->base);
-		drm_vblank_put(drm, dc->pipe);
-		dc->event = NULL;
-	}
-
-	spin_unlock_irqrestore(&drm->event_lock, flags);
-}
-
-static int tegra_dc_page_flip(struct drm_crtc *crtc, struct drm_framebuffer *fb,
-			      struct drm_pending_vblank_event *event)
-{
-	struct tegra_dc *dc = to_tegra_dc(crtc);
-	struct drm_device *drm = crtc->dev;
-
-	if (dc->event)
-		return -EBUSY;
-
-	if (event) {
-		event->pipe = dc->pipe;
-		dc->event = event;
-		drm_vblank_get(drm, dc->pipe);
-	}
-
-	tegra_dc_set_base(dc, 0, 0, fb);
-	crtc->fb = fb;
-
-	return 0;
-}
-
-static const struct drm_crtc_funcs tegra_crtc_funcs = {
-	.page_flip = tegra_dc_page_flip,
-	.set_config = drm_crtc_helper_set_config,
-	.destroy = drm_crtc_cleanup,
-};
-
-static void tegra_crtc_disable(struct drm_crtc *crtc)
-{
-	struct drm_device *drm = crtc->dev;
-	struct drm_plane *plane;
-
-	list_for_each_entry(plane, &drm->mode_config.plane_list, head) {
-		if (plane->crtc == crtc) {
-			tegra_plane_disable(plane);
-			plane->crtc = NULL;
-
-			if (plane->fb) {
-				drm_framebuffer_unreference(plane->fb);
-				plane->fb = NULL;
-			}
-		}
-	}
-}
-
-static bool tegra_crtc_mode_fixup(struct drm_crtc *crtc,
-				  const struct drm_display_mode *mode,
-				  struct drm_display_mode *adjusted)
-{
-	return true;
-}
-
-static inline u32 compute_dda_inc(unsigned int in, unsigned int out, bool v,
-				  unsigned int bpp)
-{
-	fixed20_12 outf = dfixed_init(out);
-	fixed20_12 inf = dfixed_init(in);
-	u32 dda_inc;
-	int max;
-
-	if (v)
-		max = 15;
-	else {
-		switch (bpp) {
-		case 2:
-			max = 8;
-			break;
-
-		default:
-			WARN_ON_ONCE(1);
-			/* fallthrough */
-		case 4:
-			max = 4;
-			break;
-		}
-	}
-
-	outf.full = max_t(u32, outf.full - dfixed_const(1), dfixed_const(1));
-	inf.full -= dfixed_const(1);
-
-	dda_inc = dfixed_div(inf, outf);
-	dda_inc = min_t(u32, dda_inc, dfixed_const(max));
-
-	return dda_inc;
-}
-
-static inline u32 compute_initial_dda(unsigned int in)
-{
-	fixed20_12 inf = dfixed_init(in);
-	return dfixed_frac(inf);
-}
-
-static int tegra_dc_set_timings(struct tegra_dc *dc,
-				struct drm_display_mode *mode)
-{
-	/* TODO: For HDMI compliance, h & v ref_to_sync should be set to 1 */
-	unsigned int h_ref_to_sync = 0;
-	unsigned int v_ref_to_sync = 0;
-	unsigned long value;
-
-	tegra_dc_writel(dc, 0x0, DC_DISP_DISP_TIMING_OPTIONS);
-
-	value = (v_ref_to_sync << 16) | h_ref_to_sync;
-	tegra_dc_writel(dc, value, DC_DISP_REF_TO_SYNC);
-
-	value = ((mode->vsync_end - mode->vsync_start) << 16) |
-		((mode->hsync_end - mode->hsync_start) <<  0);
-	tegra_dc_writel(dc, value, DC_DISP_SYNC_WIDTH);
-
-	value = ((mode->vtotal - mode->vsync_end) << 16) |
-		((mode->htotal - mode->hsync_end) <<  0);
-	tegra_dc_writel(dc, value, DC_DISP_BACK_PORCH);
-
-	value = ((mode->vsync_start - mode->vdisplay) << 16) |
-		((mode->hsync_start - mode->hdisplay) <<  0);
-	tegra_dc_writel(dc, value, DC_DISP_FRONT_PORCH);
-
-	value = (mode->vdisplay << 16) | mode->hdisplay;
-	tegra_dc_writel(dc, value, DC_DISP_ACTIVE);
-
-	return 0;
-}
-
-static int tegra_crtc_setup_clk(struct drm_crtc *crtc,
-				struct drm_display_mode *mode,
-				unsigned long *div)
-{
-	unsigned long pclk = mode->clock * 1000, rate;
-	struct tegra_dc *dc = to_tegra_dc(crtc);
-	struct tegra_output *output = NULL;
-	struct drm_encoder *encoder;
-	long err;
-
-	list_for_each_entry(encoder, &crtc->dev->mode_config.encoder_list, head)
-		if (encoder->crtc == crtc) {
-			output = encoder_to_output(encoder);
-			break;
-		}
-
-	if (!output)
-		return -ENODEV;
-
-	/*
-	 * This assumes that the display controller will divide its parent
-	 * clock by 2 to generate the pixel clock.
-	 */
-	err = tegra_output_setup_clock(output, dc->clk, pclk * 2);
-	if (err < 0) {
-		dev_err(dc->dev, "failed to setup clock: %ld\n", err);
-		return err;
-	}
-
-	rate = clk_get_rate(dc->clk);
-	*div = (rate * 2 / pclk) - 2;
-
-	DRM_DEBUG_KMS("rate: %lu, div: %lu\n", rate, *div);
-
-	return 0;
-}
-
-static bool tegra_dc_format_is_yuv(unsigned int format, bool *planar)
-{
-	switch (format) {
-	case WIN_COLOR_DEPTH_YCbCr422:
-	case WIN_COLOR_DEPTH_YUV422:
-		if (planar)
-			*planar = false;
-
-		return true;
-
-	case WIN_COLOR_DEPTH_YCbCr420P:
-	case WIN_COLOR_DEPTH_YUV420P:
-	case WIN_COLOR_DEPTH_YCbCr422P:
-	case WIN_COLOR_DEPTH_YUV422P:
-	case WIN_COLOR_DEPTH_YCbCr422R:
-	case WIN_COLOR_DEPTH_YUV422R:
-	case WIN_COLOR_DEPTH_YCbCr422RA:
-	case WIN_COLOR_DEPTH_YUV422RA:
-		if (planar)
-			*planar = true;
-
-		return true;
-	}
-
-	return false;
-}
-
-int tegra_dc_setup_window(struct tegra_dc *dc, unsigned int index,
-			  const struct tegra_dc_window *window)
-{
-	unsigned h_offset, v_offset, h_size, v_size, h_dda, v_dda, bpp;
-	unsigned long value;
-	bool yuv, planar;
-
-	/*
-	 * For YUV planar modes, the number of bytes per pixel takes into
-	 * account only the luma component and therefore is 1.
-	 */
-	yuv = tegra_dc_format_is_yuv(window->format, &planar);
-	if (!yuv)
-		bpp = window->bits_per_pixel / 8;
-	else
-		bpp = planar ? 1 : 2;
-
-	value = WINDOW_A_SELECT << index;
-	tegra_dc_writel(dc, value, DC_CMD_DISPLAY_WINDOW_HEADER);
-
-	tegra_dc_writel(dc, window->format, DC_WIN_COLOR_DEPTH);
-	tegra_dc_writel(dc, 0, DC_WIN_BYTE_SWAP);
-
-	value = V_POSITION(window->dst.y) | H_POSITION(window->dst.x);
-	tegra_dc_writel(dc, value, DC_WIN_POSITION);
-
-	value = V_SIZE(window->dst.h) | H_SIZE(window->dst.w);
-	tegra_dc_writel(dc, value, DC_WIN_SIZE);
-
-	h_offset = window->src.x * bpp;
-	v_offset = window->src.y;
-	h_size = window->src.w * bpp;
-	v_size = window->src.h;
-
-	value = V_PRESCALED_SIZE(v_size) | H_PRESCALED_SIZE(h_size);
-	tegra_dc_writel(dc, value, DC_WIN_PRESCALED_SIZE);
-
-	/*
-	 * For DDA computations the number of bytes per pixel for YUV planar
-	 * modes needs to take into account all Y, U and V components.
-	 */
-	if (yuv && planar)
-		bpp = 2;
-
-	h_dda = compute_dda_inc(window->src.w, window->dst.w, false, bpp);
-	v_dda = compute_dda_inc(window->src.h, window->dst.h, true, bpp);
-
-	value = V_DDA_INC(v_dda) | H_DDA_INC(h_dda);
-	tegra_dc_writel(dc, value, DC_WIN_DDA_INC);
-
-	h_dda = compute_initial_dda(window->src.x);
-	v_dda = compute_initial_dda(window->src.y);
-
-	tegra_dc_writel(dc, h_dda, DC_WIN_H_INITIAL_DDA);
-	tegra_dc_writel(dc, v_dda, DC_WIN_V_INITIAL_DDA);
-
-	tegra_dc_writel(dc, 0, DC_WIN_UV_BUF_STRIDE);
-	tegra_dc_writel(dc, 0, DC_WIN_BUF_STRIDE);
-
-	tegra_dc_writel(dc, window->base[0], DC_WINBUF_START_ADDR);
-
-	if (yuv && planar) {
-		tegra_dc_writel(dc, window->base[1], DC_WINBUF_START_ADDR_U);
-		tegra_dc_writel(dc, window->base[2], DC_WINBUF_START_ADDR_V);
-		value = window->stride[1] << 16 | window->stride[0];
-		tegra_dc_writel(dc, value, DC_WIN_LINE_STRIDE);
-	} else {
-		tegra_dc_writel(dc, window->stride[0], DC_WIN_LINE_STRIDE);
-	}
-
-	tegra_dc_writel(dc, h_offset, DC_WINBUF_ADDR_H_OFFSET);
-	tegra_dc_writel(dc, v_offset, DC_WINBUF_ADDR_V_OFFSET);
-
-	value = WIN_ENABLE;
-
-	if (yuv) {
-		/* setup default colorspace conversion coefficients */
-		tegra_dc_writel(dc, 0x00f0, DC_WIN_CSC_YOF);
-		tegra_dc_writel(dc, 0x012a, DC_WIN_CSC_KYRGB);
-		tegra_dc_writel(dc, 0x0000, DC_WIN_CSC_KUR);
-		tegra_dc_writel(dc, 0x0198, DC_WIN_CSC_KVR);
-		tegra_dc_writel(dc, 0x039b, DC_WIN_CSC_KUG);
-		tegra_dc_writel(dc, 0x032f, DC_WIN_CSC_KVG);
-		tegra_dc_writel(dc, 0x0204, DC_WIN_CSC_KUB);
-		tegra_dc_writel(dc, 0x0000, DC_WIN_CSC_KVB);
-
-		value |= CSC_ENABLE;
-	} else if (window->bits_per_pixel < 24) {
-		value |= COLOR_EXPAND;
-	}
-
-	tegra_dc_writel(dc, value, DC_WIN_WIN_OPTIONS);
-
-	/*
-	 * Disable blending and assume Window A is the bottom-most window,
-	 * Window C is the top-most window and Window B is in the middle.
-	 */
-	tegra_dc_writel(dc, 0xffff00, DC_WIN_BLEND_NOKEY);
-	tegra_dc_writel(dc, 0xffff00, DC_WIN_BLEND_1WIN);
-
-	switch (index) {
-	case 0:
-		tegra_dc_writel(dc, 0x000000, DC_WIN_BLEND_2WIN_X);
-		tegra_dc_writel(dc, 0x000000, DC_WIN_BLEND_2WIN_Y);
-		tegra_dc_writel(dc, 0x000000, DC_WIN_BLEND_3WIN_XY);
-		break;
-
-	case 1:
-		tegra_dc_writel(dc, 0xffff00, DC_WIN_BLEND_2WIN_X);
-		tegra_dc_writel(dc, 0x000000, DC_WIN_BLEND_2WIN_Y);
-		tegra_dc_writel(dc, 0x000000, DC_WIN_BLEND_3WIN_XY);
-		break;
-
-	case 2:
-		tegra_dc_writel(dc, 0xffff00, DC_WIN_BLEND_2WIN_X);
-		tegra_dc_writel(dc, 0xffff00, DC_WIN_BLEND_2WIN_Y);
-		tegra_dc_writel(dc, 0xffff00, DC_WIN_BLEND_3WIN_XY);
-		break;
-	}
-
-	tegra_dc_writel(dc, WIN_A_UPDATE << index, DC_CMD_STATE_CONTROL);
-	tegra_dc_writel(dc, WIN_A_ACT_REQ << index, DC_CMD_STATE_CONTROL);
-
-	return 0;
-}
-
-unsigned int tegra_dc_format(uint32_t format)
-{
-	switch (format) {
-	case DRM_FORMAT_XBGR8888:
-		return WIN_COLOR_DEPTH_R8G8B8A8;
-
-	case DRM_FORMAT_XRGB8888:
-		return WIN_COLOR_DEPTH_B8G8R8A8;
-
-	case DRM_FORMAT_RGB565:
-		return WIN_COLOR_DEPTH_B5G6R5;
-
-	case DRM_FORMAT_UYVY:
-		return WIN_COLOR_DEPTH_YCbCr422;
-
-	case DRM_FORMAT_YUV420:
-		return WIN_COLOR_DEPTH_YCbCr420P;
-
-	case DRM_FORMAT_YUV422:
-		return WIN_COLOR_DEPTH_YCbCr422P;
-
-	default:
-		break;
-	}
-
-	WARN(1, "unsupported pixel format %u, using default\n", format);
-	return WIN_COLOR_DEPTH_B8G8R8A8;
-}
-
-static int tegra_crtc_mode_set(struct drm_crtc *crtc,
-			       struct drm_display_mode *mode,
-			       struct drm_display_mode *adjusted,
-			       int x, int y, struct drm_framebuffer *old_fb)
-{
-	struct tegra_bo *bo = tegra_fb_get_plane(crtc->fb, 0);
-	struct tegra_dc *dc = to_tegra_dc(crtc);
-	struct tegra_dc_window window;
-	unsigned long div, value;
-	int err;
-
-	drm_vblank_pre_modeset(crtc->dev, dc->pipe);
-
-	err = tegra_crtc_setup_clk(crtc, mode, &div);
-	if (err) {
-		dev_err(dc->dev, "failed to setup clock for CRTC: %d\n", err);
-		return err;
-	}
-
-	/* program display mode */
-	tegra_dc_set_timings(dc, mode);
-
-	value = DE_SELECT_ACTIVE | DE_CONTROL_NORMAL;
-	tegra_dc_writel(dc, value, DC_DISP_DATA_ENABLE_OPTIONS);
-
-	value = tegra_dc_readl(dc, DC_COM_PIN_OUTPUT_POLARITY(1));
-	value &= ~LVS_OUTPUT_POLARITY_LOW;
-	value &= ~LHS_OUTPUT_POLARITY_LOW;
-	tegra_dc_writel(dc, value, DC_COM_PIN_OUTPUT_POLARITY(1));
-
-	value = DISP_DATA_FORMAT_DF1P1C | DISP_ALIGNMENT_MSB |
-		DISP_ORDER_RED_BLUE;
-	tegra_dc_writel(dc, value, DC_DISP_DISP_INTERFACE_CONTROL);
-
-	tegra_dc_writel(dc, 0x00010001, DC_DISP_SHIFT_CLOCK_OPTIONS);
-
-	value = SHIFT_CLK_DIVIDER(div) | PIXEL_CLK_DIVIDER_PCD1;
-	tegra_dc_writel(dc, value, DC_DISP_DISP_CLOCK_CONTROL);
-
-	/* setup window parameters */
-	memset(&window, 0, sizeof(window));
-	window.src.x = 0;
-	window.src.y = 0;
-	window.src.w = mode->hdisplay;
-	window.src.h = mode->vdisplay;
-	window.dst.x = 0;
-	window.dst.y = 0;
-	window.dst.w = mode->hdisplay;
-	window.dst.h = mode->vdisplay;
-	window.format = tegra_dc_format(crtc->fb->pixel_format);
-	window.bits_per_pixel = crtc->fb->bits_per_pixel;
-	window.stride[0] = crtc->fb->pitches[0];
-	window.base[0] = bo->paddr;
-
-	err = tegra_dc_setup_window(dc, 0, &window);
-	if (err < 0)
-		dev_err(dc->dev, "failed to enable root plane\n");
-
-	return 0;
-}
-
-static int tegra_crtc_mode_set_base(struct drm_crtc *crtc, int x, int y,
-				    struct drm_framebuffer *old_fb)
-{
-	struct tegra_dc *dc = to_tegra_dc(crtc);
-
-	return tegra_dc_set_base(dc, x, y, crtc->fb);
-}
-
-static void tegra_crtc_prepare(struct drm_crtc *crtc)
-{
-	struct tegra_dc *dc = to_tegra_dc(crtc);
-	unsigned int syncpt;
-	unsigned long value;
-
-	/* hardware initialization */
-	tegra_periph_reset_deassert(dc->clk);
-	usleep_range(10000, 20000);
-
-	if (dc->pipe)
-		syncpt = SYNCPT_VBLANK1;
-	else
-		syncpt = SYNCPT_VBLANK0;
-
-	/* initialize display controller */
-	tegra_dc_writel(dc, 0x00000100, DC_CMD_GENERAL_INCR_SYNCPT_CNTRL);
-	tegra_dc_writel(dc, 0x100 | syncpt, DC_CMD_CONT_SYNCPT_VSYNC);
-
-	value = WIN_A_UF_INT | WIN_B_UF_INT | WIN_C_UF_INT | WIN_A_OF_INT;
-	tegra_dc_writel(dc, value, DC_CMD_INT_TYPE);
-
-	value = WIN_A_UF_INT | WIN_B_UF_INT | WIN_C_UF_INT |
-		WIN_A_OF_INT | WIN_B_OF_INT | WIN_C_OF_INT;
-	tegra_dc_writel(dc, value, DC_CMD_INT_POLARITY);
-
-	value = PW0_ENABLE | PW1_ENABLE | PW2_ENABLE | PW3_ENABLE |
-		PW4_ENABLE | PM0_ENABLE | PM1_ENABLE;
-	tegra_dc_writel(dc, value, DC_CMD_DISPLAY_POWER_CONTROL);
-
-	value = tegra_dc_readl(dc, DC_CMD_DISPLAY_COMMAND);
-	value |= DISP_CTRL_MODE_C_DISPLAY;
-	tegra_dc_writel(dc, value, DC_CMD_DISPLAY_COMMAND);
-
-	/* initialize timer */
-	value = CURSOR_THRESHOLD(0) | WINDOW_A_THRESHOLD(0x20) |
-		WINDOW_B_THRESHOLD(0x20) | WINDOW_C_THRESHOLD(0x20);
-	tegra_dc_writel(dc, value, DC_DISP_DISP_MEM_HIGH_PRIORITY);
-
-	value = CURSOR_THRESHOLD(0) | WINDOW_A_THRESHOLD(1) |
-		WINDOW_B_THRESHOLD(1) | WINDOW_C_THRESHOLD(1);
-	tegra_dc_writel(dc, value, DC_DISP_DISP_MEM_HIGH_PRIORITY_TIMER);
-
-	value = VBLANK_INT | WIN_A_UF_INT | WIN_B_UF_INT | WIN_C_UF_INT;
-	tegra_dc_writel(dc, value, DC_CMD_INT_ENABLE);
-
-	value = WIN_A_UF_INT | WIN_B_UF_INT | WIN_C_UF_INT;
-	tegra_dc_writel(dc, value, DC_CMD_INT_MASK);
-}
-
-static void tegra_crtc_commit(struct drm_crtc *crtc)
-{
-	struct tegra_dc *dc = to_tegra_dc(crtc);
-	unsigned long value;
-
-	value = GENERAL_UPDATE | WIN_A_UPDATE;
-	tegra_dc_writel(dc, value, DC_CMD_STATE_CONTROL);
-
-	value = GENERAL_ACT_REQ | WIN_A_ACT_REQ;
-	tegra_dc_writel(dc, value, DC_CMD_STATE_CONTROL);
-
-	drm_vblank_post_modeset(crtc->dev, dc->pipe);
-}
-
-static void tegra_crtc_load_lut(struct drm_crtc *crtc)
-{
-}
-
-static const struct drm_crtc_helper_funcs tegra_crtc_helper_funcs = {
-	.disable = tegra_crtc_disable,
-	.mode_fixup = tegra_crtc_mode_fixup,
-	.mode_set = tegra_crtc_mode_set,
-	.mode_set_base = tegra_crtc_mode_set_base,
-	.prepare = tegra_crtc_prepare,
-	.commit = tegra_crtc_commit,
-	.load_lut = tegra_crtc_load_lut,
-};
-
-static irqreturn_t tegra_dc_irq(int irq, void *data)
-{
-	struct tegra_dc *dc = data;
-	unsigned long status;
-
-	status = tegra_dc_readl(dc, DC_CMD_INT_STATUS);
-	tegra_dc_writel(dc, status, DC_CMD_INT_STATUS);
-
-	if (status & FRAME_END_INT) {
-		/*
-		dev_dbg(dc->dev, "%s(): frame end\n", __func__);
-		*/
-	}
-
-	if (status & VBLANK_INT) {
-		/*
-		dev_dbg(dc->dev, "%s(): vertical blank\n", __func__);
-		*/
-		drm_handle_vblank(dc->base.dev, dc->pipe);
-		tegra_dc_finish_page_flip(dc);
-	}
-
-	if (status & (WIN_A_UF_INT | WIN_B_UF_INT | WIN_C_UF_INT)) {
-		/*
-		dev_dbg(dc->dev, "%s(): underflow\n", __func__);
-		*/
-	}
-
-	return IRQ_HANDLED;
-}
-
-static int tegra_dc_show_regs(struct seq_file *s, void *data)
-{
-	struct drm_info_node *node = s->private;
-	struct tegra_dc *dc = node->info_ent->data;
-
-#define DUMP_REG(name)						\
-	seq_printf(s, "%-40s %#05x %08lx\n", #name, name,	\
-		   tegra_dc_readl(dc, name))
-
-	DUMP_REG(DC_CMD_GENERAL_INCR_SYNCPT);
-	DUMP_REG(DC_CMD_GENERAL_INCR_SYNCPT_CNTRL);
-	DUMP_REG(DC_CMD_GENERAL_INCR_SYNCPT_ERROR);
-	DUMP_REG(DC_CMD_WIN_A_INCR_SYNCPT);
-	DUMP_REG(DC_CMD_WIN_A_INCR_SYNCPT_CNTRL);
-	DUMP_REG(DC_CMD_WIN_A_INCR_SYNCPT_ERROR);
-	DUMP_REG(DC_CMD_WIN_B_INCR_SYNCPT);
-	DUMP_REG(DC_CMD_WIN_B_INCR_SYNCPT_CNTRL);
-	DUMP_REG(DC_CMD_WIN_B_INCR_SYNCPT_ERROR);
-	DUMP_REG(DC_CMD_WIN_C_INCR_SYNCPT);
-	DUMP_REG(DC_CMD_WIN_C_INCR_SYNCPT_CNTRL);
-	DUMP_REG(DC_CMD_WIN_C_INCR_SYNCPT_ERROR);
-	DUMP_REG(DC_CMD_CONT_SYNCPT_VSYNC);
-	DUMP_REG(DC_CMD_DISPLAY_COMMAND_OPTION0);
-	DUMP_REG(DC_CMD_DISPLAY_COMMAND);
-	DUMP_REG(DC_CMD_SIGNAL_RAISE);
-	DUMP_REG(DC_CMD_DISPLAY_POWER_CONTROL);
-	DUMP_REG(DC_CMD_INT_STATUS);
-	DUMP_REG(DC_CMD_INT_MASK);
-	DUMP_REG(DC_CMD_INT_ENABLE);
-	DUMP_REG(DC_CMD_INT_TYPE);
-	DUMP_REG(DC_CMD_INT_POLARITY);
-	DUMP_REG(DC_CMD_SIGNAL_RAISE1);
-	DUMP_REG(DC_CMD_SIGNAL_RAISE2);
-	DUMP_REG(DC_CMD_SIGNAL_RAISE3);
-	DUMP_REG(DC_CMD_STATE_ACCESS);
-	DUMP_REG(DC_CMD_STATE_CONTROL);
-	DUMP_REG(DC_CMD_DISPLAY_WINDOW_HEADER);
-	DUMP_REG(DC_CMD_REG_ACT_CONTROL);
-	DUMP_REG(DC_COM_CRC_CONTROL);
-	DUMP_REG(DC_COM_CRC_CHECKSUM);
-	DUMP_REG(DC_COM_PIN_OUTPUT_ENABLE(0));
-	DUMP_REG(DC_COM_PIN_OUTPUT_ENABLE(1));
-	DUMP_REG(DC_COM_PIN_OUTPUT_ENABLE(2));
-	DUMP_REG(DC_COM_PIN_OUTPUT_ENABLE(3));
-	DUMP_REG(DC_COM_PIN_OUTPUT_POLARITY(0));
-	DUMP_REG(DC_COM_PIN_OUTPUT_POLARITY(1));
-	DUMP_REG(DC_COM_PIN_OUTPUT_POLARITY(2));
-	DUMP_REG(DC_COM_PIN_OUTPUT_POLARITY(3));
-	DUMP_REG(DC_COM_PIN_OUTPUT_DATA(0));
-	DUMP_REG(DC_COM_PIN_OUTPUT_DATA(1));
-	DUMP_REG(DC_COM_PIN_OUTPUT_DATA(2));
-	DUMP_REG(DC_COM_PIN_OUTPUT_DATA(3));
-	DUMP_REG(DC_COM_PIN_INPUT_ENABLE(0));
-	DUMP_REG(DC_COM_PIN_INPUT_ENABLE(1));
-	DUMP_REG(DC_COM_PIN_INPUT_ENABLE(2));
-	DUMP_REG(DC_COM_PIN_INPUT_ENABLE(3));
-	DUMP_REG(DC_COM_PIN_INPUT_DATA(0));
-	DUMP_REG(DC_COM_PIN_INPUT_DATA(1));
-	DUMP_REG(DC_COM_PIN_OUTPUT_SELECT(0));
-	DUMP_REG(DC_COM_PIN_OUTPUT_SELECT(1));
-	DUMP_REG(DC_COM_PIN_OUTPUT_SELECT(2));
-	DUMP_REG(DC_COM_PIN_OUTPUT_SELECT(3));
-	DUMP_REG(DC_COM_PIN_OUTPUT_SELECT(4));
-	DUMP_REG(DC_COM_PIN_OUTPUT_SELECT(5));
-	DUMP_REG(DC_COM_PIN_OUTPUT_SELECT(6));
-	DUMP_REG(DC_COM_PIN_MISC_CONTROL);
-	DUMP_REG(DC_COM_PIN_PM0_CONTROL);
-	DUMP_REG(DC_COM_PIN_PM0_DUTY_CYCLE);
-	DUMP_REG(DC_COM_PIN_PM1_CONTROL);
-	DUMP_REG(DC_COM_PIN_PM1_DUTY_CYCLE);
-	DUMP_REG(DC_COM_SPI_CONTROL);
-	DUMP_REG(DC_COM_SPI_START_BYTE);
-	DUMP_REG(DC_COM_HSPI_WRITE_DATA_AB);
-	DUMP_REG(DC_COM_HSPI_WRITE_DATA_CD);
-	DUMP_REG(DC_COM_HSPI_CS_DC);
-	DUMP_REG(DC_COM_SCRATCH_REGISTER_A);
-	DUMP_REG(DC_COM_SCRATCH_REGISTER_B);
-	DUMP_REG(DC_COM_GPIO_CTRL);
-	DUMP_REG(DC_COM_GPIO_DEBOUNCE_COUNTER);
-	DUMP_REG(DC_COM_CRC_CHECKSUM_LATCHED);
-	DUMP_REG(DC_DISP_DISP_SIGNAL_OPTIONS0);
-	DUMP_REG(DC_DISP_DISP_SIGNAL_OPTIONS1);
-	DUMP_REG(DC_DISP_DISP_WIN_OPTIONS);
-	DUMP_REG(DC_DISP_DISP_MEM_HIGH_PRIORITY);
-	DUMP_REG(DC_DISP_DISP_MEM_HIGH_PRIORITY_TIMER);
-	DUMP_REG(DC_DISP_DISP_TIMING_OPTIONS);
-	DUMP_REG(DC_DISP_REF_TO_SYNC);
-	DUMP_REG(DC_DISP_SYNC_WIDTH);
-	DUMP_REG(DC_DISP_BACK_PORCH);
-	DUMP_REG(DC_DISP_ACTIVE);
-	DUMP_REG(DC_DISP_FRONT_PORCH);
-	DUMP_REG(DC_DISP_H_PULSE0_CONTROL);
-	DUMP_REG(DC_DISP_H_PULSE0_POSITION_A);
-	DUMP_REG(DC_DISP_H_PULSE0_POSITION_B);
-	DUMP_REG(DC_DISP_H_PULSE0_POSITION_C);
-	DUMP_REG(DC_DISP_H_PULSE0_POSITION_D);
-	DUMP_REG(DC_DISP_H_PULSE1_CONTROL);
-	DUMP_REG(DC_DISP_H_PULSE1_POSITION_A);
-	DUMP_REG(DC_DISP_H_PULSE1_POSITION_B);
-	DUMP_REG(DC_DISP_H_PULSE1_POSITION_C);
-	DUMP_REG(DC_DISP_H_PULSE1_POSITION_D);
-	DUMP_REG(DC_DISP_H_PULSE2_CONTROL);
-	DUMP_REG(DC_DISP_H_PULSE2_POSITION_A);
-	DUMP_REG(DC_DISP_H_PULSE2_POSITION_B);
-	DUMP_REG(DC_DISP_H_PULSE2_POSITION_C);
-	DUMP_REG(DC_DISP_H_PULSE2_POSITION_D);
-	DUMP_REG(DC_DISP_V_PULSE0_CONTROL);
-	DUMP_REG(DC_DISP_V_PULSE0_POSITION_A);
-	DUMP_REG(DC_DISP_V_PULSE0_POSITION_B);
-	DUMP_REG(DC_DISP_V_PULSE0_POSITION_C);
-	DUMP_REG(DC_DISP_V_PULSE1_CONTROL);
-	DUMP_REG(DC_DISP_V_PULSE1_POSITION_A);
-	DUMP_REG(DC_DISP_V_PULSE1_POSITION_B);
-	DUMP_REG(DC_DISP_V_PULSE1_POSITION_C);
-	DUMP_REG(DC_DISP_V_PULSE2_CONTROL);
-	DUMP_REG(DC_DISP_V_PULSE2_POSITION_A);
-	DUMP_REG(DC_DISP_V_PULSE3_CONTROL);
-	DUMP_REG(DC_DISP_V_PULSE3_POSITION_A);
-	DUMP_REG(DC_DISP_M0_CONTROL);
-	DUMP_REG(DC_DISP_M1_CONTROL);
-	DUMP_REG(DC_DISP_DI_CONTROL);
-	DUMP_REG(DC_DISP_PP_CONTROL);
-	DUMP_REG(DC_DISP_PP_SELECT_A);
-	DUMP_REG(DC_DISP_PP_SELECT_B);
-	DUMP_REG(DC_DISP_PP_SELECT_C);
-	DUMP_REG(DC_DISP_PP_SELECT_D);
-	DUMP_REG(DC_DISP_DISP_CLOCK_CONTROL);
-	DUMP_REG(DC_DISP_DISP_INTERFACE_CONTROL);
-	DUMP_REG(DC_DISP_DISP_COLOR_CONTROL);
-	DUMP_REG(DC_DISP_SHIFT_CLOCK_OPTIONS);
-	DUMP_REG(DC_DISP_DATA_ENABLE_OPTIONS);
-	DUMP_REG(DC_DISP_SERIAL_INTERFACE_OPTIONS);
-	DUMP_REG(DC_DISP_LCD_SPI_OPTIONS);
-	DUMP_REG(DC_DISP_BORDER_COLOR);
-	DUMP_REG(DC_DISP_COLOR_KEY0_LOWER);
-	DUMP_REG(DC_DISP_COLOR_KEY0_UPPER);
-	DUMP_REG(DC_DISP_COLOR_KEY1_LOWER);
-	DUMP_REG(DC_DISP_COLOR_KEY1_UPPER);
-	DUMP_REG(DC_DISP_CURSOR_FOREGROUND);
-	DUMP_REG(DC_DISP_CURSOR_BACKGROUND);
-	DUMP_REG(DC_DISP_CURSOR_START_ADDR);
-	DUMP_REG(DC_DISP_CURSOR_START_ADDR_NS);
-	DUMP_REG(DC_DISP_CURSOR_POSITION);
-	DUMP_REG(DC_DISP_CURSOR_POSITION_NS);
-	DUMP_REG(DC_DISP_INIT_SEQ_CONTROL);
-	DUMP_REG(DC_DISP_SPI_INIT_SEQ_DATA_A);
-	DUMP_REG(DC_DISP_SPI_INIT_SEQ_DATA_B);
-	DUMP_REG(DC_DISP_SPI_INIT_SEQ_DATA_C);
-	DUMP_REG(DC_DISP_SPI_INIT_SEQ_DATA_D);
-	DUMP_REG(DC_DISP_DC_MCCIF_FIFOCTRL);
-	DUMP_REG(DC_DISP_MCCIF_DISPLAY0A_HYST);
-	DUMP_REG(DC_DISP_MCCIF_DISPLAY0B_HYST);
-	DUMP_REG(DC_DISP_MCCIF_DISPLAY1A_HYST);
-	DUMP_REG(DC_DISP_MCCIF_DISPLAY1B_HYST);
-	DUMP_REG(DC_DISP_DAC_CRT_CTRL);
-	DUMP_REG(DC_DISP_DISP_MISC_CONTROL);
-	DUMP_REG(DC_DISP_SD_CONTROL);
-	DUMP_REG(DC_DISP_SD_CSC_COEFF);
-	DUMP_REG(DC_DISP_SD_LUT(0));
-	DUMP_REG(DC_DISP_SD_LUT(1));
-	DUMP_REG(DC_DISP_SD_LUT(2));
-	DUMP_REG(DC_DISP_SD_LUT(3));
-	DUMP_REG(DC_DISP_SD_LUT(4));
-	DUMP_REG(DC_DISP_SD_LUT(5));
-	DUMP_REG(DC_DISP_SD_LUT(6));
-	DUMP_REG(DC_DISP_SD_LUT(7));
-	DUMP_REG(DC_DISP_SD_LUT(8));
-	DUMP_REG(DC_DISP_SD_FLICKER_CONTROL);
-	DUMP_REG(DC_DISP_DC_PIXEL_COUNT);
-	DUMP_REG(DC_DISP_SD_HISTOGRAM(0));
-	DUMP_REG(DC_DISP_SD_HISTOGRAM(1));
-	DUMP_REG(DC_DISP_SD_HISTOGRAM(2));
-	DUMP_REG(DC_DISP_SD_HISTOGRAM(3));
-	DUMP_REG(DC_DISP_SD_HISTOGRAM(4));
-	DUMP_REG(DC_DISP_SD_HISTOGRAM(5));
-	DUMP_REG(DC_DISP_SD_HISTOGRAM(6));
-	DUMP_REG(DC_DISP_SD_HISTOGRAM(7));
-	DUMP_REG(DC_DISP_SD_BL_TF(0));
-	DUMP_REG(DC_DISP_SD_BL_TF(1));
-	DUMP_REG(DC_DISP_SD_BL_TF(2));
-	DUMP_REG(DC_DISP_SD_BL_TF(3));
-	DUMP_REG(DC_DISP_SD_BL_CONTROL);
-	DUMP_REG(DC_DISP_SD_HW_K_VALUES);
-	DUMP_REG(DC_DISP_SD_MAN_K_VALUES);
-	DUMP_REG(DC_WIN_WIN_OPTIONS);
-	DUMP_REG(DC_WIN_BYTE_SWAP);
-	DUMP_REG(DC_WIN_BUFFER_CONTROL);
-	DUMP_REG(DC_WIN_COLOR_DEPTH);
-	DUMP_REG(DC_WIN_POSITION);
-	DUMP_REG(DC_WIN_SIZE);
-	DUMP_REG(DC_WIN_PRESCALED_SIZE);
-	DUMP_REG(DC_WIN_H_INITIAL_DDA);
-	DUMP_REG(DC_WIN_V_INITIAL_DDA);
-	DUMP_REG(DC_WIN_DDA_INC);
-	DUMP_REG(DC_WIN_LINE_STRIDE);
-	DUMP_REG(DC_WIN_BUF_STRIDE);
-	DUMP_REG(DC_WIN_UV_BUF_STRIDE);
-	DUMP_REG(DC_WIN_BUFFER_ADDR_MODE);
-	DUMP_REG(DC_WIN_DV_CONTROL);
-	DUMP_REG(DC_WIN_BLEND_NOKEY);
-	DUMP_REG(DC_WIN_BLEND_1WIN);
-	DUMP_REG(DC_WIN_BLEND_2WIN_X);
-	DUMP_REG(DC_WIN_BLEND_2WIN_Y);
-	DUMP_REG(DC_WIN_BLEND_3WIN_XY);
-	DUMP_REG(DC_WIN_HP_FETCH_CONTROL);
-	DUMP_REG(DC_WINBUF_START_ADDR);
-	DUMP_REG(DC_WINBUF_START_ADDR_NS);
-	DUMP_REG(DC_WINBUF_START_ADDR_U);
-	DUMP_REG(DC_WINBUF_START_ADDR_U_NS);
-	DUMP_REG(DC_WINBUF_START_ADDR_V);
-	DUMP_REG(DC_WINBUF_START_ADDR_V_NS);
-	DUMP_REG(DC_WINBUF_ADDR_H_OFFSET);
-	DUMP_REG(DC_WINBUF_ADDR_H_OFFSET_NS);
-	DUMP_REG(DC_WINBUF_ADDR_V_OFFSET);
-	DUMP_REG(DC_WINBUF_ADDR_V_OFFSET_NS);
-	DUMP_REG(DC_WINBUF_UFLOW_STATUS);
-	DUMP_REG(DC_WINBUF_AD_UFLOW_STATUS);
-	DUMP_REG(DC_WINBUF_BD_UFLOW_STATUS);
-	DUMP_REG(DC_WINBUF_CD_UFLOW_STATUS);
-
-#undef DUMP_REG
-
-	return 0;
-}
-
-static struct drm_info_list debugfs_files[] = {
-	{ "regs", tegra_dc_show_regs, 0, NULL },
-};
-
-static int tegra_dc_debugfs_init(struct tegra_dc *dc, struct drm_minor *minor)
-{
-	unsigned int i;
-	char *name;
-	int err;
-
-	name = kasprintf(GFP_KERNEL, "dc.%d", dc->pipe);
-	dc->debugfs = debugfs_create_dir(name, minor->debugfs_root);
-	kfree(name);
-
-	if (!dc->debugfs)
-		return -ENOMEM;
-
-	dc->debugfs_files = kmemdup(debugfs_files, sizeof(debugfs_files),
-				    GFP_KERNEL);
-	if (!dc->debugfs_files) {
-		err = -ENOMEM;
-		goto remove;
-	}
-
-	for (i = 0; i < ARRAY_SIZE(debugfs_files); i++)
-		dc->debugfs_files[i].data = dc;
-
-	err = drm_debugfs_create_files(dc->debugfs_files,
-				       ARRAY_SIZE(debugfs_files),
-				       dc->debugfs, minor);
-	if (err < 0)
-		goto free;
-
-	dc->minor = minor;
-
-	return 0;
-
-free:
-	kfree(dc->debugfs_files);
-	dc->debugfs_files = NULL;
-remove:
-	debugfs_remove(dc->debugfs);
-	dc->debugfs = NULL;
-
-	return err;
-}
-
-static int tegra_dc_debugfs_exit(struct tegra_dc *dc)
-{
-	drm_debugfs_remove_files(dc->debugfs_files, ARRAY_SIZE(debugfs_files),
-				 dc->minor);
-	dc->minor = NULL;
-
-	kfree(dc->debugfs_files);
-	dc->debugfs_files = NULL;
-
-	debugfs_remove(dc->debugfs);
-	dc->debugfs = NULL;
-
-	return 0;
-}
-
-static int tegra_dc_drm_init(struct host1x_client *client,
-			     struct drm_device *drm)
-{
-	struct tegra_dc *dc = host1x_client_to_dc(client);
-	int err;
-
-	dc->pipe = drm->mode_config.num_crtc;
-
-	drm_crtc_init(drm, &dc->base, &tegra_crtc_funcs);
-	drm_mode_crtc_set_gamma_size(&dc->base, 256);
-	drm_crtc_helper_add(&dc->base, &tegra_crtc_helper_funcs);
-
-	err = tegra_dc_rgb_init(drm, dc);
-	if (err < 0 && err != -ENODEV) {
-		dev_err(dc->dev, "failed to initialize RGB output: %d\n", err);
-		return err;
-	}
-
-	err = tegra_dc_add_planes(drm, dc);
-	if (err < 0)
-		return err;
-
-	if (IS_ENABLED(CONFIG_DEBUG_FS)) {
-		err = tegra_dc_debugfs_init(dc, drm->primary);
-		if (err < 0)
-			dev_err(dc->dev, "debugfs setup failed: %d\n", err);
-	}
-
-	err = devm_request_irq(dc->dev, dc->irq, tegra_dc_irq, 0,
-			       dev_name(dc->dev), dc);
-	if (err < 0) {
-		dev_err(dc->dev, "failed to request IRQ#%u: %d\n", dc->irq,
-			err);
-		return err;
-	}
-
-	return 0;
-}
-
-static int tegra_dc_drm_exit(struct host1x_client *client)
-{
-	struct tegra_dc *dc = host1x_client_to_dc(client);
-	int err;
-
-	devm_free_irq(dc->dev, dc->irq, dc);
-
-	if (IS_ENABLED(CONFIG_DEBUG_FS)) {
-		err = tegra_dc_debugfs_exit(dc);
-		if (err < 0)
-			dev_err(dc->dev, "debugfs cleanup failed: %d\n", err);
-	}
-
-	err = tegra_dc_rgb_exit(dc);
-	if (err) {
-		dev_err(dc->dev, "failed to shutdown RGB output: %d\n", err);
-		return err;
-	}
-
-	return 0;
-}
-
-static const struct host1x_client_ops dc_client_ops = {
-	.drm_init = tegra_dc_drm_init,
-	.drm_exit = tegra_dc_drm_exit,
-};
-
-static int tegra_dc_probe(struct platform_device *pdev)
-{
-	struct host1x_drm *host1x = host1x_get_drm_data(pdev->dev.parent);
-	struct resource *regs;
-	struct tegra_dc *dc;
-	int err;
-
-	dc = devm_kzalloc(&pdev->dev, sizeof(*dc), GFP_KERNEL);
-	if (!dc)
-		return -ENOMEM;
-
-	spin_lock_init(&dc->lock);
-	INIT_LIST_HEAD(&dc->list);
-	dc->dev = &pdev->dev;
-
-	dc->clk = devm_clk_get(&pdev->dev, NULL);
-	if (IS_ERR(dc->clk)) {
-		dev_err(&pdev->dev, "failed to get clock\n");
-		return PTR_ERR(dc->clk);
-	}
-
-	err = clk_prepare_enable(dc->clk);
-	if (err < 0)
-		return err;
-
-	regs = platform_get_resource(pdev, IORESOURCE_MEM, 0);
-	dc->regs = devm_ioremap_resource(&pdev->dev, regs);
-	if (IS_ERR(dc->regs))
-		return PTR_ERR(dc->regs);
-
-	dc->irq = platform_get_irq(pdev, 0);
-	if (dc->irq < 0) {
-		dev_err(&pdev->dev, "failed to get IRQ\n");
-		return -ENXIO;
-	}
-
-	INIT_LIST_HEAD(&dc->client.list);
-	dc->client.ops = &dc_client_ops;
-	dc->client.dev = &pdev->dev;
-
-	err = tegra_dc_rgb_probe(dc);
-	if (err < 0 && err != -ENODEV) {
-		dev_err(&pdev->dev, "failed to probe RGB output: %d\n", err);
-		return err;
-	}
-
-	err = host1x_register_client(host1x, &dc->client);
-	if (err < 0) {
-		dev_err(&pdev->dev, "failed to register host1x client: %d\n",
-			err);
-		return err;
-	}
-
-	platform_set_drvdata(pdev, dc);
-
-	return 0;
-}
-
-static int tegra_dc_remove(struct platform_device *pdev)
-{
-	struct host1x_drm *host1x = host1x_get_drm_data(pdev->dev.parent);
-	struct tegra_dc *dc = platform_get_drvdata(pdev);
-	int err;
-
-	err = host1x_unregister_client(host1x, &dc->client);
-	if (err < 0) {
-		dev_err(&pdev->dev, "failed to unregister host1x client: %d\n",
-			err);
-		return err;
-	}
-
-	clk_disable_unprepare(dc->clk);
-
-	return 0;
-}
-
-static struct of_device_id tegra_dc_of_match[] = {
-	{ .compatible = "nvidia,tegra30-dc", },
-	{ .compatible = "nvidia,tegra20-dc", },
-	{ },
-};
-
-struct platform_driver tegra_dc_driver = {
-	.driver = {
-		.name = "tegra-dc",
-		.owner = THIS_MODULE,
-		.of_match_table = tegra_dc_of_match,
-	},
-	.probe = tegra_dc_probe,
-	.remove = tegra_dc_remove,
-};
diff --git a/drivers/gpu/host1x/drm/dc.h b/drivers/gpu/host1x/drm/dc.h
deleted file mode 100644
index 79eaec9aac77..000000000000
--- a/drivers/gpu/host1x/drm/dc.h
+++ /dev/null
@@ -1,400 +0,0 @@
-/*
- * Copyright (C) 2012 Avionic Design GmbH
- * Copyright (C) 2012 NVIDIA CORPORATION.  All rights reserved.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- */
-
-#ifndef TEGRA_DC_H
-#define TEGRA_DC_H 1
-
-#define DC_CMD_GENERAL_INCR_SYNCPT		0x000
-#define DC_CMD_GENERAL_INCR_SYNCPT_CNTRL	0x001
-#define DC_CMD_GENERAL_INCR_SYNCPT_ERROR	0x002
-#define DC_CMD_WIN_A_INCR_SYNCPT		0x008
-#define DC_CMD_WIN_A_INCR_SYNCPT_CNTRL		0x009
-#define DC_CMD_WIN_A_INCR_SYNCPT_ERROR		0x00a
-#define DC_CMD_WIN_B_INCR_SYNCPT		0x010
-#define DC_CMD_WIN_B_INCR_SYNCPT_CNTRL		0x011
-#define DC_CMD_WIN_B_INCR_SYNCPT_ERROR		0x012
-#define DC_CMD_WIN_C_INCR_SYNCPT		0x018
-#define DC_CMD_WIN_C_INCR_SYNCPT_CNTRL		0x019
-#define DC_CMD_WIN_C_INCR_SYNCPT_ERROR		0x01a
-#define DC_CMD_CONT_SYNCPT_VSYNC		0x028
-#define DC_CMD_DISPLAY_COMMAND_OPTION0		0x031
-#define DC_CMD_DISPLAY_COMMAND			0x032
-#define DISP_CTRL_MODE_STOP (0 << 5)
-#define DISP_CTRL_MODE_C_DISPLAY (1 << 5)
-#define DISP_CTRL_MODE_NC_DISPLAY (2 << 5)
-#define DC_CMD_SIGNAL_RAISE			0x033
-#define DC_CMD_DISPLAY_POWER_CONTROL		0x036
-#define PW0_ENABLE (1 <<  0)
-#define PW1_ENABLE (1 <<  2)
-#define PW2_ENABLE (1 <<  4)
-#define PW3_ENABLE (1 <<  6)
-#define PW4_ENABLE (1 <<  8)
-#define PM0_ENABLE (1 << 16)
-#define PM1_ENABLE (1 << 18)
-
-#define DC_CMD_INT_STATUS			0x037
-#define DC_CMD_INT_MASK				0x038
-#define DC_CMD_INT_ENABLE			0x039
-#define DC_CMD_INT_TYPE				0x03a
-#define DC_CMD_INT_POLARITY			0x03b
-#define CTXSW_INT     (1 << 0)
-#define FRAME_END_INT (1 << 1)
-#define VBLANK_INT    (1 << 2)
-#define WIN_A_UF_INT  (1 << 8)
-#define WIN_B_UF_INT  (1 << 9)
-#define WIN_C_UF_INT  (1 << 10)
-#define WIN_A_OF_INT  (1 << 14)
-#define WIN_B_OF_INT  (1 << 15)
-#define WIN_C_OF_INT  (1 << 16)
-
-#define DC_CMD_SIGNAL_RAISE1			0x03c
-#define DC_CMD_SIGNAL_RAISE2			0x03d
-#define DC_CMD_SIGNAL_RAISE3			0x03e
-
-#define DC_CMD_STATE_ACCESS			0x040
-#define READ_MUX  (1 << 0)
-#define WRITE_MUX (1 << 2)
-
-#define DC_CMD_STATE_CONTROL			0x041
-#define GENERAL_ACT_REQ (1 <<  0)
-#define WIN_A_ACT_REQ   (1 <<  1)
-#define WIN_B_ACT_REQ   (1 <<  2)
-#define WIN_C_ACT_REQ   (1 <<  3)
-#define GENERAL_UPDATE  (1 <<  8)
-#define WIN_A_UPDATE    (1 <<  9)
-#define WIN_B_UPDATE    (1 << 10)
-#define WIN_C_UPDATE    (1 << 11)
-#define NC_HOST_TRIG    (1 << 24)
-
-#define DC_CMD_DISPLAY_WINDOW_HEADER		0x042
-#define WINDOW_A_SELECT (1 << 4)
-#define WINDOW_B_SELECT (1 << 5)
-#define WINDOW_C_SELECT (1 << 6)
-
-#define DC_CMD_REG_ACT_CONTROL			0x043
-
-#define DC_COM_CRC_CONTROL			0x300
-#define DC_COM_CRC_CHECKSUM			0x301
-#define DC_COM_PIN_OUTPUT_ENABLE(x) (0x302 + (x))
-#define DC_COM_PIN_OUTPUT_POLARITY(x) (0x306 + (x))
-#define LVS_OUTPUT_POLARITY_LOW (1 << 28)
-#define LHS_OUTPUT_POLARITY_LOW (1 << 30)
-#define DC_COM_PIN_OUTPUT_DATA(x) (0x30a + (x))
-#define DC_COM_PIN_INPUT_ENABLE(x) (0x30e + (x))
-#define DC_COM_PIN_INPUT_DATA(x) (0x312 + (x))
-#define DC_COM_PIN_OUTPUT_SELECT(x) (0x314 + (x))
-
-#define DC_COM_PIN_MISC_CONTROL			0x31b
-#define DC_COM_PIN_PM0_CONTROL			0x31c
-#define DC_COM_PIN_PM0_DUTY_CYCLE		0x31d
-#define DC_COM_PIN_PM1_CONTROL			0x31e
-#define DC_COM_PIN_PM1_DUTY_CYCLE		0x31f
-
-#define DC_COM_SPI_CONTROL			0x320
-#define DC_COM_SPI_START_BYTE			0x321
-#define DC_COM_HSPI_WRITE_DATA_AB		0x322
-#define DC_COM_HSPI_WRITE_DATA_CD		0x323
-#define DC_COM_HSPI_CS_DC			0x324
-#define DC_COM_SCRATCH_REGISTER_A		0x325
-#define DC_COM_SCRATCH_REGISTER_B		0x326
-#define DC_COM_GPIO_CTRL			0x327
-#define DC_COM_GPIO_DEBOUNCE_COUNTER		0x328
-#define DC_COM_CRC_CHECKSUM_LATCHED		0x329
-
-#define DC_DISP_DISP_SIGNAL_OPTIONS0		0x400
-#define H_PULSE_0_ENABLE (1 <<  8)
-#define H_PULSE_1_ENABLE (1 << 10)
-#define H_PULSE_2_ENABLE (1 << 12)
-
-#define DC_DISP_DISP_SIGNAL_OPTIONS1		0x401
-
-#define DC_DISP_DISP_WIN_OPTIONS		0x402
-#define HDMI_ENABLE (1 << 30)
-
-#define DC_DISP_DISP_MEM_HIGH_PRIORITY		0x403
-#define CURSOR_THRESHOLD(x)   (((x) & 0x03) << 24)
-#define WINDOW_A_THRESHOLD(x) (((x) & 0x7f) << 16)
-#define WINDOW_B_THRESHOLD(x) (((x) & 0x7f) <<  8)
-#define WINDOW_C_THRESHOLD(x) (((x) & 0xff) <<  0)
-
-#define DC_DISP_DISP_MEM_HIGH_PRIORITY_TIMER	0x404
-#define CURSOR_DELAY(x)   (((x) & 0x3f) << 24)
-#define WINDOW_A_DELAY(x) (((x) & 0x3f) << 16)
-#define WINDOW_B_DELAY(x) (((x) & 0x3f) <<  8)
-#define WINDOW_C_DELAY(x) (((x) & 0x3f) <<  0)
-
-#define DC_DISP_DISP_TIMING_OPTIONS		0x405
-#define VSYNC_H_POSITION(x) ((x) & 0xfff)
-
-#define DC_DISP_REF_TO_SYNC			0x406
-#define DC_DISP_SYNC_WIDTH			0x407
-#define DC_DISP_BACK_PORCH			0x408
-#define DC_DISP_ACTIVE				0x409
-#define DC_DISP_FRONT_PORCH			0x40a
-#define DC_DISP_H_PULSE0_CONTROL		0x40b
-#define DC_DISP_H_PULSE0_POSITION_A		0x40c
-#define DC_DISP_H_PULSE0_POSITION_B		0x40d
-#define DC_DISP_H_PULSE0_POSITION_C		0x40e
-#define DC_DISP_H_PULSE0_POSITION_D		0x40f
-#define DC_DISP_H_PULSE1_CONTROL		0x410
-#define DC_DISP_H_PULSE1_POSITION_A		0x411
-#define DC_DISP_H_PULSE1_POSITION_B		0x412
-#define DC_DISP_H_PULSE1_POSITION_C		0x413
-#define DC_DISP_H_PULSE1_POSITION_D		0x414
-#define DC_DISP_H_PULSE2_CONTROL		0x415
-#define DC_DISP_H_PULSE2_POSITION_A		0x416
-#define DC_DISP_H_PULSE2_POSITION_B		0x417
-#define DC_DISP_H_PULSE2_POSITION_C		0x418
-#define DC_DISP_H_PULSE2_POSITION_D		0x419
-#define DC_DISP_V_PULSE0_CONTROL		0x41a
-#define DC_DISP_V_PULSE0_POSITION_A		0x41b
-#define DC_DISP_V_PULSE0_POSITION_B		0x41c
-#define DC_DISP_V_PULSE0_POSITION_C		0x41d
-#define DC_DISP_V_PULSE1_CONTROL		0x41e
-#define DC_DISP_V_PULSE1_POSITION_A		0x41f
-#define DC_DISP_V_PULSE1_POSITION_B		0x420
-#define DC_DISP_V_PULSE1_POSITION_C		0x421
-#define DC_DISP_V_PULSE2_CONTROL		0x422
-#define DC_DISP_V_PULSE2_POSITION_A		0x423
-#define DC_DISP_V_PULSE3_CONTROL		0x424
-#define DC_DISP_V_PULSE3_POSITION_A		0x425
-#define DC_DISP_M0_CONTROL			0x426
-#define DC_DISP_M1_CONTROL			0x427
-#define DC_DISP_DI_CONTROL			0x428
-#define DC_DISP_PP_CONTROL			0x429
-#define DC_DISP_PP_SELECT_A			0x42a
-#define DC_DISP_PP_SELECT_B			0x42b
-#define DC_DISP_PP_SELECT_C			0x42c
-#define DC_DISP_PP_SELECT_D			0x42d
-
-#define PULSE_MODE_NORMAL    (0 << 3)
-#define PULSE_MODE_ONE_CLOCK (1 << 3)
-#define PULSE_POLARITY_HIGH  (0 << 4)
-#define PULSE_POLARITY_LOW   (1 << 4)
-#define PULSE_QUAL_ALWAYS    (0 << 6)
-#define PULSE_QUAL_VACTIVE   (2 << 6)
-#define PULSE_QUAL_VACTIVE1  (3 << 6)
-#define PULSE_LAST_START_A   (0 << 8)
-#define PULSE_LAST_END_A     (1 << 8)
-#define PULSE_LAST_START_B   (2 << 8)
-#define PULSE_LAST_END_B     (3 << 8)
-#define PULSE_LAST_START_C   (4 << 8)
-#define PULSE_LAST_END_C     (5 << 8)
-#define PULSE_LAST_START_D   (6 << 8)
-#define PULSE_LAST_END_D     (7 << 8)
-
-#define PULSE_START(x) (((x) & 0xfff) <<  0)
-#define PULSE_END(x)   (((x) & 0xfff) << 16)
-
-#define DC_DISP_DISP_CLOCK_CONTROL		0x42e
-#define PIXEL_CLK_DIVIDER_PCD1  (0 << 8)
-#define PIXEL_CLK_DIVIDER_PCD1H (1 << 8)
-#define PIXEL_CLK_DIVIDER_PCD2  (2 << 8)
-#define PIXEL_CLK_DIVIDER_PCD3  (3 << 8)
-#define PIXEL_CLK_DIVIDER_PCD4  (4 << 8)
-#define PIXEL_CLK_DIVIDER_PCD6  (5 << 8)
-#define PIXEL_CLK_DIVIDER_PCD8  (6 << 8)
-#define PIXEL_CLK_DIVIDER_PCD9  (7 << 8)
-#define PIXEL_CLK_DIVIDER_PCD12 (8 << 8)
-#define PIXEL_CLK_DIVIDER_PCD16 (9 << 8)
-#define PIXEL_CLK_DIVIDER_PCD18 (10 << 8)
-#define PIXEL_CLK_DIVIDER_PCD24 (11 << 8)
-#define PIXEL_CLK_DIVIDER_PCD13 (12 << 8)
-#define SHIFT_CLK_DIVIDER(x)    ((x) & 0xff)
-
-#define DC_DISP_DISP_INTERFACE_CONTROL		0x42f
-#define DISP_DATA_FORMAT_DF1P1C    (0 << 0)
-#define DISP_DATA_FORMAT_DF1P2C24B (1 << 0)
-#define DISP_DATA_FORMAT_DF1P2C18B (2 << 0)
-#define DISP_DATA_FORMAT_DF1P2C16B (3 << 0)
-#define DISP_DATA_FORMAT_DF2S      (4 << 0)
-#define DISP_DATA_FORMAT_DF3S      (5 << 0)
-#define DISP_DATA_FORMAT_DFSPI     (6 << 0)
-#define DISP_DATA_FORMAT_DF1P3C24B (7 << 0)
-#define DISP_DATA_FORMAT_DF1P3C18B (8 << 0)
-#define DISP_ALIGNMENT_MSB         (0 << 8)
-#define DISP_ALIGNMENT_LSB         (1 << 8)
-#define DISP_ORDER_RED_BLUE        (0 << 9)
-#define DISP_ORDER_BLUE_RED        (1 << 9)
-
-#define DC_DISP_DISP_COLOR_CONTROL		0x430
-#define BASE_COLOR_SIZE666     (0 << 0)
-#define BASE_COLOR_SIZE111     (1 << 0)
-#define BASE_COLOR_SIZE222     (2 << 0)
-#define BASE_COLOR_SIZE333     (3 << 0)
-#define BASE_COLOR_SIZE444     (4 << 0)
-#define BASE_COLOR_SIZE555     (5 << 0)
-#define BASE_COLOR_SIZE565     (6 << 0)
-#define BASE_COLOR_SIZE332     (7 << 0)
-#define BASE_COLOR_SIZE888     (8 << 0)
-#define DITHER_CONTROL_DISABLE (0 << 8)
-#define DITHER_CONTROL_ORDERED (2 << 8)
-#define DITHER_CONTROL_ERRDIFF (3 << 8)
-
-#define DC_DISP_SHIFT_CLOCK_OPTIONS		0x431
-
-#define DC_DISP_DATA_ENABLE_OPTIONS		0x432
-#define DE_SELECT_ACTIVE_BLANK  (0 << 0)
-#define DE_SELECT_ACTIVE        (1 << 0)
-#define DE_SELECT_ACTIVE_IS     (2 << 0)
-#define DE_CONTROL_ONECLK       (0 << 2)
-#define DE_CONTROL_NORMAL       (1 << 2)
-#define DE_CONTROL_EARLY_EXT    (2 << 2)
-#define DE_CONTROL_EARLY        (3 << 2)
-#define DE_CONTROL_ACTIVE_BLANK (4 << 2)
-
-#define DC_DISP_SERIAL_INTERFACE_OPTIONS	0x433
-#define DC_DISP_LCD_SPI_OPTIONS			0x434
-#define DC_DISP_BORDER_COLOR			0x435
-#define DC_DISP_COLOR_KEY0_LOWER		0x436
-#define DC_DISP_COLOR_KEY0_UPPER		0x437
-#define DC_DISP_COLOR_KEY1_LOWER		0x438
-#define DC_DISP_COLOR_KEY1_UPPER		0x439
-
-#define DC_DISP_CURSOR_FOREGROUND		0x43c
-#define DC_DISP_CURSOR_BACKGROUND		0x43d
-
-#define DC_DISP_CURSOR_START_ADDR		0x43e
-#define DC_DISP_CURSOR_START_ADDR_NS		0x43f
-
-#define DC_DISP_CURSOR_POSITION			0x440
-#define DC_DISP_CURSOR_POSITION_NS		0x441
-
-#define DC_DISP_INIT_SEQ_CONTROL		0x442
-#define DC_DISP_SPI_INIT_SEQ_DATA_A		0x443
-#define DC_DISP_SPI_INIT_SEQ_DATA_B		0x444
-#define DC_DISP_SPI_INIT_SEQ_DATA_C		0x445
-#define DC_DISP_SPI_INIT_SEQ_DATA_D		0x446
-
-#define DC_DISP_DC_MCCIF_FIFOCTRL		0x480
-#define DC_DISP_MCCIF_DISPLAY0A_HYST		0x481
-#define DC_DISP_MCCIF_DISPLAY0B_HYST		0x482
-#define DC_DISP_MCCIF_DISPLAY1A_HYST		0x483
-#define DC_DISP_MCCIF_DISPLAY1B_HYST		0x484
-
-#define DC_DISP_DAC_CRT_CTRL			0x4c0
-#define DC_DISP_DISP_MISC_CONTROL		0x4c1
-#define DC_DISP_SD_CONTROL			0x4c2
-#define DC_DISP_SD_CSC_COEFF			0x4c3
-#define DC_DISP_SD_LUT(x)			(0x4c4 + (x))
-#define DC_DISP_SD_FLICKER_CONTROL		0x4cd
-#define DC_DISP_DC_PIXEL_COUNT			0x4ce
-#define DC_DISP_SD_HISTOGRAM(x)			(0x4cf + (x))
-#define DC_DISP_SD_BL_PARAMETERS		0x4d7
-#define DC_DISP_SD_BL_TF(x)			(0x4d8 + (x))
-#define DC_DISP_SD_BL_CONTROL			0x4dc
-#define DC_DISP_SD_HW_K_VALUES			0x4dd
-#define DC_DISP_SD_MAN_K_VALUES			0x4de
-
-#define DC_WIN_CSC_YOF				0x611
-#define DC_WIN_CSC_KYRGB			0x612
-#define DC_WIN_CSC_KUR				0x613
-#define DC_WIN_CSC_KVR				0x614
-#define DC_WIN_CSC_KUG				0x615
-#define DC_WIN_CSC_KVG				0x616
-#define DC_WIN_CSC_KUB				0x617
-#define DC_WIN_CSC_KVB				0x618
-
-#define DC_WIN_WIN_OPTIONS			0x700
-#define COLOR_EXPAND (1 <<  6)
-#define CSC_ENABLE   (1 << 18)
-#define WIN_ENABLE   (1 << 30)
-
-#define DC_WIN_BYTE_SWAP			0x701
-#define BYTE_SWAP_NOSWAP  (0 << 0)
-#define BYTE_SWAP_SWAP2   (1 << 0)
-#define BYTE_SWAP_SWAP4   (2 << 0)
-#define BYTE_SWAP_SWAP4HW (3 << 0)
-
-#define DC_WIN_BUFFER_CONTROL			0x702
-#define BUFFER_CONTROL_HOST  (0 << 0)
-#define BUFFER_CONTROL_VI    (1 << 0)
-#define BUFFER_CONTROL_EPP   (2 << 0)
-#define BUFFER_CONTROL_MPEGE (3 << 0)
-#define BUFFER_CONTROL_SB2D  (4 << 0)
-
-#define DC_WIN_COLOR_DEPTH			0x703
-#define WIN_COLOR_DEPTH_P1              0
-#define WIN_COLOR_DEPTH_P2              1
-#define WIN_COLOR_DEPTH_P4              2
-#define WIN_COLOR_DEPTH_P8              3
-#define WIN_COLOR_DEPTH_B4G4R4A4        4
-#define WIN_COLOR_DEPTH_B5G5R5A         5
-#define WIN_COLOR_DEPTH_B5G6R5          6
-#define WIN_COLOR_DEPTH_AB5G5R5         7
-#define WIN_COLOR_DEPTH_B8G8R8A8       12
-#define WIN_COLOR_DEPTH_R8G8B8A8       13
-#define WIN_COLOR_DEPTH_B6x2G6x2R6x2A8 14
-#define WIN_COLOR_DEPTH_R6x2G6x2B6x2A8 15
-#define WIN_COLOR_DEPTH_YCbCr422       16
-#define WIN_COLOR_DEPTH_YUV422         17
-#define WIN_COLOR_DEPTH_YCbCr420P      18
-#define WIN_COLOR_DEPTH_YUV420P        19
-#define WIN_COLOR_DEPTH_YCbCr422P      20
-#define WIN_COLOR_DEPTH_YUV422P        21
-#define WIN_COLOR_DEPTH_YCbCr422R      22
-#define WIN_COLOR_DEPTH_YUV422R        23
-#define WIN_COLOR_DEPTH_YCbCr422RA     24
-#define WIN_COLOR_DEPTH_YUV422RA       25
-
-#define DC_WIN_POSITION				0x704
-#define H_POSITION(x) (((x) & 0x1fff) <<  0)
-#define V_POSITION(x) (((x) & 0x1fff) << 16)
-
-#define DC_WIN_SIZE				0x705
-#define H_SIZE(x) (((x) & 0x1fff) <<  0)
-#define V_SIZE(x) (((x) & 0x1fff) << 16)
-
-#define DC_WIN_PRESCALED_SIZE			0x706
-#define H_PRESCALED_SIZE(x) (((x) & 0x7fff) <<  0)
-#define V_PRESCALED_SIZE(x) (((x) & 0x1fff) << 16)
-
-#define DC_WIN_H_INITIAL_DDA			0x707
-#define DC_WIN_V_INITIAL_DDA			0x708
-#define DC_WIN_DDA_INC				0x709
-#define H_DDA_INC(x) (((x) & 0xffff) <<  0)
-#define V_DDA_INC(x) (((x) & 0xffff) << 16)
-
-#define DC_WIN_LINE_STRIDE			0x70a
-#define DC_WIN_BUF_STRIDE			0x70b
-#define DC_WIN_UV_BUF_STRIDE			0x70c
-#define DC_WIN_BUFFER_ADDR_MODE			0x70d
-#define DC_WIN_DV_CONTROL			0x70e
-
-#define DC_WIN_BLEND_NOKEY			0x70f
-#define DC_WIN_BLEND_1WIN			0x710
-#define DC_WIN_BLEND_2WIN_X			0x711
-#define DC_WIN_BLEND_2WIN_Y			0x712
-#define DC_WIN_BLEND_3WIN_XY			0x713
-
-#define DC_WIN_HP_FETCH_CONTROL			0x714
-
-#define DC_WINBUF_START_ADDR			0x800
-#define DC_WINBUF_START_ADDR_NS			0x801
-#define DC_WINBUF_START_ADDR_U			0x802
-#define DC_WINBUF_START_ADDR_U_NS		0x803
-#define DC_WINBUF_START_ADDR_V			0x804
-#define DC_WINBUF_START_ADDR_V_NS		0x805
-
-#define DC_WINBUF_ADDR_H_OFFSET			0x806
-#define DC_WINBUF_ADDR_H_OFFSET_NS		0x807
-#define DC_WINBUF_ADDR_V_OFFSET			0x808
-#define DC_WINBUF_ADDR_V_OFFSET_NS		0x809
-
-#define DC_WINBUF_UFLOW_STATUS			0x80a
-
-#define DC_WINBUF_AD_UFLOW_STATUS		0xbca
-#define DC_WINBUF_BD_UFLOW_STATUS		0xdca
-#define DC_WINBUF_CD_UFLOW_STATUS		0xfca
-
-/* synchronization points */
-#define SYNCPT_VBLANK0 26
-#define SYNCPT_VBLANK1 27
-
-#endif /* TEGRA_DC_H */
diff --git a/drivers/gpu/host1x/drm/drm.c b/drivers/gpu/host1x/drm/drm.c
deleted file mode 100644
index e184b00faacd..000000000000
--- a/drivers/gpu/host1x/drm/drm.c
+++ /dev/null
@@ -1,648 +0,0 @@
-/*
- * Copyright (C) 2012 Avionic Design GmbH
- * Copyright (C) 2012-2013 NVIDIA CORPORATION.  All rights reserved.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- */
-
-#include <linux/module.h>
-#include <linux/of_address.h>
-#include <linux/of_platform.h>
-
-#include <linux/dma-mapping.h>
-#include <asm/dma-iommu.h>
-
-#include <drm/drm.h>
-#include <drm/drmP.h>
-
-#include "host1x_client.h"
-#include "dev.h"
-#include "drm.h"
-#include "gem.h"
-#include "syncpt.h"
-
-#define DRIVER_NAME "tegra"
-#define DRIVER_DESC "NVIDIA Tegra graphics"
-#define DRIVER_DATE "20120330"
-#define DRIVER_MAJOR 0
-#define DRIVER_MINOR 0
-#define DRIVER_PATCHLEVEL 0
-
-struct host1x_drm_client {
-	struct host1x_client *client;
-	struct device_node *np;
-	struct list_head list;
-};
-
-static int host1x_add_drm_client(struct host1x_drm *host1x,
-				 struct device_node *np)
-{
-	struct host1x_drm_client *client;
-
-	client = kzalloc(sizeof(*client), GFP_KERNEL);
-	if (!client)
-		return -ENOMEM;
-
-	INIT_LIST_HEAD(&client->list);
-	client->np = of_node_get(np);
-
-	list_add_tail(&client->list, &host1x->drm_clients);
-
-	return 0;
-}
-
-static int host1x_activate_drm_client(struct host1x_drm *host1x,
-				      struct host1x_drm_client *drm,
-				      struct host1x_client *client)
-{
-	mutex_lock(&host1x->drm_clients_lock);
-	list_del_init(&drm->list);
-	list_add_tail(&drm->list, &host1x->drm_active);
-	drm->client = client;
-	mutex_unlock(&host1x->drm_clients_lock);
-
-	return 0;
-}
-
-static int host1x_remove_drm_client(struct host1x_drm *host1x,
-				    struct host1x_drm_client *client)
-{
-	mutex_lock(&host1x->drm_clients_lock);
-	list_del_init(&client->list);
-	mutex_unlock(&host1x->drm_clients_lock);
-
-	of_node_put(client->np);
-	kfree(client);
-
-	return 0;
-}
-
-static int host1x_parse_dt(struct host1x_drm *host1x)
-{
-	static const char * const compat[] = {
-		"nvidia,tegra20-dc",
-		"nvidia,tegra20-hdmi",
-		"nvidia,tegra20-gr2d",
-		"nvidia,tegra30-dc",
-		"nvidia,tegra30-hdmi",
-		"nvidia,tegra30-gr2d",
-	};
-	unsigned int i;
-	int err;
-
-	for (i = 0; i < ARRAY_SIZE(compat); i++) {
-		struct device_node *np;
-
-		for_each_child_of_node(host1x->dev->of_node, np) {
-			if (of_device_is_compatible(np, compat[i]) &&
-			    of_device_is_available(np)) {
-				err = host1x_add_drm_client(host1x, np);
-				if (err < 0)
-					return err;
-			}
-		}
-	}
-
-	return 0;
-}
-
-int host1x_drm_alloc(struct platform_device *pdev)
-{
-	struct host1x_drm *host1x;
-	int err;
-
-	host1x = devm_kzalloc(&pdev->dev, sizeof(*host1x), GFP_KERNEL);
-	if (!host1x)
-		return -ENOMEM;
-
-	mutex_init(&host1x->drm_clients_lock);
-	INIT_LIST_HEAD(&host1x->drm_clients);
-	INIT_LIST_HEAD(&host1x->drm_active);
-	mutex_init(&host1x->clients_lock);
-	INIT_LIST_HEAD(&host1x->clients);
-	host1x->dev = &pdev->dev;
-
-	err = host1x_parse_dt(host1x);
-	if (err < 0) {
-		dev_err(&pdev->dev, "failed to parse DT: %d\n", err);
-		return err;
-	}
-
-	host1x_set_drm_data(&pdev->dev, host1x);
-
-	return 0;
-}
-
-int host1x_drm_init(struct host1x_drm *host1x, struct drm_device *drm)
-{
-	struct host1x_client *client;
-
-	mutex_lock(&host1x->clients_lock);
-
-	list_for_each_entry(client, &host1x->clients, list) {
-		if (client->ops && client->ops->drm_init) {
-			int err = client->ops->drm_init(client, drm);
-			if (err < 0) {
-				dev_err(host1x->dev,
-					"DRM setup failed for %s: %d\n",
-					dev_name(client->dev), err);
-				mutex_unlock(&host1x->clients_lock);
-				return err;
-			}
-		}
-	}
-
-	mutex_unlock(&host1x->clients_lock);
-
-	return 0;
-}
-
-int host1x_drm_exit(struct host1x_drm *host1x)
-{
-	struct platform_device *pdev = to_platform_device(host1x->dev);
-	struct host1x_client *client;
-
-	if (!host1x->drm)
-		return 0;
-
-	mutex_lock(&host1x->clients_lock);
-
-	list_for_each_entry_reverse(client, &host1x->clients, list) {
-		if (client->ops && client->ops->drm_exit) {
-			int err = client->ops->drm_exit(client);
-			if (err < 0) {
-				dev_err(host1x->dev,
-					"DRM cleanup failed for %s: %d\n",
-					dev_name(client->dev), err);
-				mutex_unlock(&host1x->clients_lock);
-				return err;
-			}
-		}
-	}
-
-	mutex_unlock(&host1x->clients_lock);
-
-	drm_platform_exit(&tegra_drm_driver, pdev);
-	host1x->drm = NULL;
-
-	return 0;
-}
-
-int host1x_register_client(struct host1x_drm *host1x,
-			   struct host1x_client *client)
-{
-	struct host1x_drm_client *drm, *tmp;
-	int err;
-
-	mutex_lock(&host1x->clients_lock);
-	list_add_tail(&client->list, &host1x->clients);
-	mutex_unlock(&host1x->clients_lock);
-
-	list_for_each_entry_safe(drm, tmp, &host1x->drm_clients, list)
-		if (drm->np == client->dev->of_node)
-			host1x_activate_drm_client(host1x, drm, client);
-
-	if (list_empty(&host1x->drm_clients)) {
-		struct platform_device *pdev = to_platform_device(host1x->dev);
-
-		err = drm_platform_init(&tegra_drm_driver, pdev);
-		if (err < 0) {
-			dev_err(host1x->dev, "drm_platform_init(): %d\n", err);
-			return err;
-		}
-	}
-
-	return 0;
-}
-
-int host1x_unregister_client(struct host1x_drm *host1x,
-			     struct host1x_client *client)
-{
-	struct host1x_drm_client *drm, *tmp;
-	int err;
-
-	list_for_each_entry_safe(drm, tmp, &host1x->drm_active, list) {
-		if (drm->client == client) {
-			err = host1x_drm_exit(host1x);
-			if (err < 0) {
-				dev_err(host1x->dev, "host1x_drm_exit(): %d\n",
-					err);
-				return err;
-			}
-
-			host1x_remove_drm_client(host1x, drm);
-			break;
-		}
-	}
-
-	mutex_lock(&host1x->clients_lock);
-	list_del_init(&client->list);
-	mutex_unlock(&host1x->clients_lock);
-
-	return 0;
-}
-
-static int tegra_drm_load(struct drm_device *drm, unsigned long flags)
-{
-	struct host1x_drm *host1x;
-	int err;
-
-	host1x = host1x_get_drm_data(drm->dev);
-	drm->dev_private = host1x;
-	host1x->drm = drm;
-
-	drm_mode_config_init(drm);
-
-	err = host1x_drm_init(host1x, drm);
-	if (err < 0)
-		return err;
-
-	/*
-	 * We don't use the drm_irq_install() helpers provided by the DRM
-	 * core, so we need to set this manually in order to allow the
-	 * DRM_IOCTL_WAIT_VBLANK to operate correctly.
-	 */
-	drm->irq_enabled = 1;
-
-	err = drm_vblank_init(drm, drm->mode_config.num_crtc);
-	if (err < 0)
-		return err;
-
-	err = tegra_drm_fb_init(drm);
-	if (err < 0)
-		return err;
-
-	drm_kms_helper_poll_init(drm);
-
-	return 0;
-}
-
-static int tegra_drm_unload(struct drm_device *drm)
-{
-	drm_kms_helper_poll_fini(drm);
-	tegra_drm_fb_exit(drm);
-
-	drm_mode_config_cleanup(drm);
-
-	return 0;
-}
-
-static int tegra_drm_open(struct drm_device *drm, struct drm_file *filp)
-{
-	struct host1x_drm_file *fpriv;
-
-	fpriv = kzalloc(sizeof(*fpriv), GFP_KERNEL);
-	if (!fpriv)
-		return -ENOMEM;
-
-	INIT_LIST_HEAD(&fpriv->contexts);
-	filp->driver_priv = fpriv;
-
-	return 0;
-}
-
-static void host1x_drm_context_free(struct host1x_drm_context *context)
-{
-	context->client->ops->close_channel(context);
-	kfree(context);
-}
-
-static void tegra_drm_lastclose(struct drm_device *drm)
-{
-	struct host1x_drm *host1x = drm->dev_private;
-
-	tegra_fbdev_restore_mode(host1x->fbdev);
-}
-
-#ifdef CONFIG_DRM_TEGRA_STAGING
-static bool host1x_drm_file_owns_context(struct host1x_drm_file *file,
-					 struct host1x_drm_context *context)
-{
-	struct host1x_drm_context *ctx;
-
-	list_for_each_entry(ctx, &file->contexts, list)
-		if (ctx == context)
-			return true;
-
-	return false;
-}
-
-static int tegra_gem_create(struct drm_device *drm, void *data,
-			    struct drm_file *file)
-{
-	struct drm_tegra_gem_create *args = data;
-	struct tegra_bo *bo;
-
-	bo = tegra_bo_create_with_handle(file, drm, args->size,
-					 &args->handle);
-	if (IS_ERR(bo))
-		return PTR_ERR(bo);
-
-	return 0;
-}
-
-static int tegra_gem_mmap(struct drm_device *drm, void *data,
-			  struct drm_file *file)
-{
-	struct drm_tegra_gem_mmap *args = data;
-	struct drm_gem_object *gem;
-	struct tegra_bo *bo;
-
-	gem = drm_gem_object_lookup(drm, file, args->handle);
-	if (!gem)
-		return -EINVAL;
-
-	bo = to_tegra_bo(gem);
-
-	args->offset = tegra_bo_get_mmap_offset(bo);
-
-	drm_gem_object_unreference(gem);
-
-	return 0;
-}
-
-static int tegra_syncpt_read(struct drm_device *drm, void *data,
-			     struct drm_file *file)
-{
-	struct drm_tegra_syncpt_read *args = data;
-	struct host1x *host = dev_get_drvdata(drm->dev);
-	struct host1x_syncpt *sp = host1x_syncpt_get(host, args->id);
-
-	if (!sp)
-		return -EINVAL;
-
-	args->value = host1x_syncpt_read_min(sp);
-	return 0;
-}
-
-static int tegra_syncpt_incr(struct drm_device *drm, void *data,
-			     struct drm_file *file)
-{
-	struct drm_tegra_syncpt_incr *args = data;
-	struct host1x *host = dev_get_drvdata(drm->dev);
-	struct host1x_syncpt *sp = host1x_syncpt_get(host, args->id);
-
-	if (!sp)
-		return -EINVAL;
-
-	return host1x_syncpt_incr(sp);
-}
-
-static int tegra_syncpt_wait(struct drm_device *drm, void *data,
-			     struct drm_file *file)
-{
-	struct drm_tegra_syncpt_wait *args = data;
-	struct host1x *host = dev_get_drvdata(drm->dev);
-	struct host1x_syncpt *sp = host1x_syncpt_get(host, args->id);
-
-	if (!sp)
-		return -EINVAL;
-
-	return host1x_syncpt_wait(sp, args->thresh, args->timeout,
-				  &args->value);
-}
-
-static int tegra_open_channel(struct drm_device *drm, void *data,
-			      struct drm_file *file)
-{
-	struct drm_tegra_open_channel *args = data;
-	struct host1x_client *client;
-	struct host1x_drm_context *context;
-	struct host1x_drm_file *fpriv = file->driver_priv;
-	struct host1x_drm *host1x = drm->dev_private;
-	int err = -ENODEV;
-
-	context = kzalloc(sizeof(*context), GFP_KERNEL);
-	if (!context)
-		return -ENOMEM;
-
-	list_for_each_entry(client, &host1x->clients, list)
-		if (client->class == args->client) {
-			err = client->ops->open_channel(client, context);
-			if (err)
-				break;
-
-			context->client = client;
-			list_add(&context->list, &fpriv->contexts);
-			args->context = (uintptr_t)context;
-			return 0;
-		}
-
-	kfree(context);
-	return err;
-}
-
-static int tegra_close_channel(struct drm_device *drm, void *data,
-			       struct drm_file *file)
-{
-	struct drm_tegra_close_channel *args = data;
-	struct host1x_drm_file *fpriv = file->driver_priv;
-	struct host1x_drm_context *context =
-		(struct host1x_drm_context *)(uintptr_t)args->context;
-
-	if (!host1x_drm_file_owns_context(fpriv, context))
-		return -EINVAL;
-
-	list_del(&context->list);
-	host1x_drm_context_free(context);
-
-	return 0;
-}
-
-static int tegra_get_syncpt(struct drm_device *drm, void *data,
-			    struct drm_file *file)
-{
-	struct drm_tegra_get_syncpt *args = data;
-	struct host1x_drm_file *fpriv = file->driver_priv;
-	struct host1x_drm_context *context =
-		(struct host1x_drm_context *)(uintptr_t)args->context;
-	struct host1x_syncpt *syncpt;
-
-	if (!host1x_drm_file_owns_context(fpriv, context))
-		return -ENODEV;
-
-	if (args->index >= context->client->num_syncpts)
-		return -EINVAL;
-
-	syncpt = context->client->syncpts[args->index];
-	args->id = host1x_syncpt_id(syncpt);
-
-	return 0;
-}
-
-static int tegra_submit(struct drm_device *drm, void *data,
-			struct drm_file *file)
-{
-	struct drm_tegra_submit *args = data;
-	struct host1x_drm_file *fpriv = file->driver_priv;
-	struct host1x_drm_context *context =
-		(struct host1x_drm_context *)(uintptr_t)args->context;
-
-	if (!host1x_drm_file_owns_context(fpriv, context))
-		return -ENODEV;
-
-	return context->client->ops->submit(context, args, drm, file);
-}
-#endif
-
-static struct drm_ioctl_desc tegra_drm_ioctls[] = {
-#ifdef CONFIG_DRM_TEGRA_STAGING
-	DRM_IOCTL_DEF_DRV(TEGRA_GEM_CREATE, tegra_gem_create, DRM_UNLOCKED | DRM_AUTH),
-	DRM_IOCTL_DEF_DRV(TEGRA_GEM_MMAP, tegra_gem_mmap, DRM_UNLOCKED),
-	DRM_IOCTL_DEF_DRV(TEGRA_SYNCPT_READ, tegra_syncpt_read, DRM_UNLOCKED),
-	DRM_IOCTL_DEF_DRV(TEGRA_SYNCPT_INCR, tegra_syncpt_incr, DRM_UNLOCKED),
-	DRM_IOCTL_DEF_DRV(TEGRA_SYNCPT_WAIT, tegra_syncpt_wait, DRM_UNLOCKED),
-	DRM_IOCTL_DEF_DRV(TEGRA_OPEN_CHANNEL, tegra_open_channel, DRM_UNLOCKED),
-	DRM_IOCTL_DEF_DRV(TEGRA_CLOSE_CHANNEL, tegra_close_channel, DRM_UNLOCKED),
-	DRM_IOCTL_DEF_DRV(TEGRA_GET_SYNCPT, tegra_get_syncpt, DRM_UNLOCKED),
-	DRM_IOCTL_DEF_DRV(TEGRA_SUBMIT, tegra_submit, DRM_UNLOCKED),
-#endif
-};
-
-static const struct file_operations tegra_drm_fops = {
-	.owner = THIS_MODULE,
-	.open = drm_open,
-	.release = drm_release,
-	.unlocked_ioctl = drm_ioctl,
-	.mmap = tegra_drm_mmap,
-	.poll = drm_poll,
-	.fasync = drm_fasync,
-	.read = drm_read,
-#ifdef CONFIG_COMPAT
-	.compat_ioctl = drm_compat_ioctl,
-#endif
-	.llseek = noop_llseek,
-};
-
-static struct drm_crtc *tegra_crtc_from_pipe(struct drm_device *drm, int pipe)
-{
-	struct drm_crtc *crtc;
-
-	list_for_each_entry(crtc, &drm->mode_config.crtc_list, head) {
-		struct tegra_dc *dc = to_tegra_dc(crtc);
-
-		if (dc->pipe == pipe)
-			return crtc;
-	}
-
-	return NULL;
-}
-
-static u32 tegra_drm_get_vblank_counter(struct drm_device *dev, int crtc)
-{
-	/* TODO: implement real hardware counter using syncpoints */
-	return drm_vblank_count(dev, crtc);
-}
-
-static int tegra_drm_enable_vblank(struct drm_device *drm, int pipe)
-{
-	struct drm_crtc *crtc = tegra_crtc_from_pipe(drm, pipe);
-	struct tegra_dc *dc = to_tegra_dc(crtc);
-
-	if (!crtc)
-		return -ENODEV;
-
-	tegra_dc_enable_vblank(dc);
-
-	return 0;
-}
-
-static void tegra_drm_disable_vblank(struct drm_device *drm, int pipe)
-{
-	struct drm_crtc *crtc = tegra_crtc_from_pipe(drm, pipe);
-	struct tegra_dc *dc = to_tegra_dc(crtc);
-
-	if (crtc)
-		tegra_dc_disable_vblank(dc);
-}
-
-static void tegra_drm_preclose(struct drm_device *drm, struct drm_file *file)
-{
-	struct host1x_drm_file *fpriv = file->driver_priv;
-	struct host1x_drm_context *context, *tmp;
-	struct drm_crtc *crtc;
-
-	list_for_each_entry(crtc, &drm->mode_config.crtc_list, head)
-		tegra_dc_cancel_page_flip(crtc, file);
-
-	list_for_each_entry_safe(context, tmp, &fpriv->contexts, list)
-		host1x_drm_context_free(context);
-
-	kfree(fpriv);
-}
-
-#ifdef CONFIG_DEBUG_FS
-static int tegra_debugfs_framebuffers(struct seq_file *s, void *data)
-{
-	struct drm_info_node *node = (struct drm_info_node *)s->private;
-	struct drm_device *drm = node->minor->dev;
-	struct drm_framebuffer *fb;
-
-	mutex_lock(&drm->mode_config.fb_lock);
-
-	list_for_each_entry(fb, &drm->mode_config.fb_list, head) {
-		seq_printf(s, "%3d: user size: %d x %d, depth %d, %d bpp, refcount %d\n",
-			   fb->base.id, fb->width, fb->height, fb->depth,
-			   fb->bits_per_pixel,
-			   atomic_read(&fb->refcount.refcount));
-	}
-
-	mutex_unlock(&drm->mode_config.fb_lock);
-
-	return 0;
-}
-
-static struct drm_info_list tegra_debugfs_list[] = {
-	{ "framebuffers", tegra_debugfs_framebuffers, 0 },
-};
-
-static int tegra_debugfs_init(struct drm_minor *minor)
-{
-	return drm_debugfs_create_files(tegra_debugfs_list,
-					ARRAY_SIZE(tegra_debugfs_list),
-					minor->debugfs_root, minor);
-}
-
-static void tegra_debugfs_cleanup(struct drm_minor *minor)
-{
-	drm_debugfs_remove_files(tegra_debugfs_list,
-				 ARRAY_SIZE(tegra_debugfs_list), minor);
-}
-#endif
-
-struct drm_driver tegra_drm_driver = {
-	.driver_features = DRIVER_MODESET | DRIVER_GEM,
-	.load = tegra_drm_load,
-	.unload = tegra_drm_unload,
-	.open = tegra_drm_open,
-	.preclose = tegra_drm_preclose,
-	.lastclose = tegra_drm_lastclose,
-
-	.get_vblank_counter = tegra_drm_get_vblank_counter,
-	.enable_vblank = tegra_drm_enable_vblank,
-	.disable_vblank = tegra_drm_disable_vblank,
-
-#if defined(CONFIG_DEBUG_FS)
-	.debugfs_init = tegra_debugfs_init,
-	.debugfs_cleanup = tegra_debugfs_cleanup,
-#endif
-
-	.gem_free_object = tegra_bo_free_object,
-	.gem_vm_ops = &tegra_bo_vm_ops,
-	.dumb_create = tegra_bo_dumb_create,
-	.dumb_map_offset = tegra_bo_dumb_map_offset,
-	.dumb_destroy = tegra_bo_dumb_destroy,
-
-	.ioctls = tegra_drm_ioctls,
-	.num_ioctls = ARRAY_SIZE(tegra_drm_ioctls),
-	.fops = &tegra_drm_fops,
-
-	.name = DRIVER_NAME,
-	.desc = DRIVER_DESC,
-	.date = DRIVER_DATE,
-	.major = DRIVER_MAJOR,
-	.minor = DRIVER_MINOR,
-	.patchlevel = DRIVER_PATCHLEVEL,
-};
diff --git a/drivers/gpu/host1x/drm/drm.h b/drivers/gpu/host1x/drm/drm.h
deleted file mode 100644
index 02ce020f2575..000000000000
--- a/drivers/gpu/host1x/drm/drm.h
+++ /dev/null
@@ -1,271 +0,0 @@
-/*
- * Copyright (C) 2012 Avionic Design GmbH
- * Copyright (C) 2012-2013 NVIDIA CORPORATION.  All rights reserved.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- */
-
-#ifndef HOST1X_DRM_H
-#define HOST1X_DRM_H 1
-
-#include <drm/drmP.h>
-#include <drm/drm_crtc_helper.h>
-#include <drm/drm_edid.h>
-#include <drm/drm_fb_helper.h>
-#include <drm/drm_fixed.h>
-#include <uapi/drm/tegra_drm.h>
-
-#include "host1x.h"
-
-struct tegra_fb {
-	struct drm_framebuffer base;
-	struct tegra_bo **planes;
-	unsigned int num_planes;
-};
-
-struct tegra_fbdev {
-	struct drm_fb_helper base;
-	struct tegra_fb *fb;
-};
-
-struct host1x_drm {
-	struct drm_device *drm;
-	struct device *dev;
-	void __iomem *regs;
-	struct clk *clk;
-	int syncpt;
-	int irq;
-
-	struct mutex drm_clients_lock;
-	struct list_head drm_clients;
-	struct list_head drm_active;
-
-	struct mutex clients_lock;
-	struct list_head clients;
-
-	struct tegra_fbdev *fbdev;
-};
-
-struct host1x_client;
-
-struct host1x_drm_context {
-	struct host1x_client *client;
-	struct host1x_channel *channel;
-	struct list_head list;
-};
-
-struct host1x_client_ops {
-	int (*drm_init)(struct host1x_client *client, struct drm_device *drm);
-	int (*drm_exit)(struct host1x_client *client);
-	int (*open_channel)(struct host1x_client *client,
-			    struct host1x_drm_context *context);
-	void (*close_channel)(struct host1x_drm_context *context);
-	int (*submit)(struct host1x_drm_context *context,
-		      struct drm_tegra_submit *args, struct drm_device *drm,
-		      struct drm_file *file);
-};
-
-struct host1x_drm_file {
-	struct list_head contexts;
-};
-
-struct host1x_client {
-	struct host1x_drm *host1x;
-	struct device *dev;
-
-	const struct host1x_client_ops *ops;
-
-	enum host1x_class class;
-	struct host1x_channel *channel;
-
-	struct host1x_syncpt **syncpts;
-	unsigned int num_syncpts;
-
-	struct list_head list;
-};
-
-extern int host1x_drm_init(struct host1x_drm *host1x, struct drm_device *drm);
-extern int host1x_drm_exit(struct host1x_drm *host1x);
-
-extern int host1x_register_client(struct host1x_drm *host1x,
-				  struct host1x_client *client);
-extern int host1x_unregister_client(struct host1x_drm *host1x,
-				    struct host1x_client *client);
-
-struct tegra_output;
-
-struct tegra_dc {
-	struct host1x_client client;
-	spinlock_t lock;
-
-	struct host1x_drm *host1x;
-	struct device *dev;
-
-	struct drm_crtc base;
-	int pipe;
-
-	struct clk *clk;
-
-	void __iomem *regs;
-	int irq;
-
-	struct tegra_output *rgb;
-
-	struct list_head list;
-
-	struct drm_info_list *debugfs_files;
-	struct drm_minor *minor;
-	struct dentry *debugfs;
-
-	/* page-flip handling */
-	struct drm_pending_vblank_event *event;
-};
-
-static inline struct tegra_dc *host1x_client_to_dc(struct host1x_client *client)
-{
-	return container_of(client, struct tegra_dc, client);
-}
-
-static inline struct tegra_dc *to_tegra_dc(struct drm_crtc *crtc)
-{
-	return container_of(crtc, struct tegra_dc, base);
-}
-
-static inline void tegra_dc_writel(struct tegra_dc *dc, unsigned long value,
-				   unsigned long reg)
-{
-	writel(value, dc->regs + (reg << 2));
-}
-
-static inline unsigned long tegra_dc_readl(struct tegra_dc *dc,
-					   unsigned long reg)
-{
-	return readl(dc->regs + (reg << 2));
-}
-
-struct tegra_dc_window {
-	struct {
-		unsigned int x;
-		unsigned int y;
-		unsigned int w;
-		unsigned int h;
-	} src;
-	struct {
-		unsigned int x;
-		unsigned int y;
-		unsigned int w;
-		unsigned int h;
-	} dst;
-	unsigned int bits_per_pixel;
-	unsigned int format;
-	unsigned int stride[2];
-	unsigned long base[3];
-};
-
-/* from dc.c */
-extern unsigned int tegra_dc_format(uint32_t format);
-extern int tegra_dc_setup_window(struct tegra_dc *dc, unsigned int index,
-				 const struct tegra_dc_window *window);
-extern void tegra_dc_enable_vblank(struct tegra_dc *dc);
-extern void tegra_dc_disable_vblank(struct tegra_dc *dc);
-extern void tegra_dc_cancel_page_flip(struct drm_crtc *crtc,
-				      struct drm_file *file);
-
-struct tegra_output_ops {
-	int (*enable)(struct tegra_output *output);
-	int (*disable)(struct tegra_output *output);
-	int (*setup_clock)(struct tegra_output *output, struct clk *clk,
-			   unsigned long pclk);
-	int (*check_mode)(struct tegra_output *output,
-			  struct drm_display_mode *mode,
-			  enum drm_mode_status *status);
-};
-
-enum tegra_output_type {
-	TEGRA_OUTPUT_RGB,
-	TEGRA_OUTPUT_HDMI,
-};
-
-struct tegra_output {
-	struct device_node *of_node;
-	struct device *dev;
-
-	const struct tegra_output_ops *ops;
-	enum tegra_output_type type;
-
-	struct i2c_adapter *ddc;
-	const struct edid *edid;
-	unsigned int hpd_irq;
-	int hpd_gpio;
-
-	struct drm_encoder encoder;
-	struct drm_connector connector;
-};
-
-static inline struct tegra_output *encoder_to_output(struct drm_encoder *e)
-{
-	return container_of(e, struct tegra_output, encoder);
-}
-
-static inline struct tegra_output *connector_to_output(struct drm_connector *c)
-{
-	return container_of(c, struct tegra_output, connector);
-}
-
-static inline int tegra_output_enable(struct tegra_output *output)
-{
-	if (output && output->ops && output->ops->enable)
-		return output->ops->enable(output);
-
-	return output ? -ENOSYS : -EINVAL;
-}
-
-static inline int tegra_output_disable(struct tegra_output *output)
-{
-	if (output && output->ops && output->ops->disable)
-		return output->ops->disable(output);
-
-	return output ? -ENOSYS : -EINVAL;
-}
-
-static inline int tegra_output_setup_clock(struct tegra_output *output,
-					   struct clk *clk, unsigned long pclk)
-{
-	if (output && output->ops && output->ops->setup_clock)
-		return output->ops->setup_clock(output, clk, pclk);
-
-	return output ? -ENOSYS : -EINVAL;
-}
-
-static inline int tegra_output_check_mode(struct tegra_output *output,
-					  struct drm_display_mode *mode,
-					  enum drm_mode_status *status)
-{
-	if (output && output->ops && output->ops->check_mode)
-		return output->ops->check_mode(output, mode, status);
-
-	return output ? -ENOSYS : -EINVAL;
-}
-
-/* from rgb.c */
-extern int tegra_dc_rgb_probe(struct tegra_dc *dc);
-extern int tegra_dc_rgb_init(struct drm_device *drm, struct tegra_dc *dc);
-extern int tegra_dc_rgb_exit(struct tegra_dc *dc);
-
-/* from output.c */
-extern int tegra_output_parse_dt(struct tegra_output *output);
-extern int tegra_output_init(struct drm_device *drm, struct tegra_output *output);
-extern int tegra_output_exit(struct tegra_output *output);
-
-/* from fb.c */
-struct tegra_bo *tegra_fb_get_plane(struct drm_framebuffer *framebuffer,
-				    unsigned int index);
-extern int tegra_drm_fb_init(struct drm_device *drm);
-extern void tegra_drm_fb_exit(struct drm_device *drm);
-extern void tegra_fbdev_restore_mode(struct tegra_fbdev *fbdev);
-
-extern struct drm_driver tegra_drm_driver;
-
-#endif /* HOST1X_DRM_H */
diff --git a/drivers/gpu/host1x/drm/fb.c b/drivers/gpu/host1x/drm/fb.c
deleted file mode 100644
index 979a3e32b78b..000000000000
--- a/drivers/gpu/host1x/drm/fb.c
+++ /dev/null
@@ -1,374 +0,0 @@
-/*
- * Copyright (C) 2012-2013 Avionic Design GmbH
- * Copyright (C) 2012 NVIDIA CORPORATION.  All rights reserved.
- *
- * Based on the KMS/FB CMA helpers
- *   Copyright (C) 2012 Analog Device Inc.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- */
-
-#include <linux/module.h>
-
-#include "drm.h"
-#include "gem.h"
-
-static inline struct tegra_fb *to_tegra_fb(struct drm_framebuffer *fb)
-{
-	return container_of(fb, struct tegra_fb, base);
-}
-
-static inline struct tegra_fbdev *to_tegra_fbdev(struct drm_fb_helper *helper)
-{
-	return container_of(helper, struct tegra_fbdev, base);
-}
-
-struct tegra_bo *tegra_fb_get_plane(struct drm_framebuffer *framebuffer,
-				    unsigned int index)
-{
-	struct tegra_fb *fb = to_tegra_fb(framebuffer);
-
-	if (index >= drm_format_num_planes(framebuffer->pixel_format))
-		return NULL;
-
-	return fb->planes[index];
-}
-
-static void tegra_fb_destroy(struct drm_framebuffer *framebuffer)
-{
-	struct tegra_fb *fb = to_tegra_fb(framebuffer);
-	unsigned int i;
-
-	for (i = 0; i < fb->num_planes; i++) {
-		struct tegra_bo *bo = fb->planes[i];
-
-		if (bo)
-			drm_gem_object_unreference_unlocked(&bo->gem);
-	}
-
-	drm_framebuffer_cleanup(framebuffer);
-	kfree(fb->planes);
-	kfree(fb);
-}
-
-static int tegra_fb_create_handle(struct drm_framebuffer *framebuffer,
-				  struct drm_file *file, unsigned int *handle)
-{
-	struct tegra_fb *fb = to_tegra_fb(framebuffer);
-
-	return drm_gem_handle_create(file, &fb->planes[0]->gem, handle);
-}
-
-static struct drm_framebuffer_funcs tegra_fb_funcs = {
-	.destroy = tegra_fb_destroy,
-	.create_handle = tegra_fb_create_handle,
-};
-
-static struct tegra_fb *tegra_fb_alloc(struct drm_device *drm,
-				       struct drm_mode_fb_cmd2 *mode_cmd,
-				       struct tegra_bo **planes,
-				       unsigned int num_planes)
-{
-	struct tegra_fb *fb;
-	unsigned int i;
-	int err;
-
-	fb = kzalloc(sizeof(*fb), GFP_KERNEL);
-	if (!fb)
-		return ERR_PTR(-ENOMEM);
-
-	fb->planes = kzalloc(num_planes * sizeof(*planes), GFP_KERNEL);
-	if (!fb->planes)
-		return ERR_PTR(-ENOMEM);
-
-	fb->num_planes = num_planes;
-
-	drm_helper_mode_fill_fb_struct(&fb->base, mode_cmd);
-
-	for (i = 0; i < fb->num_planes; i++)
-		fb->planes[i] = planes[i];
-
-	err = drm_framebuffer_init(drm, &fb->base, &tegra_fb_funcs);
-	if (err < 0) {
-		dev_err(drm->dev, "failed to initialize framebuffer: %d\n",
-			err);
-		kfree(fb->planes);
-		kfree(fb);
-		return ERR_PTR(err);
-	}
-
-	return fb;
-}
-
-static struct drm_framebuffer *tegra_fb_create(struct drm_device *drm,
-					       struct drm_file *file,
-					       struct drm_mode_fb_cmd2 *cmd)
-{
-	unsigned int hsub, vsub, i;
-	struct tegra_bo *planes[4];
-	struct drm_gem_object *gem;
-	struct tegra_fb *fb;
-	int err;
-
-	hsub = drm_format_horz_chroma_subsampling(cmd->pixel_format);
-	vsub = drm_format_vert_chroma_subsampling(cmd->pixel_format);
-
-	for (i = 0; i < drm_format_num_planes(cmd->pixel_format); i++) {
-		unsigned int width = cmd->width / (i ? hsub : 1);
-		unsigned int height = cmd->height / (i ? vsub : 1);
-		unsigned int size, bpp;
-
-		gem = drm_gem_object_lookup(drm, file, cmd->handles[i]);
-		if (!gem) {
-			err = -ENXIO;
-			goto unreference;
-		}
-
-		bpp = drm_format_plane_cpp(cmd->pixel_format, i);
-
-		size = (height - 1) * cmd->pitches[i] +
-		       width * bpp + cmd->offsets[i];
-
-		if (gem->size < size) {
-			err = -EINVAL;
-			goto unreference;
-		}
-
-		planes[i] = to_tegra_bo(gem);
-	}
-
-	fb = tegra_fb_alloc(drm, cmd, planes, i);
-	if (IS_ERR(fb)) {
-		err = PTR_ERR(fb);
-		goto unreference;
-	}
-
-	return &fb->base;
-
-unreference:
-	while (i--)
-		drm_gem_object_unreference_unlocked(&planes[i]->gem);
-
-	return ERR_PTR(err);
-}
-
-static struct fb_ops tegra_fb_ops = {
-	.owner = THIS_MODULE,
-	.fb_fillrect = sys_fillrect,
-	.fb_copyarea = sys_copyarea,
-	.fb_imageblit = sys_imageblit,
-	.fb_check_var = drm_fb_helper_check_var,
-	.fb_set_par = drm_fb_helper_set_par,
-	.fb_blank = drm_fb_helper_blank,
-	.fb_pan_display = drm_fb_helper_pan_display,
-	.fb_setcmap = drm_fb_helper_setcmap,
-};
-
-static int tegra_fbdev_probe(struct drm_fb_helper *helper,
-			     struct drm_fb_helper_surface_size *sizes)
-{
-	struct tegra_fbdev *fbdev = to_tegra_fbdev(helper);
-	struct drm_device *drm = helper->dev;
-	struct drm_mode_fb_cmd2 cmd = { 0 };
-	unsigned int bytes_per_pixel;
-	struct drm_framebuffer *fb;
-	unsigned long offset;
-	struct fb_info *info;
-	struct tegra_bo *bo;
-	size_t size;
-	int err;
-
-	bytes_per_pixel = DIV_ROUND_UP(sizes->surface_bpp, 8);
-
-	cmd.width = sizes->surface_width;
-	cmd.height = sizes->surface_height;
-	cmd.pitches[0] = sizes->surface_width * bytes_per_pixel;
-	cmd.pixel_format = drm_mode_legacy_fb_format(sizes->surface_bpp,
-						     sizes->surface_depth);
-
-	size = cmd.pitches[0] * cmd.height;
-
-	bo = tegra_bo_create(drm, size);
-	if (IS_ERR(bo))
-		return PTR_ERR(bo);
-
-	info = framebuffer_alloc(0, drm->dev);
-	if (!info) {
-		dev_err(drm->dev, "failed to allocate framebuffer info\n");
-		tegra_bo_free_object(&bo->gem);
-		return -ENOMEM;
-	}
-
-	fbdev->fb = tegra_fb_alloc(drm, &cmd, &bo, 1);
-	if (IS_ERR(fbdev->fb)) {
-		dev_err(drm->dev, "failed to allocate DRM framebuffer\n");
-		err = PTR_ERR(fbdev->fb);
-		goto release;
-	}
-
-	fb = &fbdev->fb->base;
-	helper->fb = fb;
-	helper->fbdev = info;
-
-	info->par = helper;
-	info->flags = FBINFO_FLAG_DEFAULT;
-	info->fbops = &tegra_fb_ops;
-
-	err = fb_alloc_cmap(&info->cmap, 256, 0);
-	if (err < 0) {
-		dev_err(drm->dev, "failed to allocate color map: %d\n", err);
-		goto destroy;
-	}
-
-	drm_fb_helper_fill_fix(info, fb->pitches[0], fb->depth);
-	drm_fb_helper_fill_var(info, helper, fb->width, fb->height);
-
-	offset = info->var.xoffset * bytes_per_pixel +
-		 info->var.yoffset * fb->pitches[0];
-
-	drm->mode_config.fb_base = (resource_size_t)bo->paddr;
-	info->screen_base = bo->vaddr + offset;
-	info->screen_size = size;
-	info->fix.smem_start = (unsigned long)(bo->paddr + offset);
-	info->fix.smem_len = size;
-
-	return 0;
-
-destroy:
-	drm_framebuffer_unregister_private(fb);
-	tegra_fb_destroy(fb);
-release:
-	framebuffer_release(info);
-	return err;
-}
-
-static struct drm_fb_helper_funcs tegra_fb_helper_funcs = {
-	.fb_probe = tegra_fbdev_probe,
-};
-
-static struct tegra_fbdev *tegra_fbdev_create(struct drm_device *drm,
-					      unsigned int preferred_bpp,
-					      unsigned int num_crtc,
-					      unsigned int max_connectors)
-{
-	struct drm_fb_helper *helper;
-	struct tegra_fbdev *fbdev;
-	int err;
-
-	fbdev = kzalloc(sizeof(*fbdev), GFP_KERNEL);
-	if (!fbdev) {
-		dev_err(drm->dev, "failed to allocate DRM fbdev\n");
-		return ERR_PTR(-ENOMEM);
-	}
-
-	fbdev->base.funcs = &tegra_fb_helper_funcs;
-	helper = &fbdev->base;
-
-	err = drm_fb_helper_init(drm, &fbdev->base, num_crtc, max_connectors);
-	if (err < 0) {
-		dev_err(drm->dev, "failed to initialize DRM FB helper\n");
-		goto free;
-	}
-
-	err = drm_fb_helper_single_add_all_connectors(&fbdev->base);
-	if (err < 0) {
-		dev_err(drm->dev, "failed to add connectors\n");
-		goto fini;
-	}
-
-	drm_helper_disable_unused_functions(drm);
-
-	err = drm_fb_helper_initial_config(&fbdev->base, preferred_bpp);
-	if (err < 0) {
-		dev_err(drm->dev, "failed to set initial configuration\n");
-		goto fini;
-	}
-
-	return fbdev;
-
-fini:
-	drm_fb_helper_fini(&fbdev->base);
-free:
-	kfree(fbdev);
-	return ERR_PTR(err);
-}
-
-static void tegra_fbdev_free(struct tegra_fbdev *fbdev)
-{
-	struct fb_info *info = fbdev->base.fbdev;
-
-	if (info) {
-		int err;
-
-		err = unregister_framebuffer(info);
-		if (err < 0)
-			DRM_DEBUG_KMS("failed to unregister framebuffer\n");
-
-		if (info->cmap.len)
-			fb_dealloc_cmap(&info->cmap);
-
-		framebuffer_release(info);
-	}
-
-	if (fbdev->fb) {
-		drm_framebuffer_unregister_private(&fbdev->fb->base);
-		tegra_fb_destroy(&fbdev->fb->base);
-	}
-
-	drm_fb_helper_fini(&fbdev->base);
-	kfree(fbdev);
-}
-
-static void tegra_fb_output_poll_changed(struct drm_device *drm)
-{
-	struct host1x_drm *host1x = drm->dev_private;
-
-	if (host1x->fbdev)
-		drm_fb_helper_hotplug_event(&host1x->fbdev->base);
-}
-
-static const struct drm_mode_config_funcs tegra_drm_mode_funcs = {
-	.fb_create = tegra_fb_create,
-	.output_poll_changed = tegra_fb_output_poll_changed,
-};
-
-int tegra_drm_fb_init(struct drm_device *drm)
-{
-	struct host1x_drm *host1x = drm->dev_private;
-	struct tegra_fbdev *fbdev;
-
-	drm->mode_config.min_width = 0;
-	drm->mode_config.min_height = 0;
-
-	drm->mode_config.max_width = 4096;
-	drm->mode_config.max_height = 4096;
-
-	drm->mode_config.funcs = &tegra_drm_mode_funcs;
-
-	fbdev = tegra_fbdev_create(drm, 32, drm->mode_config.num_crtc,
-				   drm->mode_config.num_connector);
-	if (IS_ERR(fbdev))
-		return PTR_ERR(fbdev);
-
-	host1x->fbdev = fbdev;
-
-	return 0;
-}
-
-void tegra_drm_fb_exit(struct drm_device *drm)
-{
-	struct host1x_drm *host1x = drm->dev_private;
-
-	tegra_fbdev_free(host1x->fbdev);
-}
-
-void tegra_fbdev_restore_mode(struct tegra_fbdev *fbdev)
-{
-	if (fbdev) {
-		drm_modeset_lock_all(fbdev->base.dev);
-		drm_fb_helper_restore_fbdev_mode(&fbdev->base);
-		drm_modeset_unlock_all(fbdev->base.dev);
-	}
-}
diff --git a/drivers/gpu/host1x/drm/gem.c b/drivers/gpu/host1x/drm/gem.c
deleted file mode 100644
index c5e9a9b494c2..000000000000
--- a/drivers/gpu/host1x/drm/gem.c
+++ /dev/null
@@ -1,270 +0,0 @@
-/*
- * NVIDIA Tegra DRM GEM helper functions
- *
- * Copyright (C) 2012 Sascha Hauer, Pengutronix
- * Copyright (C) 2013 NVIDIA CORPORATION, All rights reserved.
- *
- * Based on the GEM/CMA helpers
- *
- * Copyright (c) 2011 Samsung Electronics Co., Ltd.
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; either version 2
- * of the License, or (at your option) any later version.
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- */
-
-#include <linux/mm.h>
-#include <linux/slab.h>
-#include <linux/mutex.h>
-#include <linux/export.h>
-#include <linux/dma-mapping.h>
-
-#include <drm/drmP.h>
-#include <drm/drm.h>
-
-#include "gem.h"
-
-static inline struct tegra_bo *host1x_to_drm_bo(struct host1x_bo *bo)
-{
-	return container_of(bo, struct tegra_bo, base);
-}
-
-static void tegra_bo_put(struct host1x_bo *bo)
-{
-	struct tegra_bo *obj = host1x_to_drm_bo(bo);
-	struct drm_device *drm = obj->gem.dev;
-
-	mutex_lock(&drm->struct_mutex);
-	drm_gem_object_unreference(&obj->gem);
-	mutex_unlock(&drm->struct_mutex);
-}
-
-static dma_addr_t tegra_bo_pin(struct host1x_bo *bo, struct sg_table **sgt)
-{
-	struct tegra_bo *obj = host1x_to_drm_bo(bo);
-
-	return obj->paddr;
-}
-
-static void tegra_bo_unpin(struct host1x_bo *bo, struct sg_table *sgt)
-{
-}
-
-static void *tegra_bo_mmap(struct host1x_bo *bo)
-{
-	struct tegra_bo *obj = host1x_to_drm_bo(bo);
-
-	return obj->vaddr;
-}
-
-static void tegra_bo_munmap(struct host1x_bo *bo, void *addr)
-{
-}
-
-static void *tegra_bo_kmap(struct host1x_bo *bo, unsigned int page)
-{
-	struct tegra_bo *obj = host1x_to_drm_bo(bo);
-
-	return obj->vaddr + page * PAGE_SIZE;
-}
-
-static void tegra_bo_kunmap(struct host1x_bo *bo, unsigned int page,
-			    void *addr)
-{
-}
-
-static struct host1x_bo *tegra_bo_get(struct host1x_bo *bo)
-{
-	struct tegra_bo *obj = host1x_to_drm_bo(bo);
-	struct drm_device *drm = obj->gem.dev;
-
-	mutex_lock(&drm->struct_mutex);
-	drm_gem_object_reference(&obj->gem);
-	mutex_unlock(&drm->struct_mutex);
-
-	return bo;
-}
-
-const struct host1x_bo_ops tegra_bo_ops = {
-	.get = tegra_bo_get,
-	.put = tegra_bo_put,
-	.pin = tegra_bo_pin,
-	.unpin = tegra_bo_unpin,
-	.mmap = tegra_bo_mmap,
-	.munmap = tegra_bo_munmap,
-	.kmap = tegra_bo_kmap,
-	.kunmap = tegra_bo_kunmap,
-};
-
-static void tegra_bo_destroy(struct drm_device *drm, struct tegra_bo *bo)
-{
-	dma_free_writecombine(drm->dev, bo->gem.size, bo->vaddr, bo->paddr);
-}
-
-unsigned int tegra_bo_get_mmap_offset(struct tegra_bo *bo)
-{
-	return (unsigned int)bo->gem.map_list.hash.key << PAGE_SHIFT;
-}
-
-struct tegra_bo *tegra_bo_create(struct drm_device *drm, unsigned int size)
-{
-	struct tegra_bo *bo;
-	int err;
-
-	bo = kzalloc(sizeof(*bo), GFP_KERNEL);
-	if (!bo)
-		return ERR_PTR(-ENOMEM);
-
-	host1x_bo_init(&bo->base, &tegra_bo_ops);
-	size = round_up(size, PAGE_SIZE);
-
-	bo->vaddr = dma_alloc_writecombine(drm->dev, size, &bo->paddr,
-					   GFP_KERNEL | __GFP_NOWARN);
-	if (!bo->vaddr) {
-		dev_err(drm->dev, "failed to allocate buffer with size %u\n",
-			size);
-		err = -ENOMEM;
-		goto err_dma;
-	}
-
-	err = drm_gem_object_init(drm, &bo->gem, size);
-	if (err)
-		goto err_init;
-
-	err = drm_gem_create_mmap_offset(&bo->gem);
-	if (err)
-		goto err_mmap;
-
-	return bo;
-
-err_mmap:
-	drm_gem_object_release(&bo->gem);
-err_init:
-	tegra_bo_destroy(drm, bo);
-err_dma:
-	kfree(bo);
-
-	return ERR_PTR(err);
-
-}
-
-struct tegra_bo *tegra_bo_create_with_handle(struct drm_file *file,
-					    struct drm_device *drm,
-					    unsigned int size,
-					    unsigned int *handle)
-{
-	struct tegra_bo *bo;
-	int ret;
-
-	bo = tegra_bo_create(drm, size);
-	if (IS_ERR(bo))
-		return bo;
-
-	ret = drm_gem_handle_create(file, &bo->gem, handle);
-	if (ret)
-		goto err;
-
-	drm_gem_object_unreference_unlocked(&bo->gem);
-
-	return bo;
-
-err:
-	tegra_bo_free_object(&bo->gem);
-	return ERR_PTR(ret);
-}
-
-void tegra_bo_free_object(struct drm_gem_object *gem)
-{
-	struct tegra_bo *bo = to_tegra_bo(gem);
-
-	if (gem->map_list.map)
-		drm_gem_free_mmap_offset(gem);
-
-	drm_gem_object_release(gem);
-	tegra_bo_destroy(gem->dev, bo);
-
-	kfree(bo);
-}
-
-int tegra_bo_dumb_create(struct drm_file *file, struct drm_device *drm,
-			 struct drm_mode_create_dumb *args)
-{
-	int min_pitch = DIV_ROUND_UP(args->width * args->bpp, 8);
-	struct tegra_bo *bo;
-
-	if (args->pitch < min_pitch)
-		args->pitch = min_pitch;
-
-	if (args->size < args->pitch * args->height)
-		args->size = args->pitch * args->height;
-
-	bo = tegra_bo_create_with_handle(file, drm, args->size,
-					    &args->handle);
-	if (IS_ERR(bo))
-		return PTR_ERR(bo);
-
-	return 0;
-}
-
-int tegra_bo_dumb_map_offset(struct drm_file *file, struct drm_device *drm,
-			     uint32_t handle, uint64_t *offset)
-{
-	struct drm_gem_object *gem;
-	struct tegra_bo *bo;
-
-	mutex_lock(&drm->struct_mutex);
-
-	gem = drm_gem_object_lookup(drm, file, handle);
-	if (!gem) {
-		dev_err(drm->dev, "failed to lookup GEM object\n");
-		mutex_unlock(&drm->struct_mutex);
-		return -EINVAL;
-	}
-
-	bo = to_tegra_bo(gem);
-
-	*offset = tegra_bo_get_mmap_offset(bo);
-
-	drm_gem_object_unreference(gem);
-
-	mutex_unlock(&drm->struct_mutex);
-
-	return 0;
-}
-
-const struct vm_operations_struct tegra_bo_vm_ops = {
-	.open = drm_gem_vm_open,
-	.close = drm_gem_vm_close,
-};
-
-int tegra_drm_mmap(struct file *file, struct vm_area_struct *vma)
-{
-	struct drm_gem_object *gem;
-	struct tegra_bo *bo;
-	int ret;
-
-	ret = drm_gem_mmap(file, vma);
-	if (ret)
-		return ret;
-
-	gem = vma->vm_private_data;
-	bo = to_tegra_bo(gem);
-
-	ret = remap_pfn_range(vma, vma->vm_start, bo->paddr >> PAGE_SHIFT,
-			      vma->vm_end - vma->vm_start, vma->vm_page_prot);
-	if (ret)
-		drm_gem_vm_close(vma);
-
-	return ret;
-}
-
-int tegra_bo_dumb_destroy(struct drm_file *file, struct drm_device *drm,
-			  unsigned int handle)
-{
-	return drm_gem_handle_delete(file, handle);
-}
diff --git a/drivers/gpu/host1x/drm/gem.h b/drivers/gpu/host1x/drm/gem.h
deleted file mode 100644
index 34de2b486eb7..000000000000
--- a/drivers/gpu/host1x/drm/gem.h
+++ /dev/null
@@ -1,59 +0,0 @@
-/*
- * Tegra host1x GEM implementation
- *
- * Copyright (c) 2012-2013, NVIDIA Corporation.
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms and conditions of the GNU General Public License,
- * version 2, as published by the Free Software Foundation.
- *
- * This program is distributed in the hope it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
- * more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program.  If not, see <http://www.gnu.org/licenses/>.
- */
-
-#ifndef __HOST1X_GEM_H
-#define __HOST1X_GEM_H
-
-#include <drm/drm.h>
-#include <drm/drmP.h>
-
-#include "host1x_bo.h"
-
-struct tegra_bo {
-	struct drm_gem_object gem;
-	struct host1x_bo base;
-	dma_addr_t paddr;
-	void *vaddr;
-};
-
-static inline struct tegra_bo *to_tegra_bo(struct drm_gem_object *gem)
-{
-	return container_of(gem, struct tegra_bo, gem);
-}
-
-extern const struct host1x_bo_ops tegra_bo_ops;
-
-struct tegra_bo *tegra_bo_create(struct drm_device *drm, unsigned int size);
-struct tegra_bo *tegra_bo_create_with_handle(struct drm_file *file,
-					    struct drm_device *drm,
-					    unsigned int size,
-					    unsigned int *handle);
-void tegra_bo_free_object(struct drm_gem_object *gem);
-unsigned int tegra_bo_get_mmap_offset(struct tegra_bo *bo);
-int tegra_bo_dumb_create(struct drm_file *file, struct drm_device *drm,
-			 struct drm_mode_create_dumb *args);
-int tegra_bo_dumb_map_offset(struct drm_file *file, struct drm_device *drm,
-			     uint32_t handle, uint64_t *offset);
-int tegra_bo_dumb_destroy(struct drm_file *file, struct drm_device *drm,
-			  unsigned int handle);
-
-int tegra_drm_mmap(struct file *file, struct vm_area_struct *vma);
-
-extern const struct vm_operations_struct tegra_bo_vm_ops;
-
-#endif
diff --git a/drivers/gpu/host1x/drm/gr2d.c b/drivers/gpu/host1x/drm/gr2d.c
deleted file mode 100644
index 27ffcf15a4b4..000000000000
--- a/drivers/gpu/host1x/drm/gr2d.c
+++ /dev/null
@@ -1,343 +0,0 @@
-/*
- * drivers/video/tegra/host/gr2d/gr2d.c
- *
- * Tegra Graphics 2D
- *
- * Copyright (c) 2012-2013, NVIDIA Corporation.
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms and conditions of the GNU General Public License,
- * version 2, as published by the Free Software Foundation.
- *
- * This program is distributed in the hope it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
- * more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program.  If not, see <http://www.gnu.org/licenses/>.
- */
-
-#include <linux/export.h>
-#include <linux/of.h>
-#include <linux/of_device.h>
-#include <linux/clk.h>
-
-#include "channel.h"
-#include "drm.h"
-#include "gem.h"
-#include "job.h"
-#include "host1x.h"
-#include "host1x_bo.h"
-#include "host1x_client.h"
-#include "syncpt.h"
-
-struct gr2d {
-	struct host1x_client client;
-	struct clk *clk;
-	struct host1x_channel *channel;
-	unsigned long *addr_regs;
-};
-
-static inline struct gr2d *to_gr2d(struct host1x_client *client)
-{
-	return container_of(client, struct gr2d, client);
-}
-
-static int gr2d_is_addr_reg(struct device *dev, u32 class, u32 reg);
-
-static int gr2d_client_init(struct host1x_client *client,
-			    struct drm_device *drm)
-{
-	return 0;
-}
-
-static int gr2d_client_exit(struct host1x_client *client)
-{
-	return 0;
-}
-
-static int gr2d_open_channel(struct host1x_client *client,
-			     struct host1x_drm_context *context)
-{
-	struct gr2d *gr2d = to_gr2d(client);
-
-	context->channel = host1x_channel_get(gr2d->channel);
-
-	if (!context->channel)
-		return -ENOMEM;
-
-	return 0;
-}
-
-static void gr2d_close_channel(struct host1x_drm_context *context)
-{
-	host1x_channel_put(context->channel);
-}
-
-static struct host1x_bo *host1x_bo_lookup(struct drm_device *drm,
-					  struct drm_file *file,
-					  u32 handle)
-{
-	struct drm_gem_object *gem;
-	struct tegra_bo *bo;
-
-	gem = drm_gem_object_lookup(drm, file, handle);
-	if (!gem)
-		return NULL;
-
-	mutex_lock(&drm->struct_mutex);
-	drm_gem_object_unreference(gem);
-	mutex_unlock(&drm->struct_mutex);
-
-	bo = to_tegra_bo(gem);
-	return &bo->base;
-}
-
-static int gr2d_submit(struct host1x_drm_context *context,
-		       struct drm_tegra_submit *args, struct drm_device *drm,
-		       struct drm_file *file)
-{
-	struct host1x_job *job;
-	unsigned int num_cmdbufs = args->num_cmdbufs;
-	unsigned int num_relocs = args->num_relocs;
-	unsigned int num_waitchks = args->num_waitchks;
-	struct drm_tegra_cmdbuf __user *cmdbufs =
-		(void * __user)(uintptr_t)args->cmdbufs;
-	struct drm_tegra_reloc __user *relocs =
-		(void * __user)(uintptr_t)args->relocs;
-	struct drm_tegra_waitchk __user *waitchks =
-		(void * __user)(uintptr_t)args->waitchks;
-	struct drm_tegra_syncpt syncpt;
-	int err;
-
-	/* We don't yet support other than one syncpt_incr struct per submit */
-	if (args->num_syncpts != 1)
-		return -EINVAL;
-
-	job = host1x_job_alloc(context->channel, args->num_cmdbufs,
-			       args->num_relocs, args->num_waitchks);
-	if (!job)
-		return -ENOMEM;
-
-	job->num_relocs = args->num_relocs;
-	job->num_waitchk = args->num_waitchks;
-	job->client = (u32)args->context;
-	job->class = context->client->class;
-	job->serialize = true;
-
-	while (num_cmdbufs) {
-		struct drm_tegra_cmdbuf cmdbuf;
-		struct host1x_bo *bo;
-
-		err = copy_from_user(&cmdbuf, cmdbufs, sizeof(cmdbuf));
-		if (err)
-			goto fail;
-
-		bo = host1x_bo_lookup(drm, file, cmdbuf.handle);
-		if (!bo) {
-			err = -ENOENT;
-			goto fail;
-		}
-
-		host1x_job_add_gather(job, bo, cmdbuf.words, cmdbuf.offset);
-		num_cmdbufs--;
-		cmdbufs++;
-	}
-
-	err = copy_from_user(job->relocarray, relocs,
-			     sizeof(*relocs) * num_relocs);
-	if (err)
-		goto fail;
-
-	while (num_relocs--) {
-		struct host1x_reloc *reloc = &job->relocarray[num_relocs];
-		struct host1x_bo *cmdbuf, *target;
-
-		cmdbuf = host1x_bo_lookup(drm, file, (u32)reloc->cmdbuf);
-		target = host1x_bo_lookup(drm, file, (u32)reloc->target);
-
-		reloc->cmdbuf = cmdbuf;
-		reloc->target = target;
-
-		if (!reloc->target || !reloc->cmdbuf) {
-			err = -ENOENT;
-			goto fail;
-		}
-	}
-
-	err = copy_from_user(job->waitchk, waitchks,
-			     sizeof(*waitchks) * num_waitchks);
-	if (err)
-		goto fail;
-
-	err = copy_from_user(&syncpt, (void * __user)(uintptr_t)args->syncpts,
-			     sizeof(syncpt));
-	if (err)
-		goto fail;
-
-	job->syncpt_id = syncpt.id;
-	job->syncpt_incrs = syncpt.incrs;
-	job->timeout = 10000;
-	job->is_addr_reg = gr2d_is_addr_reg;
-
-	if (args->timeout && args->timeout < 10000)
-		job->timeout = args->timeout;
-
-	err = host1x_job_pin(job, context->client->dev);
-	if (err)
-		goto fail;
-
-	err = host1x_job_submit(job);
-	if (err)
-		goto fail_submit;
-
-	args->fence = job->syncpt_end;
-
-	host1x_job_put(job);
-	return 0;
-
-fail_submit:
-	host1x_job_unpin(job);
-fail:
-	host1x_job_put(job);
-	return err;
-}
-
-static struct host1x_client_ops gr2d_client_ops = {
-	.drm_init = gr2d_client_init,
-	.drm_exit = gr2d_client_exit,
-	.open_channel = gr2d_open_channel,
-	.close_channel = gr2d_close_channel,
-	.submit = gr2d_submit,
-};
-
-static void gr2d_init_addr_reg_map(struct device *dev, struct gr2d *gr2d)
-{
-	const u32 gr2d_addr_regs[] = {0x1a, 0x1b, 0x26, 0x2b, 0x2c, 0x2d, 0x31,
-				      0x32, 0x48, 0x49, 0x4a, 0x4b, 0x4c};
-	unsigned long *bitmap;
-	int i;
-
-	bitmap = devm_kzalloc(dev, DIV_ROUND_UP(256, BITS_PER_BYTE),
-			      GFP_KERNEL);
-
-	for (i = 0; i < ARRAY_SIZE(gr2d_addr_regs); ++i) {
-		u32 reg = gr2d_addr_regs[i];
-		bitmap[BIT_WORD(reg)] |= BIT_MASK(reg);
-	}
-
-	gr2d->addr_regs = bitmap;
-}
-
-static int gr2d_is_addr_reg(struct device *dev, u32 class, u32 reg)
-{
-	struct gr2d *gr2d = dev_get_drvdata(dev);
-
-	switch (class) {
-	case HOST1X_CLASS_HOST1X:
-		return reg == 0x2b;
-	case HOST1X_CLASS_GR2D:
-	case HOST1X_CLASS_GR2D_SB:
-		reg &= 0xff;
-		if (gr2d->addr_regs[BIT_WORD(reg)] & BIT_MASK(reg))
-			return 1;
-	default:
-		return 0;
-	}
-}
-
-static const struct of_device_id gr2d_match[] = {
-	{ .compatible = "nvidia,tegra30-gr2d" },
-	{ .compatible = "nvidia,tegra20-gr2d" },
-	{ },
-};
-
-static int gr2d_probe(struct platform_device *pdev)
-{
-	struct device *dev = &pdev->dev;
-	struct host1x_drm *host1x = host1x_get_drm_data(dev->parent);
-	int err;
-	struct gr2d *gr2d = NULL;
-	struct host1x_syncpt **syncpts;
-
-	gr2d = devm_kzalloc(dev, sizeof(*gr2d), GFP_KERNEL);
-	if (!gr2d)
-		return -ENOMEM;
-
-	syncpts = devm_kzalloc(dev, sizeof(*syncpts), GFP_KERNEL);
-	if (!syncpts)
-		return -ENOMEM;
-
-	gr2d->clk = devm_clk_get(dev, NULL);
-	if (IS_ERR(gr2d->clk)) {
-		dev_err(dev, "cannot get clock\n");
-		return PTR_ERR(gr2d->clk);
-	}
-
-	err = clk_prepare_enable(gr2d->clk);
-	if (err) {
-		dev_err(dev, "cannot turn on clock\n");
-		return err;
-	}
-
-	gr2d->channel = host1x_channel_request(dev);
-	if (!gr2d->channel)
-		return -ENOMEM;
-
-	*syncpts = host1x_syncpt_request(dev, false);
-	if (!(*syncpts)) {
-		host1x_channel_free(gr2d->channel);
-		return -ENOMEM;
-	}
-
-	gr2d->client.ops = &gr2d_client_ops;
-	gr2d->client.dev = dev;
-	gr2d->client.class = HOST1X_CLASS_GR2D;
-	gr2d->client.syncpts = syncpts;
-	gr2d->client.num_syncpts = 1;
-
-	err = host1x_register_client(host1x, &gr2d->client);
-	if (err < 0) {
-		dev_err(dev, "failed to register host1x client: %d\n", err);
-		return err;
-	}
-
-	gr2d_init_addr_reg_map(dev, gr2d);
-
-	platform_set_drvdata(pdev, gr2d);
-
-	return 0;
-}
-
-static int __exit gr2d_remove(struct platform_device *pdev)
-{
-	struct host1x_drm *host1x = host1x_get_drm_data(pdev->dev.parent);
-	struct gr2d *gr2d = platform_get_drvdata(pdev);
-	unsigned int i;
-	int err;
-
-	err = host1x_unregister_client(host1x, &gr2d->client);
-	if (err < 0) {
-		dev_err(&pdev->dev, "failed to unregister client: %d\n", err);
-		return err;
-	}
-
-	for (i = 0; i < gr2d->client.num_syncpts; i++)
-		host1x_syncpt_free(gr2d->client.syncpts[i]);
-
-	host1x_channel_free(gr2d->channel);
-	clk_disable_unprepare(gr2d->clk);
-
-	return 0;
-}
-
-struct platform_driver tegra_gr2d_driver = {
-	.probe = gr2d_probe,
-	.remove = __exit_p(gr2d_remove),
-	.driver = {
-		.owner = THIS_MODULE,
-		.name = "gr2d",
-		.of_match_table = gr2d_match,
-	}
-};
diff --git a/drivers/gpu/host1x/drm/hdmi.c b/drivers/gpu/host1x/drm/hdmi.c
deleted file mode 100644
index 01097da09f7f..000000000000
--- a/drivers/gpu/host1x/drm/hdmi.c
+++ /dev/null
@@ -1,1313 +0,0 @@
-/*
- * Copyright (C) 2012 Avionic Design GmbH
- * Copyright (C) 2012 NVIDIA CORPORATION.  All rights reserved.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- */
-
-#include <linux/clk.h>
-#include <linux/debugfs.h>
-#include <linux/gpio.h>
-#include <linux/hdmi.h>
-#include <linux/module.h>
-#include <linux/of.h>
-#include <linux/platform_device.h>
-#include <linux/regulator/consumer.h>
-#include <linux/clk/tegra.h>
-
-#include <drm/drm_edid.h>
-
-#include "hdmi.h"
-#include "drm.h"
-#include "dc.h"
-#include "host1x_client.h"
-
-struct tegra_hdmi {
-	struct host1x_client client;
-	struct tegra_output output;
-	struct device *dev;
-
-	struct regulator *vdd;
-	struct regulator *pll;
-
-	void __iomem *regs;
-	unsigned int irq;
-
-	struct clk *clk_parent;
-	struct clk *clk;
-
-	unsigned int audio_source;
-	unsigned int audio_freq;
-	bool stereo;
-	bool dvi;
-
-	struct drm_info_list *debugfs_files;
-	struct drm_minor *minor;
-	struct dentry *debugfs;
-};
-
-static inline struct tegra_hdmi *
-host1x_client_to_hdmi(struct host1x_client *client)
-{
-	return container_of(client, struct tegra_hdmi, client);
-}
-
-static inline struct tegra_hdmi *to_hdmi(struct tegra_output *output)
-{
-	return container_of(output, struct tegra_hdmi, output);
-}
-
-#define HDMI_AUDIOCLK_FREQ 216000000
-#define HDMI_REKEY_DEFAULT 56
-
-enum {
-	AUTO = 0,
-	SPDIF,
-	HDA,
-};
-
-static inline unsigned long tegra_hdmi_readl(struct tegra_hdmi *hdmi,
-					     unsigned long reg)
-{
-	return readl(hdmi->regs + (reg << 2));
-}
-
-static inline void tegra_hdmi_writel(struct tegra_hdmi *hdmi, unsigned long val,
-				     unsigned long reg)
-{
-	writel(val, hdmi->regs + (reg << 2));
-}
-
-struct tegra_hdmi_audio_config {
-	unsigned int pclk;
-	unsigned int n;
-	unsigned int cts;
-	unsigned int aval;
-};
-
-static const struct tegra_hdmi_audio_config tegra_hdmi_audio_32k[] = {
-	{  25200000, 4096,  25200, 24000 },
-	{  27000000, 4096,  27000, 24000 },
-	{  74250000, 4096,  74250, 24000 },
-	{ 148500000, 4096, 148500, 24000 },
-	{         0,    0,      0,     0 },
-};
-
-static const struct tegra_hdmi_audio_config tegra_hdmi_audio_44_1k[] = {
-	{  25200000, 5880,  26250, 25000 },
-	{  27000000, 5880,  28125, 25000 },
-	{  74250000, 4704,  61875, 20000 },
-	{ 148500000, 4704, 123750, 20000 },
-	{         0,    0,      0,     0 },
-};
-
-static const struct tegra_hdmi_audio_config tegra_hdmi_audio_48k[] = {
-	{  25200000, 6144,  25200, 24000 },
-	{  27000000, 6144,  27000, 24000 },
-	{  74250000, 6144,  74250, 24000 },
-	{ 148500000, 6144, 148500, 24000 },
-	{         0,    0,      0,     0 },
-};
-
-static const struct tegra_hdmi_audio_config tegra_hdmi_audio_88_2k[] = {
-	{  25200000, 11760,  26250, 25000 },
-	{  27000000, 11760,  28125, 25000 },
-	{  74250000,  9408,  61875, 20000 },
-	{ 148500000,  9408, 123750, 20000 },
-	{         0,     0,      0,     0 },
-};
-
-static const struct tegra_hdmi_audio_config tegra_hdmi_audio_96k[] = {
-	{  25200000, 12288,  25200, 24000 },
-	{  27000000, 12288,  27000, 24000 },
-	{  74250000, 12288,  74250, 24000 },
-	{ 148500000, 12288, 148500, 24000 },
-	{         0,     0,      0,     0 },
-};
-
-static const struct tegra_hdmi_audio_config tegra_hdmi_audio_176_4k[] = {
-	{  25200000, 23520,  26250, 25000 },
-	{  27000000, 23520,  28125, 25000 },
-	{  74250000, 18816,  61875, 20000 },
-	{ 148500000, 18816, 123750, 20000 },
-	{         0,     0,      0,     0 },
-};
-
-static const struct tegra_hdmi_audio_config tegra_hdmi_audio_192k[] = {
-	{  25200000, 24576,  25200, 24000 },
-	{  27000000, 24576,  27000, 24000 },
-	{  74250000, 24576,  74250, 24000 },
-	{ 148500000, 24576, 148500, 24000 },
-	{         0,     0,      0,     0 },
-};
-
-struct tmds_config {
-	unsigned int pclk;
-	u32 pll0;
-	u32 pll1;
-	u32 pe_current;
-	u32 drive_current;
-};
-
-static const struct tmds_config tegra2_tmds_config[] = {
-	{ /* slow pixel clock modes */
-		.pclk = 27000000,
-		.pll0 = SOR_PLL_BG_V17_S(3) | SOR_PLL_ICHPMP(1) |
-			SOR_PLL_RESISTORSEL | SOR_PLL_VCOCAP(0) |
-			SOR_PLL_TX_REG_LOAD(3),
-		.pll1 = SOR_PLL_TMDS_TERM_ENABLE,
-		.pe_current = PE_CURRENT0(PE_CURRENT_0_0_mA) |
-			PE_CURRENT1(PE_CURRENT_0_0_mA) |
-			PE_CURRENT2(PE_CURRENT_0_0_mA) |
-			PE_CURRENT3(PE_CURRENT_0_0_mA),
-		.drive_current = DRIVE_CURRENT_LANE0(DRIVE_CURRENT_7_125_mA) |
-			DRIVE_CURRENT_LANE1(DRIVE_CURRENT_7_125_mA) |
-			DRIVE_CURRENT_LANE2(DRIVE_CURRENT_7_125_mA) |
-			DRIVE_CURRENT_LANE3(DRIVE_CURRENT_7_125_mA),
-	},
-	{ /* high pixel clock modes */
-		.pclk = UINT_MAX,
-		.pll0 = SOR_PLL_BG_V17_S(3) | SOR_PLL_ICHPMP(1) |
-			SOR_PLL_RESISTORSEL | SOR_PLL_VCOCAP(1) |
-			SOR_PLL_TX_REG_LOAD(3),
-		.pll1 = SOR_PLL_TMDS_TERM_ENABLE | SOR_PLL_PE_EN,
-		.pe_current = PE_CURRENT0(PE_CURRENT_6_0_mA) |
-			PE_CURRENT1(PE_CURRENT_6_0_mA) |
-			PE_CURRENT2(PE_CURRENT_6_0_mA) |
-			PE_CURRENT3(PE_CURRENT_6_0_mA),
-		.drive_current = DRIVE_CURRENT_LANE0(DRIVE_CURRENT_7_125_mA) |
-			DRIVE_CURRENT_LANE1(DRIVE_CURRENT_7_125_mA) |
-			DRIVE_CURRENT_LANE2(DRIVE_CURRENT_7_125_mA) |
-			DRIVE_CURRENT_LANE3(DRIVE_CURRENT_7_125_mA),
-	},
-};
-
-static const struct tmds_config tegra3_tmds_config[] = {
-	{ /* 480p modes */
-		.pclk = 27000000,
-		.pll0 = SOR_PLL_BG_V17_S(3) | SOR_PLL_ICHPMP(1) |
-			SOR_PLL_RESISTORSEL | SOR_PLL_VCOCAP(0) |
-			SOR_PLL_TX_REG_LOAD(0),
-		.pll1 = SOR_PLL_TMDS_TERM_ENABLE,
-		.pe_current = PE_CURRENT0(PE_CURRENT_0_0_mA) |
-			PE_CURRENT1(PE_CURRENT_0_0_mA) |
-			PE_CURRENT2(PE_CURRENT_0_0_mA) |
-			PE_CURRENT3(PE_CURRENT_0_0_mA),
-		.drive_current = DRIVE_CURRENT_LANE0(DRIVE_CURRENT_5_250_mA) |
-			DRIVE_CURRENT_LANE1(DRIVE_CURRENT_5_250_mA) |
-			DRIVE_CURRENT_LANE2(DRIVE_CURRENT_5_250_mA) |
-			DRIVE_CURRENT_LANE3(DRIVE_CURRENT_5_250_mA),
-	}, { /* 720p modes */
-		.pclk = 74250000,
-		.pll0 = SOR_PLL_BG_V17_S(3) | SOR_PLL_ICHPMP(1) |
-			SOR_PLL_RESISTORSEL | SOR_PLL_VCOCAP(1) |
-			SOR_PLL_TX_REG_LOAD(0),
-		.pll1 = SOR_PLL_TMDS_TERM_ENABLE | SOR_PLL_PE_EN,
-		.pe_current = PE_CURRENT0(PE_CURRENT_5_0_mA) |
-			PE_CURRENT1(PE_CURRENT_5_0_mA) |
-			PE_CURRENT2(PE_CURRENT_5_0_mA) |
-			PE_CURRENT3(PE_CURRENT_5_0_mA),
-		.drive_current = DRIVE_CURRENT_LANE0(DRIVE_CURRENT_5_250_mA) |
-			DRIVE_CURRENT_LANE1(DRIVE_CURRENT_5_250_mA) |
-			DRIVE_CURRENT_LANE2(DRIVE_CURRENT_5_250_mA) |
-			DRIVE_CURRENT_LANE3(DRIVE_CURRENT_5_250_mA),
-	}, { /* 1080p modes */
-		.pclk = UINT_MAX,
-		.pll0 = SOR_PLL_BG_V17_S(3) | SOR_PLL_ICHPMP(1) |
-			SOR_PLL_RESISTORSEL | SOR_PLL_VCOCAP(3) |
-			SOR_PLL_TX_REG_LOAD(0),
-		.pll1 = SOR_PLL_TMDS_TERM_ENABLE | SOR_PLL_PE_EN,
-		.pe_current = PE_CURRENT0(PE_CURRENT_5_0_mA) |
-			PE_CURRENT1(PE_CURRENT_5_0_mA) |
-			PE_CURRENT2(PE_CURRENT_5_0_mA) |
-			PE_CURRENT3(PE_CURRENT_5_0_mA),
-		.drive_current = DRIVE_CURRENT_LANE0(DRIVE_CURRENT_5_250_mA) |
-			DRIVE_CURRENT_LANE1(DRIVE_CURRENT_5_250_mA) |
-			DRIVE_CURRENT_LANE2(DRIVE_CURRENT_5_250_mA) |
-			DRIVE_CURRENT_LANE3(DRIVE_CURRENT_5_250_mA),
-	},
-};
-
-static const struct tegra_hdmi_audio_config *
-tegra_hdmi_get_audio_config(unsigned int audio_freq, unsigned int pclk)
-{
-	const struct tegra_hdmi_audio_config *table;
-
-	switch (audio_freq) {
-	case 32000:
-		table = tegra_hdmi_audio_32k;
-		break;
-
-	case 44100:
-		table = tegra_hdmi_audio_44_1k;
-		break;
-
-	case 48000:
-		table = tegra_hdmi_audio_48k;
-		break;
-
-	case 88200:
-		table = tegra_hdmi_audio_88_2k;
-		break;
-
-	case 96000:
-		table = tegra_hdmi_audio_96k;
-		break;
-
-	case 176400:
-		table = tegra_hdmi_audio_176_4k;
-		break;
-
-	case 192000:
-		table = tegra_hdmi_audio_192k;
-		break;
-
-	default:
-		return NULL;
-	}
-
-	while (table->pclk) {
-		if (table->pclk == pclk)
-			return table;
-
-		table++;
-	}
-
-	return NULL;
-}
-
-static void tegra_hdmi_setup_audio_fs_tables(struct tegra_hdmi *hdmi)
-{
-	const unsigned int freqs[] = {
-		32000, 44100, 48000, 88200, 96000, 176400, 192000
-	};
-	unsigned int i;
-
-	for (i = 0; i < ARRAY_SIZE(freqs); i++) {
-		unsigned int f = freqs[i];
-		unsigned int eight_half;
-		unsigned long value;
-		unsigned int delta;
-
-		if (f > 96000)
-			delta = 2;
-		else if (f > 480000)
-			delta = 6;
-		else
-			delta = 9;
-
-		eight_half = (8 * HDMI_AUDIOCLK_FREQ) / (f * 128);
-		value = AUDIO_FS_LOW(eight_half - delta) |
-			AUDIO_FS_HIGH(eight_half + delta);
-		tegra_hdmi_writel(hdmi, value, HDMI_NV_PDISP_AUDIO_FS(i));
-	}
-}
-
-static int tegra_hdmi_setup_audio(struct tegra_hdmi *hdmi, unsigned int pclk)
-{
-	struct device_node *node = hdmi->dev->of_node;
-	const struct tegra_hdmi_audio_config *config;
-	unsigned int offset = 0;
-	unsigned long value;
-
-	switch (hdmi->audio_source) {
-	case HDA:
-		value = AUDIO_CNTRL0_SOURCE_SELECT_HDAL;
-		break;
-
-	case SPDIF:
-		value = AUDIO_CNTRL0_SOURCE_SELECT_SPDIF;
-		break;
-
-	default:
-		value = AUDIO_CNTRL0_SOURCE_SELECT_AUTO;
-		break;
-	}
-
-	if (of_device_is_compatible(node, "nvidia,tegra30-hdmi")) {
-		value |= AUDIO_CNTRL0_ERROR_TOLERANCE(6) |
-			 AUDIO_CNTRL0_FRAMES_PER_BLOCK(0xc0);
-		tegra_hdmi_writel(hdmi, value, HDMI_NV_PDISP_AUDIO_CNTRL0);
-	} else {
-		value |= AUDIO_CNTRL0_INJECT_NULLSMPL;
-		tegra_hdmi_writel(hdmi, value, HDMI_NV_PDISP_SOR_AUDIO_CNTRL0);
-
-		value = AUDIO_CNTRL0_ERROR_TOLERANCE(6) |
-			AUDIO_CNTRL0_FRAMES_PER_BLOCK(0xc0);
-		tegra_hdmi_writel(hdmi, value, HDMI_NV_PDISP_AUDIO_CNTRL0);
-	}
-
-	config = tegra_hdmi_get_audio_config(hdmi->audio_freq, pclk);
-	if (!config) {
-		dev_err(hdmi->dev, "cannot set audio to %u at %u pclk\n",
-			hdmi->audio_freq, pclk);
-		return -EINVAL;
-	}
-
-	tegra_hdmi_writel(hdmi, 0, HDMI_NV_PDISP_HDMI_ACR_CTRL);
-
-	value = AUDIO_N_RESETF | AUDIO_N_GENERATE_ALTERNATE |
-		AUDIO_N_VALUE(config->n - 1);
-	tegra_hdmi_writel(hdmi, value, HDMI_NV_PDISP_AUDIO_N);
-
-	tegra_hdmi_writel(hdmi, ACR_SUBPACK_N(config->n) | ACR_ENABLE,
-			  HDMI_NV_PDISP_HDMI_ACR_0441_SUBPACK_HIGH);
-
-	value = ACR_SUBPACK_CTS(config->cts);
-	tegra_hdmi_writel(hdmi, value, HDMI_NV_PDISP_HDMI_ACR_0441_SUBPACK_LOW);
-
-	value = SPARE_HW_CTS | SPARE_FORCE_SW_CTS | SPARE_CTS_RESET_VAL(1);
-	tegra_hdmi_writel(hdmi, value, HDMI_NV_PDISP_HDMI_SPARE);
-
-	value = tegra_hdmi_readl(hdmi, HDMI_NV_PDISP_AUDIO_N);
-	value &= ~AUDIO_N_RESETF;
-	tegra_hdmi_writel(hdmi, value, HDMI_NV_PDISP_AUDIO_N);
-
-	if (of_device_is_compatible(node, "nvidia,tegra30-hdmi")) {
-		switch (hdmi->audio_freq) {
-		case 32000:
-			offset = HDMI_NV_PDISP_SOR_AUDIO_AVAL_0320;
-			break;
-
-		case 44100:
-			offset = HDMI_NV_PDISP_SOR_AUDIO_AVAL_0441;
-			break;
-
-		case 48000:
-			offset = HDMI_NV_PDISP_SOR_AUDIO_AVAL_0480;
-			break;
-
-		case 88200:
-			offset = HDMI_NV_PDISP_SOR_AUDIO_AVAL_0882;
-			break;
-
-		case 96000:
-			offset = HDMI_NV_PDISP_SOR_AUDIO_AVAL_0960;
-			break;
-
-		case 176400:
-			offset = HDMI_NV_PDISP_SOR_AUDIO_AVAL_1764;
-			break;
-
-		case 192000:
-			offset = HDMI_NV_PDISP_SOR_AUDIO_AVAL_1920;
-			break;
-		}
-
-		tegra_hdmi_writel(hdmi, config->aval, offset);
-	}
-
-	tegra_hdmi_setup_audio_fs_tables(hdmi);
-
-	return 0;
-}
-
-static inline unsigned long tegra_hdmi_subpack(const u8 *ptr, size_t size)
-{
-	unsigned long value = 0;
-	size_t i;
-
-	for (i = size; i > 0; i--)
-		value = (value << 8) | ptr[i - 1];
-
-	return value;
-}
-
-static void tegra_hdmi_write_infopack(struct tegra_hdmi *hdmi, const void *data,
-				      size_t size)
-{
-	const u8 *ptr = data;
-	unsigned long offset;
-	unsigned long value;
-	size_t i, j;
-
-	switch (ptr[0]) {
-	case HDMI_INFOFRAME_TYPE_AVI:
-		offset = HDMI_NV_PDISP_HDMI_AVI_INFOFRAME_HEADER;
-		break;
-
-	case HDMI_INFOFRAME_TYPE_AUDIO:
-		offset = HDMI_NV_PDISP_HDMI_AUDIO_INFOFRAME_HEADER;
-		break;
-
-	case HDMI_INFOFRAME_TYPE_VENDOR:
-		offset = HDMI_NV_PDISP_HDMI_GENERIC_HEADER;
-		break;
-
-	default:
-		dev_err(hdmi->dev, "unsupported infoframe type: %02x\n",
-			ptr[0]);
-		return;
-	}
-
-	value = INFOFRAME_HEADER_TYPE(ptr[0]) |
-		INFOFRAME_HEADER_VERSION(ptr[1]) |
-		INFOFRAME_HEADER_LEN(ptr[2]);
-	tegra_hdmi_writel(hdmi, value, offset);
-	offset++;
-
-	/*
-	 * Each subpack contains 7 bytes, divided into:
-	 * - subpack_low: bytes 0 - 3
-	 * - subpack_high: bytes 4 - 6 (with byte 7 padded to 0x00)
-	 */
-	for (i = 3, j = 0; i < size; i += 7, j += 8) {
-		size_t rem = size - i, num = min_t(size_t, rem, 4);
-
-		value = tegra_hdmi_subpack(&ptr[i], num);
-		tegra_hdmi_writel(hdmi, value, offset++);
-
-		num = min_t(size_t, rem - num, 3);
-
-		value = tegra_hdmi_subpack(&ptr[i + 4], num);
-		tegra_hdmi_writel(hdmi, value, offset++);
-	}
-}
-
-static void tegra_hdmi_setup_avi_infoframe(struct tegra_hdmi *hdmi,
-					   struct drm_display_mode *mode)
-{
-	struct hdmi_avi_infoframe frame;
-	u8 buffer[17];
-	ssize_t err;
-
-	if (hdmi->dvi) {
-		tegra_hdmi_writel(hdmi, 0,
-				  HDMI_NV_PDISP_HDMI_AVI_INFOFRAME_CTRL);
-		return;
-	}
-
-	err = drm_hdmi_avi_infoframe_from_display_mode(&frame, mode);
-	if (err < 0) {
-		dev_err(hdmi->dev, "failed to setup AVI infoframe: %zd\n", err);
-		return;
-	}
-
-	err = hdmi_avi_infoframe_pack(&frame, buffer, sizeof(buffer));
-	if (err < 0) {
-		dev_err(hdmi->dev, "failed to pack AVI infoframe: %zd\n", err);
-		return;
-	}
-
-	tegra_hdmi_write_infopack(hdmi, buffer, err);
-
-	tegra_hdmi_writel(hdmi, INFOFRAME_CTRL_ENABLE,
-			  HDMI_NV_PDISP_HDMI_AVI_INFOFRAME_CTRL);
-}
-
-static void tegra_hdmi_setup_audio_infoframe(struct tegra_hdmi *hdmi)
-{
-	struct hdmi_audio_infoframe frame;
-	u8 buffer[14];
-	ssize_t err;
-
-	if (hdmi->dvi) {
-		tegra_hdmi_writel(hdmi, 0,
-				  HDMI_NV_PDISP_HDMI_AUDIO_INFOFRAME_CTRL);
-		return;
-	}
-
-	err = hdmi_audio_infoframe_init(&frame);
-	if (err < 0) {
-		dev_err(hdmi->dev, "failed to initialize audio infoframe: %d\n",
-			err);
-		return;
-	}
-
-	frame.channels = 2;
-
-	err = hdmi_audio_infoframe_pack(&frame, buffer, sizeof(buffer));
-	if (err < 0) {
-		dev_err(hdmi->dev, "failed to pack audio infoframe: %zd\n",
-			err);
-		return;
-	}
-
-	/*
-	 * The audio infoframe has only one set of subpack registers, so the
-	 * infoframe needs to be truncated. One set of subpack registers can
-	 * contain 7 bytes. Including the 3 byte header only the first 10
-	 * bytes can be programmed.
-	 */
-	tegra_hdmi_write_infopack(hdmi, buffer, min(10, err));
-
-	tegra_hdmi_writel(hdmi, INFOFRAME_CTRL_ENABLE,
-			  HDMI_NV_PDISP_HDMI_AUDIO_INFOFRAME_CTRL);
-}
-
-static void tegra_hdmi_setup_stereo_infoframe(struct tegra_hdmi *hdmi)
-{
-	struct hdmi_vendor_infoframe frame;
-	unsigned long value;
-	u8 buffer[10];
-	ssize_t err;
-
-	if (!hdmi->stereo) {
-		value = tegra_hdmi_readl(hdmi, HDMI_NV_PDISP_HDMI_GENERIC_CTRL);
-		value &= ~GENERIC_CTRL_ENABLE;
-		tegra_hdmi_writel(hdmi, value, HDMI_NV_PDISP_HDMI_GENERIC_CTRL);
-		return;
-	}
-
-	memset(&frame, 0, sizeof(frame));
-
-	frame.type = HDMI_INFOFRAME_TYPE_VENDOR;
-	frame.version = 0x01;
-	frame.length = 6;
-
-	frame.data[0] = 0x03; /* regid0 */
-	frame.data[1] = 0x0c; /* regid1 */
-	frame.data[2] = 0x00; /* regid2 */
-	frame.data[3] = 0x02 << 5; /* video format */
-
-	/* TODO: 74 MHz limit? */
-	if (1) {
-		frame.data[4] = 0x00 << 4; /* 3D structure */
-	} else {
-		frame.data[4] = 0x08 << 4; /* 3D structure */
-		frame.data[5] = 0x00 << 4; /* 3D ext. data */
-	}
-
-	err = hdmi_vendor_infoframe_pack(&frame, buffer, sizeof(buffer));
-	if (err < 0) {
-		dev_err(hdmi->dev, "failed to pack vendor infoframe: %zd\n",
-			err);
-		return;
-	}
-
-	tegra_hdmi_write_infopack(hdmi, buffer, err);
-
-	value = tegra_hdmi_readl(hdmi, HDMI_NV_PDISP_HDMI_GENERIC_CTRL);
-	value |= GENERIC_CTRL_ENABLE;
-	tegra_hdmi_writel(hdmi, value, HDMI_NV_PDISP_HDMI_GENERIC_CTRL);
-}
-
-static void tegra_hdmi_setup_tmds(struct tegra_hdmi *hdmi,
-				  const struct tmds_config *tmds)
-{
-	unsigned long value;
-
-	tegra_hdmi_writel(hdmi, tmds->pll0, HDMI_NV_PDISP_SOR_PLL0);
-	tegra_hdmi_writel(hdmi, tmds->pll1, HDMI_NV_PDISP_SOR_PLL1);
-	tegra_hdmi_writel(hdmi, tmds->pe_current, HDMI_NV_PDISP_PE_CURRENT);
-
-	value = tmds->drive_current | DRIVE_CURRENT_FUSE_OVERRIDE;
-	tegra_hdmi_writel(hdmi, value, HDMI_NV_PDISP_SOR_LANE_DRIVE_CURRENT);
-}
-
-static int tegra_output_hdmi_enable(struct tegra_output *output)
-{
-	unsigned int h_sync_width, h_front_porch, h_back_porch, i, rekey;
-	struct tegra_dc *dc = to_tegra_dc(output->encoder.crtc);
-	struct drm_display_mode *mode = &dc->base.mode;
-	struct tegra_hdmi *hdmi = to_hdmi(output);
-	struct device_node *node = hdmi->dev->of_node;
-	unsigned int pulse_start, div82, pclk;
-	const struct tmds_config *tmds;
-	unsigned int num_tmds;
-	unsigned long value;
-	int retries = 1000;
-	int err;
-
-	pclk = mode->clock * 1000;
-	h_sync_width = mode->hsync_end - mode->hsync_start;
-	h_back_porch = mode->htotal - mode->hsync_end;
-	h_front_porch = mode->hsync_start - mode->hdisplay;
-
-	err = regulator_enable(hdmi->vdd);
-	if (err < 0) {
-		dev_err(hdmi->dev, "failed to enable VDD regulator: %d\n", err);
-		return err;
-	}
-
-	err = regulator_enable(hdmi->pll);
-	if (err < 0) {
-		dev_err(hdmi->dev, "failed to enable PLL regulator: %d\n", err);
-		return err;
-	}
-
-	/*
-	 * This assumes that the display controller will divide its parent
-	 * clock by 2 to generate the pixel clock.
-	 */
-	err = tegra_output_setup_clock(output, hdmi->clk, pclk * 2);
-	if (err < 0) {
-		dev_err(hdmi->dev, "failed to setup clock: %d\n", err);
-		return err;
-	}
-
-	err = clk_set_rate(hdmi->clk, pclk);
-	if (err < 0)
-		return err;
-
-	err = clk_enable(hdmi->clk);
-	if (err < 0) {
-		dev_err(hdmi->dev, "failed to enable clock: %d\n", err);
-		return err;
-	}
-
-	tegra_periph_reset_assert(hdmi->clk);
-	usleep_range(1000, 2000);
-	tegra_periph_reset_deassert(hdmi->clk);
-
-	tegra_dc_writel(dc, VSYNC_H_POSITION(1),
-			DC_DISP_DISP_TIMING_OPTIONS);
-	tegra_dc_writel(dc, DITHER_CONTROL_DISABLE | BASE_COLOR_SIZE888,
-			DC_DISP_DISP_COLOR_CONTROL);
-
-	/* video_preamble uses h_pulse2 */
-	pulse_start = 1 + h_sync_width + h_back_porch - 10;
-
-	tegra_dc_writel(dc, H_PULSE_2_ENABLE, DC_DISP_DISP_SIGNAL_OPTIONS0);
-
-	value = PULSE_MODE_NORMAL | PULSE_POLARITY_HIGH | PULSE_QUAL_VACTIVE |
-		PULSE_LAST_END_A;
-	tegra_dc_writel(dc, value, DC_DISP_H_PULSE2_CONTROL);
-
-	value = PULSE_START(pulse_start) | PULSE_END(pulse_start + 8);
-	tegra_dc_writel(dc, value, DC_DISP_H_PULSE2_POSITION_A);
-
-	value = VSYNC_WINDOW_END(0x210) | VSYNC_WINDOW_START(0x200) |
-		VSYNC_WINDOW_ENABLE;
-	tegra_hdmi_writel(hdmi, value, HDMI_NV_PDISP_HDMI_VSYNC_WINDOW);
-
-	if (dc->pipe)
-		value = HDMI_SRC_DISPLAYB;
-	else
-		value = HDMI_SRC_DISPLAYA;
-
-	if ((mode->hdisplay == 720) && ((mode->vdisplay == 480) ||
-					(mode->vdisplay == 576)))
-		tegra_hdmi_writel(hdmi,
-				  value | ARM_VIDEO_RANGE_FULL,
-				  HDMI_NV_PDISP_INPUT_CONTROL);
-	else
-		tegra_hdmi_writel(hdmi,
-				  value | ARM_VIDEO_RANGE_LIMITED,
-				  HDMI_NV_PDISP_INPUT_CONTROL);
-
-	div82 = clk_get_rate(hdmi->clk) / 1000000 * 4;
-	value = SOR_REFCLK_DIV_INT(div82 >> 2) | SOR_REFCLK_DIV_FRAC(div82);
-	tegra_hdmi_writel(hdmi, value, HDMI_NV_PDISP_SOR_REFCLK);
-
-	if (!hdmi->dvi) {
-		err = tegra_hdmi_setup_audio(hdmi, pclk);
-		if (err < 0)
-			hdmi->dvi = true;
-	}
-
-	if (of_device_is_compatible(node, "nvidia,tegra20-hdmi")) {
-		/*
-		 * TODO: add ELD support
-		 */
-	}
-
-	rekey = HDMI_REKEY_DEFAULT;
-	value = HDMI_CTRL_REKEY(rekey);
-	value |= HDMI_CTRL_MAX_AC_PACKET((h_sync_width + h_back_porch +
-					  h_front_porch - rekey - 18) / 32);
-
-	if (!hdmi->dvi)
-		value |= HDMI_CTRL_ENABLE;
-
-	tegra_hdmi_writel(hdmi, value, HDMI_NV_PDISP_HDMI_CTRL);
-
-	if (hdmi->dvi)
-		tegra_hdmi_writel(hdmi, 0x0,
-				  HDMI_NV_PDISP_HDMI_GENERIC_CTRL);
-	else
-		tegra_hdmi_writel(hdmi, GENERIC_CTRL_AUDIO,
-				  HDMI_NV_PDISP_HDMI_GENERIC_CTRL);
-
-	tegra_hdmi_setup_avi_infoframe(hdmi, mode);
-	tegra_hdmi_setup_audio_infoframe(hdmi);
-	tegra_hdmi_setup_stereo_infoframe(hdmi);
-
-	/* TMDS CONFIG */
-	if (of_device_is_compatible(node, "nvidia,tegra30-hdmi")) {
-		num_tmds = ARRAY_SIZE(tegra3_tmds_config);
-		tmds = tegra3_tmds_config;
-	} else {
-		num_tmds = ARRAY_SIZE(tegra2_tmds_config);
-		tmds = tegra2_tmds_config;
-	}
-
-	for (i = 0; i < num_tmds; i++) {
-		if (pclk <= tmds[i].pclk) {
-			tegra_hdmi_setup_tmds(hdmi, &tmds[i]);
-			break;
-		}
-	}
-
-	tegra_hdmi_writel(hdmi,
-			  SOR_SEQ_CTL_PU_PC(0) |
-			  SOR_SEQ_PU_PC_ALT(0) |
-			  SOR_SEQ_PD_PC(8) |
-			  SOR_SEQ_PD_PC_ALT(8),
-			  HDMI_NV_PDISP_SOR_SEQ_CTL);
-
-	value = SOR_SEQ_INST_WAIT_TIME(1) |
-		SOR_SEQ_INST_WAIT_UNITS_VSYNC |
-		SOR_SEQ_INST_HALT |
-		SOR_SEQ_INST_PIN_A_LOW |
-		SOR_SEQ_INST_PIN_B_LOW |
-		SOR_SEQ_INST_DRIVE_PWM_OUT_LO;
-
-	tegra_hdmi_writel(hdmi, value, HDMI_NV_PDISP_SOR_SEQ_INST(0));
-	tegra_hdmi_writel(hdmi, value, HDMI_NV_PDISP_SOR_SEQ_INST(8));
-
-	value = 0x1c800;
-	value &= ~SOR_CSTM_ROTCLK(~0);
-	value |= SOR_CSTM_ROTCLK(2);
-	tegra_hdmi_writel(hdmi, value, HDMI_NV_PDISP_SOR_CSTM);
-
-	tegra_dc_writel(dc, DISP_CTRL_MODE_STOP, DC_CMD_DISPLAY_COMMAND);
-	tegra_dc_writel(dc, GENERAL_ACT_REQ << 8, DC_CMD_STATE_CONTROL);
-	tegra_dc_writel(dc, GENERAL_ACT_REQ, DC_CMD_STATE_CONTROL);
-
-	/* start SOR */
-	tegra_hdmi_writel(hdmi,
-			  SOR_PWR_NORMAL_STATE_PU |
-			  SOR_PWR_NORMAL_START_NORMAL |
-			  SOR_PWR_SAFE_STATE_PD |
-			  SOR_PWR_SETTING_NEW_TRIGGER,
-			  HDMI_NV_PDISP_SOR_PWR);
-	tegra_hdmi_writel(hdmi,
-			  SOR_PWR_NORMAL_STATE_PU |
-			  SOR_PWR_NORMAL_START_NORMAL |
-			  SOR_PWR_SAFE_STATE_PD |
-			  SOR_PWR_SETTING_NEW_DONE,
-			  HDMI_NV_PDISP_SOR_PWR);
-
-	do {
-		BUG_ON(--retries < 0);
-		value = tegra_hdmi_readl(hdmi, HDMI_NV_PDISP_SOR_PWR);
-	} while (value & SOR_PWR_SETTING_NEW_PENDING);
-
-	value = SOR_STATE_ASY_CRCMODE_COMPLETE |
-		SOR_STATE_ASY_OWNER_HEAD0 |
-		SOR_STATE_ASY_SUBOWNER_BOTH |
-		SOR_STATE_ASY_PROTOCOL_SINGLE_TMDS_A |
-		SOR_STATE_ASY_DEPOL_POS;
-
-	/* setup sync polarities */
-	if (mode->flags & DRM_MODE_FLAG_PHSYNC)
-		value |= SOR_STATE_ASY_HSYNCPOL_POS;
-
-	if (mode->flags & DRM_MODE_FLAG_NHSYNC)
-		value |= SOR_STATE_ASY_HSYNCPOL_NEG;
-
-	if (mode->flags & DRM_MODE_FLAG_PVSYNC)
-		value |= SOR_STATE_ASY_VSYNCPOL_POS;
-
-	if (mode->flags & DRM_MODE_FLAG_NVSYNC)
-		value |= SOR_STATE_ASY_VSYNCPOL_NEG;
-
-	tegra_hdmi_writel(hdmi, value, HDMI_NV_PDISP_SOR_STATE2);
-
-	value = SOR_STATE_ASY_HEAD_OPMODE_AWAKE | SOR_STATE_ASY_ORMODE_NORMAL;
-	tegra_hdmi_writel(hdmi, value, HDMI_NV_PDISP_SOR_STATE1);
-
-	tegra_hdmi_writel(hdmi, 0, HDMI_NV_PDISP_SOR_STATE0);
-	tegra_hdmi_writel(hdmi, SOR_STATE_UPDATE, HDMI_NV_PDISP_SOR_STATE0);
-	tegra_hdmi_writel(hdmi, value | SOR_STATE_ATTACHED,
-			  HDMI_NV_PDISP_SOR_STATE1);
-	tegra_hdmi_writel(hdmi, 0, HDMI_NV_PDISP_SOR_STATE0);
-
-	tegra_dc_writel(dc, HDMI_ENABLE, DC_DISP_DISP_WIN_OPTIONS);
-
-	value = PW0_ENABLE | PW1_ENABLE | PW2_ENABLE | PW3_ENABLE |
-		PW4_ENABLE | PM0_ENABLE | PM1_ENABLE;
-	tegra_dc_writel(dc, value, DC_CMD_DISPLAY_POWER_CONTROL);
-
-	value = DISP_CTRL_MODE_C_DISPLAY;
-	tegra_dc_writel(dc, value, DC_CMD_DISPLAY_COMMAND);
-
-	tegra_dc_writel(dc, GENERAL_ACT_REQ << 8, DC_CMD_STATE_CONTROL);
-	tegra_dc_writel(dc, GENERAL_ACT_REQ, DC_CMD_STATE_CONTROL);
-
-	/* TODO: add HDCP support */
-
-	return 0;
-}
-
-static int tegra_output_hdmi_disable(struct tegra_output *output)
-{
-	struct tegra_hdmi *hdmi = to_hdmi(output);
-
-	tegra_periph_reset_assert(hdmi->clk);
-	clk_disable(hdmi->clk);
-	regulator_disable(hdmi->pll);
-	regulator_disable(hdmi->vdd);
-
-	return 0;
-}
-
-static int tegra_output_hdmi_setup_clock(struct tegra_output *output,
-					 struct clk *clk, unsigned long pclk)
-{
-	struct tegra_hdmi *hdmi = to_hdmi(output);
-	struct clk *base;
-	int err;
-
-	err = clk_set_parent(clk, hdmi->clk_parent);
-	if (err < 0) {
-		dev_err(output->dev, "failed to set parent: %d\n", err);
-		return err;
-	}
-
-	base = clk_get_parent(hdmi->clk_parent);
-
-	/*
-	 * This assumes that the parent clock is pll_d_out0 or pll_d2_out
-	 * respectively, each of which divides the base pll_d by 2.
-	 */
-	err = clk_set_rate(base, pclk * 2);
-	if (err < 0)
-		dev_err(output->dev,
-			"failed to set base clock rate to %lu Hz\n",
-			pclk * 2);
-
-	return 0;
-}
-
-static int tegra_output_hdmi_check_mode(struct tegra_output *output,
-					struct drm_display_mode *mode,
-					enum drm_mode_status *status)
-{
-	struct tegra_hdmi *hdmi = to_hdmi(output);
-	unsigned long pclk = mode->clock * 1000;
-	struct clk *parent;
-	long err;
-
-	parent = clk_get_parent(hdmi->clk_parent);
-
-	err = clk_round_rate(parent, pclk * 4);
-	if (err < 0)
-		*status = MODE_NOCLOCK;
-	else
-		*status = MODE_OK;
-
-	return 0;
-}
-
-static const struct tegra_output_ops hdmi_ops = {
-	.enable = tegra_output_hdmi_enable,
-	.disable = tegra_output_hdmi_disable,
-	.setup_clock = tegra_output_hdmi_setup_clock,
-	.check_mode = tegra_output_hdmi_check_mode,
-};
-
-static int tegra_hdmi_show_regs(struct seq_file *s, void *data)
-{
-	struct drm_info_node *node = s->private;
-	struct tegra_hdmi *hdmi = node->info_ent->data;
-
-#define DUMP_REG(name)						\
-	seq_printf(s, "%-56s %#05x %08lx\n", #name, name,	\
-		tegra_hdmi_readl(hdmi, name))
-
-	DUMP_REG(HDMI_CTXSW);
-	DUMP_REG(HDMI_NV_PDISP_SOR_STATE0);
-	DUMP_REG(HDMI_NV_PDISP_SOR_STATE1);
-	DUMP_REG(HDMI_NV_PDISP_SOR_STATE2);
-	DUMP_REG(HDMI_NV_PDISP_RG_HDCP_AN_MSB);
-	DUMP_REG(HDMI_NV_PDISP_RG_HDCP_AN_LSB);
-	DUMP_REG(HDMI_NV_PDISP_RG_HDCP_CN_MSB);
-	DUMP_REG(HDMI_NV_PDISP_RG_HDCP_CN_LSB);
-	DUMP_REG(HDMI_NV_PDISP_RG_HDCP_AKSV_MSB);
-	DUMP_REG(HDMI_NV_PDISP_RG_HDCP_AKSV_LSB);
-	DUMP_REG(HDMI_NV_PDISP_RG_HDCP_BKSV_MSB);
-	DUMP_REG(HDMI_NV_PDISP_RG_HDCP_BKSV_LSB);
-	DUMP_REG(HDMI_NV_PDISP_RG_HDCP_CKSV_MSB);
-	DUMP_REG(HDMI_NV_PDISP_RG_HDCP_CKSV_LSB);
-	DUMP_REG(HDMI_NV_PDISP_RG_HDCP_DKSV_MSB);
-	DUMP_REG(HDMI_NV_PDISP_RG_HDCP_DKSV_LSB);
-	DUMP_REG(HDMI_NV_PDISP_RG_HDCP_CTRL);
-	DUMP_REG(HDMI_NV_PDISP_RG_HDCP_CMODE);
-	DUMP_REG(HDMI_NV_PDISP_RG_HDCP_MPRIME_MSB);
-	DUMP_REG(HDMI_NV_PDISP_RG_HDCP_MPRIME_LSB);
-	DUMP_REG(HDMI_NV_PDISP_RG_HDCP_SPRIME_MSB);
-	DUMP_REG(HDMI_NV_PDISP_RG_HDCP_SPRIME_LSB2);
-	DUMP_REG(HDMI_NV_PDISP_RG_HDCP_SPRIME_LSB1);
-	DUMP_REG(HDMI_NV_PDISP_RG_HDCP_RI);
-	DUMP_REG(HDMI_NV_PDISP_RG_HDCP_CS_MSB);
-	DUMP_REG(HDMI_NV_PDISP_RG_HDCP_CS_LSB);
-	DUMP_REG(HDMI_NV_PDISP_HDMI_AUDIO_EMU0);
-	DUMP_REG(HDMI_NV_PDISP_HDMI_AUDIO_EMU_RDATA0);
-	DUMP_REG(HDMI_NV_PDISP_HDMI_AUDIO_EMU1);
-	DUMP_REG(HDMI_NV_PDISP_HDMI_AUDIO_EMU2);
-	DUMP_REG(HDMI_NV_PDISP_HDMI_AUDIO_INFOFRAME_CTRL);
-	DUMP_REG(HDMI_NV_PDISP_HDMI_AUDIO_INFOFRAME_STATUS);
-	DUMP_REG(HDMI_NV_PDISP_HDMI_AUDIO_INFOFRAME_HEADER);
-	DUMP_REG(HDMI_NV_PDISP_HDMI_AUDIO_INFOFRAME_SUBPACK0_LOW);
-	DUMP_REG(HDMI_NV_PDISP_HDMI_AUDIO_INFOFRAME_SUBPACK0_HIGH);
-	DUMP_REG(HDMI_NV_PDISP_HDMI_AVI_INFOFRAME_CTRL);
-	DUMP_REG(HDMI_NV_PDISP_HDMI_AVI_INFOFRAME_STATUS);
-	DUMP_REG(HDMI_NV_PDISP_HDMI_AVI_INFOFRAME_HEADER);
-	DUMP_REG(HDMI_NV_PDISP_HDMI_AVI_INFOFRAME_SUBPACK0_LOW);
-	DUMP_REG(HDMI_NV_PDISP_HDMI_AVI_INFOFRAME_SUBPACK0_HIGH);
-	DUMP_REG(HDMI_NV_PDISP_HDMI_AVI_INFOFRAME_SUBPACK1_LOW);
-	DUMP_REG(HDMI_NV_PDISP_HDMI_AVI_INFOFRAME_SUBPACK1_HIGH);
-	DUMP_REG(HDMI_NV_PDISP_HDMI_GENERIC_CTRL);
-	DUMP_REG(HDMI_NV_PDISP_HDMI_GENERIC_STATUS);
-	DUMP_REG(HDMI_NV_PDISP_HDMI_GENERIC_HEADER);
-	DUMP_REG(HDMI_NV_PDISP_HDMI_GENERIC_SUBPACK0_LOW);
-	DUMP_REG(HDMI_NV_PDISP_HDMI_GENERIC_SUBPACK0_HIGH);
-	DUMP_REG(HDMI_NV_PDISP_HDMI_GENERIC_SUBPACK1_LOW);
-	DUMP_REG(HDMI_NV_PDISP_HDMI_GENERIC_SUBPACK1_HIGH);
-	DUMP_REG(HDMI_NV_PDISP_HDMI_GENERIC_SUBPACK2_LOW);
-	DUMP_REG(HDMI_NV_PDISP_HDMI_GENERIC_SUBPACK2_HIGH);
-	DUMP_REG(HDMI_NV_PDISP_HDMI_GENERIC_SUBPACK3_LOW);
-	DUMP_REG(HDMI_NV_PDISP_HDMI_GENERIC_SUBPACK3_HIGH);
-	DUMP_REG(HDMI_NV_PDISP_HDMI_ACR_CTRL);
-	DUMP_REG(HDMI_NV_PDISP_HDMI_ACR_0320_SUBPACK_LOW);
-	DUMP_REG(HDMI_NV_PDISP_HDMI_ACR_0320_SUBPACK_HIGH);
-	DUMP_REG(HDMI_NV_PDISP_HDMI_ACR_0441_SUBPACK_LOW);
-	DUMP_REG(HDMI_NV_PDISP_HDMI_ACR_0441_SUBPACK_HIGH);
-	DUMP_REG(HDMI_NV_PDISP_HDMI_ACR_0882_SUBPACK_LOW);
-	DUMP_REG(HDMI_NV_PDISP_HDMI_ACR_0882_SUBPACK_HIGH);
-	DUMP_REG(HDMI_NV_PDISP_HDMI_ACR_1764_SUBPACK_LOW);
-	DUMP_REG(HDMI_NV_PDISP_HDMI_ACR_1764_SUBPACK_HIGH);
-	DUMP_REG(HDMI_NV_PDISP_HDMI_ACR_0480_SUBPACK_LOW);
-	DUMP_REG(HDMI_NV_PDISP_HDMI_ACR_0480_SUBPACK_HIGH);
-	DUMP_REG(HDMI_NV_PDISP_HDMI_ACR_0960_SUBPACK_LOW);
-	DUMP_REG(HDMI_NV_PDISP_HDMI_ACR_0960_SUBPACK_HIGH);
-	DUMP_REG(HDMI_NV_PDISP_HDMI_ACR_1920_SUBPACK_LOW);
-	DUMP_REG(HDMI_NV_PDISP_HDMI_ACR_1920_SUBPACK_HIGH);
-	DUMP_REG(HDMI_NV_PDISP_HDMI_CTRL);
-	DUMP_REG(HDMI_NV_PDISP_HDMI_VSYNC_KEEPOUT);
-	DUMP_REG(HDMI_NV_PDISP_HDMI_VSYNC_WINDOW);
-	DUMP_REG(HDMI_NV_PDISP_HDMI_GCP_CTRL);
-	DUMP_REG(HDMI_NV_PDISP_HDMI_GCP_STATUS);
-	DUMP_REG(HDMI_NV_PDISP_HDMI_GCP_SUBPACK);
-	DUMP_REG(HDMI_NV_PDISP_HDMI_CHANNEL_STATUS1);
-	DUMP_REG(HDMI_NV_PDISP_HDMI_CHANNEL_STATUS2);
-	DUMP_REG(HDMI_NV_PDISP_HDMI_EMU0);
-	DUMP_REG(HDMI_NV_PDISP_HDMI_EMU1);
-	DUMP_REG(HDMI_NV_PDISP_HDMI_EMU1_RDATA);
-	DUMP_REG(HDMI_NV_PDISP_HDMI_SPARE);
-	DUMP_REG(HDMI_NV_PDISP_HDMI_SPDIF_CHN_STATUS1);
-	DUMP_REG(HDMI_NV_PDISP_HDMI_SPDIF_CHN_STATUS2);
-	DUMP_REG(HDMI_NV_PDISP_HDMI_HDCPRIF_ROM_CTRL);
-	DUMP_REG(HDMI_NV_PDISP_SOR_CAP);
-	DUMP_REG(HDMI_NV_PDISP_SOR_PWR);
-	DUMP_REG(HDMI_NV_PDISP_SOR_TEST);
-	DUMP_REG(HDMI_NV_PDISP_SOR_PLL0);
-	DUMP_REG(HDMI_NV_PDISP_SOR_PLL1);
-	DUMP_REG(HDMI_NV_PDISP_SOR_PLL2);
-	DUMP_REG(HDMI_NV_PDISP_SOR_CSTM);
-	DUMP_REG(HDMI_NV_PDISP_SOR_LVDS);
-	DUMP_REG(HDMI_NV_PDISP_SOR_CRCA);
-	DUMP_REG(HDMI_NV_PDISP_SOR_CRCB);
-	DUMP_REG(HDMI_NV_PDISP_SOR_BLANK);
-	DUMP_REG(HDMI_NV_PDISP_SOR_SEQ_CTL);
-	DUMP_REG(HDMI_NV_PDISP_SOR_SEQ_INST(0));
-	DUMP_REG(HDMI_NV_PDISP_SOR_SEQ_INST(1));
-	DUMP_REG(HDMI_NV_PDISP_SOR_SEQ_INST(2));
-	DUMP_REG(HDMI_NV_PDISP_SOR_SEQ_INST(3));
-	DUMP_REG(HDMI_NV_PDISP_SOR_SEQ_INST(4));
-	DUMP_REG(HDMI_NV_PDISP_SOR_SEQ_INST(5));
-	DUMP_REG(HDMI_NV_PDISP_SOR_SEQ_INST(6));
-	DUMP_REG(HDMI_NV_PDISP_SOR_SEQ_INST(7));
-	DUMP_REG(HDMI_NV_PDISP_SOR_SEQ_INST(8));
-	DUMP_REG(HDMI_NV_PDISP_SOR_SEQ_INST(9));
-	DUMP_REG(HDMI_NV_PDISP_SOR_SEQ_INST(10));
-	DUMP_REG(HDMI_NV_PDISP_SOR_SEQ_INST(11));
-	DUMP_REG(HDMI_NV_PDISP_SOR_SEQ_INST(12));
-	DUMP_REG(HDMI_NV_PDISP_SOR_SEQ_INST(13));
-	DUMP_REG(HDMI_NV_PDISP_SOR_SEQ_INST(14));
-	DUMP_REG(HDMI_NV_PDISP_SOR_SEQ_INST(15));
-	DUMP_REG(HDMI_NV_PDISP_SOR_VCRCA0);
-	DUMP_REG(HDMI_NV_PDISP_SOR_VCRCA1);
-	DUMP_REG(HDMI_NV_PDISP_SOR_CCRCA0);
-	DUMP_REG(HDMI_NV_PDISP_SOR_CCRCA1);
-	DUMP_REG(HDMI_NV_PDISP_SOR_EDATAA0);
-	DUMP_REG(HDMI_NV_PDISP_SOR_EDATAA1);
-	DUMP_REG(HDMI_NV_PDISP_SOR_COUNTA0);
-	DUMP_REG(HDMI_NV_PDISP_SOR_COUNTA1);
-	DUMP_REG(HDMI_NV_PDISP_SOR_DEBUGA0);
-	DUMP_REG(HDMI_NV_PDISP_SOR_DEBUGA1);
-	DUMP_REG(HDMI_NV_PDISP_SOR_TRIG);
-	DUMP_REG(HDMI_NV_PDISP_SOR_MSCHECK);
-	DUMP_REG(HDMI_NV_PDISP_SOR_LANE_DRIVE_CURRENT);
-	DUMP_REG(HDMI_NV_PDISP_AUDIO_DEBUG0);
-	DUMP_REG(HDMI_NV_PDISP_AUDIO_DEBUG1);
-	DUMP_REG(HDMI_NV_PDISP_AUDIO_DEBUG2);
-	DUMP_REG(HDMI_NV_PDISP_AUDIO_FS(0));
-	DUMP_REG(HDMI_NV_PDISP_AUDIO_FS(1));
-	DUMP_REG(HDMI_NV_PDISP_AUDIO_FS(2));
-	DUMP_REG(HDMI_NV_PDISP_AUDIO_FS(3));
-	DUMP_REG(HDMI_NV_PDISP_AUDIO_FS(4));
-	DUMP_REG(HDMI_NV_PDISP_AUDIO_FS(5));
-	DUMP_REG(HDMI_NV_PDISP_AUDIO_FS(6));
-	DUMP_REG(HDMI_NV_PDISP_AUDIO_PULSE_WIDTH);
-	DUMP_REG(HDMI_NV_PDISP_AUDIO_THRESHOLD);
-	DUMP_REG(HDMI_NV_PDISP_AUDIO_CNTRL0);
-	DUMP_REG(HDMI_NV_PDISP_AUDIO_N);
-	DUMP_REG(HDMI_NV_PDISP_HDCPRIF_ROM_TIMING);
-	DUMP_REG(HDMI_NV_PDISP_SOR_REFCLK);
-	DUMP_REG(HDMI_NV_PDISP_CRC_CONTROL);
-	DUMP_REG(HDMI_NV_PDISP_INPUT_CONTROL);
-	DUMP_REG(HDMI_NV_PDISP_SCRATCH);
-	DUMP_REG(HDMI_NV_PDISP_PE_CURRENT);
-	DUMP_REG(HDMI_NV_PDISP_KEY_CTRL);
-	DUMP_REG(HDMI_NV_PDISP_KEY_DEBUG0);
-	DUMP_REG(HDMI_NV_PDISP_KEY_DEBUG1);
-	DUMP_REG(HDMI_NV_PDISP_KEY_DEBUG2);
-	DUMP_REG(HDMI_NV_PDISP_KEY_HDCP_KEY_0);
-	DUMP_REG(HDMI_NV_PDISP_KEY_HDCP_KEY_1);
-	DUMP_REG(HDMI_NV_PDISP_KEY_HDCP_KEY_2);
-	DUMP_REG(HDMI_NV_PDISP_KEY_HDCP_KEY_3);
-	DUMP_REG(HDMI_NV_PDISP_KEY_HDCP_KEY_TRIG);
-	DUMP_REG(HDMI_NV_PDISP_KEY_SKEY_INDEX);
-	DUMP_REG(HDMI_NV_PDISP_SOR_AUDIO_CNTRL0);
-	DUMP_REG(HDMI_NV_PDISP_SOR_AUDIO_HDA_ELD_BUFWR);
-	DUMP_REG(HDMI_NV_PDISP_SOR_AUDIO_HDA_PRESENSE);
-
-#undef DUMP_REG
-
-	return 0;
-}
-
-static struct drm_info_list debugfs_files[] = {
-	{ "regs", tegra_hdmi_show_regs, 0, NULL },
-};
-
-static int tegra_hdmi_debugfs_init(struct tegra_hdmi *hdmi,
-				   struct drm_minor *minor)
-{
-	unsigned int i;
-	int err;
-
-	hdmi->debugfs = debugfs_create_dir("hdmi", minor->debugfs_root);
-	if (!hdmi->debugfs)
-		return -ENOMEM;
-
-	hdmi->debugfs_files = kmemdup(debugfs_files, sizeof(debugfs_files),
-				      GFP_KERNEL);
-	if (!hdmi->debugfs_files) {
-		err = -ENOMEM;
-		goto remove;
-	}
-
-	for (i = 0; i < ARRAY_SIZE(debugfs_files); i++)
-		hdmi->debugfs_files[i].data = hdmi;
-
-	err = drm_debugfs_create_files(hdmi->debugfs_files,
-				       ARRAY_SIZE(debugfs_files),
-				       hdmi->debugfs, minor);
-	if (err < 0)
-		goto free;
-
-	hdmi->minor = minor;
-
-	return 0;
-
-free:
-	kfree(hdmi->debugfs_files);
-	hdmi->debugfs_files = NULL;
-remove:
-	debugfs_remove(hdmi->debugfs);
-	hdmi->debugfs = NULL;
-
-	return err;
-}
-
-static int tegra_hdmi_debugfs_exit(struct tegra_hdmi *hdmi)
-{
-	drm_debugfs_remove_files(hdmi->debugfs_files, ARRAY_SIZE(debugfs_files),
-				 hdmi->minor);
-	hdmi->minor = NULL;
-
-	kfree(hdmi->debugfs_files);
-	hdmi->debugfs_files = NULL;
-
-	debugfs_remove(hdmi->debugfs);
-	hdmi->debugfs = NULL;
-
-	return 0;
-}
-
-static int tegra_hdmi_drm_init(struct host1x_client *client,
-			       struct drm_device *drm)
-{
-	struct tegra_hdmi *hdmi = host1x_client_to_hdmi(client);
-	int err;
-
-	hdmi->output.type = TEGRA_OUTPUT_HDMI;
-	hdmi->output.dev = client->dev;
-	hdmi->output.ops = &hdmi_ops;
-
-	err = tegra_output_init(drm, &hdmi->output);
-	if (err < 0) {
-		dev_err(client->dev, "output setup failed: %d\n", err);
-		return err;
-	}
-
-	if (IS_ENABLED(CONFIG_DEBUG_FS)) {
-		err = tegra_hdmi_debugfs_init(hdmi, drm->primary);
-		if (err < 0)
-			dev_err(client->dev, "debugfs setup failed: %d\n", err);
-	}
-
-	return 0;
-}
-
-static int tegra_hdmi_drm_exit(struct host1x_client *client)
-{
-	struct tegra_hdmi *hdmi = host1x_client_to_hdmi(client);
-	int err;
-
-	if (IS_ENABLED(CONFIG_DEBUG_FS)) {
-		err = tegra_hdmi_debugfs_exit(hdmi);
-		if (err < 0)
-			dev_err(client->dev, "debugfs cleanup failed: %d\n",
-				err);
-	}
-
-	err = tegra_output_disable(&hdmi->output);
-	if (err < 0) {
-		dev_err(client->dev, "output failed to disable: %d\n", err);
-		return err;
-	}
-
-	err = tegra_output_exit(&hdmi->output);
-	if (err < 0) {
-		dev_err(client->dev, "output cleanup failed: %d\n", err);
-		return err;
-	}
-
-	return 0;
-}
-
-static const struct host1x_client_ops hdmi_client_ops = {
-	.drm_init = tegra_hdmi_drm_init,
-	.drm_exit = tegra_hdmi_drm_exit,
-};
-
-static int tegra_hdmi_probe(struct platform_device *pdev)
-{
-	struct host1x_drm *host1x = host1x_get_drm_data(pdev->dev.parent);
-	struct tegra_hdmi *hdmi;
-	struct resource *regs;
-	int err;
-
-	hdmi = devm_kzalloc(&pdev->dev, sizeof(*hdmi), GFP_KERNEL);
-	if (!hdmi)
-		return -ENOMEM;
-
-	hdmi->dev = &pdev->dev;
-	hdmi->audio_source = AUTO;
-	hdmi->audio_freq = 44100;
-	hdmi->stereo = false;
-	hdmi->dvi = false;
-
-	hdmi->clk = devm_clk_get(&pdev->dev, NULL);
-	if (IS_ERR(hdmi->clk)) {
-		dev_err(&pdev->dev, "failed to get clock\n");
-		return PTR_ERR(hdmi->clk);
-	}
-
-	err = clk_prepare(hdmi->clk);
-	if (err < 0)
-		return err;
-
-	hdmi->clk_parent = devm_clk_get(&pdev->dev, "parent");
-	if (IS_ERR(hdmi->clk_parent))
-		return PTR_ERR(hdmi->clk_parent);
-
-	err = clk_prepare(hdmi->clk_parent);
-	if (err < 0)
-		return err;
-
-	err = clk_set_parent(hdmi->clk, hdmi->clk_parent);
-	if (err < 0) {
-		dev_err(&pdev->dev, "failed to setup clocks: %d\n", err);
-		return err;
-	}
-
-	hdmi->vdd = devm_regulator_get(&pdev->dev, "vdd");
-	if (IS_ERR(hdmi->vdd)) {
-		dev_err(&pdev->dev, "failed to get VDD regulator\n");
-		return PTR_ERR(hdmi->vdd);
-	}
-
-	hdmi->pll = devm_regulator_get(&pdev->dev, "pll");
-	if (IS_ERR(hdmi->pll)) {
-		dev_err(&pdev->dev, "failed to get PLL regulator\n");
-		return PTR_ERR(hdmi->pll);
-	}
-
-	hdmi->output.dev = &pdev->dev;
-
-	err = tegra_output_parse_dt(&hdmi->output);
-	if (err < 0)
-		return err;
-
-	regs = platform_get_resource(pdev, IORESOURCE_MEM, 0);
-	if (!regs)
-		return -ENXIO;
-
-	hdmi->regs = devm_ioremap_resource(&pdev->dev, regs);
-	if (IS_ERR(hdmi->regs))
-		return PTR_ERR(hdmi->regs);
-
-	err = platform_get_irq(pdev, 0);
-	if (err < 0)
-		return err;
-
-	hdmi->irq = err;
-
-	hdmi->client.ops = &hdmi_client_ops;
-	INIT_LIST_HEAD(&hdmi->client.list);
-	hdmi->client.dev = &pdev->dev;
-
-	err = host1x_register_client(host1x, &hdmi->client);
-	if (err < 0) {
-		dev_err(&pdev->dev, "failed to register host1x client: %d\n",
-			err);
-		return err;
-	}
-
-	platform_set_drvdata(pdev, hdmi);
-
-	return 0;
-}
-
-static int tegra_hdmi_remove(struct platform_device *pdev)
-{
-	struct host1x_drm *host1x = host1x_get_drm_data(pdev->dev.parent);
-	struct tegra_hdmi *hdmi = platform_get_drvdata(pdev);
-	int err;
-
-	err = host1x_unregister_client(host1x, &hdmi->client);
-	if (err < 0) {
-		dev_err(&pdev->dev, "failed to unregister host1x client: %d\n",
-			err);
-		return err;
-	}
-
-	clk_unprepare(hdmi->clk_parent);
-	clk_unprepare(hdmi->clk);
-
-	return 0;
-}
-
-static struct of_device_id tegra_hdmi_of_match[] = {
-	{ .compatible = "nvidia,tegra30-hdmi", },
-	{ .compatible = "nvidia,tegra20-hdmi", },
-	{ },
-};
-
-struct platform_driver tegra_hdmi_driver = {
-	.driver = {
-		.name = "tegra-hdmi",
-		.owner = THIS_MODULE,
-		.of_match_table = tegra_hdmi_of_match,
-	},
-	.probe = tegra_hdmi_probe,
-	.remove = tegra_hdmi_remove,
-};
diff --git a/drivers/gpu/host1x/drm/hdmi.h b/drivers/gpu/host1x/drm/hdmi.h
deleted file mode 100644
index 52ac36e08ccb..000000000000
--- a/drivers/gpu/host1x/drm/hdmi.h
+++ /dev/null
@@ -1,386 +0,0 @@
-/*
- * Copyright (C) 2012 Avionic Design GmbH
- * Copyright (C) 2012 NVIDIA CORPORATION.  All rights reserved.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- */
-
-#ifndef TEGRA_HDMI_H
-#define TEGRA_HDMI_H 1
-
-/* register definitions */
-#define HDMI_CTXSW						0x00
-
-#define HDMI_NV_PDISP_SOR_STATE0				0x01
-#define SOR_STATE_UPDATE (1 << 0)
-
-#define HDMI_NV_PDISP_SOR_STATE1				0x02
-#define SOR_STATE_ASY_HEAD_OPMODE_AWAKE (2 << 0)
-#define SOR_STATE_ASY_ORMODE_NORMAL     (1 << 2)
-#define SOR_STATE_ATTACHED              (1 << 3)
-
-#define HDMI_NV_PDISP_SOR_STATE2				0x03
-#define SOR_STATE_ASY_OWNER_NONE         (0 <<  0)
-#define SOR_STATE_ASY_OWNER_HEAD0        (1 <<  0)
-#define SOR_STATE_ASY_SUBOWNER_NONE      (0 <<  4)
-#define SOR_STATE_ASY_SUBOWNER_SUBHEAD0  (1 <<  4)
-#define SOR_STATE_ASY_SUBOWNER_SUBHEAD1  (2 <<  4)
-#define SOR_STATE_ASY_SUBOWNER_BOTH      (3 <<  4)
-#define SOR_STATE_ASY_CRCMODE_ACTIVE     (0 <<  6)
-#define SOR_STATE_ASY_CRCMODE_COMPLETE   (1 <<  6)
-#define SOR_STATE_ASY_CRCMODE_NON_ACTIVE (2 <<  6)
-#define SOR_STATE_ASY_PROTOCOL_SINGLE_TMDS_A (1 << 8)
-#define SOR_STATE_ASY_PROTOCOL_CUSTOM        (15 << 8)
-#define SOR_STATE_ASY_HSYNCPOL_POS       (0 << 12)
-#define SOR_STATE_ASY_HSYNCPOL_NEG       (1 << 12)
-#define SOR_STATE_ASY_VSYNCPOL_POS       (0 << 13)
-#define SOR_STATE_ASY_VSYNCPOL_NEG       (1 << 13)
-#define SOR_STATE_ASY_DEPOL_POS          (0 << 14)
-#define SOR_STATE_ASY_DEPOL_NEG          (1 << 14)
-
-#define HDMI_NV_PDISP_RG_HDCP_AN_MSB				0x04
-#define HDMI_NV_PDISP_RG_HDCP_AN_LSB				0x05
-#define HDMI_NV_PDISP_RG_HDCP_CN_MSB				0x06
-#define HDMI_NV_PDISP_RG_HDCP_CN_LSB				0x07
-#define HDMI_NV_PDISP_RG_HDCP_AKSV_MSB				0x08
-#define HDMI_NV_PDISP_RG_HDCP_AKSV_LSB				0x09
-#define HDMI_NV_PDISP_RG_HDCP_BKSV_MSB				0x0a
-#define HDMI_NV_PDISP_RG_HDCP_BKSV_LSB				0x0b
-#define HDMI_NV_PDISP_RG_HDCP_CKSV_MSB				0x0c
-#define HDMI_NV_PDISP_RG_HDCP_CKSV_LSB				0x0d
-#define HDMI_NV_PDISP_RG_HDCP_DKSV_MSB				0x0e
-#define HDMI_NV_PDISP_RG_HDCP_DKSV_LSB				0x0f
-#define HDMI_NV_PDISP_RG_HDCP_CTRL				0x10
-#define HDMI_NV_PDISP_RG_HDCP_CMODE				0x11
-#define HDMI_NV_PDISP_RG_HDCP_MPRIME_MSB			0x12
-#define HDMI_NV_PDISP_RG_HDCP_MPRIME_LSB			0x13
-#define HDMI_NV_PDISP_RG_HDCP_SPRIME_MSB			0x14
-#define HDMI_NV_PDISP_RG_HDCP_SPRIME_LSB2			0x15
-#define HDMI_NV_PDISP_RG_HDCP_SPRIME_LSB1			0x16
-#define HDMI_NV_PDISP_RG_HDCP_RI				0x17
-#define HDMI_NV_PDISP_RG_HDCP_CS_MSB				0x18
-#define HDMI_NV_PDISP_RG_HDCP_CS_LSB				0x19
-#define HDMI_NV_PDISP_HDMI_AUDIO_EMU0				0x1a
-#define HDMI_NV_PDISP_HDMI_AUDIO_EMU_RDATA0			0x1b
-#define HDMI_NV_PDISP_HDMI_AUDIO_EMU1				0x1c
-#define HDMI_NV_PDISP_HDMI_AUDIO_EMU2				0x1d
-
-#define HDMI_NV_PDISP_HDMI_AUDIO_INFOFRAME_CTRL			0x1e
-#define HDMI_NV_PDISP_HDMI_AUDIO_INFOFRAME_STATUS		0x1f
-#define HDMI_NV_PDISP_HDMI_AUDIO_INFOFRAME_HEADER		0x20
-#define HDMI_NV_PDISP_HDMI_AUDIO_INFOFRAME_SUBPACK0_LOW		0x21
-#define HDMI_NV_PDISP_HDMI_AUDIO_INFOFRAME_SUBPACK0_HIGH	0x22
-#define HDMI_NV_PDISP_HDMI_AVI_INFOFRAME_CTRL			0x23
-#define HDMI_NV_PDISP_HDMI_AVI_INFOFRAME_STATUS			0x24
-#define HDMI_NV_PDISP_HDMI_AVI_INFOFRAME_HEADER			0x25
-#define HDMI_NV_PDISP_HDMI_AVI_INFOFRAME_SUBPACK0_LOW		0x26
-#define HDMI_NV_PDISP_HDMI_AVI_INFOFRAME_SUBPACK0_HIGH		0x27
-#define HDMI_NV_PDISP_HDMI_AVI_INFOFRAME_SUBPACK1_LOW		0x28
-#define HDMI_NV_PDISP_HDMI_AVI_INFOFRAME_SUBPACK1_HIGH		0x29
-
-#define INFOFRAME_CTRL_ENABLE (1 << 0)
-
-#define INFOFRAME_HEADER_TYPE(x)    (((x) & 0xff) <<  0)
-#define INFOFRAME_HEADER_VERSION(x) (((x) & 0xff) <<  8)
-#define INFOFRAME_HEADER_LEN(x)     (((x) & 0x0f) << 16)
-
-#define HDMI_NV_PDISP_HDMI_GENERIC_CTRL				0x2a
-#define GENERIC_CTRL_ENABLE (1 <<  0)
-#define GENERIC_CTRL_OTHER  (1 <<  4)
-#define GENERIC_CTRL_SINGLE (1 <<  8)
-#define GENERIC_CTRL_HBLANK (1 << 12)
-#define GENERIC_CTRL_AUDIO  (1 << 16)
-
-#define HDMI_NV_PDISP_HDMI_GENERIC_STATUS			0x2b
-#define HDMI_NV_PDISP_HDMI_GENERIC_HEADER			0x2c
-#define HDMI_NV_PDISP_HDMI_GENERIC_SUBPACK0_LOW			0x2d
-#define HDMI_NV_PDISP_HDMI_GENERIC_SUBPACK0_HIGH		0x2e
-#define HDMI_NV_PDISP_HDMI_GENERIC_SUBPACK1_LOW			0x2f
-#define HDMI_NV_PDISP_HDMI_GENERIC_SUBPACK1_HIGH		0x30
-#define HDMI_NV_PDISP_HDMI_GENERIC_SUBPACK2_LOW			0x31
-#define HDMI_NV_PDISP_HDMI_GENERIC_SUBPACK2_HIGH		0x32
-#define HDMI_NV_PDISP_HDMI_GENERIC_SUBPACK3_LOW			0x33
-#define HDMI_NV_PDISP_HDMI_GENERIC_SUBPACK3_HIGH		0x34
-
-#define HDMI_NV_PDISP_HDMI_ACR_CTRL				0x35
-#define HDMI_NV_PDISP_HDMI_ACR_0320_SUBPACK_LOW			0x36
-#define HDMI_NV_PDISP_HDMI_ACR_0320_SUBPACK_HIGH		0x37
-#define HDMI_NV_PDISP_HDMI_ACR_0441_SUBPACK_LOW			0x38
-#define HDMI_NV_PDISP_HDMI_ACR_0441_SUBPACK_HIGH		0x39
-#define HDMI_NV_PDISP_HDMI_ACR_0882_SUBPACK_LOW			0x3a
-#define HDMI_NV_PDISP_HDMI_ACR_0882_SUBPACK_HIGH		0x3b
-#define HDMI_NV_PDISP_HDMI_ACR_1764_SUBPACK_LOW			0x3c
-#define HDMI_NV_PDISP_HDMI_ACR_1764_SUBPACK_HIGH		0x3d
-#define HDMI_NV_PDISP_HDMI_ACR_0480_SUBPACK_LOW			0x3e
-#define HDMI_NV_PDISP_HDMI_ACR_0480_SUBPACK_HIGH		0x3f
-#define HDMI_NV_PDISP_HDMI_ACR_0960_SUBPACK_LOW			0x40
-#define HDMI_NV_PDISP_HDMI_ACR_0960_SUBPACK_HIGH		0x41
-#define HDMI_NV_PDISP_HDMI_ACR_1920_SUBPACK_LOW			0x42
-#define HDMI_NV_PDISP_HDMI_ACR_1920_SUBPACK_HIGH		0x43
-
-#define ACR_SUBPACK_CTS(x) (((x) & 0xffffff) << 8)
-#define ACR_SUBPACK_N(x)   (((x) & 0xffffff) << 0)
-#define ACR_ENABLE         (1 << 31)
-
-#define HDMI_NV_PDISP_HDMI_CTRL					0x44
-#define HDMI_CTRL_REKEY(x)         (((x) & 0x7f) <<  0)
-#define HDMI_CTRL_MAX_AC_PACKET(x) (((x) & 0x1f) << 16)
-#define HDMI_CTRL_ENABLE           (1 << 30)
-
-#define HDMI_NV_PDISP_HDMI_VSYNC_KEEPOUT			0x45
-#define HDMI_NV_PDISP_HDMI_VSYNC_WINDOW				0x46
-#define VSYNC_WINDOW_END(x)   (((x) & 0x3ff) <<  0)
-#define VSYNC_WINDOW_START(x) (((x) & 0x3ff) << 16)
-#define VSYNC_WINDOW_ENABLE   (1 << 31)
-
-#define HDMI_NV_PDISP_HDMI_GCP_CTRL				0x47
-#define HDMI_NV_PDISP_HDMI_GCP_STATUS				0x48
-#define HDMI_NV_PDISP_HDMI_GCP_SUBPACK				0x49
-#define HDMI_NV_PDISP_HDMI_CHANNEL_STATUS1			0x4a
-#define HDMI_NV_PDISP_HDMI_CHANNEL_STATUS2			0x4b
-#define HDMI_NV_PDISP_HDMI_EMU0					0x4c
-#define HDMI_NV_PDISP_HDMI_EMU1					0x4d
-#define HDMI_NV_PDISP_HDMI_EMU1_RDATA				0x4e
-
-#define HDMI_NV_PDISP_HDMI_SPARE				0x4f
-#define SPARE_HW_CTS           (1 << 0)
-#define SPARE_FORCE_SW_CTS     (1 << 1)
-#define SPARE_CTS_RESET_VAL(x) (((x) & 0x7) << 16)
-
-#define HDMI_NV_PDISP_HDMI_SPDIF_CHN_STATUS1			0x50
-#define HDMI_NV_PDISP_HDMI_SPDIF_CHN_STATUS2			0x51
-#define HDMI_NV_PDISP_HDMI_HDCPRIF_ROM_CTRL			0x53
-#define HDMI_NV_PDISP_SOR_CAP					0x54
-#define HDMI_NV_PDISP_SOR_PWR					0x55
-#define SOR_PWR_NORMAL_STATE_PD     (0 <<  0)
-#define SOR_PWR_NORMAL_STATE_PU     (1 <<  0)
-#define SOR_PWR_NORMAL_START_NORMAL (0 <<  1)
-#define SOR_PWR_NORMAL_START_ALT    (1 <<  1)
-#define SOR_PWR_SAFE_STATE_PD       (0 << 16)
-#define SOR_PWR_SAFE_STATE_PU       (1 << 16)
-#define SOR_PWR_SETTING_NEW_DONE    (0 << 31)
-#define SOR_PWR_SETTING_NEW_PENDING (1 << 31)
-#define SOR_PWR_SETTING_NEW_TRIGGER (1 << 31)
-
-#define HDMI_NV_PDISP_SOR_TEST					0x56
-#define HDMI_NV_PDISP_SOR_PLL0					0x57
-#define SOR_PLL_PWR            (1 << 0)
-#define SOR_PLL_PDBG           (1 << 1)
-#define SOR_PLL_VCAPD          (1 << 2)
-#define SOR_PLL_PDPORT         (1 << 3)
-#define SOR_PLL_RESISTORSEL    (1 << 4)
-#define SOR_PLL_PULLDOWN       (1 << 5)
-#define SOR_PLL_VCOCAP(x)      (((x) & 0xf) <<  8)
-#define SOR_PLL_BG_V17_S(x)    (((x) & 0xf) << 12)
-#define SOR_PLL_FILTER(x)      (((x) & 0xf) << 16)
-#define SOR_PLL_ICHPMP(x)      (((x) & 0xf) << 24)
-#define SOR_PLL_TX_REG_LOAD(x) (((x) & 0xf) << 28)
-
-#define HDMI_NV_PDISP_SOR_PLL1					0x58
-#define SOR_PLL_TMDS_TERM_ENABLE (1 << 8)
-#define SOR_PLL_TMDS_TERMADJ(x)  (((x) & 0xf) <<  9)
-#define SOR_PLL_LOADADJ(x)       (((x) & 0xf) << 20)
-#define SOR_PLL_PE_EN            (1 << 28)
-#define SOR_PLL_HALF_FULL_PE     (1 << 29)
-#define SOR_PLL_S_D_PIN_PE       (1 << 30)
-
-#define HDMI_NV_PDISP_SOR_PLL2					0x59
-
-#define HDMI_NV_PDISP_SOR_CSTM					0x5a
-#define SOR_CSTM_ROTCLK(x) (((x) & 0xf) << 24)
-
-#define HDMI_NV_PDISP_SOR_LVDS					0x5b
-#define HDMI_NV_PDISP_SOR_CRCA					0x5c
-#define HDMI_NV_PDISP_SOR_CRCB					0x5d
-#define HDMI_NV_PDISP_SOR_BLANK					0x5e
-#define HDMI_NV_PDISP_SOR_SEQ_CTL				0x5f
-#define SOR_SEQ_CTL_PU_PC(x) (((x) & 0xf) <<  0)
-#define SOR_SEQ_PU_PC_ALT(x) (((x) & 0xf) <<  4)
-#define SOR_SEQ_PD_PC(x)     (((x) & 0xf) <<  8)
-#define SOR_SEQ_PD_PC_ALT(x) (((x) & 0xf) << 12)
-#define SOR_SEQ_PC(x)        (((x) & 0xf) << 16)
-#define SOR_SEQ_STATUS       (1 << 28)
-#define SOR_SEQ_SWITCH       (1 << 30)
-
-#define HDMI_NV_PDISP_SOR_SEQ_INST(x)				(0x60 + (x))
-
-#define SOR_SEQ_INST_WAIT_TIME(x)     (((x) & 0x3ff) << 0)
-#define SOR_SEQ_INST_WAIT_UNITS_VSYNC (2 << 12)
-#define SOR_SEQ_INST_HALT             (1 << 15)
-#define SOR_SEQ_INST_PIN_A_LOW        (0 << 21)
-#define SOR_SEQ_INST_PIN_A_HIGH       (1 << 21)
-#define SOR_SEQ_INST_PIN_B_LOW        (0 << 22)
-#define SOR_SEQ_INST_PIN_B_HIGH       (1 << 22)
-#define SOR_SEQ_INST_DRIVE_PWM_OUT_LO (1 << 23)
-
-#define HDMI_NV_PDISP_SOR_VCRCA0				0x72
-#define HDMI_NV_PDISP_SOR_VCRCA1				0x73
-#define HDMI_NV_PDISP_SOR_CCRCA0				0x74
-#define HDMI_NV_PDISP_SOR_CCRCA1				0x75
-#define HDMI_NV_PDISP_SOR_EDATAA0				0x76
-#define HDMI_NV_PDISP_SOR_EDATAA1				0x77
-#define HDMI_NV_PDISP_SOR_COUNTA0				0x78
-#define HDMI_NV_PDISP_SOR_COUNTA1				0x79
-#define HDMI_NV_PDISP_SOR_DEBUGA0				0x7a
-#define HDMI_NV_PDISP_SOR_DEBUGA1				0x7b
-#define HDMI_NV_PDISP_SOR_TRIG					0x7c
-#define HDMI_NV_PDISP_SOR_MSCHECK				0x7d
-
-#define HDMI_NV_PDISP_SOR_LANE_DRIVE_CURRENT			0x7e
-#define DRIVE_CURRENT_LANE0(x)      (((x) & 0x3f) <<  0)
-#define DRIVE_CURRENT_LANE1(x)      (((x) & 0x3f) <<  8)
-#define DRIVE_CURRENT_LANE2(x)      (((x) & 0x3f) << 16)
-#define DRIVE_CURRENT_LANE3(x)      (((x) & 0x3f) << 24)
-#define DRIVE_CURRENT_FUSE_OVERRIDE (1 << 31)
-
-#define DRIVE_CURRENT_1_500_mA  0x00
-#define DRIVE_CURRENT_1_875_mA  0x01
-#define DRIVE_CURRENT_2_250_mA  0x02
-#define DRIVE_CURRENT_2_625_mA  0x03
-#define DRIVE_CURRENT_3_000_mA  0x04
-#define DRIVE_CURRENT_3_375_mA  0x05
-#define DRIVE_CURRENT_3_750_mA  0x06
-#define DRIVE_CURRENT_4_125_mA  0x07
-#define DRIVE_CURRENT_4_500_mA  0x08
-#define DRIVE_CURRENT_4_875_mA  0x09
-#define DRIVE_CURRENT_5_250_mA  0x0a
-#define DRIVE_CURRENT_5_625_mA  0x0b
-#define DRIVE_CURRENT_6_000_mA  0x0c
-#define DRIVE_CURRENT_6_375_mA  0x0d
-#define DRIVE_CURRENT_6_750_mA  0x0e
-#define DRIVE_CURRENT_7_125_mA  0x0f
-#define DRIVE_CURRENT_7_500_mA  0x10
-#define DRIVE_CURRENT_7_875_mA  0x11
-#define DRIVE_CURRENT_8_250_mA  0x12
-#define DRIVE_CURRENT_8_625_mA  0x13
-#define DRIVE_CURRENT_9_000_mA  0x14
-#define DRIVE_CURRENT_9_375_mA  0x15
-#define DRIVE_CURRENT_9_750_mA  0x16
-#define DRIVE_CURRENT_10_125_mA 0x17
-#define DRIVE_CURRENT_10_500_mA 0x18
-#define DRIVE_CURRENT_10_875_mA 0x19
-#define DRIVE_CURRENT_11_250_mA 0x1a
-#define DRIVE_CURRENT_11_625_mA 0x1b
-#define DRIVE_CURRENT_12_000_mA 0x1c
-#define DRIVE_CURRENT_12_375_mA 0x1d
-#define DRIVE_CURRENT_12_750_mA 0x1e
-#define DRIVE_CURRENT_13_125_mA 0x1f
-#define DRIVE_CURRENT_13_500_mA 0x20
-#define DRIVE_CURRENT_13_875_mA 0x21
-#define DRIVE_CURRENT_14_250_mA 0x22
-#define DRIVE_CURRENT_14_625_mA 0x23
-#define DRIVE_CURRENT_15_000_mA 0x24
-#define DRIVE_CURRENT_15_375_mA 0x25
-#define DRIVE_CURRENT_15_750_mA 0x26
-#define DRIVE_CURRENT_16_125_mA 0x27
-#define DRIVE_CURRENT_16_500_mA 0x28
-#define DRIVE_CURRENT_16_875_mA 0x29
-#define DRIVE_CURRENT_17_250_mA 0x2a
-#define DRIVE_CURRENT_17_625_mA 0x2b
-#define DRIVE_CURRENT_18_000_mA 0x2c
-#define DRIVE_CURRENT_18_375_mA 0x2d
-#define DRIVE_CURRENT_18_750_mA 0x2e
-#define DRIVE_CURRENT_19_125_mA 0x2f
-#define DRIVE_CURRENT_19_500_mA 0x30
-#define DRIVE_CURRENT_19_875_mA 0x31
-#define DRIVE_CURRENT_20_250_mA 0x32
-#define DRIVE_CURRENT_20_625_mA 0x33
-#define DRIVE_CURRENT_21_000_mA 0x34
-#define DRIVE_CURRENT_21_375_mA 0x35
-#define DRIVE_CURRENT_21_750_mA 0x36
-#define DRIVE_CURRENT_22_125_mA 0x37
-#define DRIVE_CURRENT_22_500_mA 0x38
-#define DRIVE_CURRENT_22_875_mA 0x39
-#define DRIVE_CURRENT_23_250_mA 0x3a
-#define DRIVE_CURRENT_23_625_mA 0x3b
-#define DRIVE_CURRENT_24_000_mA 0x3c
-#define DRIVE_CURRENT_24_375_mA 0x3d
-#define DRIVE_CURRENT_24_750_mA 0x3e
-
-#define HDMI_NV_PDISP_AUDIO_DEBUG0				0x7f
-#define HDMI_NV_PDISP_AUDIO_DEBUG1				0x80
-#define HDMI_NV_PDISP_AUDIO_DEBUG2				0x81
-
-#define HDMI_NV_PDISP_AUDIO_FS(x)				(0x82 + (x))
-#define AUDIO_FS_LOW(x)  (((x) & 0xfff) <<  0)
-#define AUDIO_FS_HIGH(x) (((x) & 0xfff) << 16)
-
-#define HDMI_NV_PDISP_AUDIO_PULSE_WIDTH				0x89
-#define HDMI_NV_PDISP_AUDIO_THRESHOLD				0x8a
-#define HDMI_NV_PDISP_AUDIO_CNTRL0				0x8b
-#define AUDIO_CNTRL0_ERROR_TOLERANCE(x)  (((x) & 0xff) << 0)
-#define AUDIO_CNTRL0_SOURCE_SELECT_AUTO  (0 << 20)
-#define AUDIO_CNTRL0_SOURCE_SELECT_SPDIF (1 << 20)
-#define AUDIO_CNTRL0_SOURCE_SELECT_HDAL  (2 << 20)
-#define AUDIO_CNTRL0_FRAMES_PER_BLOCK(x) (((x) & 0xff) << 24)
-
-#define HDMI_NV_PDISP_AUDIO_N					0x8c
-#define AUDIO_N_VALUE(x)           (((x) & 0xfffff) << 0)
-#define AUDIO_N_RESETF             (1 << 20)
-#define AUDIO_N_GENERATE_NORMAL    (0 << 24)
-#define AUDIO_N_GENERATE_ALTERNATE (1 << 24)
-
-#define HDMI_NV_PDISP_HDCPRIF_ROM_TIMING			0x94
-#define HDMI_NV_PDISP_SOR_REFCLK				0x95
-#define SOR_REFCLK_DIV_INT(x)  (((x) & 0xff) << 8)
-#define SOR_REFCLK_DIV_FRAC(x) (((x) & 0x03) << 6)
-
-#define HDMI_NV_PDISP_CRC_CONTROL				0x96
-#define HDMI_NV_PDISP_INPUT_CONTROL				0x97
-#define HDMI_SRC_DISPLAYA       (0 << 0)
-#define HDMI_SRC_DISPLAYB       (1 << 0)
-#define ARM_VIDEO_RANGE_FULL    (0 << 1)
-#define ARM_VIDEO_RANGE_LIMITED (1 << 1)
-
-#define HDMI_NV_PDISP_SCRATCH					0x98
-#define HDMI_NV_PDISP_PE_CURRENT				0x99
-#define PE_CURRENT0(x) (((x) & 0xf) << 0)
-#define PE_CURRENT1(x) (((x) & 0xf) << 8)
-#define PE_CURRENT2(x) (((x) & 0xf) << 16)
-#define PE_CURRENT3(x) (((x) & 0xf) << 24)
-
-#define PE_CURRENT_0_0_mA 0x0
-#define PE_CURRENT_0_5_mA 0x1
-#define PE_CURRENT_1_0_mA 0x2
-#define PE_CURRENT_1_5_mA 0x3
-#define PE_CURRENT_2_0_mA 0x4
-#define PE_CURRENT_2_5_mA 0x5
-#define PE_CURRENT_3_0_mA 0x6
-#define PE_CURRENT_3_5_mA 0x7
-#define PE_CURRENT_4_0_mA 0x8
-#define PE_CURRENT_4_5_mA 0x9
-#define PE_CURRENT_5_0_mA 0xa
-#define PE_CURRENT_5_5_mA 0xb
-#define PE_CURRENT_6_0_mA 0xc
-#define PE_CURRENT_6_5_mA 0xd
-#define PE_CURRENT_7_0_mA 0xe
-#define PE_CURRENT_7_5_mA 0xf
-
-#define HDMI_NV_PDISP_KEY_CTRL					0x9a
-#define HDMI_NV_PDISP_KEY_DEBUG0				0x9b
-#define HDMI_NV_PDISP_KEY_DEBUG1				0x9c
-#define HDMI_NV_PDISP_KEY_DEBUG2				0x9d
-#define HDMI_NV_PDISP_KEY_HDCP_KEY_0				0x9e
-#define HDMI_NV_PDISP_KEY_HDCP_KEY_1				0x9f
-#define HDMI_NV_PDISP_KEY_HDCP_KEY_2				0xa0
-#define HDMI_NV_PDISP_KEY_HDCP_KEY_3				0xa1
-#define HDMI_NV_PDISP_KEY_HDCP_KEY_TRIG				0xa2
-#define HDMI_NV_PDISP_KEY_SKEY_INDEX				0xa3
-
-#define HDMI_NV_PDISP_SOR_AUDIO_CNTRL0				0xac
-#define AUDIO_CNTRL0_INJECT_NULLSMPL (1 << 29)
-#define HDMI_NV_PDISP_SOR_AUDIO_HDA_ELD_BUFWR			0xbc
-#define HDMI_NV_PDISP_SOR_AUDIO_HDA_PRESENSE			0xbd
-
-#define HDMI_NV_PDISP_SOR_AUDIO_AVAL_0320    0xbf
-#define HDMI_NV_PDISP_SOR_AUDIO_AVAL_0441    0xc0
-#define HDMI_NV_PDISP_SOR_AUDIO_AVAL_0882    0xc1
-#define HDMI_NV_PDISP_SOR_AUDIO_AVAL_1764    0xc2
-#define HDMI_NV_PDISP_SOR_AUDIO_AVAL_0480    0xc3
-#define HDMI_NV_PDISP_SOR_AUDIO_AVAL_0960    0xc4
-#define HDMI_NV_PDISP_SOR_AUDIO_AVAL_1920    0xc5
-#define HDMI_NV_PDISP_SOR_AUDIO_AVAL_DEFAULT 0xc5
-
-#endif /* TEGRA_HDMI_H */
diff --git a/drivers/gpu/host1x/drm/output.c b/drivers/gpu/host1x/drm/output.c
deleted file mode 100644
index 8140fc6c34d8..000000000000
--- a/drivers/gpu/host1x/drm/output.c
+++ /dev/null
@@ -1,272 +0,0 @@
-/*
- * Copyright (C) 2012 Avionic Design GmbH
- * Copyright (C) 2012 NVIDIA CORPORATION.  All rights reserved.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- */
-
-#include <linux/module.h>
-#include <linux/of_gpio.h>
-#include <linux/of_i2c.h>
-
-#include "drm.h"
-
-static int tegra_connector_get_modes(struct drm_connector *connector)
-{
-	struct tegra_output *output = connector_to_output(connector);
-	struct edid *edid = NULL;
-	int err = 0;
-
-	if (output->edid)
-		edid = kmemdup(output->edid, sizeof(*edid), GFP_KERNEL);
-	else if (output->ddc)
-		edid = drm_get_edid(connector, output->ddc);
-
-	drm_mode_connector_update_edid_property(connector, edid);
-
-	if (edid) {
-		err = drm_add_edid_modes(connector, edid);
-		kfree(edid);
-	}
-
-	return err;
-}
-
-static int tegra_connector_mode_valid(struct drm_connector *connector,
-				      struct drm_display_mode *mode)
-{
-	struct tegra_output *output = connector_to_output(connector);
-	enum drm_mode_status status = MODE_OK;
-	int err;
-
-	err = tegra_output_check_mode(output, mode, &status);
-	if (err < 0)
-		return MODE_ERROR;
-
-	return status;
-}
-
-static struct drm_encoder *
-tegra_connector_best_encoder(struct drm_connector *connector)
-{
-	struct tegra_output *output = connector_to_output(connector);
-
-	return &output->encoder;
-}
-
-static const struct drm_connector_helper_funcs connector_helper_funcs = {
-	.get_modes = tegra_connector_get_modes,
-	.mode_valid = tegra_connector_mode_valid,
-	.best_encoder = tegra_connector_best_encoder,
-};
-
-static enum drm_connector_status
-tegra_connector_detect(struct drm_connector *connector, bool force)
-{
-	struct tegra_output *output = connector_to_output(connector);
-	enum drm_connector_status status = connector_status_unknown;
-
-	if (gpio_is_valid(output->hpd_gpio)) {
-		if (gpio_get_value(output->hpd_gpio) == 0)
-			status = connector_status_disconnected;
-		else
-			status = connector_status_connected;
-	} else {
-		if (connector->connector_type == DRM_MODE_CONNECTOR_LVDS)
-			status = connector_status_connected;
-	}
-
-	return status;
-}
-
-static void tegra_connector_destroy(struct drm_connector *connector)
-{
-	drm_sysfs_connector_remove(connector);
-	drm_connector_cleanup(connector);
-}
-
-static const struct drm_connector_funcs connector_funcs = {
-	.dpms = drm_helper_connector_dpms,
-	.detect = tegra_connector_detect,
-	.fill_modes = drm_helper_probe_single_connector_modes,
-	.destroy = tegra_connector_destroy,
-};
-
-static void tegra_encoder_destroy(struct drm_encoder *encoder)
-{
-	drm_encoder_cleanup(encoder);
-}
-
-static const struct drm_encoder_funcs encoder_funcs = {
-	.destroy = tegra_encoder_destroy,
-};
-
-static void tegra_encoder_dpms(struct drm_encoder *encoder, int mode)
-{
-}
-
-static bool tegra_encoder_mode_fixup(struct drm_encoder *encoder,
-				     const struct drm_display_mode *mode,
-				     struct drm_display_mode *adjusted)
-{
-	return true;
-}
-
-static void tegra_encoder_prepare(struct drm_encoder *encoder)
-{
-}
-
-static void tegra_encoder_commit(struct drm_encoder *encoder)
-{
-}
-
-static void tegra_encoder_mode_set(struct drm_encoder *encoder,
-				   struct drm_display_mode *mode,
-				   struct drm_display_mode *adjusted)
-{
-	struct tegra_output *output = encoder_to_output(encoder);
-	int err;
-
-	err = tegra_output_enable(output);
-	if (err < 0)
-		dev_err(encoder->dev->dev, "tegra_output_enable(): %d\n", err);
-}
-
-static const struct drm_encoder_helper_funcs encoder_helper_funcs = {
-	.dpms = tegra_encoder_dpms,
-	.mode_fixup = tegra_encoder_mode_fixup,
-	.prepare = tegra_encoder_prepare,
-	.commit = tegra_encoder_commit,
-	.mode_set = tegra_encoder_mode_set,
-};
-
-static irqreturn_t hpd_irq(int irq, void *data)
-{
-	struct tegra_output *output = data;
-
-	drm_helper_hpd_irq_event(output->connector.dev);
-
-	return IRQ_HANDLED;
-}
-
-int tegra_output_parse_dt(struct tegra_output *output)
-{
-	enum of_gpio_flags flags;
-	struct device_node *ddc;
-	size_t size;
-	int err;
-
-	if (!output->of_node)
-		output->of_node = output->dev->of_node;
-
-	output->edid = of_get_property(output->of_node, "nvidia,edid", &size);
-
-	ddc = of_parse_phandle(output->of_node, "nvidia,ddc-i2c-bus", 0);
-	if (ddc) {
-		output->ddc = of_find_i2c_adapter_by_node(ddc);
-		if (!output->ddc) {
-			err = -EPROBE_DEFER;
-			of_node_put(ddc);
-			return err;
-		}
-
-		of_node_put(ddc);
-	}
-
-	if (!output->edid && !output->ddc)
-		return -ENODEV;
-
-	output->hpd_gpio = of_get_named_gpio_flags(output->of_node,
-						   "nvidia,hpd-gpio", 0,
-						   &flags);
-
-	return 0;
-}
-
-int tegra_output_init(struct drm_device *drm, struct tegra_output *output)
-{
-	int connector, encoder, err;
-
-	if (gpio_is_valid(output->hpd_gpio)) {
-		unsigned long flags;
-
-		err = gpio_request_one(output->hpd_gpio, GPIOF_DIR_IN,
-				       "HDMI hotplug detect");
-		if (err < 0) {
-			dev_err(output->dev, "gpio_request_one(): %d\n", err);
-			return err;
-		}
-
-		err = gpio_to_irq(output->hpd_gpio);
-		if (err < 0) {
-			dev_err(output->dev, "gpio_to_irq(): %d\n", err);
-			goto free_hpd;
-		}
-
-		output->hpd_irq = err;
-
-		flags = IRQF_TRIGGER_RISING | IRQF_TRIGGER_FALLING |
-			IRQF_ONESHOT;
-
-		err = request_threaded_irq(output->hpd_irq, NULL, hpd_irq,
-					   flags, "hpd", output);
-		if (err < 0) {
-			dev_err(output->dev, "failed to request IRQ#%u: %d\n",
-				output->hpd_irq, err);
-			goto free_hpd;
-		}
-
-		output->connector.polled = DRM_CONNECTOR_POLL_HPD;
-	}
-
-	switch (output->type) {
-	case TEGRA_OUTPUT_RGB:
-		connector = DRM_MODE_CONNECTOR_LVDS;
-		encoder = DRM_MODE_ENCODER_LVDS;
-		break;
-
-	case TEGRA_OUTPUT_HDMI:
-		connector = DRM_MODE_CONNECTOR_HDMIA;
-		encoder = DRM_MODE_ENCODER_TMDS;
-		break;
-
-	default:
-		connector = DRM_MODE_CONNECTOR_Unknown;
-		encoder = DRM_MODE_ENCODER_NONE;
-		break;
-	}
-
-	drm_connector_init(drm, &output->connector, &connector_funcs,
-			   connector);
-	drm_connector_helper_add(&output->connector, &connector_helper_funcs);
-
-	drm_encoder_init(drm, &output->encoder, &encoder_funcs, encoder);
-	drm_encoder_helper_add(&output->encoder, &encoder_helper_funcs);
-
-	drm_mode_connector_attach_encoder(&output->connector, &output->encoder);
-	drm_sysfs_connector_add(&output->connector);
-
-	output->encoder.possible_crtcs = 0x3;
-
-	return 0;
-
-free_hpd:
-	gpio_free(output->hpd_gpio);
-
-	return err;
-}
-
-int tegra_output_exit(struct tegra_output *output)
-{
-	if (gpio_is_valid(output->hpd_gpio)) {
-		free_irq(output->hpd_irq, output);
-		gpio_free(output->hpd_gpio);
-	}
-
-	if (output->ddc)
-		put_device(&output->ddc->dev);
-
-	return 0;
-}
diff --git a/drivers/gpu/host1x/drm/rgb.c b/drivers/gpu/host1x/drm/rgb.c
deleted file mode 100644
index ed4416f20260..000000000000
--- a/drivers/gpu/host1x/drm/rgb.c
+++ /dev/null
@@ -1,228 +0,0 @@
-/*
- * Copyright (C) 2012 Avionic Design GmbH
- * Copyright (C) 2012 NVIDIA CORPORATION.  All rights reserved.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- */
-
-#include <linux/clk.h>
-#include <linux/module.h>
-#include <linux/of.h>
-#include <linux/platform_device.h>
-
-#include "drm.h"
-#include "dc.h"
-
-struct tegra_rgb {
-	struct tegra_output output;
-	struct clk *clk_parent;
-	struct clk *clk;
-};
-
-static inline struct tegra_rgb *to_rgb(struct tegra_output *output)
-{
-	return container_of(output, struct tegra_rgb, output);
-}
-
-struct reg_entry {
-	unsigned long offset;
-	unsigned long value;
-};
-
-static const struct reg_entry rgb_enable[] = {
-	{ DC_COM_PIN_OUTPUT_ENABLE(0),   0x00000000 },
-	{ DC_COM_PIN_OUTPUT_ENABLE(1),   0x00000000 },
-	{ DC_COM_PIN_OUTPUT_ENABLE(2),   0x00000000 },
-	{ DC_COM_PIN_OUTPUT_ENABLE(3),   0x00000000 },
-	{ DC_COM_PIN_OUTPUT_POLARITY(0), 0x00000000 },
-	{ DC_COM_PIN_OUTPUT_POLARITY(1), 0x01000000 },
-	{ DC_COM_PIN_OUTPUT_POLARITY(2), 0x00000000 },
-	{ DC_COM_PIN_OUTPUT_POLARITY(3), 0x00000000 },
-	{ DC_COM_PIN_OUTPUT_DATA(0),     0x00000000 },
-	{ DC_COM_PIN_OUTPUT_DATA(1),     0x00000000 },
-	{ DC_COM_PIN_OUTPUT_DATA(2),     0x00000000 },
-	{ DC_COM_PIN_OUTPUT_DATA(3),     0x00000000 },
-	{ DC_COM_PIN_OUTPUT_SELECT(0),   0x00000000 },
-	{ DC_COM_PIN_OUTPUT_SELECT(1),   0x00000000 },
-	{ DC_COM_PIN_OUTPUT_SELECT(2),   0x00000000 },
-	{ DC_COM_PIN_OUTPUT_SELECT(3),   0x00000000 },
-	{ DC_COM_PIN_OUTPUT_SELECT(4),   0x00210222 },
-	{ DC_COM_PIN_OUTPUT_SELECT(5),   0x00002200 },
-	{ DC_COM_PIN_OUTPUT_SELECT(6),   0x00020000 },
-};
-
-static const struct reg_entry rgb_disable[] = {
-	{ DC_COM_PIN_OUTPUT_SELECT(6),   0x00000000 },
-	{ DC_COM_PIN_OUTPUT_SELECT(5),   0x00000000 },
-	{ DC_COM_PIN_OUTPUT_SELECT(4),   0x00000000 },
-	{ DC_COM_PIN_OUTPUT_SELECT(3),   0x00000000 },
-	{ DC_COM_PIN_OUTPUT_SELECT(2),   0x00000000 },
-	{ DC_COM_PIN_OUTPUT_SELECT(1),   0x00000000 },
-	{ DC_COM_PIN_OUTPUT_SELECT(0),   0x00000000 },
-	{ DC_COM_PIN_OUTPUT_DATA(3),     0xaaaaaaaa },
-	{ DC_COM_PIN_OUTPUT_DATA(2),     0xaaaaaaaa },
-	{ DC_COM_PIN_OUTPUT_DATA(1),     0xaaaaaaaa },
-	{ DC_COM_PIN_OUTPUT_DATA(0),     0xaaaaaaaa },
-	{ DC_COM_PIN_OUTPUT_POLARITY(3), 0x00000000 },
-	{ DC_COM_PIN_OUTPUT_POLARITY(2), 0x00000000 },
-	{ DC_COM_PIN_OUTPUT_POLARITY(1), 0x00000000 },
-	{ DC_COM_PIN_OUTPUT_POLARITY(0), 0x00000000 },
-	{ DC_COM_PIN_OUTPUT_ENABLE(3),   0x55555555 },
-	{ DC_COM_PIN_OUTPUT_ENABLE(2),   0x55555555 },
-	{ DC_COM_PIN_OUTPUT_ENABLE(1),   0x55150005 },
-	{ DC_COM_PIN_OUTPUT_ENABLE(0),   0x55555555 },
-};
-
-static void tegra_dc_write_regs(struct tegra_dc *dc,
-				const struct reg_entry *table,
-				unsigned int num)
-{
-	unsigned int i;
-
-	for (i = 0; i < num; i++)
-		tegra_dc_writel(dc, table[i].value, table[i].offset);
-}
-
-static int tegra_output_rgb_enable(struct tegra_output *output)
-{
-	struct tegra_dc *dc = to_tegra_dc(output->encoder.crtc);
-
-	tegra_dc_write_regs(dc, rgb_enable, ARRAY_SIZE(rgb_enable));
-
-	return 0;
-}
-
-static int tegra_output_rgb_disable(struct tegra_output *output)
-{
-	struct tegra_dc *dc = to_tegra_dc(output->encoder.crtc);
-
-	tegra_dc_write_regs(dc, rgb_disable, ARRAY_SIZE(rgb_disable));
-
-	return 0;
-}
-
-static int tegra_output_rgb_setup_clock(struct tegra_output *output,
-					struct clk *clk, unsigned long pclk)
-{
-	struct tegra_rgb *rgb = to_rgb(output);
-
-	return clk_set_parent(clk, rgb->clk_parent);
-}
-
-static int tegra_output_rgb_check_mode(struct tegra_output *output,
-				       struct drm_display_mode *mode,
-				       enum drm_mode_status *status)
-{
-	/*
-	 * FIXME: For now, always assume that the mode is okay. There are
-	 * unresolved issues with clk_round_rate(), which doesn't always
-	 * reliably report whether a frequency can be set or not.
-	 */
-
-	*status = MODE_OK;
-
-	return 0;
-}
-
-static const struct tegra_output_ops rgb_ops = {
-	.enable = tegra_output_rgb_enable,
-	.disable = tegra_output_rgb_disable,
-	.setup_clock = tegra_output_rgb_setup_clock,
-	.check_mode = tegra_output_rgb_check_mode,
-};
-
-int tegra_dc_rgb_probe(struct tegra_dc *dc)
-{
-	struct device_node *np;
-	struct tegra_rgb *rgb;
-	int err;
-
-	np = of_get_child_by_name(dc->dev->of_node, "rgb");
-	if (!np || !of_device_is_available(np))
-		return -ENODEV;
-
-	rgb = devm_kzalloc(dc->dev, sizeof(*rgb), GFP_KERNEL);
-	if (!rgb)
-		return -ENOMEM;
-
-	rgb->clk = devm_clk_get(dc->dev, NULL);
-	if (IS_ERR(rgb->clk)) {
-		dev_err(dc->dev, "failed to get clock\n");
-		return PTR_ERR(rgb->clk);
-	}
-
-	rgb->clk_parent = devm_clk_get(dc->dev, "parent");
-	if (IS_ERR(rgb->clk_parent)) {
-		dev_err(dc->dev, "failed to get parent clock\n");
-		return PTR_ERR(rgb->clk_parent);
-	}
-
-	err = clk_set_parent(rgb->clk, rgb->clk_parent);
-	if (err < 0) {
-		dev_err(dc->dev, "failed to set parent clock: %d\n", err);
-		return err;
-	}
-
-	rgb->output.dev = dc->dev;
-	rgb->output.of_node = np;
-
-	err = tegra_output_parse_dt(&rgb->output);
-	if (err < 0)
-		return err;
-
-	dc->rgb = &rgb->output;
-
-	return 0;
-}
-
-int tegra_dc_rgb_init(struct drm_device *drm, struct tegra_dc *dc)
-{
-	struct tegra_rgb *rgb = to_rgb(dc->rgb);
-	int err;
-
-	if (!dc->rgb)
-		return -ENODEV;
-
-	rgb->output.type = TEGRA_OUTPUT_RGB;
-	rgb->output.ops = &rgb_ops;
-
-	err = tegra_output_init(dc->base.dev, &rgb->output);
-	if (err < 0) {
-		dev_err(dc->dev, "output setup failed: %d\n", err);
-		return err;
-	}
-
-	/*
-	 * By default, outputs can be associated with each display controller.
-	 * RGB outputs are an exception, so we make sure they can be attached
-	 * to only their parent display controller.
-	 */
-	rgb->output.encoder.possible_crtcs = 1 << dc->pipe;
-
-	return 0;
-}
-
-int tegra_dc_rgb_exit(struct tegra_dc *dc)
-{
-	if (dc->rgb) {
-		int err;
-
-		err = tegra_output_disable(dc->rgb);
-		if (err < 0) {
-			dev_err(dc->dev, "output failed to disable: %d\n", err);
-			return err;
-		}
-
-		err = tegra_output_exit(dc->rgb);
-		if (err < 0) {
-			dev_err(dc->dev, "output cleanup failed: %d\n", err);
-			return err;
-		}
-
-		dc->rgb = NULL;
-	}
-
-	return 0;
-}
diff --git a/drivers/gpu/host1x/fence.c b/drivers/gpu/host1x/fence.c
new file mode 100644
index 000000000000..139ad1afd935
--- /dev/null
+++ b/drivers/gpu/host1x/fence.c
@@ -0,0 +1,154 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Syncpoint dma_fence implementation
+ *
+ * Copyright (c) 2020, NVIDIA Corporation.
+ */
+
+#include <linux/dma-fence.h>
+#include <linux/file.h>
+#include <linux/fs.h>
+#include <linux/slab.h>
+#include <linux/sync_file.h>
+
+#include "fence.h"
+#include "intr.h"
+#include "syncpt.h"
+
+static const char *host1x_syncpt_fence_get_driver_name(struct dma_fence *f)
+{
+	return "host1x";
+}
+
+static const char *host1x_syncpt_fence_get_timeline_name(struct dma_fence *f)
+{
+	return "syncpoint";
+}
+
+static struct host1x_syncpt_fence *to_host1x_fence(struct dma_fence *f)
+{
+	return container_of(f, struct host1x_syncpt_fence, base);
+}
+
+static bool host1x_syncpt_fence_enable_signaling(struct dma_fence *f)
+{
+	struct host1x_syncpt_fence *sf = to_host1x_fence(f);
+
+	if (host1x_syncpt_is_expired(sf->sp, sf->threshold))
+		return false;
+
+	/* Reference for interrupt path. */
+	dma_fence_get(f);
+
+	/*
+	 * The dma_fence framework requires the fence driver to keep a
+	 * reference to any fences for which 'enable_signaling' has been
+	 * called (and that have not been signalled).
+	 *
+	 * We cannot currently always guarantee that all fences get signalled
+	 * or cancelled. As such, for such situations, set up a timeout, so
+	 * that long-lasting fences will get reaped eventually.
+	 */
+	if (sf->timeout) {
+		/* Reference for timeout path. */
+		dma_fence_get(f);
+		schedule_delayed_work(&sf->timeout_work, msecs_to_jiffies(30000));
+	}
+
+	host1x_intr_add_fence_locked(sf->sp->host, sf);
+
+	/*
+	 * The fence may get signalled at any time after the above call,
+	 * so we need to initialize all state used by signalling
+	 * before it.
+	 */
+
+	return true;
+}
+
+static const struct dma_fence_ops host1x_syncpt_fence_ops = {
+	.get_driver_name = host1x_syncpt_fence_get_driver_name,
+	.get_timeline_name = host1x_syncpt_fence_get_timeline_name,
+	.enable_signaling = host1x_syncpt_fence_enable_signaling,
+};
+
+void host1x_fence_signal(struct host1x_syncpt_fence *f)
+{
+	if (atomic_xchg(&f->signaling, 1)) {
+		/*
+		 * Already on timeout path, but we removed the fence before
+		 * timeout path could, so drop interrupt path reference.
+		 */
+		dma_fence_put(&f->base);
+		return;
+	}
+
+	if (f->timeout && cancel_delayed_work(&f->timeout_work)) {
+		/*
+		 * We know that the timeout path will not be entered.
+		 * Safe to drop the timeout path's reference now.
+		 */
+		dma_fence_put(&f->base);
+	}
+
+	dma_fence_signal_locked(&f->base);
+	dma_fence_put(&f->base);
+}
+
+static void do_fence_timeout(struct work_struct *work)
+{
+	struct delayed_work *dwork = (struct delayed_work *)work;
+	struct host1x_syncpt_fence *f =
+		container_of(dwork, struct host1x_syncpt_fence, timeout_work);
+
+	if (atomic_xchg(&f->signaling, 1)) {
+		/* Already on interrupt path, drop timeout path reference if any. */
+		if (f->timeout)
+			dma_fence_put(&f->base);
+		return;
+	}
+
+	if (host1x_intr_remove_fence(f->sp->host, f)) {
+		/*
+		 * Managed to remove fence from queue, so it's safe to drop
+		 * the interrupt path's reference.
+		 */
+		dma_fence_put(&f->base);
+	}
+
+	dma_fence_set_error(&f->base, -ETIMEDOUT);
+	dma_fence_signal(&f->base);
+	if (f->timeout)
+		dma_fence_put(&f->base);
+}
+
+struct dma_fence *host1x_fence_create(struct host1x_syncpt *sp, u32 threshold,
+				      bool timeout)
+{
+	struct host1x_syncpt_fence *fence;
+
+	fence = kzalloc(sizeof(*fence), GFP_KERNEL);
+	if (!fence)
+		return ERR_PTR(-ENOMEM);
+
+	fence->sp = sp;
+	fence->threshold = threshold;
+	fence->timeout = timeout;
+
+	dma_fence_init(&fence->base, &host1x_syncpt_fence_ops, &sp->fences.lock,
+		       dma_fence_context_alloc(1), 0);
+
+	INIT_DELAYED_WORK(&fence->timeout_work, do_fence_timeout);
+
+	return &fence->base;
+}
+EXPORT_SYMBOL(host1x_fence_create);
+
+void host1x_fence_cancel(struct dma_fence *f)
+{
+	struct host1x_syncpt_fence *sf = to_host1x_fence(f);
+
+	schedule_delayed_work(&sf->timeout_work, 0);
+	flush_delayed_work(&sf->timeout_work);
+}
+EXPORT_SYMBOL(host1x_fence_cancel);
diff --git a/drivers/gpu/host1x/fence.h b/drivers/gpu/host1x/fence.h
new file mode 100644
index 000000000000..f3c644c73cad
--- /dev/null
+++ b/drivers/gpu/host1x/fence.h
@@ -0,0 +1,30 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * Copyright (c) 2020, NVIDIA Corporation.
+ */
+
+#ifndef HOST1X_FENCE_H
+#define HOST1X_FENCE_H
+
+struct host1x_syncpt_fence {
+	struct dma_fence base;
+
+	atomic_t signaling;
+
+	struct host1x_syncpt *sp;
+	u32 threshold;
+	bool timeout;
+
+	struct delayed_work timeout_work;
+
+	struct list_head list;
+};
+
+struct host1x_fence_list {
+	spinlock_t lock;
+	struct list_head list;
+};
+
+void host1x_fence_signal(struct host1x_syncpt_fence *fence);
+
+#endif
diff --git a/drivers/gpu/host1x/host1x.h b/drivers/gpu/host1x/host1x.h
deleted file mode 100644
index a2bc1e65e972..000000000000
--- a/drivers/gpu/host1x/host1x.h
+++ /dev/null
@@ -1,30 +0,0 @@
-/*
- * Tegra host1x driver
- *
- * Copyright (c) 2009-2013, NVIDIA Corporation. All rights reserved.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
- * more details.
- *
- * You should have received a copy of the GNU General Public License along
- * with this program; if not, write to the Free Software Foundation, Inc.,
- * 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
- */
-
-#ifndef __LINUX_HOST1X_H
-#define __LINUX_HOST1X_H
-
-enum host1x_class {
-	HOST1X_CLASS_HOST1X	= 0x1,
-	HOST1X_CLASS_GR2D	= 0x51,
-	HOST1X_CLASS_GR2D_SB    = 0x52
-};
-
-#endif
diff --git a/drivers/gpu/host1x/host1x_bo.h b/drivers/gpu/host1x/host1x_bo.h
deleted file mode 100644
index 4c1f10bd773d..000000000000
--- a/drivers/gpu/host1x/host1x_bo.h
+++ /dev/null
@@ -1,87 +0,0 @@
-/*
- * Tegra host1x Memory Management Abstraction header
- *
- * Copyright (c) 2012-2013, NVIDIA Corporation.
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms and conditions of the GNU General Public License,
- * version 2, as published by the Free Software Foundation.
- *
- * This program is distributed in the hope it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
- * more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program.  If not, see <http://www.gnu.org/licenses/>.
- */
-
-#ifndef _HOST1X_BO_H
-#define _HOST1X_BO_H
-
-struct host1x_bo;
-
-struct host1x_bo_ops {
-	struct host1x_bo *(*get)(struct host1x_bo *bo);
-	void (*put)(struct host1x_bo *bo);
-	dma_addr_t (*pin)(struct host1x_bo *bo, struct sg_table **sgt);
-	void (*unpin)(struct host1x_bo *bo, struct sg_table *sgt);
-	void *(*mmap)(struct host1x_bo *bo);
-	void (*munmap)(struct host1x_bo *bo, void *addr);
-	void *(*kmap)(struct host1x_bo *bo, unsigned int pagenum);
-	void (*kunmap)(struct host1x_bo *bo, unsigned int pagenum, void *addr);
-};
-
-struct host1x_bo {
-	const struct host1x_bo_ops *ops;
-};
-
-static inline void host1x_bo_init(struct host1x_bo *bo,
-				  const struct host1x_bo_ops *ops)
-{
-	bo->ops = ops;
-}
-
-static inline struct host1x_bo *host1x_bo_get(struct host1x_bo *bo)
-{
-	return bo->ops->get(bo);
-}
-
-static inline void host1x_bo_put(struct host1x_bo *bo)
-{
-	bo->ops->put(bo);
-}
-
-static inline dma_addr_t host1x_bo_pin(struct host1x_bo *bo,
-				       struct sg_table **sgt)
-{
-	return bo->ops->pin(bo, sgt);
-}
-
-static inline void host1x_bo_unpin(struct host1x_bo *bo, struct sg_table *sgt)
-{
-	bo->ops->unpin(bo, sgt);
-}
-
-static inline void *host1x_bo_mmap(struct host1x_bo *bo)
-{
-	return bo->ops->mmap(bo);
-}
-
-static inline void host1x_bo_munmap(struct host1x_bo *bo, void *addr)
-{
-	bo->ops->munmap(bo, addr);
-}
-
-static inline void *host1x_bo_kmap(struct host1x_bo *bo, unsigned int pagenum)
-{
-	return bo->ops->kmap(bo, pagenum);
-}
-
-static inline void host1x_bo_kunmap(struct host1x_bo *bo,
-				    unsigned int pagenum, void *addr)
-{
-	bo->ops->kunmap(bo, pagenum, addr);
-}
-
-#endif
diff --git a/drivers/gpu/host1x/host1x_client.h b/drivers/gpu/host1x/host1x_client.h
deleted file mode 100644
index 9b85f10f4a44..000000000000
--- a/drivers/gpu/host1x/host1x_client.h
+++ /dev/null
@@ -1,35 +0,0 @@
-/*
- * Copyright (c) 2013, NVIDIA Corporation.
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms and conditions of the GNU General Public License,
- * version 2, as published by the Free Software Foundation.
- *
- * This program is distributed in the hope it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
- * more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program.  If not, see <http://www.gnu.org/licenses/>.
- */
-
-#ifndef HOST1X_CLIENT_H
-#define HOST1X_CLIENT_H
-
-struct device;
-struct platform_device;
-
-#ifdef CONFIG_DRM_TEGRA
-int host1x_drm_alloc(struct platform_device *pdev);
-#else
-static inline int host1x_drm_alloc(struct platform_device *pdev)
-{
-	return 0;
-}
-#endif
-
-void host1x_set_drm_data(struct device *dev, void *data);
-void *host1x_get_drm_data(struct device *dev);
-
-#endif
diff --git a/drivers/gpu/host1x/hw/Makefile b/drivers/gpu/host1x/hw/Makefile
deleted file mode 100644
index 9b50863a2236..000000000000
--- a/drivers/gpu/host1x/hw/Makefile
+++ /dev/null
@@ -1,6 +0,0 @@
-ccflags-y = -Idrivers/gpu/host1x
-
-host1x-hw-objs  = \
-	host1x01.o
-
-obj-$(CONFIG_TEGRA_HOST1X) += host1x-hw.o
diff --git a/drivers/gpu/host1x/hw/cdma_hw.c b/drivers/gpu/host1x/hw/cdma_hw.c
index 2ee4ad55c4db..3f3f0018eee0 100644
--- a/drivers/gpu/host1x/hw/cdma_hw.c
+++ b/drivers/gpu/host1x/hw/cdma_hw.c
@@ -1,36 +1,25 @@
+// SPDX-License-Identifier: GPL-2.0-only
 /*
  * Tegra host1x Command DMA
  *
  * Copyright (c) 2010-2013, NVIDIA Corporation.
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms and conditions of the GNU General Public License,
- * version 2, as published by the Free Software Foundation.
- *
- * This program is distributed in the hope it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
- * more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program.  If not, see <http://www.gnu.org/licenses/>.
  */
 
 #include <linux/slab.h>
 #include <linux/scatterlist.h>
 #include <linux/dma-mapping.h>
 
-#include "cdma.h"
-#include "channel.h"
-#include "dev.h"
-#include "debug.h"
+#include "../cdma.h"
+#include "../channel.h"
+#include "../dev.h"
+#include "../debug.h"
 
 /*
- * Put the restart at the end of pushbuffer memor
+ * Put the restart at the end of pushbuffer memory
  */
 static void push_buffer_init(struct push_buffer *pb)
 {
-	*(pb->mapped + (pb->size_bytes >> 2)) = host1x_opcode_restart(0);
+	*(u32 *)(pb->mapped + pb->size) = host1x_opcode_restart(0);
 }
 
 /*
@@ -39,26 +28,13 @@ static void push_buffer_init(struct push_buffer *pb)
 static void cdma_timeout_cpu_incr(struct host1x_cdma *cdma, u32 getptr,
 				u32 syncpt_incrs, u32 syncval, u32 nr_slots)
 {
-	struct host1x *host1x = cdma_to_host1x(cdma);
-	struct push_buffer *pb = &cdma->push_buffer;
-	u32 i;
+	unsigned int i;
 
 	for (i = 0; i < syncpt_incrs; i++)
 		host1x_syncpt_incr(cdma->timeout.syncpt);
 
 	/* after CPU incr, ensure shadow is up to date */
 	host1x_syncpt_load(cdma->timeout.syncpt);
-
-	/* NOP all the PB slots */
-	while (nr_slots--) {
-		u32 *p = (u32 *)((u32)pb->mapped + getptr);
-		*(p++) = HOST1X_OPCODE_NOP;
-		*(p++) = HOST1X_OPCODE_NOP;
-		dev_dbg(host1x->dev, "%s: NOP at 0x%x\n", __func__,
-			pb->phys + getptr);
-		getptr = (getptr + 8) & (pb->size_bytes - 1);
-	}
-	wmb();
 }
 
 /*
@@ -67,21 +43,31 @@ static void cdma_timeout_cpu_incr(struct host1x_cdma *cdma, u32 getptr,
 static void cdma_start(struct host1x_cdma *cdma)
 {
 	struct host1x_channel *ch = cdma_to_channel(cdma);
+	u64 start, end;
 
 	if (cdma->running)
 		return;
 
 	cdma->last_pos = cdma->push_buffer.pos;
+	start = cdma->push_buffer.dma;
+	end = cdma->push_buffer.size + 4;
 
 	host1x_ch_writel(ch, HOST1X_CHANNEL_DMACTRL_DMASTOP,
 			 HOST1X_CHANNEL_DMACTRL);
 
 	/* set base, put and end pointer */
-	host1x_ch_writel(ch, cdma->push_buffer.phys, HOST1X_CHANNEL_DMASTART);
+	host1x_ch_writel(ch, lower_32_bits(start), HOST1X_CHANNEL_DMASTART);
+#if HOST1X_HW >= 6
+	host1x_ch_writel(ch, upper_32_bits(start), HOST1X_CHANNEL_DMASTART_HI);
+#endif
 	host1x_ch_writel(ch, cdma->push_buffer.pos, HOST1X_CHANNEL_DMAPUT);
-	host1x_ch_writel(ch, cdma->push_buffer.phys +
-			 cdma->push_buffer.size_bytes + 4,
-			 HOST1X_CHANNEL_DMAEND);
+#if HOST1X_HW >= 6
+	host1x_ch_writel(ch, 0, HOST1X_CHANNEL_DMAPUT_HI);
+#endif
+	host1x_ch_writel(ch, lower_32_bits(end), HOST1X_CHANNEL_DMAEND);
+#if HOST1X_HW >= 6
+	host1x_ch_writel(ch, upper_32_bits(end), HOST1X_CHANNEL_DMAEND_HI);
+#endif
 
 	/* reset GET */
 	host1x_ch_writel(ch, HOST1X_CHANNEL_DMACTRL_DMASTOP |
@@ -104,6 +90,7 @@ static void cdma_timeout_restart(struct host1x_cdma *cdma, u32 getptr)
 {
 	struct host1x *host1x = cdma_to_host1x(cdma);
 	struct host1x_channel *ch = cdma_to_channel(cdma);
+	u64 start, end;
 
 	if (cdma->running)
 		return;
@@ -113,11 +100,18 @@ static void cdma_timeout_restart(struct host1x_cdma *cdma, u32 getptr)
 	host1x_ch_writel(ch, HOST1X_CHANNEL_DMACTRL_DMASTOP,
 			 HOST1X_CHANNEL_DMACTRL);
 
+	start = cdma->push_buffer.dma;
+	end = cdma->push_buffer.size + 4;
+
 	/* set base, end pointer (all of memory) */
-	host1x_ch_writel(ch, cdma->push_buffer.phys, HOST1X_CHANNEL_DMASTART);
-	host1x_ch_writel(ch, cdma->push_buffer.phys +
-			 cdma->push_buffer.size_bytes,
-			 HOST1X_CHANNEL_DMAEND);
+	host1x_ch_writel(ch, lower_32_bits(start), HOST1X_CHANNEL_DMASTART);
+#if HOST1X_HW >= 6
+	host1x_ch_writel(ch, upper_32_bits(start), HOST1X_CHANNEL_DMASTART_HI);
+#endif
+	host1x_ch_writel(ch, lower_32_bits(end), HOST1X_CHANNEL_DMAEND);
+#if HOST1X_HW >= 6
+	host1x_ch_writel(ch, upper_32_bits(end), HOST1X_CHANNEL_DMAEND_HI);
+#endif
 
 	/* set GET, by loading the value in PUT (then reset GET) */
 	host1x_ch_writel(ch, getptr, HOST1X_CHANNEL_DMAPUT);
@@ -162,15 +156,41 @@ static void cdma_stop(struct host1x_cdma *cdma)
 	struct host1x_channel *ch = cdma_to_channel(cdma);
 
 	mutex_lock(&cdma->lock);
+
 	if (cdma->running) {
 		host1x_cdma_wait_locked(cdma, CDMA_EVENT_SYNC_QUEUE_EMPTY);
 		host1x_ch_writel(ch, HOST1X_CHANNEL_DMACTRL_DMASTOP,
 				 HOST1X_CHANNEL_DMACTRL);
 		cdma->running = false;
 	}
+
 	mutex_unlock(&cdma->lock);
 }
 
+static void cdma_hw_cmdproc_stop(struct host1x *host, struct host1x_channel *ch,
+				 bool stop)
+{
+#if HOST1X_HW >= 6
+	host1x_ch_writel(ch, stop ? 0x1 : 0x0, HOST1X_CHANNEL_CMDPROC_STOP);
+#else
+	u32 cmdproc_stop = host1x_sync_readl(host, HOST1X_SYNC_CMDPROC_STOP);
+	if (stop)
+		cmdproc_stop |= BIT(ch->id);
+	else
+		cmdproc_stop &= ~BIT(ch->id);
+	host1x_sync_writel(host, cmdproc_stop, HOST1X_SYNC_CMDPROC_STOP);
+#endif
+}
+
+static void cdma_hw_teardown(struct host1x *host, struct host1x_channel *ch)
+{
+#if HOST1X_HW >= 6
+	host1x_ch_writel(ch, 0x1, HOST1X_CHANNEL_TEARDOWN);
+#else
+	host1x_sync_writel(host, BIT(ch->id), HOST1X_SYNC_CH_TEARDOWN);
+#endif
+}
+
 /*
  * Stops both channel's command processor and CDMA immediately.
  * Also, tears down the channel and resets corresponding module.
@@ -179,7 +199,6 @@ static void cdma_freeze(struct host1x_cdma *cdma)
 {
 	struct host1x *host = cdma_to_host1x(cdma);
 	struct host1x_channel *ch = cdma_to_channel(cdma);
-	u32 cmdproc_stop;
 
 	if (cdma->torndown && !cdma->running) {
 		dev_warn(host->dev, "Already torn down\n");
@@ -188,9 +207,7 @@ static void cdma_freeze(struct host1x_cdma *cdma)
 
 	dev_dbg(host->dev, "freezing channel (id %d)\n", ch->id);
 
-	cmdproc_stop = host1x_sync_readl(host, HOST1X_SYNC_CMDPROC_STOP);
-	cmdproc_stop |= BIT(ch->id);
-	host1x_sync_writel(host, cmdproc_stop, HOST1X_SYNC_CMDPROC_STOP);
+	cdma_hw_cmdproc_stop(host, ch, true);
 
 	dev_dbg(host->dev, "%s: DMA GET 0x%x, PUT HW 0x%x / shadow 0x%x\n",
 		__func__, host1x_ch_readl(ch, HOST1X_CHANNEL_DMAGET),
@@ -200,7 +217,7 @@ static void cdma_freeze(struct host1x_cdma *cdma)
 	host1x_ch_writel(ch, HOST1X_CHANNEL_DMACTRL_DMASTOP,
 			 HOST1X_CHANNEL_DMACTRL);
 
-	host1x_sync_writel(host, BIT(ch->id), HOST1X_SYNC_CH_TEARDOWN);
+	cdma_hw_teardown(host, ch);
 
 	cdma->running = false;
 	cdma->torndown = true;
@@ -210,20 +227,60 @@ static void cdma_resume(struct host1x_cdma *cdma, u32 getptr)
 {
 	struct host1x *host1x = cdma_to_host1x(cdma);
 	struct host1x_channel *ch = cdma_to_channel(cdma);
-	u32 cmdproc_stop;
 
 	dev_dbg(host1x->dev,
-		"resuming channel (id %d, DMAGET restart = 0x%x)\n",
+		"resuming channel (id %u, DMAGET restart = 0x%x)\n",
 		ch->id, getptr);
 
-	cmdproc_stop = host1x_sync_readl(host1x, HOST1X_SYNC_CMDPROC_STOP);
-	cmdproc_stop &= ~(BIT(ch->id));
-	host1x_sync_writel(host1x, cmdproc_stop, HOST1X_SYNC_CMDPROC_STOP);
+	cdma_hw_cmdproc_stop(host1x, ch, false);
 
 	cdma->torndown = false;
 	cdma_timeout_restart(cdma, getptr);
 }
 
+static void timeout_release_mlock(struct host1x_cdma *cdma)
+{
+#if HOST1X_HW >= 8
+	/* Tegra186 and Tegra194 require a more complicated MLOCK release
+	 * sequence. Furthermore, those chips by default don't enforce MLOCKs,
+	 * so it turns out that if we don't /actually/ need MLOCKs, we can just
+	 * ignore them.
+	 *
+	 * As such, for now just implement this on Tegra234 where things are
+	 * stricter but also easy to implement.
+	 */
+	struct host1x_channel *ch = cdma_to_channel(cdma);
+	struct host1x *host1x = cdma_to_host1x(cdma);
+	u32 offset;
+
+	switch (ch->client->class) {
+	case HOST1X_CLASS_NVJPG1:
+		offset = HOST1X_COMMON_NVJPG1_MLOCK;
+		break;
+	case HOST1X_CLASS_NVENC:
+		offset = HOST1X_COMMON_NVENC_MLOCK;
+		break;
+	case HOST1X_CLASS_VIC:
+		offset = HOST1X_COMMON_VIC_MLOCK;
+		break;
+	case HOST1X_CLASS_NVJPG:
+		offset = HOST1X_COMMON_NVJPG_MLOCK;
+		break;
+	case HOST1X_CLASS_NVDEC:
+		offset = HOST1X_COMMON_NVDEC_MLOCK;
+		break;
+	case HOST1X_CLASS_OFA:
+		offset = HOST1X_COMMON_OFA_MLOCK;
+		break;
+	default:
+		WARN(1, "%s was not updated for class %u", __func__, ch->client->class);
+		return;
+	}
+
+	host1x_common_writel(host1x, 0x0, offset);
+#endif
+}
+
 /*
  * If this timeout fires, it indicates the current sync_queue entry has
  * exceeded its TTL and the userctx should be timed out and remaining
@@ -231,14 +288,11 @@ static void cdma_resume(struct host1x_cdma *cdma, u32 getptr)
  */
 static void cdma_timeout_handler(struct work_struct *work)
 {
+	u32 syncpt_val;
 	struct host1x_cdma *cdma;
 	struct host1x *host1x;
 	struct host1x_channel *ch;
 
-	u32 syncpt_val;
-
-	u32 prev_cmdproc, cmdproc_stop;
-
 	cdma = container_of(to_delayed_work(work), struct host1x_cdma,
 			    timeout.wq);
 	host1x = cdma_to_host1x(cdma);
@@ -256,12 +310,7 @@ static void cdma_timeout_handler(struct work_struct *work)
 	}
 
 	/* stop processing to get a clean snapshot */
-	prev_cmdproc = host1x_sync_readl(host1x, HOST1X_SYNC_CMDPROC_STOP);
-	cmdproc_stop = prev_cmdproc | BIT(ch->id);
-	host1x_sync_writel(host1x, cmdproc_stop, HOST1X_SYNC_CMDPROC_STOP);
-
-	dev_dbg(host1x->dev, "cdma_timeout: cmdproc was 0x%x is 0x%x\n",
-		prev_cmdproc, cmdproc_stop);
+	cdma_hw_cmdproc_stop(host1x, ch, true);
 
 	syncpt_val = host1x_syncpt_load(cdma->timeout.syncpt);
 
@@ -270,20 +319,21 @@ static void cdma_timeout_handler(struct work_struct *work)
 		dev_dbg(host1x->dev,
 			"cdma_timeout: expired, but buffer had completed\n");
 		/* restore */
-		cmdproc_stop = prev_cmdproc & ~(BIT(ch->id));
-		host1x_sync_writel(host1x, cmdproc_stop,
-				   HOST1X_SYNC_CMDPROC_STOP);
+		cdma_hw_cmdproc_stop(host1x, ch, false);
 		mutex_unlock(&cdma->lock);
 		return;
 	}
 
-	dev_warn(host1x->dev, "%s: timeout: %d (%s), HW thresh %d, done %d\n",
-		__func__, cdma->timeout.syncpt->id, cdma->timeout.syncpt->name,
-		syncpt_val, cdma->timeout.syncpt_val);
+	dev_warn(host1x->dev, "%s: timeout: %u (%s), HW thresh %d, done %d\n",
+		 __func__, cdma->timeout.syncpt->id, cdma->timeout.syncpt->name,
+		 syncpt_val, cdma->timeout.syncpt_val);
 
 	/* stop HW, resetting channel/module */
 	host1x_hw_cdma_freeze(host1x, cdma);
 
+	/* release any held MLOCK */
+	timeout_release_mlock(cdma);
+
 	host1x_cdma_update_sync_queue(cdma, ch->dev);
 	mutex_unlock(&cdma->lock);
 }
@@ -291,7 +341,7 @@ static void cdma_timeout_handler(struct work_struct *work)
 /*
  * Init timeout resources
  */
-static int cdma_timeout_init(struct host1x_cdma *cdma, u32 syncpt_id)
+static int cdma_timeout_init(struct host1x_cdma *cdma)
 {
 	INIT_DELAYED_WORK(&cdma->timeout.wq, cdma_timeout_handler);
 	cdma->timeout.initialized = true;
@@ -306,6 +356,7 @@ static void cdma_timeout_destroy(struct host1x_cdma *cdma)
 {
 	if (cdma->timeout.initialized)
 		cancel_delayed_work(&cdma->timeout.wq);
+
 	cdma->timeout.initialized = false;
 }
 
diff --git a/drivers/gpu/host1x/hw/channel_hw.c b/drivers/gpu/host1x/hw/channel_hw.c
index ee199623e365..2df6a16d484e 100644
--- a/drivers/gpu/host1x/hw/channel_hw.c
+++ b/drivers/gpu/host1x/hw/channel_hw.c
@@ -1,37 +1,27 @@
+// SPDX-License-Identifier: GPL-2.0-only
 /*
  * Tegra host1x Channel
  *
  * Copyright (c) 2010-2013, NVIDIA Corporation.
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms and conditions of the GNU General Public License,
- * version 2, as published by the Free Software Foundation.
- *
- * This program is distributed in the hope it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
- * more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program.  If not, see <http://www.gnu.org/licenses/>.
  */
 
+#include <linux/host1x.h>
+#include <linux/iommu.h>
 #include <linux/slab.h>
+
 #include <trace/events/host1x.h>
 
-#include "host1x.h"
-#include "host1x_bo.h"
-#include "channel.h"
-#include "dev.h"
-#include "intr.h"
-#include "job.h"
+#include "../channel.h"
+#include "../dev.h"
+#include "../intr.h"
+#include "../job.h"
 
-#define HOST1X_CHANNEL_SIZE 16384
 #define TRACE_MAX_LENGTH 128U
 
 static void trace_write_gather(struct host1x_cdma *cdma, struct host1x_bo *bo,
 			       u32 offset, u32 words)
 {
+	struct device *dev = cdma_to_channel(cdma)->dev;
 	void *mem = NULL;
 
 	if (host1x_debug_trace_cmdbuf)
@@ -44,121 +34,352 @@ static void trace_write_gather(struct host1x_cdma *cdma, struct host1x_bo *bo,
 		 * of how much you can output to ftrace at once.
 		 */
 		for (i = 0; i < words; i += TRACE_MAX_LENGTH) {
-			trace_host1x_cdma_push_gather(
-				dev_name(cdma_to_channel(cdma)->dev),
-				(u32)bo, min(words - i, TRACE_MAX_LENGTH),
-				offset + i * sizeof(u32), mem);
+			u32 num_words = min(words - i, TRACE_MAX_LENGTH);
+
+			offset += i * sizeof(u32);
+
+			trace_host1x_cdma_push_gather(dev_name(dev), bo,
+						      num_words, offset,
+						      mem);
 		}
+
 		host1x_bo_munmap(bo, mem);
 	}
 }
 
-static void submit_gathers(struct host1x_job *job)
+static void submit_wait(struct host1x_job *job, u32 id, u32 threshold)
 {
 	struct host1x_cdma *cdma = &job->channel->cdma;
+
+#if HOST1X_HW >= 2
+	host1x_cdma_push_wide(cdma,
+		host1x_opcode_setclass(
+			HOST1X_CLASS_HOST1X,
+			HOST1X_UCLASS_LOAD_SYNCPT_PAYLOAD_32,
+			/* WAIT_SYNCPT_32 is at SYNCPT_PAYLOAD_32+2 */
+			BIT(0) | BIT(2)
+		),
+		threshold,
+		id,
+		HOST1X_OPCODE_NOP
+	);
+#else
+	/* TODO add waitchk or use waitbases or other mitigation */
+	host1x_cdma_push(cdma,
+		host1x_opcode_setclass(
+			HOST1X_CLASS_HOST1X,
+			host1x_uclass_wait_syncpt_r(),
+			BIT(0)
+		),
+		host1x_class_host_wait_syncpt(id, threshold)
+	);
+#endif
+}
+
+static void submit_setclass(struct host1x_job *job, u32 next_class)
+{
+	struct host1x_cdma *cdma = &job->channel->cdma;
+
+#if HOST1X_HW >= 6
+	u32 stream_id;
+
+	/*
+	 * If a memory context has been set, use it. Otherwise
+	 * (if context isolation is disabled) use the engine's
+	 * firmware stream ID.
+	 */
+	if (job->memory_context)
+		stream_id = job->memory_context->stream_id;
+	else
+		stream_id = job->engine_fallback_streamid;
+
+	host1x_cdma_push_wide(cdma,
+		host1x_opcode_setclass(next_class, 0, 0),
+		host1x_opcode_setpayload(stream_id),
+		host1x_opcode_setstreamid(job->engine_streamid_offset / 4),
+		HOST1X_OPCODE_NOP);
+#else
+	host1x_cdma_push(cdma,
+		host1x_opcode_setclass(next_class, 0, 0),
+		HOST1X_OPCODE_NOP
+	);
+#endif
+}
+
+static void submit_gathers(struct host1x_job *job, struct host1x_job_cmd *cmds, u32 num_cmds,
+			   u32 job_syncpt_base)
+{
+	struct host1x_cdma *cdma = &job->channel->cdma;
+#if HOST1X_HW < 6
+	struct device *dev = job->channel->dev;
+#endif
 	unsigned int i;
+	u32 threshold;
+
+	for (i = 0; i < num_cmds; i++) {
+		struct host1x_job_cmd *cmd = &cmds[i];
+
+		if (cmd->is_wait) {
+			if (cmd->wait.relative)
+				threshold = job_syncpt_base + cmd->wait.threshold;
+			else
+				threshold = cmd->wait.threshold;
+
+			submit_wait(job, cmd->wait.id, threshold);
+			submit_setclass(job, cmd->wait.next_class);
+		} else {
+			struct host1x_job_gather *g = &cmd->gather;
+
+			dma_addr_t addr = g->base + g->offset;
+			u32 op2, op3;
+
+			op2 = lower_32_bits(addr);
+			op3 = upper_32_bits(addr);
+
+			trace_write_gather(cdma, g->bo, g->offset, g->words);
+
+			if (op3 != 0) {
+#if HOST1X_HW >= 6
+				u32 op1 = host1x_opcode_gather_wide(g->words);
+				u32 op4 = HOST1X_OPCODE_NOP;
+
+				host1x_cdma_push_wide(cdma, op1, op2, op3, op4);
+#else
+				dev_err(dev, "invalid gather for push buffer %pad\n",
+					&addr);
+				continue;
+#endif
+			} else {
+				u32 op1 = host1x_opcode_gather(g->words);
 
-	for (i = 0; i < job->num_gathers; i++) {
-		struct host1x_job_gather *g = &job->gathers[i];
-		u32 op1 = host1x_opcode_gather(g->words);
-		u32 op2 = g->base + g->offset;
-		trace_write_gather(cdma, g->bo, g->offset, op1 & 0xffff);
-		host1x_cdma_push(cdma, op1, op2);
+				host1x_cdma_push(cdma, op1, op2);
+			}
+		}
 	}
 }
 
-static int channel_submit(struct host1x_job *job)
+static inline void synchronize_syncpt_base(struct host1x_job *job)
 {
-	struct host1x_channel *ch = job->channel;
-	struct host1x_syncpt *sp;
-	u32 user_syncpt_incrs = job->syncpt_incrs;
-	u32 prev_max = 0;
-	u32 syncval;
-	int err;
-	struct host1x_waitlist *completed_waiter = NULL;
+	struct host1x_syncpt *sp = job->syncpt;
+	unsigned int id;
+	u32 value;
+
+	value = host1x_syncpt_read_max(sp);
+	id = sp->base->id;
+
+	host1x_cdma_push(&job->channel->cdma,
+			 host1x_opcode_setclass(HOST1X_CLASS_HOST1X,
+				HOST1X_UCLASS_LOAD_SYNCPT_BASE, 1),
+			 HOST1X_UCLASS_LOAD_SYNCPT_BASE_BASE_INDX_F(id) |
+			 HOST1X_UCLASS_LOAD_SYNCPT_BASE_VALUE_F(value));
+}
+
+static void host1x_channel_set_streamid(struct host1x_channel *channel)
+{
+#if HOST1X_HW >= 6
+	u32 stream_id;
+
+	if (!tegra_dev_iommu_get_stream_id(channel->dev->parent, &stream_id))
+		stream_id = TEGRA_STREAM_ID_BYPASS;
+
+	host1x_ch_writel(channel, stream_id, HOST1X_CHANNEL_SMMU_STREAMID);
+#endif
+}
+
+static void host1x_enable_gather_filter(struct host1x_channel *ch)
+{
+#if HOST1X_HW >= 6
 	struct host1x *host = dev_get_drvdata(ch->dev->parent);
+	u32 val;
 
-	sp = host->syncpt + job->syncpt_id;
-	trace_host1x_channel_submit(dev_name(ch->dev),
-				    job->num_gathers, job->num_relocs,
-				    job->num_waitchk, job->syncpt_id,
-				    job->syncpt_incrs);
+	if (!host->hv_regs)
+		return;
 
-	/* before error checks, return current max */
-	prev_max = job->syncpt_end = host1x_syncpt_read_max(sp);
+	val = host1x_hypervisor_readl(
+		host, HOST1X_HV_CH_KERNEL_FILTER_GBUFFER(ch->id / 32));
+	val |= BIT(ch->id % 32);
+	host1x_hypervisor_writel(
+		host, val, HOST1X_HV_CH_KERNEL_FILTER_GBUFFER(ch->id / 32));
+#elif HOST1X_HW >= 4
+	host1x_ch_writel(ch,
+			 HOST1X_CHANNEL_CHANNELCTRL_KERNEL_FILTER_GBUFFER(1),
+			 HOST1X_CHANNEL_CHANNELCTRL);
+#endif
+}
 
-	/* get submit lock */
-	err = mutex_lock_interruptible(&ch->submitlock);
-	if (err)
-		goto error;
+static void channel_program_cdma(struct host1x_job *job)
+{
+	struct host1x_cdma *cdma = &job->channel->cdma;
+	struct host1x_syncpt *sp = job->syncpt;
 
-	completed_waiter = kzalloc(sizeof(*completed_waiter), GFP_KERNEL);
-	if (!completed_waiter) {
-		mutex_unlock(&ch->submitlock);
-		err = -ENOMEM;
-		goto error;
-	}
+#if HOST1X_HW >= 6
+	u32 fence;
+	int i = 0;
 
-	/* begin a CDMA submit */
-	err = host1x_cdma_begin(&ch->cdma, job);
-	if (err) {
-		mutex_unlock(&ch->submitlock);
-		goto error;
+	if (job->num_cmds == 0)
+		goto prefences_done;
+	if (!job->cmds[0].is_wait || job->cmds[0].wait.relative)
+		goto prefences_done;
+
+	/* Enter host1x class with invalid stream ID for prefence waits. */
+	host1x_cdma_push_wide(cdma,
+		host1x_opcode_acquire_mlock(1),
+		host1x_opcode_setclass(1, 0, 0),
+		host1x_opcode_setpayload(0),
+		host1x_opcode_setstreamid(0x1fffff));
+
+	for (i = 0; i < job->num_cmds; i++) {
+		struct host1x_job_cmd *cmd = &job->cmds[i];
+
+		if (!cmd->is_wait || cmd->wait.relative)
+			break;
+
+		submit_wait(job, cmd->wait.id, cmd->wait.threshold);
 	}
 
+	host1x_cdma_push(cdma,
+		HOST1X_OPCODE_NOP,
+		host1x_opcode_release_mlock(1));
+
+prefences_done:
+	/* Enter engine class with invalid stream ID. */
+	host1x_cdma_push_wide(cdma,
+		host1x_opcode_acquire_mlock(job->class),
+		host1x_opcode_setclass(job->class, 0, 0),
+		host1x_opcode_setpayload(0),
+		host1x_opcode_setstreamid(job->engine_streamid_offset / 4));
+
+	/* Before switching stream ID to real stream ID, ensure engine is idle. */
+	fence = host1x_syncpt_incr_max(sp, 1);
+	host1x_cdma_push(&job->channel->cdma,
+		host1x_opcode_nonincr(HOST1X_UCLASS_INCR_SYNCPT, 1),
+		HOST1X_UCLASS_INCR_SYNCPT_INDX_F(job->syncpt->id) |
+			HOST1X_UCLASS_INCR_SYNCPT_COND_F(4));
+	submit_wait(job, job->syncpt->id, fence);
+	submit_setclass(job, job->class);
+
+	/* Submit work. */
+	job->syncpt_end = host1x_syncpt_incr_max(sp, job->syncpt_incrs);
+	submit_gathers(job, job->cmds + i, job->num_cmds - i, job->syncpt_end - job->syncpt_incrs);
+
+	/* Before releasing MLOCK, ensure engine is idle again. */
+	fence = host1x_syncpt_incr_max(sp, 1);
+	host1x_cdma_push(&job->channel->cdma,
+		host1x_opcode_nonincr(HOST1X_UCLASS_INCR_SYNCPT, 1),
+		HOST1X_UCLASS_INCR_SYNCPT_INDX_F(job->syncpt->id) |
+			HOST1X_UCLASS_INCR_SYNCPT_COND_F(4));
+	submit_wait(job, job->syncpt->id, fence);
+
+	/* Release MLOCK. */
+	host1x_cdma_push(cdma,
+		HOST1X_OPCODE_NOP, host1x_opcode_release_mlock(job->class));
+#else
 	if (job->serialize) {
 		/*
 		 * Force serialization by inserting a host wait for the
 		 * previous job to finish before this one can commence.
 		 */
-		host1x_cdma_push(&ch->cdma,
+		host1x_cdma_push(cdma,
 				 host1x_opcode_setclass(HOST1X_CLASS_HOST1X,
 					host1x_uclass_wait_syncpt_r(), 1),
-				 host1x_class_host_wait_syncpt(job->syncpt_id,
+				 host1x_class_host_wait_syncpt(job->syncpt->id,
 					host1x_syncpt_read_max(sp)));
 	}
 
-	syncval = host1x_syncpt_incr_max(sp, user_syncpt_incrs);
-
-	job->syncpt_end = syncval;
+	/* Synchronize base register to allow using it for relative waiting */
+	if (sp->base)
+		synchronize_syncpt_base(job);
 
 	/* add a setclass for modules that require it */
 	if (job->class)
-		host1x_cdma_push(&ch->cdma,
+		host1x_cdma_push(cdma,
 				 host1x_opcode_setclass(job->class, 0, 0),
 				 HOST1X_OPCODE_NOP);
 
-	submit_gathers(job);
+	job->syncpt_end = host1x_syncpt_incr_max(sp, job->syncpt_incrs);
+
+	submit_gathers(job, job->cmds, job->num_cmds, job->syncpt_end - job->syncpt_incrs);
+#endif
+}
+
+static void job_complete_callback(struct dma_fence *fence, struct dma_fence_cb *cb)
+{
+	struct host1x_job *job = container_of(cb, struct host1x_job, fence_cb);
+
+	/* Schedules CDMA update. */
+	host1x_cdma_update(&job->channel->cdma);
+}
+
+static int channel_submit(struct host1x_job *job)
+{
+	struct host1x_channel *ch = job->channel;
+	struct host1x_syncpt *sp = job->syncpt;
+	u32 prev_max = 0;
+	u32 syncval;
+	int err;
+	struct host1x *host = dev_get_drvdata(ch->dev->parent);
+
+	trace_host1x_channel_submit(dev_name(ch->dev),
+				    job->num_cmds, job->num_relocs,
+				    job->syncpt->id, job->syncpt_incrs);
+
+	/* before error checks, return current max */
+	prev_max = job->syncpt_end = host1x_syncpt_read_max(sp);
+
+	/* get submit lock */
+	err = mutex_lock_interruptible(&ch->submitlock);
+	if (err)
+		return err;
+
+	host1x_channel_set_streamid(ch);
+	host1x_enable_gather_filter(ch);
+	host1x_hw_syncpt_assign_to_channel(host, sp, ch);
+
+	/* begin a CDMA submit */
+	err = host1x_cdma_begin(&ch->cdma, job);
+	if (err) {
+		mutex_unlock(&ch->submitlock);
+		return err;
+	}
+
+	channel_program_cdma(job);
+	syncval = host1x_syncpt_read_max(sp);
+
+	/*
+	 * Create fence before submitting job to HW to avoid job completing
+	 * before the fence is set up.
+	 */
+	job->fence = host1x_fence_create(sp, syncval, true);
+	if (WARN(IS_ERR(job->fence), "Failed to create submit complete fence")) {
+		job->fence = NULL;
+	} else {
+		err = dma_fence_add_callback(job->fence, &job->fence_cb,
+					     job_complete_callback);
+	}
 
 	/* end CDMA submit & stash pinned hMems into sync queue */
 	host1x_cdma_end(&ch->cdma, job);
 
 	trace_host1x_channel_submitted(dev_name(ch->dev), prev_max, syncval);
 
-	/* schedule a submit complete interrupt */
-	err = host1x_intr_add_action(host, job->syncpt_id, syncval,
-				     HOST1X_INTR_ACTION_SUBMIT_COMPLETE, ch,
-				     completed_waiter, NULL);
-	completed_waiter = NULL;
-	WARN(err, "Failed to set submit complete interrupt");
-
 	mutex_unlock(&ch->submitlock);
 
-	return 0;
+	if (err == -ENOENT)
+		host1x_cdma_update(&ch->cdma);
+	else
+		WARN(err, "Failed to set submit complete interrupt");
 
-error:
-	kfree(completed_waiter);
-	return err;
+	return 0;
 }
 
 static int host1x_channel_init(struct host1x_channel *ch, struct host1x *dev,
 			       unsigned int index)
 {
-	ch->id = index;
-	mutex_init(&ch->reflock);
-	mutex_init(&ch->submitlock);
-
-	ch->regs = dev->regs + index * HOST1X_CHANNEL_SIZE;
+#if HOST1X_HW < 6
+	ch->regs = dev->regs + index * 0x4000;
+#else
+	ch->regs = dev->regs + index * 0x100;
+#endif
 	return 0;
 }
 
diff --git a/drivers/gpu/host1x/hw/debug_hw.c b/drivers/gpu/host1x/hw/debug_hw.c
index 334c038052f5..4c32aa1b95e8 100644
--- a/drivers/gpu/host1x/hw/debug_hw.c
+++ b/drivers/gpu/host1x/hw/debug_hw.c
@@ -1,32 +1,15 @@
+// SPDX-License-Identifier: GPL-2.0-only
 /*
  * Copyright (C) 2010 Google, Inc.
  * Author: Erik Gilling <konkers@android.com>
  *
  * Copyright (C) 2011-2013 NVIDIA Corporation
- *
- * This software is licensed under the terms of the GNU General Public
- * License version 2, as published by the Free Software Foundation, and
- * may be copied, distributed, and modified under those terms.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
  */
 
-#include <linux/debugfs.h>
-#include <linux/seq_file.h>
-#include <linux/mm.h>
-#include <linux/scatterlist.h>
-
-#include <linux/io.h>
-
-#include "dev.h"
-#include "debug.h"
-#include "cdma.h"
-#include "channel.h"
-#include "host1x_bo.h"
+#include "../dev.h"
+#include "../debug.h"
+#include "../cdma.h"
+#include "../channel.h"
 
 #define HOST1X_DEBUG_MAX_PAGE_OFFSET 102400
 
@@ -38,6 +21,13 @@ enum {
 	HOST1X_OPCODE_IMM	= 0x04,
 	HOST1X_OPCODE_RESTART	= 0x05,
 	HOST1X_OPCODE_GATHER	= 0x06,
+	HOST1X_OPCODE_SETSTRMID = 0x07,
+	HOST1X_OPCODE_SETAPPID  = 0x08,
+	HOST1X_OPCODE_SETPYLD   = 0x09,
+	HOST1X_OPCODE_INCR_W    = 0x0a,
+	HOST1X_OPCODE_NONINCR_W = 0x0b,
+	HOST1X_OPCODE_GATHER_W  = 0x0c,
+	HOST1X_OPCODE_RESTART_W = 0x0d,
 	HOST1X_OPCODE_EXTEND	= 0x0e,
 };
 
@@ -46,80 +36,134 @@ enum {
 	HOST1X_OPCODE_EXTEND_RELEASE_MLOCK	= 0x01,
 };
 
-static unsigned int show_channel_command(struct output *o, u32 val)
+#define INVALID_PAYLOAD				0xffffffff
+
+static unsigned int show_channel_command(struct output *o, u32 val,
+					 u32 *payload)
 {
-	unsigned mask;
-	unsigned subop;
+	unsigned int mask, subop, num, opcode;
 
-	switch (val >> 28) {
+	opcode = val >> 28;
+
+	switch (opcode) {
 	case HOST1X_OPCODE_SETCLASS:
 		mask = val & 0x3f;
 		if (mask) {
-			host1x_debug_output(o, "SETCL(class=%03x, offset=%03x, mask=%02x, [",
+			host1x_debug_cont(o, "SETCL(class=%03x, offset=%03x, mask=%02x, [",
 					    val >> 6 & 0x3ff,
 					    val >> 16 & 0xfff, mask);
 			return hweight8(mask);
-		} else {
-			host1x_debug_output(o, "SETCL(class=%03x)\n",
-					    val >> 6 & 0x3ff);
-			return 0;
 		}
 
+		host1x_debug_cont(o, "SETCL(class=%03x)\n", val >> 6 & 0x3ff);
+		return 0;
+
 	case HOST1X_OPCODE_INCR:
-		host1x_debug_output(o, "INCR(offset=%03x, [",
+		num = val & 0xffff;
+		host1x_debug_cont(o, "INCR(offset=%03x, [",
 				    val >> 16 & 0xfff);
-		return val & 0xffff;
+		if (!num)
+			host1x_debug_cont(o, "])\n");
+
+		return num;
 
 	case HOST1X_OPCODE_NONINCR:
-		host1x_debug_output(o, "NONINCR(offset=%03x, [",
+		num = val & 0xffff;
+		host1x_debug_cont(o, "NONINCR(offset=%03x, [",
 				    val >> 16 & 0xfff);
-		return val & 0xffff;
+		if (!num)
+			host1x_debug_cont(o, "])\n");
+
+		return num;
 
 	case HOST1X_OPCODE_MASK:
 		mask = val & 0xffff;
-		host1x_debug_output(o, "MASK(offset=%03x, mask=%03x, [",
+		host1x_debug_cont(o, "MASK(offset=%03x, mask=%03x, [",
 				    val >> 16 & 0xfff, mask);
+		if (!mask)
+			host1x_debug_cont(o, "])\n");
+
 		return hweight16(mask);
 
 	case HOST1X_OPCODE_IMM:
-		host1x_debug_output(o, "IMM(offset=%03x, data=%03x)\n",
+		host1x_debug_cont(o, "IMM(offset=%03x, data=%03x)\n",
 				    val >> 16 & 0xfff, val & 0xffff);
 		return 0;
 
 	case HOST1X_OPCODE_RESTART:
-		host1x_debug_output(o, "RESTART(offset=%08x)\n", val << 4);
+		host1x_debug_cont(o, "RESTART(offset=%08x)\n", val << 4);
 		return 0;
 
 	case HOST1X_OPCODE_GATHER:
-		host1x_debug_output(o, "GATHER(offset=%03x, insert=%d, type=%d, count=%04x, addr=[",
+		host1x_debug_cont(o, "GATHER(offset=%03x, insert=%d, type=%d, count=%04x, addr=[",
 				    val >> 16 & 0xfff, val >> 15 & 0x1,
 				    val >> 14 & 0x1, val & 0x3fff);
 		return 1;
 
+#if HOST1X_HW >= 6
+	case HOST1X_OPCODE_SETSTRMID:
+		host1x_debug_cont(o, "SETSTRMID(offset=%06x)\n",
+				  val & 0x3fffff);
+		return 0;
+
+	case HOST1X_OPCODE_SETAPPID:
+		host1x_debug_cont(o, "SETAPPID(appid=%02x)\n", val & 0xff);
+		return 0;
+
+	case HOST1X_OPCODE_SETPYLD:
+		*payload = val & 0xffff;
+		host1x_debug_cont(o, "SETPYLD(data=%04x)\n", *payload);
+		return 0;
+
+	case HOST1X_OPCODE_INCR_W:
+	case HOST1X_OPCODE_NONINCR_W:
+		host1x_debug_cont(o, "%s(offset=%06x, ",
+				  opcode == HOST1X_OPCODE_INCR_W ?
+					"INCR_W" : "NONINCR_W",
+				  val & 0x3fffff);
+		if (*payload == 0) {
+			host1x_debug_cont(o, "[])\n");
+			return 0;
+		} else if (*payload == INVALID_PAYLOAD) {
+			host1x_debug_cont(o, "unknown)\n");
+			return 0;
+		} else {
+			host1x_debug_cont(o, "[");
+			return *payload;
+		}
+
+	case HOST1X_OPCODE_GATHER_W:
+		host1x_debug_cont(o, "GATHER_W(count=%04x, addr=[",
+				  val & 0x3fff);
+		return 2;
+#endif
+
 	case HOST1X_OPCODE_EXTEND:
 		subop = val >> 24 & 0xf;
 		if (subop == HOST1X_OPCODE_EXTEND_ACQUIRE_MLOCK)
-			host1x_debug_output(o, "ACQUIRE_MLOCK(index=%d)\n",
+			host1x_debug_cont(o, "ACQUIRE_MLOCK(index=%d)\n",
 					    val & 0xff);
 		else if (subop == HOST1X_OPCODE_EXTEND_RELEASE_MLOCK)
-			host1x_debug_output(o, "RELEASE_MLOCK(index=%d)\n",
+			host1x_debug_cont(o, "RELEASE_MLOCK(index=%d)\n",
 					    val & 0xff);
 		else
-			host1x_debug_output(o, "EXTEND_UNKNOWN(%08x)\n", val);
+			host1x_debug_cont(o, "EXTEND_UNKNOWN(%08x)\n", val);
 		return 0;
 
 	default:
+		host1x_debug_cont(o, "UNKNOWN\n");
 		return 0;
 	}
 }
 
-static void show_gather(struct output *o, phys_addr_t phys_addr,
+static void show_gather(struct output *o, dma_addr_t phys_addr,
 			unsigned int words, struct host1x_cdma *cdma,
-			phys_addr_t pin_addr, u32 *map_addr)
+			dma_addr_t pin_addr, u32 *map_addr)
 {
 	/* Map dmaget cursor to corresponding mem handle */
 	u32 offset = phys_addr - pin_addr;
 	unsigned int data_count = 0, i;
+	u32 payload = INVALID_PAYLOAD;
 
 	/*
 	 * Sometimes we're given different hardware address to the same
@@ -132,15 +176,24 @@ static void show_gather(struct output *o, phys_addr_t phys_addr,
 	}
 
 	for (i = 0; i < words; i++) {
-		u32 addr = phys_addr + i * 4;
-		u32 val = *(map_addr + offset / 4 + i);
+		dma_addr_t addr = phys_addr + i * 4;
+		u32 voffset = offset + i * 4;
+		u32 val;
+
+		/* If we reach the RESTART opcode, continue at the beginning of pushbuffer */
+		if (cdma && voffset >= cdma->push_buffer.size) {
+			addr -= cdma->push_buffer.size;
+			voffset -= cdma->push_buffer.size;
+		}
+
+		val = *(map_addr + voffset / 4);
 
 		if (!data_count) {
-			host1x_debug_output(o, "%08x: %08x:", addr, val);
-			data_count = show_channel_command(o, val);
+			host1x_debug_output(o, "    %pad: %08x: ", &addr, val);
+			data_count = show_channel_command(o, val, &payload);
 		} else {
-			host1x_debug_output(o, "%08x%s", val,
-					    data_count > 0 ? ", " : "])\n");
+			host1x_debug_cont(o, "%08x%s", val,
+					    data_count > 1 ? ", " : "])\n");
 			data_count--;
 		}
 	}
@@ -148,19 +201,28 @@ static void show_gather(struct output *o, phys_addr_t phys_addr,
 
 static void show_channel_gathers(struct output *o, struct host1x_cdma *cdma)
 {
+	struct push_buffer *pb = &cdma->push_buffer;
 	struct host1x_job *job;
 
 	list_for_each_entry(job, &cdma->sync_queue, list) {
-		int i;
-		host1x_debug_output(o, "\n%p: JOB, syncpt_id=%d, syncpt_val=%d, first_get=%08x, timeout=%d num_slots=%d, num_handles=%d\n",
-				    job, job->syncpt_id, job->syncpt_end,
-				    job->first_get, job->timeout,
+		unsigned int i;
+
+		host1x_debug_output(o, "JOB, syncpt %u: %u timeout: %u num_slots: %u num_handles: %u\n",
+				    job->syncpt->id, job->syncpt_end, job->timeout,
 				    job->num_slots, job->num_unpins);
 
-		for (i = 0; i < job->num_gathers; i++) {
-			struct host1x_job_gather *g = &job->gathers[i];
+		show_gather(o, pb->dma + job->first_get, job->num_slots * 2, cdma,
+			    pb->dma, pb->mapped);
+
+		for (i = 0; i < job->num_cmds; i++) {
+			struct host1x_job_gather *g;
 			u32 *mapped;
 
+			if (job->cmds[i].is_wait)
+				continue;
+
+			g = &job->cmds[i].gather;
+
 			if (job->gather_copy_mapped)
 				mapped = (u32 *)job->gather_copy_mapped;
 			else
@@ -171,10 +233,10 @@ static void show_channel_gathers(struct output *o, struct host1x_cdma *cdma)
 				continue;
 			}
 
-			host1x_debug_output(o, "    GATHER at %08x+%04x, %d words\n",
-					    g->base, g->offset, g->words);
+			host1x_debug_output(o, "  GATHER at %pad+%#x, %d words\n",
+					    &g->base, g->offset, g->words);
 
-			show_gather(o, g->base + g->offset, g->words, cdma,
+			show_gather(o, g->base + g->offset, g->words, NULL,
 				    g->base, mapped);
 
 			if (!job->gather_copy_mapped)
@@ -183,137 +245,11 @@ static void show_channel_gathers(struct output *o, struct host1x_cdma *cdma)
 	}
 }
 
-static void host1x_debug_show_channel_cdma(struct host1x *host,
-					   struct host1x_channel *ch,
-					   struct output *o)
-{
-	struct host1x_cdma *cdma = &ch->cdma;
-	u32 dmaput, dmaget, dmactrl;
-	u32 cbstat, cbread;
-	u32 val, base, baseval;
-
-	dmaput = host1x_ch_readl(ch, HOST1X_CHANNEL_DMAPUT);
-	dmaget = host1x_ch_readl(ch, HOST1X_CHANNEL_DMAGET);
-	dmactrl = host1x_ch_readl(ch, HOST1X_CHANNEL_DMACTRL);
-	cbread = host1x_sync_readl(host, HOST1X_SYNC_CBREAD(ch->id));
-	cbstat = host1x_sync_readl(host, HOST1X_SYNC_CBSTAT(ch->id));
-
-	host1x_debug_output(o, "%d-%s: ", ch->id, dev_name(ch->dev));
-
-	if (HOST1X_CHANNEL_DMACTRL_DMASTOP_V(dmactrl) ||
-	    !ch->cdma.push_buffer.mapped) {
-		host1x_debug_output(o, "inactive\n\n");
-		return;
-	}
-
-	if (HOST1X_SYNC_CBSTAT_CBCLASS_V(cbstat) == HOST1X_CLASS_HOST1X &&
-	    HOST1X_SYNC_CBSTAT_CBOFFSET_V(cbstat) ==
-	    HOST1X_UCLASS_WAIT_SYNCPT)
-		host1x_debug_output(o, "waiting on syncpt %d val %d\n",
-				    cbread >> 24, cbread & 0xffffff);
-	else if (HOST1X_SYNC_CBSTAT_CBCLASS_V(cbstat) ==
-	   HOST1X_CLASS_HOST1X &&
-	   HOST1X_SYNC_CBSTAT_CBOFFSET_V(cbstat) ==
-	   HOST1X_UCLASS_WAIT_SYNCPT_BASE) {
-
-		base = (cbread >> 16) & 0xff;
-		baseval =
-			host1x_sync_readl(host, HOST1X_SYNC_SYNCPT_BASE(base));
-		val = cbread & 0xffff;
-		host1x_debug_output(o, "waiting on syncpt %d val %d (base %d = %d; offset = %d)\n",
-				    cbread >> 24, baseval + val, base,
-				    baseval, val);
-	} else
-		host1x_debug_output(o, "active class %02x, offset %04x, val %08x\n",
-				    HOST1X_SYNC_CBSTAT_CBCLASS_V(cbstat),
-				    HOST1X_SYNC_CBSTAT_CBOFFSET_V(cbstat),
-				    cbread);
-
-	host1x_debug_output(o, "DMAPUT %08x, DMAGET %08x, DMACTL %08x\n",
-			    dmaput, dmaget, dmactrl);
-	host1x_debug_output(o, "CBREAD %08x, CBSTAT %08x\n", cbread, cbstat);
-
-	show_channel_gathers(o, cdma);
-	host1x_debug_output(o, "\n");
-}
-
-static void host1x_debug_show_channel_fifo(struct host1x *host,
-					   struct host1x_channel *ch,
-					   struct output *o)
-{
-	u32 val, rd_ptr, wr_ptr, start, end;
-	unsigned int data_count = 0;
-
-	host1x_debug_output(o, "%d: fifo:\n", ch->id);
-
-	val = host1x_ch_readl(ch, HOST1X_CHANNEL_FIFOSTAT);
-	host1x_debug_output(o, "FIFOSTAT %08x\n", val);
-	if (HOST1X_CHANNEL_FIFOSTAT_CFEMPTY_V(val)) {
-		host1x_debug_output(o, "[empty]\n");
-		return;
-	}
-
-	host1x_sync_writel(host, 0x0, HOST1X_SYNC_CFPEEK_CTRL);
-	host1x_sync_writel(host, HOST1X_SYNC_CFPEEK_CTRL_ENA_F(1) |
-			   HOST1X_SYNC_CFPEEK_CTRL_CHANNR_F(ch->id),
-			   HOST1X_SYNC_CFPEEK_CTRL);
-
-	val = host1x_sync_readl(host, HOST1X_SYNC_CFPEEK_PTRS);
-	rd_ptr = HOST1X_SYNC_CFPEEK_PTRS_CF_RD_PTR_V(val);
-	wr_ptr = HOST1X_SYNC_CFPEEK_PTRS_CF_WR_PTR_V(val);
-
-	val = host1x_sync_readl(host, HOST1X_SYNC_CF_SETUP(ch->id));
-	start = HOST1X_SYNC_CF_SETUP_BASE_V(val);
-	end = HOST1X_SYNC_CF_SETUP_LIMIT_V(val);
-
-	do {
-		host1x_sync_writel(host, 0x0, HOST1X_SYNC_CFPEEK_CTRL);
-		host1x_sync_writel(host, HOST1X_SYNC_CFPEEK_CTRL_ENA_F(1) |
-				   HOST1X_SYNC_CFPEEK_CTRL_CHANNR_F(ch->id) |
-				   HOST1X_SYNC_CFPEEK_CTRL_ADDR_F(rd_ptr),
-				   HOST1X_SYNC_CFPEEK_CTRL);
-		val = host1x_sync_readl(host, HOST1X_SYNC_CFPEEK_READ);
-
-		if (!data_count) {
-			host1x_debug_output(o, "%08x:", val);
-			data_count = show_channel_command(o, val);
-		} else {
-			host1x_debug_output(o, "%08x%s", val,
-					    data_count > 0 ? ", " : "])\n");
-			data_count--;
-		}
-
-		if (rd_ptr == end)
-			rd_ptr = start;
-		else
-			rd_ptr++;
-	} while (rd_ptr != wr_ptr);
-
-	if (data_count)
-		host1x_debug_output(o, ", ...])\n");
-	host1x_debug_output(o, "\n");
-
-	host1x_sync_writel(host, 0x0, HOST1X_SYNC_CFPEEK_CTRL);
-}
-
-static void host1x_debug_show_mlocks(struct host1x *host, struct output *o)
-{
-	int i;
-
-	host1x_debug_output(o, "---- mlocks ----\n");
-	for (i = 0; i < host1x_syncpt_nb_mlocks(host); i++) {
-		u32 owner =
-			host1x_sync_readl(host, HOST1X_SYNC_MLOCK_OWNER(i));
-		if (HOST1X_SYNC_MLOCK_OWNER_CH_OWNS_V(owner))
-			host1x_debug_output(o, "%d: locked by channel %d\n",
-				i, HOST1X_SYNC_MLOCK_OWNER_CHID_F(owner));
-		else if (HOST1X_SYNC_MLOCK_OWNER_CPU_OWNS_V(owner))
-			host1x_debug_output(o, "%d: locked by cpu\n", i);
-		else
-			host1x_debug_output(o, "%d: unlocked\n", i);
-	}
-	host1x_debug_output(o, "\n");
-}
+#if HOST1X_HW >= 6
+#include "debug_hw_1x06.c"
+#else
+#include "debug_hw_1x01.c"
+#endif
 
 static const struct host1x_debug_ops host1x_debug_ops = {
 	.show_channel_cdma = host1x_debug_show_channel_cdma,
diff --git a/drivers/gpu/host1x/hw/debug_hw_1x01.c b/drivers/gpu/host1x/hw/debug_hw_1x01.c
new file mode 100644
index 000000000000..85242a59fa6a
--- /dev/null
+++ b/drivers/gpu/host1x/hw/debug_hw_1x01.c
@@ -0,0 +1,149 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright (C) 2010 Google, Inc.
+ * Author: Erik Gilling <konkers@android.com>
+ *
+ * Copyright (C) 2011-2013 NVIDIA Corporation
+ */
+
+#include "../dev.h"
+#include "../debug.h"
+#include "../cdma.h"
+#include "../channel.h"
+
+static void host1x_debug_show_channel_cdma(struct host1x *host,
+					   struct host1x_channel *ch,
+					   struct output *o)
+{
+	struct host1x_cdma *cdma = &ch->cdma;
+	dma_addr_t dmastart, dmaend;
+	u32 dmaput, dmaget, dmactrl;
+	u32 cbstat, cbread;
+	u32 val, base, baseval;
+
+	dmastart = host1x_ch_readl(ch, HOST1X_CHANNEL_DMASTART);
+	dmaend = host1x_ch_readl(ch, HOST1X_CHANNEL_DMAEND);
+	dmaput = host1x_ch_readl(ch, HOST1X_CHANNEL_DMAPUT);
+	dmaget = host1x_ch_readl(ch, HOST1X_CHANNEL_DMAGET);
+	dmactrl = host1x_ch_readl(ch, HOST1X_CHANNEL_DMACTRL);
+	cbread = host1x_sync_readl(host, HOST1X_SYNC_CBREAD(ch->id));
+	cbstat = host1x_sync_readl(host, HOST1X_SYNC_CBSTAT(ch->id));
+
+	host1x_debug_output(o, "%u-%s: ", ch->id, dev_name(ch->dev));
+
+	if (HOST1X_CHANNEL_DMACTRL_DMASTOP_V(dmactrl) ||
+	    !ch->cdma.push_buffer.mapped) {
+		host1x_debug_output(o, "inactive\n\n");
+		return;
+	}
+
+	if (HOST1X_SYNC_CBSTAT_CBCLASS_V(cbstat) == HOST1X_CLASS_HOST1X &&
+	    HOST1X_SYNC_CBSTAT_CBOFFSET_V(cbstat) ==
+			HOST1X_UCLASS_WAIT_SYNCPT)
+		host1x_debug_output(o, "waiting on syncpt %d val %d\n",
+				    cbread >> 24, cbread & 0xffffff);
+	else if (HOST1X_SYNC_CBSTAT_CBCLASS_V(cbstat) ==
+				HOST1X_CLASS_HOST1X &&
+		 HOST1X_SYNC_CBSTAT_CBOFFSET_V(cbstat) ==
+				HOST1X_UCLASS_WAIT_SYNCPT_BASE) {
+		base = (cbread >> 16) & 0xff;
+		baseval =
+			host1x_sync_readl(host, HOST1X_SYNC_SYNCPT_BASE(base));
+		val = cbread & 0xffff;
+		host1x_debug_output(o, "waiting on syncpt %d val %d (base %d = %d; offset = %d)\n",
+				    cbread >> 24, baseval + val, base,
+				    baseval, val);
+	} else
+		host1x_debug_output(o, "active class %02x, offset %04x, val %08x\n",
+				    HOST1X_SYNC_CBSTAT_CBCLASS_V(cbstat),
+				    HOST1X_SYNC_CBSTAT_CBOFFSET_V(cbstat),
+				    cbread);
+
+	host1x_debug_output(o, "DMASTART %pad, DMAEND %pad\n", &dmastart, &dmaend);
+	host1x_debug_output(o, "DMAPUT %08x DMAGET %08x DMACTL %08x\n",
+			    dmaput, dmaget, dmactrl);
+	host1x_debug_output(o, "CBREAD %08x CBSTAT %08x\n", cbread, cbstat);
+
+	show_channel_gathers(o, cdma);
+	host1x_debug_output(o, "\n");
+}
+
+static void host1x_debug_show_channel_fifo(struct host1x *host,
+					   struct host1x_channel *ch,
+					   struct output *o)
+{
+	u32 val, rd_ptr, wr_ptr, start, end;
+	unsigned int data_count = 0;
+
+	host1x_debug_output(o, "%u: fifo:\n", ch->id);
+
+	val = host1x_ch_readl(ch, HOST1X_CHANNEL_FIFOSTAT);
+	host1x_debug_output(o, "FIFOSTAT %08x\n", val);
+	if (HOST1X_CHANNEL_FIFOSTAT_CFEMPTY_V(val)) {
+		host1x_debug_output(o, "[empty]\n");
+		return;
+	}
+
+	host1x_sync_writel(host, 0x0, HOST1X_SYNC_CFPEEK_CTRL);
+	host1x_sync_writel(host, HOST1X_SYNC_CFPEEK_CTRL_ENA_F(1) |
+			   HOST1X_SYNC_CFPEEK_CTRL_CHANNR_F(ch->id),
+			   HOST1X_SYNC_CFPEEK_CTRL);
+
+	val = host1x_sync_readl(host, HOST1X_SYNC_CFPEEK_PTRS);
+	rd_ptr = HOST1X_SYNC_CFPEEK_PTRS_CF_RD_PTR_V(val);
+	wr_ptr = HOST1X_SYNC_CFPEEK_PTRS_CF_WR_PTR_V(val);
+
+	val = host1x_sync_readl(host, HOST1X_SYNC_CF_SETUP(ch->id));
+	start = HOST1X_SYNC_CF_SETUP_BASE_V(val);
+	end = HOST1X_SYNC_CF_SETUP_LIMIT_V(val);
+
+	do {
+		host1x_sync_writel(host, 0x0, HOST1X_SYNC_CFPEEK_CTRL);
+		host1x_sync_writel(host, HOST1X_SYNC_CFPEEK_CTRL_ENA_F(1) |
+				   HOST1X_SYNC_CFPEEK_CTRL_CHANNR_F(ch->id) |
+				   HOST1X_SYNC_CFPEEK_CTRL_ADDR_F(rd_ptr),
+				   HOST1X_SYNC_CFPEEK_CTRL);
+		val = host1x_sync_readl(host, HOST1X_SYNC_CFPEEK_READ);
+
+		if (!data_count) {
+			host1x_debug_output(o, "%08x: ", val);
+			data_count = show_channel_command(o, val, NULL);
+		} else {
+			host1x_debug_cont(o, "%08x%s", val,
+					  data_count > 1 ? ", " : "])\n");
+			data_count--;
+		}
+
+		if (rd_ptr == end)
+			rd_ptr = start;
+		else
+			rd_ptr++;
+	} while (rd_ptr != wr_ptr);
+
+	if (data_count)
+		host1x_debug_cont(o, ", ...])\n");
+	host1x_debug_output(o, "\n");
+
+	host1x_sync_writel(host, 0x0, HOST1X_SYNC_CFPEEK_CTRL);
+}
+
+static void host1x_debug_show_mlocks(struct host1x *host, struct output *o)
+{
+	unsigned int i;
+
+	host1x_debug_output(o, "---- mlocks ----\n");
+
+	for (i = 0; i < host1x_syncpt_nb_mlocks(host); i++) {
+		u32 owner =
+			host1x_sync_readl(host, HOST1X_SYNC_MLOCK_OWNER(i));
+		if (HOST1X_SYNC_MLOCK_OWNER_CH_OWNS_V(owner))
+			host1x_debug_output(o, "%u: locked by channel %u\n",
+				i, HOST1X_SYNC_MLOCK_OWNER_CHID_V(owner));
+		else if (HOST1X_SYNC_MLOCK_OWNER_CPU_OWNS_V(owner))
+			host1x_debug_output(o, "%u: locked by cpu\n", i);
+		else
+			host1x_debug_output(o, "%u: unlocked\n", i);
+	}
+
+	host1x_debug_output(o, "\n");
+}
diff --git a/drivers/gpu/host1x/hw/debug_hw_1x06.c b/drivers/gpu/host1x/hw/debug_hw_1x06.c
new file mode 100644
index 000000000000..9d0667879a19
--- /dev/null
+++ b/drivers/gpu/host1x/hw/debug_hw_1x06.c
@@ -0,0 +1,145 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright (C) 2010 Google, Inc.
+ * Author: Erik Gilling <konkers@android.com>
+ *
+ * Copyright (C) 2011-2017 NVIDIA Corporation
+ */
+
+#include "../dev.h"
+#include "../debug.h"
+#include "../cdma.h"
+#include "../channel.h"
+
+static void host1x_debug_show_channel_cdma(struct host1x *host,
+					   struct host1x_channel *ch,
+					   struct output *o)
+{
+	struct host1x_cdma *cdma = &ch->cdma;
+	dma_addr_t dmastart = 0, dmaend = 0;
+	u32 dmaput, dmaget, dmactrl;
+	u32 offset, class;
+	u32 ch_stat;
+
+#if defined(CONFIG_ARCH_DMA_ADDR_T_64BIT) && HOST1X_HW >= 6
+	dmastart = host1x_ch_readl(ch, HOST1X_CHANNEL_DMASTART_HI);
+	dmastart <<= 32;
+#endif
+	dmastart |= host1x_ch_readl(ch, HOST1X_CHANNEL_DMASTART);
+
+#if defined(CONFIG_ARCH_DMA_ADDR_T_64BIT) && HOST1X_HW >= 6
+	dmaend = host1x_ch_readl(ch, HOST1X_CHANNEL_DMAEND_HI);
+	dmaend <<= 32;
+#endif
+	dmaend |= host1x_ch_readl(ch, HOST1X_CHANNEL_DMAEND);
+
+	dmaput = host1x_ch_readl(ch, HOST1X_CHANNEL_DMAPUT);
+	dmaget = host1x_ch_readl(ch, HOST1X_CHANNEL_DMAGET);
+	dmactrl = host1x_ch_readl(ch, HOST1X_CHANNEL_DMACTRL);
+	offset = host1x_ch_readl(ch, HOST1X_CHANNEL_CMDP_OFFSET);
+	class = host1x_ch_readl(ch, HOST1X_CHANNEL_CMDP_CLASS);
+	ch_stat = host1x_ch_readl(ch, HOST1X_CHANNEL_CHANNELSTAT);
+
+	host1x_debug_output(o, "%u-%s: ", ch->id, dev_name(ch->dev));
+
+	if (dmactrl & HOST1X_CHANNEL_DMACTRL_DMASTOP ||
+	    !ch->cdma.push_buffer.mapped) {
+		host1x_debug_output(o, "inactive\n\n");
+		return;
+	}
+
+	if (class == HOST1X_CLASS_HOST1X && offset == HOST1X_UCLASS_WAIT_SYNCPT)
+		host1x_debug_output(o, "waiting on syncpt\n");
+	else
+		host1x_debug_output(o, "active class %02x, offset %04x\n",
+				    class, offset);
+
+	host1x_debug_output(o, "DMASTART %pad, DMAEND %pad\n", &dmastart, &dmaend);
+	host1x_debug_output(o, "DMAPUT %08x DMAGET %08x DMACTL %08x\n",
+			    dmaput, dmaget, dmactrl);
+	host1x_debug_output(o, "CHANNELSTAT %02x\n", ch_stat);
+
+	show_channel_gathers(o, cdma);
+	host1x_debug_output(o, "\n");
+}
+
+static void host1x_debug_show_channel_fifo(struct host1x *host,
+					   struct host1x_channel *ch,
+					   struct output *o)
+{
+#if HOST1X_HW <= 6
+	u32 rd_ptr, wr_ptr, start, end;
+	u32 payload = INVALID_PAYLOAD;
+	unsigned int data_count = 0;
+#endif
+	u32 val;
+
+	host1x_debug_output(o, "%u: fifo:\n", ch->id);
+
+	val = host1x_ch_readl(ch, HOST1X_CHANNEL_CMDFIFO_STAT);
+	host1x_debug_output(o, "CMDFIFO_STAT %08x\n", val);
+	if (val & HOST1X_CHANNEL_CMDFIFO_STAT_EMPTY) {
+		host1x_debug_output(o, "[empty]\n");
+		return;
+	}
+
+	val = host1x_ch_readl(ch, HOST1X_CHANNEL_CMDFIFO_RDATA);
+	host1x_debug_output(o, "CMDFIFO_RDATA %08x\n", val);
+
+#if HOST1X_HW <= 6
+	/* Peek pointer values are invalid during SLCG, so disable it */
+	host1x_hypervisor_writel(host, 0x1, HOST1X_HV_ICG_EN_OVERRIDE);
+
+	val = 0;
+	val |= HOST1X_HV_CMDFIFO_PEEK_CTRL_ENABLE;
+	val |= HOST1X_HV_CMDFIFO_PEEK_CTRL_CHANNEL(ch->id);
+	host1x_hypervisor_writel(host, val, HOST1X_HV_CMDFIFO_PEEK_CTRL);
+
+	val = host1x_hypervisor_readl(host, HOST1X_HV_CMDFIFO_PEEK_PTRS);
+	rd_ptr = HOST1X_HV_CMDFIFO_PEEK_PTRS_RD_PTR_V(val);
+	wr_ptr = HOST1X_HV_CMDFIFO_PEEK_PTRS_WR_PTR_V(val);
+
+	val = host1x_hypervisor_readl(host, HOST1X_HV_CMDFIFO_SETUP(ch->id));
+	start = HOST1X_HV_CMDFIFO_SETUP_BASE_V(val);
+	end = HOST1X_HV_CMDFIFO_SETUP_LIMIT_V(val);
+
+	do {
+		val = 0;
+		val |= HOST1X_HV_CMDFIFO_PEEK_CTRL_ENABLE;
+		val |= HOST1X_HV_CMDFIFO_PEEK_CTRL_CHANNEL(ch->id);
+		val |= HOST1X_HV_CMDFIFO_PEEK_CTRL_ADDR(rd_ptr);
+		host1x_hypervisor_writel(host, val,
+					 HOST1X_HV_CMDFIFO_PEEK_CTRL);
+
+		val = host1x_hypervisor_readl(host,
+					      HOST1X_HV_CMDFIFO_PEEK_READ);
+
+		if (!data_count) {
+			host1x_debug_output(o, "%03x 0x%08x: ",
+					    rd_ptr - start, val);
+			data_count = show_channel_command(o, val, &payload);
+		} else {
+			host1x_debug_cont(o, "%08x%s", val,
+					  data_count > 1 ? ", " : "])\n");
+			data_count--;
+		}
+
+		if (rd_ptr == end)
+			rd_ptr = start;
+		else
+			rd_ptr++;
+	} while (rd_ptr != wr_ptr);
+
+	if (data_count)
+		host1x_debug_cont(o, ", ...])\n");
+	host1x_debug_output(o, "\n");
+
+	host1x_hypervisor_writel(host, 0x0, HOST1X_HV_CMDFIFO_PEEK_CTRL);
+	host1x_hypervisor_writel(host, 0x0, HOST1X_HV_ICG_EN_OVERRIDE);
+#endif
+}
+
+static void host1x_debug_show_mlocks(struct host1x *host, struct output *o)
+{
+	/* TODO */
+}
diff --git a/drivers/gpu/host1x/hw/host1x01.c b/drivers/gpu/host1x/hw/host1x01.c
index a14e91cd1e58..8d8a117a5153 100644
--- a/drivers/gpu/host1x/hw/host1x01.c
+++ b/drivers/gpu/host1x/hw/host1x01.c
@@ -1,33 +1,24 @@
+// SPDX-License-Identifier: GPL-2.0-only
 /*
  * Host1x init for T20 and T30 Architecture Chips
  *
  * Copyright (c) 2011-2013, NVIDIA Corporation.
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms and conditions of the GNU General Public License,
- * version 2, as published by the Free Software Foundation.
- *
- * This program is distributed in the hope it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
- * more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program.  If not, see <http://www.gnu.org/licenses/>.
  */
 
 /* include hw specification */
-#include "hw/host1x01.h"
-#include "hw/host1x01_hardware.h"
+#include "host1x01.h"
+#include "host1x01_hardware.h"
 
 /* include code */
-#include "hw/cdma_hw.c"
-#include "hw/channel_hw.c"
-#include "hw/debug_hw.c"
-#include "hw/intr_hw.c"
-#include "hw/syncpt_hw.c"
+#define HOST1X_HW 1
+
+#include "cdma_hw.c"
+#include "channel_hw.c"
+#include "debug_hw.c"
+#include "intr_hw.c"
+#include "syncpt_hw.c"
 
-#include "dev.h"
+#include "../dev.h"
 
 int host1x01_init(struct host1x *host)
 {
diff --git a/drivers/gpu/host1x/hw/host1x01.h b/drivers/gpu/host1x/hw/host1x01.h
index 2706b6743250..6516c3f1edf5 100644
--- a/drivers/gpu/host1x/hw/host1x01.h
+++ b/drivers/gpu/host1x/hw/host1x01.h
@@ -1,19 +1,8 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
 /*
  * Host1x init for T20 and T30 Architecture Chips
  *
  * Copyright (c) 2011-2013, NVIDIA Corporation.
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms and conditions of the GNU General Public License,
- * version 2, as published by the Free Software Foundation.
- *
- * This program is distributed in the hope it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
- * more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program.  If not, see <http://www.gnu.org/licenses/>.
  */
 #ifndef HOST1X_HOST1X01_H
 #define HOST1X_HOST1X01_H
diff --git a/drivers/gpu/host1x/hw/host1x01_hardware.h b/drivers/gpu/host1x/hw/host1x01_hardware.h
index 5f0fb866efa8..cb93d7c1808c 100644
--- a/drivers/gpu/host1x/hw/host1x01_hardware.h
+++ b/drivers/gpu/host1x/hw/host1x01_hardware.h
@@ -1,19 +1,8 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
 /*
  * Tegra host1x Register Offsets for Tegra20 and Tegra30
  *
  * Copyright (c) 2010-2013 NVIDIA Corporation.
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms and conditions of the GNU General Public License,
- * version 2, as published by the Free Software Foundation.
- *
- * This program is distributed in the hope it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
- * more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program.  If not, see <http://www.gnu.org/licenses/>.
  */
 
 #ifndef __HOST1X_HOST1X01_HARDWARE_H
@@ -26,118 +15,6 @@
 #include "hw_host1x01_sync.h"
 #include "hw_host1x01_uclass.h"
 
-static inline u32 host1x_class_host_wait_syncpt(
-	unsigned indx, unsigned threshold)
-{
-	return host1x_uclass_wait_syncpt_indx_f(indx)
-		| host1x_uclass_wait_syncpt_thresh_f(threshold);
-}
-
-static inline u32 host1x_class_host_load_syncpt_base(
-	unsigned indx, unsigned threshold)
-{
-	return host1x_uclass_load_syncpt_base_base_indx_f(indx)
-		| host1x_uclass_load_syncpt_base_value_f(threshold);
-}
-
-static inline u32 host1x_class_host_wait_syncpt_base(
-	unsigned indx, unsigned base_indx, unsigned offset)
-{
-	return host1x_uclass_wait_syncpt_base_indx_f(indx)
-		| host1x_uclass_wait_syncpt_base_base_indx_f(base_indx)
-		| host1x_uclass_wait_syncpt_base_offset_f(offset);
-}
-
-static inline u32 host1x_class_host_incr_syncpt_base(
-	unsigned base_indx, unsigned offset)
-{
-	return host1x_uclass_incr_syncpt_base_base_indx_f(base_indx)
-		| host1x_uclass_incr_syncpt_base_offset_f(offset);
-}
-
-static inline u32 host1x_class_host_incr_syncpt(
-	unsigned cond, unsigned indx)
-{
-	return host1x_uclass_incr_syncpt_cond_f(cond)
-		| host1x_uclass_incr_syncpt_indx_f(indx);
-}
-
-static inline u32 host1x_class_host_indoff_reg_write(
-	unsigned mod_id, unsigned offset, bool auto_inc)
-{
-	u32 v = host1x_uclass_indoff_indbe_f(0xf)
-		| host1x_uclass_indoff_indmodid_f(mod_id)
-		| host1x_uclass_indoff_indroffset_f(offset);
-	if (auto_inc)
-		v |= host1x_uclass_indoff_autoinc_f(1);
-	return v;
-}
-
-static inline u32 host1x_class_host_indoff_reg_read(
-	unsigned mod_id, unsigned offset, bool auto_inc)
-{
-	u32 v = host1x_uclass_indoff_indmodid_f(mod_id)
-		| host1x_uclass_indoff_indroffset_f(offset)
-		| host1x_uclass_indoff_rwn_read_v();
-	if (auto_inc)
-		v |= host1x_uclass_indoff_autoinc_f(1);
-	return v;
-}
-
-
-/* cdma opcodes */
-static inline u32 host1x_opcode_setclass(
-	unsigned class_id, unsigned offset, unsigned mask)
-{
-	return (0 << 28) | (offset << 16) | (class_id << 6) | mask;
-}
-
-static inline u32 host1x_opcode_incr(unsigned offset, unsigned count)
-{
-	return (1 << 28) | (offset << 16) | count;
-}
-
-static inline u32 host1x_opcode_nonincr(unsigned offset, unsigned count)
-{
-	return (2 << 28) | (offset << 16) | count;
-}
-
-static inline u32 host1x_opcode_mask(unsigned offset, unsigned mask)
-{
-	return (3 << 28) | (offset << 16) | mask;
-}
-
-static inline u32 host1x_opcode_imm(unsigned offset, unsigned value)
-{
-	return (4 << 28) | (offset << 16) | value;
-}
-
-static inline u32 host1x_opcode_imm_incr_syncpt(unsigned cond, unsigned indx)
-{
-	return host1x_opcode_imm(host1x_uclass_incr_syncpt_r(),
-		host1x_class_host_incr_syncpt(cond, indx));
-}
-
-static inline u32 host1x_opcode_restart(unsigned address)
-{
-	return (5 << 28) | (address >> 4);
-}
-
-static inline u32 host1x_opcode_gather(unsigned count)
-{
-	return (6 << 28) | count;
-}
-
-static inline u32 host1x_opcode_gather_nonincr(unsigned offset,	unsigned count)
-{
-	return (6 << 28) | (offset << 16) | BIT(15) | count;
-}
-
-static inline u32 host1x_opcode_gather_incr(unsigned offset, unsigned count)
-{
-	return (6 << 28) | (offset << 16) | BIT(15) | BIT(14) | count;
-}
-
-#define HOST1X_OPCODE_NOP host1x_opcode_nonincr(0, 0)
+#include "opcodes.h"
 
 #endif
diff --git a/drivers/gpu/host1x/hw/host1x02.c b/drivers/gpu/host1x/hw/host1x02.c
new file mode 100644
index 000000000000..583b33c04884
--- /dev/null
+++ b/drivers/gpu/host1x/hw/host1x02.c
@@ -0,0 +1,33 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Host1x init for Tegra114 SoCs
+ *
+ * Copyright (c) 2013 NVIDIA Corporation.
+ */
+
+/* include hw specification */
+#include "host1x02.h"
+#include "host1x02_hardware.h"
+
+/* include code */
+#define HOST1X_HW 2
+
+#include "cdma_hw.c"
+#include "channel_hw.c"
+#include "debug_hw.c"
+#include "intr_hw.c"
+#include "syncpt_hw.c"
+
+#include "../dev.h"
+
+int host1x02_init(struct host1x *host)
+{
+	host->channel_op = &host1x_channel_ops;
+	host->cdma_op = &host1x_cdma_ops;
+	host->cdma_pb_op = &host1x_pushbuffer_ops;
+	host->syncpt_op = &host1x_syncpt_ops;
+	host->intr_op = &host1x_intr_ops;
+	host->debug_op = &host1x_debug_ops;
+
+	return 0;
+}
diff --git a/drivers/gpu/host1x/hw/host1x02.h b/drivers/gpu/host1x/hw/host1x02.h
new file mode 100644
index 000000000000..7e5c3e4700d2
--- /dev/null
+++ b/drivers/gpu/host1x/hw/host1x02.h
@@ -0,0 +1,15 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * Host1x init for Tegra114 SoCs
+ *
+ * Copyright (c) 2013 NVIDIA Corporation.
+ */
+
+#ifndef HOST1X_HOST1X02_H
+#define HOST1X_HOST1X02_H
+
+struct host1x;
+
+int host1x02_init(struct host1x *host);
+
+#endif
diff --git a/drivers/gpu/host1x/hw/host1x02_hardware.h b/drivers/gpu/host1x/hw/host1x02_hardware.h
new file mode 100644
index 000000000000..2d1282b9bc33
--- /dev/null
+++ b/drivers/gpu/host1x/hw/host1x02_hardware.h
@@ -0,0 +1,20 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * Tegra host1x Register Offsets for Tegra114
+ *
+ * Copyright (c) 2010-2013 NVIDIA Corporation.
+ */
+
+#ifndef __HOST1X_HOST1X02_HARDWARE_H
+#define __HOST1X_HOST1X02_HARDWARE_H
+
+#include <linux/types.h>
+#include <linux/bitops.h>
+
+#include "hw_host1x02_channel.h"
+#include "hw_host1x02_sync.h"
+#include "hw_host1x02_uclass.h"
+
+#include "opcodes.h"
+
+#endif
diff --git a/drivers/gpu/host1x/hw/host1x04.c b/drivers/gpu/host1x/hw/host1x04.c
new file mode 100644
index 000000000000..26b459eb2d3e
--- /dev/null
+++ b/drivers/gpu/host1x/hw/host1x04.c
@@ -0,0 +1,33 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Host1x init for Tegra124 SoCs
+ *
+ * Copyright (c) 2013 NVIDIA Corporation.
+ */
+
+/* include hw specification */
+#include "host1x04.h"
+#include "host1x04_hardware.h"
+
+/* include code */
+#define HOST1X_HW 4
+
+#include "cdma_hw.c"
+#include "channel_hw.c"
+#include "debug_hw.c"
+#include "intr_hw.c"
+#include "syncpt_hw.c"
+
+#include "../dev.h"
+
+int host1x04_init(struct host1x *host)
+{
+	host->channel_op = &host1x_channel_ops;
+	host->cdma_op = &host1x_cdma_ops;
+	host->cdma_pb_op = &host1x_pushbuffer_ops;
+	host->syncpt_op = &host1x_syncpt_ops;
+	host->intr_op = &host1x_intr_ops;
+	host->debug_op = &host1x_debug_ops;
+
+	return 0;
+}
diff --git a/drivers/gpu/host1x/hw/host1x04.h b/drivers/gpu/host1x/hw/host1x04.h
new file mode 100644
index 000000000000..2a1e8153c5fe
--- /dev/null
+++ b/drivers/gpu/host1x/hw/host1x04.h
@@ -0,0 +1,15 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * Host1x init for Tegra124 SoCs
+ *
+ * Copyright (c) 2013 NVIDIA Corporation.
+ */
+
+#ifndef HOST1X_HOST1X04_H
+#define HOST1X_HOST1X04_H
+
+struct host1x;
+
+int host1x04_init(struct host1x *host);
+
+#endif
diff --git a/drivers/gpu/host1x/hw/host1x04_hardware.h b/drivers/gpu/host1x/hw/host1x04_hardware.h
new file mode 100644
index 000000000000..84d244e8af30
--- /dev/null
+++ b/drivers/gpu/host1x/hw/host1x04_hardware.h
@@ -0,0 +1,20 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * Tegra host1x Register Offsets for Tegra124
+ *
+ * Copyright (c) 2010-2013 NVIDIA Corporation.
+ */
+
+#ifndef __HOST1X_HOST1X04_HARDWARE_H
+#define __HOST1X_HOST1X04_HARDWARE_H
+
+#include <linux/types.h>
+#include <linux/bitops.h>
+
+#include "hw_host1x04_channel.h"
+#include "hw_host1x04_sync.h"
+#include "hw_host1x04_uclass.h"
+
+#include "opcodes.h"
+
+#endif
diff --git a/drivers/gpu/host1x/hw/host1x05.c b/drivers/gpu/host1x/hw/host1x05.c
new file mode 100644
index 000000000000..6d9803343aae
--- /dev/null
+++ b/drivers/gpu/host1x/hw/host1x05.c
@@ -0,0 +1,33 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Host1x init for Tegra210 SoCs
+ *
+ * Copyright (c) 2015 NVIDIA Corporation.
+ */
+
+/* include hw specification */
+#include "host1x05.h"
+#include "host1x05_hardware.h"
+
+/* include code */
+#define HOST1X_HW 5
+
+#include "cdma_hw.c"
+#include "channel_hw.c"
+#include "debug_hw.c"
+#include "intr_hw.c"
+#include "syncpt_hw.c"
+
+#include "../dev.h"
+
+int host1x05_init(struct host1x *host)
+{
+	host->channel_op = &host1x_channel_ops;
+	host->cdma_op = &host1x_cdma_ops;
+	host->cdma_pb_op = &host1x_pushbuffer_ops;
+	host->syncpt_op = &host1x_syncpt_ops;
+	host->intr_op = &host1x_intr_ops;
+	host->debug_op = &host1x_debug_ops;
+
+	return 0;
+}
diff --git a/drivers/gpu/host1x/hw/host1x05.h b/drivers/gpu/host1x/hw/host1x05.h
new file mode 100644
index 000000000000..addfd41e7ef3
--- /dev/null
+++ b/drivers/gpu/host1x/hw/host1x05.h
@@ -0,0 +1,15 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * Host1x init for Tegra210 SoCs
+ *
+ * Copyright (c) 2015 NVIDIA Corporation.
+ */
+
+#ifndef HOST1X_HOST1X05_H
+#define HOST1X_HOST1X05_H
+
+struct host1x;
+
+int host1x05_init(struct host1x *host);
+
+#endif
diff --git a/drivers/gpu/host1x/hw/host1x05_hardware.h b/drivers/gpu/host1x/hw/host1x05_hardware.h
new file mode 100644
index 000000000000..1dcde6ec7909
--- /dev/null
+++ b/drivers/gpu/host1x/hw/host1x05_hardware.h
@@ -0,0 +1,20 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * Tegra host1x Register Offsets for Tegra210
+ *
+ * Copyright (c) 2015 NVIDIA Corporation.
+ */
+
+#ifndef __HOST1X_HOST1X05_HARDWARE_H
+#define __HOST1X_HOST1X05_HARDWARE_H
+
+#include <linux/types.h>
+#include <linux/bitops.h>
+
+#include "hw_host1x05_channel.h"
+#include "hw_host1x05_sync.h"
+#include "hw_host1x05_uclass.h"
+
+#include "opcodes.h"
+
+#endif
diff --git a/drivers/gpu/host1x/hw/host1x06.c b/drivers/gpu/host1x/hw/host1x06.c
new file mode 100644
index 000000000000..844f81ae2d7e
--- /dev/null
+++ b/drivers/gpu/host1x/hw/host1x06.c
@@ -0,0 +1,33 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Host1x init for Tegra186 SoCs
+ *
+ * Copyright (c) 2017 NVIDIA Corporation.
+ */
+
+/* include hw specification */
+#include "host1x06.h"
+#include "host1x06_hardware.h"
+
+/* include code */
+#define HOST1X_HW 6
+
+#include "cdma_hw.c"
+#include "channel_hw.c"
+#include "debug_hw.c"
+#include "intr_hw.c"
+#include "syncpt_hw.c"
+
+#include "../dev.h"
+
+int host1x06_init(struct host1x *host)
+{
+	host->channel_op = &host1x_channel_ops;
+	host->cdma_op = &host1x_cdma_ops;
+	host->cdma_pb_op = &host1x_pushbuffer_ops;
+	host->syncpt_op = &host1x_syncpt_ops;
+	host->intr_op = &host1x_intr_ops;
+	host->debug_op = &host1x_debug_ops;
+
+	return 0;
+}
diff --git a/drivers/gpu/host1x/hw/host1x06.h b/drivers/gpu/host1x/hw/host1x06.h
new file mode 100644
index 000000000000..4ea756895ca5
--- /dev/null
+++ b/drivers/gpu/host1x/hw/host1x06.h
@@ -0,0 +1,15 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * Host1x init for Tegra186 SoCs
+ *
+ * Copyright (c) 2017 NVIDIA Corporation.
+ */
+
+#ifndef HOST1X_HOST1X06_H
+#define HOST1X_HOST1X06_H
+
+struct host1x;
+
+int host1x06_init(struct host1x *host);
+
+#endif
diff --git a/drivers/gpu/host1x/hw/host1x06_hardware.h b/drivers/gpu/host1x/hw/host1x06_hardware.h
new file mode 100644
index 000000000000..c05cfa7e3090
--- /dev/null
+++ b/drivers/gpu/host1x/hw/host1x06_hardware.h
@@ -0,0 +1,21 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * Tegra host1x Register Offsets for Tegra186
+ *
+ * Copyright (c) 2017 NVIDIA Corporation.
+ */
+
+#ifndef __HOST1X_HOST1X06_HARDWARE_H
+#define __HOST1X_HOST1X06_HARDWARE_H
+
+#include <linux/types.h>
+#include <linux/bitops.h>
+
+#include "hw_host1x06_channel.h"
+#include "hw_host1x06_uclass.h"
+#include "hw_host1x06_vm.h"
+#include "hw_host1x06_hypervisor.h"
+
+#include "opcodes.h"
+
+#endif
diff --git a/drivers/gpu/host1x/hw/host1x07.c b/drivers/gpu/host1x/hw/host1x07.c
new file mode 100644
index 000000000000..0c6f14f7ec80
--- /dev/null
+++ b/drivers/gpu/host1x/hw/host1x07.c
@@ -0,0 +1,33 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Host1x init for Tegra194 SoCs
+ *
+ * Copyright (c) 2018 NVIDIA Corporation.
+ */
+
+/* include hw specification */
+#include "host1x07.h"
+#include "host1x07_hardware.h"
+
+/* include code */
+#define HOST1X_HW 7
+
+#include "cdma_hw.c"
+#include "channel_hw.c"
+#include "debug_hw.c"
+#include "intr_hw.c"
+#include "syncpt_hw.c"
+
+#include "../dev.h"
+
+int host1x07_init(struct host1x *host)
+{
+	host->channel_op = &host1x_channel_ops;
+	host->cdma_op = &host1x_cdma_ops;
+	host->cdma_pb_op = &host1x_pushbuffer_ops;
+	host->syncpt_op = &host1x_syncpt_ops;
+	host->intr_op = &host1x_intr_ops;
+	host->debug_op = &host1x_debug_ops;
+
+	return 0;
+}
diff --git a/drivers/gpu/host1x/hw/host1x07.h b/drivers/gpu/host1x/hw/host1x07.h
new file mode 100644
index 000000000000..419b6eaad3d8
--- /dev/null
+++ b/drivers/gpu/host1x/hw/host1x07.h
@@ -0,0 +1,15 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * Host1x init for Tegra194 SoCs
+ *
+ * Copyright (c) 2018 NVIDIA Corporation.
+ */
+
+#ifndef HOST1X_HOST1X07_H
+#define HOST1X_HOST1X07_H
+
+struct host1x;
+
+int host1x07_init(struct host1x *host);
+
+#endif
diff --git a/drivers/gpu/host1x/hw/host1x07_hardware.h b/drivers/gpu/host1x/hw/host1x07_hardware.h
new file mode 100644
index 000000000000..d67364e03956
--- /dev/null
+++ b/drivers/gpu/host1x/hw/host1x07_hardware.h
@@ -0,0 +1,21 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * Tegra host1x Register Offsets for Tegra194
+ *
+ * Copyright (c) 2018 NVIDIA Corporation.
+ */
+
+#ifndef __HOST1X_HOST1X07_HARDWARE_H
+#define __HOST1X_HOST1X07_HARDWARE_H
+
+#include <linux/types.h>
+#include <linux/bitops.h>
+
+#include "hw_host1x07_channel.h"
+#include "hw_host1x07_uclass.h"
+#include "hw_host1x07_vm.h"
+#include "hw_host1x07_hypervisor.h"
+
+#include "opcodes.h"
+
+#endif
diff --git a/drivers/gpu/host1x/hw/host1x08.c b/drivers/gpu/host1x/hw/host1x08.c
new file mode 100644
index 000000000000..754890c34c74
--- /dev/null
+++ b/drivers/gpu/host1x/hw/host1x08.c
@@ -0,0 +1,33 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Host1x init for Tegra234 SoCs
+ *
+ * Copyright (c) 2022 NVIDIA Corporation.
+ */
+
+/* include hw specification */
+#include "host1x08.h"
+#include "host1x08_hardware.h"
+
+/* include code */
+#define HOST1X_HW 8
+
+#include "cdma_hw.c"
+#include "channel_hw.c"
+#include "debug_hw.c"
+#include "intr_hw.c"
+#include "syncpt_hw.c"
+
+#include "../dev.h"
+
+int host1x08_init(struct host1x *host)
+{
+	host->channel_op = &host1x_channel_ops;
+	host->cdma_op = &host1x_cdma_ops;
+	host->cdma_pb_op = &host1x_pushbuffer_ops;
+	host->syncpt_op = &host1x_syncpt_ops;
+	host->intr_op = &host1x_intr_ops;
+	host->debug_op = &host1x_debug_ops;
+
+	return 0;
+}
diff --git a/drivers/gpu/host1x/hw/host1x08.h b/drivers/gpu/host1x/hw/host1x08.h
new file mode 100644
index 000000000000..a6bad56e44cf
--- /dev/null
+++ b/drivers/gpu/host1x/hw/host1x08.h
@@ -0,0 +1,15 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * Host1x init for Tegra234 SoCs
+ *
+ * Copyright (c) 2018 NVIDIA Corporation.
+ */
+
+#ifndef HOST1X_HOST1X08_H
+#define HOST1X_HOST1X08_H
+
+struct host1x;
+
+int host1x08_init(struct host1x *host);
+
+#endif
diff --git a/drivers/gpu/host1x/hw/host1x08_hardware.h b/drivers/gpu/host1x/hw/host1x08_hardware.h
new file mode 100644
index 000000000000..936243060bff
--- /dev/null
+++ b/drivers/gpu/host1x/hw/host1x08_hardware.h
@@ -0,0 +1,21 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * Tegra host1x Register Offsets for Tegra234
+ *
+ * Copyright (c) 2022 NVIDIA Corporation.
+ */
+
+#ifndef __HOST1X_HOST1X08_HARDWARE_H
+#define __HOST1X_HOST1X08_HARDWARE_H
+
+#include <linux/types.h>
+#include <linux/bitops.h>
+
+#include "hw_host1x08_uclass.h"
+#include "hw_host1x08_vm.h"
+#include "hw_host1x08_hypervisor.h"
+#include "hw_host1x08_common.h"
+
+#include "opcodes.h"
+
+#endif
diff --git a/drivers/gpu/host1x/hw/hw_host1x01_channel.h b/drivers/gpu/host1x/hw/hw_host1x01_channel.h
index b4bc7ca4e051..8da43eaba2c8 100644
--- a/drivers/gpu/host1x/hw/hw_host1x01_channel.h
+++ b/drivers/gpu/host1x/hw/hw_host1x01_channel.h
@@ -1,18 +1,6 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
 /*
  * Copyright (c) 2012-2013, NVIDIA Corporation.
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms and conditions of the GNU General Public License,
- * version 2, as published by the Free Software Foundation.
- *
- * This program is distributed in the hope it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
- * more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program.  If not, see <http://www.gnu.org/licenses/>.
- *
  */
 
  /*
diff --git a/drivers/gpu/host1x/hw/hw_host1x01_sync.h b/drivers/gpu/host1x/hw/hw_host1x01_sync.h
index ac704e579977..ec95e7ae7ca5 100644
--- a/drivers/gpu/host1x/hw/hw_host1x01_sync.h
+++ b/drivers/gpu/host1x/hw/hw_host1x01_sync.h
@@ -1,18 +1,6 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
 /*
  * Copyright (c) 2012-2013, NVIDIA Corporation.
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms and conditions of the GNU General Public License,
- * version 2, as published by the Free Software Foundation.
- *
- * This program is distributed in the hope it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
- * more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program.  If not, see <http://www.gnu.org/licenses/>.
- *
  */
 
  /*
@@ -131,12 +119,12 @@ static inline u32 host1x_sync_mlock_owner_r(unsigned int id)
 }
 #define HOST1X_SYNC_MLOCK_OWNER(id) \
 	host1x_sync_mlock_owner_r(id)
-static inline u32 host1x_sync_mlock_owner_chid_f(u32 v)
+static inline u32 host1x_sync_mlock_owner_chid_v(u32 v)
 {
-	return (v & 0xf) << 8;
+	return (v >> 8) & 0xf;
 }
-#define HOST1X_SYNC_MLOCK_OWNER_CHID_F(v) \
-	host1x_sync_mlock_owner_chid_f(v)
+#define HOST1X_SYNC_MLOCK_OWNER_CHID_V(v) \
+	host1x_sync_mlock_owner_chid_v(v)
 static inline u32 host1x_sync_mlock_owner_cpu_owns_v(u32 r)
 {
 	return (r >> 1) & 0x1;
diff --git a/drivers/gpu/host1x/hw/hw_host1x01_uclass.h b/drivers/gpu/host1x/hw/hw_host1x01_uclass.h
index 42f3ce19ca32..1239bfd46a5e 100644
--- a/drivers/gpu/host1x/hw/hw_host1x01_uclass.h
+++ b/drivers/gpu/host1x/hw/hw_host1x01_uclass.h
@@ -1,18 +1,6 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
 /*
  * Copyright (c) 2012-2013, NVIDIA Corporation.
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms and conditions of the GNU General Public License,
- * version 2, as published by the Free Software Foundation.
- *
- * This program is distributed in the hope it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
- * more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program.  If not, see <http://www.gnu.org/licenses/>.
- *
  */
 
  /*
@@ -111,6 +99,12 @@ static inline u32 host1x_uclass_wait_syncpt_base_offset_f(u32 v)
 }
 #define HOST1X_UCLASS_WAIT_SYNCPT_BASE_OFFSET_F(v) \
 	host1x_uclass_wait_syncpt_base_offset_f(v)
+static inline u32 host1x_uclass_load_syncpt_base_r(void)
+{
+	return 0xb;
+}
+#define HOST1X_UCLASS_LOAD_SYNCPT_BASE \
+	host1x_uclass_load_syncpt_base_r()
 static inline u32 host1x_uclass_load_syncpt_base_base_indx_f(u32 v)
 {
 	return (v & 0xff) << 24;
diff --git a/drivers/gpu/host1x/hw/hw_host1x02_channel.h b/drivers/gpu/host1x/hw/hw_host1x02_channel.h
new file mode 100644
index 000000000000..210d317ad2b7
--- /dev/null
+++ b/drivers/gpu/host1x/hw/hw_host1x02_channel.h
@@ -0,0 +1,109 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * Copyright (c) 2013 NVIDIA Corporation.
+ */
+
+ /*
+  * Function naming determines intended use:
+  *
+  *     <x>_r(void) : Returns the offset for register <x>.
+  *
+  *     <x>_w(void) : Returns the word offset for word (4 byte) element <x>.
+  *
+  *     <x>_<y>_s(void) : Returns size of field <y> of register <x> in bits.
+  *
+  *     <x>_<y>_f(u32 v) : Returns a value based on 'v' which has been shifted
+  *         and masked to place it at field <y> of register <x>.  This value
+  *         can be |'d with others to produce a full register value for
+  *         register <x>.
+  *
+  *     <x>_<y>_m(void) : Returns a mask for field <y> of register <x>.  This
+  *         value can be ~'d and then &'d to clear the value of field <y> for
+  *         register <x>.
+  *
+  *     <x>_<y>_<z>_f(void) : Returns the constant value <z> after being shifted
+  *         to place it at field <y> of register <x>.  This value can be |'d
+  *         with others to produce a full register value for <x>.
+  *
+  *     <x>_<y>_v(u32 r) : Returns the value of field <y> from a full register
+  *         <x> value 'r' after being shifted to place its LSB at bit 0.
+  *         This value is suitable for direct comparison with other unshifted
+  *         values appropriate for use in field <y> of register <x>.
+  *
+  *     <x>_<y>_<z>_v(void) : Returns the constant value for <z> defined for
+  *         field <y> of register <x>.  This value is suitable for direct
+  *         comparison with unshifted values appropriate for use in field <y>
+  *         of register <x>.
+  */
+
+#ifndef HOST1X_HW_HOST1X02_CHANNEL_H
+#define HOST1X_HW_HOST1X02_CHANNEL_H
+
+static inline u32 host1x_channel_fifostat_r(void)
+{
+	return 0x0;
+}
+#define HOST1X_CHANNEL_FIFOSTAT \
+	host1x_channel_fifostat_r()
+static inline u32 host1x_channel_fifostat_cfempty_v(u32 r)
+{
+	return (r >> 11) & 0x1;
+}
+#define HOST1X_CHANNEL_FIFOSTAT_CFEMPTY_V(r) \
+	host1x_channel_fifostat_cfempty_v(r)
+static inline u32 host1x_channel_dmastart_r(void)
+{
+	return 0x14;
+}
+#define HOST1X_CHANNEL_DMASTART \
+	host1x_channel_dmastart_r()
+static inline u32 host1x_channel_dmaput_r(void)
+{
+	return 0x18;
+}
+#define HOST1X_CHANNEL_DMAPUT \
+	host1x_channel_dmaput_r()
+static inline u32 host1x_channel_dmaget_r(void)
+{
+	return 0x1c;
+}
+#define HOST1X_CHANNEL_DMAGET \
+	host1x_channel_dmaget_r()
+static inline u32 host1x_channel_dmaend_r(void)
+{
+	return 0x20;
+}
+#define HOST1X_CHANNEL_DMAEND \
+	host1x_channel_dmaend_r()
+static inline u32 host1x_channel_dmactrl_r(void)
+{
+	return 0x24;
+}
+#define HOST1X_CHANNEL_DMACTRL \
+	host1x_channel_dmactrl_r()
+static inline u32 host1x_channel_dmactrl_dmastop(void)
+{
+	return 1 << 0;
+}
+#define HOST1X_CHANNEL_DMACTRL_DMASTOP \
+	host1x_channel_dmactrl_dmastop()
+static inline u32 host1x_channel_dmactrl_dmastop_v(u32 r)
+{
+	return (r >> 0) & 0x1;
+}
+#define HOST1X_CHANNEL_DMACTRL_DMASTOP_V(r) \
+	host1x_channel_dmactrl_dmastop_v(r)
+static inline u32 host1x_channel_dmactrl_dmagetrst(void)
+{
+	return 1 << 1;
+}
+#define HOST1X_CHANNEL_DMACTRL_DMAGETRST \
+	host1x_channel_dmactrl_dmagetrst()
+static inline u32 host1x_channel_dmactrl_dmainitget(void)
+{
+	return 1 << 2;
+}
+#define HOST1X_CHANNEL_DMACTRL_DMAINITGET \
+	host1x_channel_dmactrl_dmainitget()
+
+#endif
diff --git a/drivers/gpu/host1x/hw/hw_host1x02_sync.h b/drivers/gpu/host1x/hw/hw_host1x02_sync.h
new file mode 100644
index 000000000000..44b4f8379732
--- /dev/null
+++ b/drivers/gpu/host1x/hw/hw_host1x02_sync.h
@@ -0,0 +1,231 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * Copyright (c) 2013 NVIDIA Corporation.
+ */
+
+ /*
+  * Function naming determines intended use:
+  *
+  *     <x>_r(void) : Returns the offset for register <x>.
+  *
+  *     <x>_w(void) : Returns the word offset for word (4 byte) element <x>.
+  *
+  *     <x>_<y>_s(void) : Returns size of field <y> of register <x> in bits.
+  *
+  *     <x>_<y>_f(u32 v) : Returns a value based on 'v' which has been shifted
+  *         and masked to place it at field <y> of register <x>.  This value
+  *         can be |'d with others to produce a full register value for
+  *         register <x>.
+  *
+  *     <x>_<y>_m(void) : Returns a mask for field <y> of register <x>.  This
+  *         value can be ~'d and then &'d to clear the value of field <y> for
+  *         register <x>.
+  *
+  *     <x>_<y>_<z>_f(void) : Returns the constant value <z> after being shifted
+  *         to place it at field <y> of register <x>.  This value can be |'d
+  *         with others to produce a full register value for <x>.
+  *
+  *     <x>_<y>_v(u32 r) : Returns the value of field <y> from a full register
+  *         <x> value 'r' after being shifted to place its LSB at bit 0.
+  *         This value is suitable for direct comparison with other unshifted
+  *         values appropriate for use in field <y> of register <x>.
+  *
+  *     <x>_<y>_<z>_v(void) : Returns the constant value for <z> defined for
+  *         field <y> of register <x>.  This value is suitable for direct
+  *         comparison with unshifted values appropriate for use in field <y>
+  *         of register <x>.
+  */
+
+#ifndef HOST1X_HW_HOST1X02_SYNC_H
+#define HOST1X_HW_HOST1X02_SYNC_H
+
+#define REGISTER_STRIDE	4
+
+static inline u32 host1x_sync_syncpt_r(unsigned int id)
+{
+	return 0x400 + id * REGISTER_STRIDE;
+}
+#define HOST1X_SYNC_SYNCPT(id) \
+	host1x_sync_syncpt_r(id)
+static inline u32 host1x_sync_syncpt_thresh_cpu0_int_status_r(unsigned int id)
+{
+	return 0x40 + id * REGISTER_STRIDE;
+}
+#define HOST1X_SYNC_SYNCPT_THRESH_CPU0_INT_STATUS(id) \
+	host1x_sync_syncpt_thresh_cpu0_int_status_r(id)
+static inline u32 host1x_sync_syncpt_thresh_int_disable_r(unsigned int id)
+{
+	return 0x60 + id * REGISTER_STRIDE;
+}
+#define HOST1X_SYNC_SYNCPT_THRESH_INT_DISABLE(id) \
+	host1x_sync_syncpt_thresh_int_disable_r(id)
+static inline u32 host1x_sync_syncpt_thresh_int_enable_cpu0_r(unsigned int id)
+{
+	return 0x68 + id * REGISTER_STRIDE;
+}
+#define HOST1X_SYNC_SYNCPT_THRESH_INT_ENABLE_CPU0(id) \
+	host1x_sync_syncpt_thresh_int_enable_cpu0_r(id)
+static inline u32 host1x_sync_cf_setup_r(unsigned int channel)
+{
+	return 0x80 + channel * REGISTER_STRIDE;
+}
+#define HOST1X_SYNC_CF_SETUP(channel) \
+	host1x_sync_cf_setup_r(channel)
+static inline u32 host1x_sync_cf_setup_base_v(u32 r)
+{
+	return (r >> 0) & 0x3ff;
+}
+#define HOST1X_SYNC_CF_SETUP_BASE_V(r) \
+	host1x_sync_cf_setup_base_v(r)
+static inline u32 host1x_sync_cf_setup_limit_v(u32 r)
+{
+	return (r >> 16) & 0x3ff;
+}
+#define HOST1X_SYNC_CF_SETUP_LIMIT_V(r) \
+	host1x_sync_cf_setup_limit_v(r)
+static inline u32 host1x_sync_cmdproc_stop_r(void)
+{
+	return 0xac;
+}
+#define HOST1X_SYNC_CMDPROC_STOP \
+	host1x_sync_cmdproc_stop_r()
+static inline u32 host1x_sync_ch_teardown_r(void)
+{
+	return 0xb0;
+}
+#define HOST1X_SYNC_CH_TEARDOWN \
+	host1x_sync_ch_teardown_r()
+static inline u32 host1x_sync_usec_clk_r(void)
+{
+	return 0x1a4;
+}
+#define HOST1X_SYNC_USEC_CLK \
+	host1x_sync_usec_clk_r()
+static inline u32 host1x_sync_ctxsw_timeout_cfg_r(void)
+{
+	return 0x1a8;
+}
+#define HOST1X_SYNC_CTXSW_TIMEOUT_CFG \
+	host1x_sync_ctxsw_timeout_cfg_r()
+static inline u32 host1x_sync_ip_busy_timeout_r(void)
+{
+	return 0x1bc;
+}
+#define HOST1X_SYNC_IP_BUSY_TIMEOUT \
+	host1x_sync_ip_busy_timeout_r()
+static inline u32 host1x_sync_mlock_owner_r(unsigned int id)
+{
+	return 0x340 + id * REGISTER_STRIDE;
+}
+#define HOST1X_SYNC_MLOCK_OWNER(id) \
+	host1x_sync_mlock_owner_r(id)
+static inline u32 host1x_sync_mlock_owner_chid_v(u32 v)
+{
+	return (v >> 8) & 0xf;
+}
+#define HOST1X_SYNC_MLOCK_OWNER_CHID_V(v) \
+	host1x_sync_mlock_owner_chid_v(v)
+static inline u32 host1x_sync_mlock_owner_cpu_owns_v(u32 r)
+{
+	return (r >> 1) & 0x1;
+}
+#define HOST1X_SYNC_MLOCK_OWNER_CPU_OWNS_V(r) \
+	host1x_sync_mlock_owner_cpu_owns_v(r)
+static inline u32 host1x_sync_mlock_owner_ch_owns_v(u32 r)
+{
+	return (r >> 0) & 0x1;
+}
+#define HOST1X_SYNC_MLOCK_OWNER_CH_OWNS_V(r) \
+	host1x_sync_mlock_owner_ch_owns_v(r)
+static inline u32 host1x_sync_syncpt_int_thresh_r(unsigned int id)
+{
+	return 0x500 + id * REGISTER_STRIDE;
+}
+#define HOST1X_SYNC_SYNCPT_INT_THRESH(id) \
+	host1x_sync_syncpt_int_thresh_r(id)
+static inline u32 host1x_sync_syncpt_base_r(unsigned int id)
+{
+	return 0x600 + id * REGISTER_STRIDE;
+}
+#define HOST1X_SYNC_SYNCPT_BASE(id) \
+	host1x_sync_syncpt_base_r(id)
+static inline u32 host1x_sync_syncpt_cpu_incr_r(unsigned int id)
+{
+	return 0x700 + id * REGISTER_STRIDE;
+}
+#define HOST1X_SYNC_SYNCPT_CPU_INCR(id) \
+	host1x_sync_syncpt_cpu_incr_r(id)
+static inline u32 host1x_sync_cbread_r(unsigned int channel)
+{
+	return 0x720 + channel * REGISTER_STRIDE;
+}
+#define HOST1X_SYNC_CBREAD(channel) \
+	host1x_sync_cbread_r(channel)
+static inline u32 host1x_sync_cfpeek_ctrl_r(void)
+{
+	return 0x74c;
+}
+#define HOST1X_SYNC_CFPEEK_CTRL \
+	host1x_sync_cfpeek_ctrl_r()
+static inline u32 host1x_sync_cfpeek_ctrl_addr_f(u32 v)
+{
+	return (v & 0x3ff) << 0;
+}
+#define HOST1X_SYNC_CFPEEK_CTRL_ADDR_F(v) \
+	host1x_sync_cfpeek_ctrl_addr_f(v)
+static inline u32 host1x_sync_cfpeek_ctrl_channr_f(u32 v)
+{
+	return (v & 0xf) << 16;
+}
+#define HOST1X_SYNC_CFPEEK_CTRL_CHANNR_F(v) \
+	host1x_sync_cfpeek_ctrl_channr_f(v)
+static inline u32 host1x_sync_cfpeek_ctrl_ena_f(u32 v)
+{
+	return (v & 0x1) << 31;
+}
+#define HOST1X_SYNC_CFPEEK_CTRL_ENA_F(v) \
+	host1x_sync_cfpeek_ctrl_ena_f(v)
+static inline u32 host1x_sync_cfpeek_read_r(void)
+{
+	return 0x750;
+}
+#define HOST1X_SYNC_CFPEEK_READ \
+	host1x_sync_cfpeek_read_r()
+static inline u32 host1x_sync_cfpeek_ptrs_r(void)
+{
+	return 0x754;
+}
+#define HOST1X_SYNC_CFPEEK_PTRS \
+	host1x_sync_cfpeek_ptrs_r()
+static inline u32 host1x_sync_cfpeek_ptrs_cf_rd_ptr_v(u32 r)
+{
+	return (r >> 0) & 0x3ff;
+}
+#define HOST1X_SYNC_CFPEEK_PTRS_CF_RD_PTR_V(r) \
+	host1x_sync_cfpeek_ptrs_cf_rd_ptr_v(r)
+static inline u32 host1x_sync_cfpeek_ptrs_cf_wr_ptr_v(u32 r)
+{
+	return (r >> 16) & 0x3ff;
+}
+#define HOST1X_SYNC_CFPEEK_PTRS_CF_WR_PTR_V(r) \
+	host1x_sync_cfpeek_ptrs_cf_wr_ptr_v(r)
+static inline u32 host1x_sync_cbstat_r(unsigned int channel)
+{
+	return 0x758 + channel * REGISTER_STRIDE;
+}
+#define HOST1X_SYNC_CBSTAT(channel) \
+	host1x_sync_cbstat_r(channel)
+static inline u32 host1x_sync_cbstat_cboffset_v(u32 r)
+{
+	return (r >> 0) & 0xffff;
+}
+#define HOST1X_SYNC_CBSTAT_CBOFFSET_V(r) \
+	host1x_sync_cbstat_cboffset_v(r)
+static inline u32 host1x_sync_cbstat_cbclass_v(u32 r)
+{
+	return (r >> 16) & 0x3ff;
+}
+#define HOST1X_SYNC_CBSTAT_CBCLASS_V(r) \
+	host1x_sync_cbstat_cbclass_v(r)
+
+#endif
diff --git a/drivers/gpu/host1x/hw/hw_host1x02_uclass.h b/drivers/gpu/host1x/hw/hw_host1x02_uclass.h
new file mode 100644
index 000000000000..0a2ab8f1da6f
--- /dev/null
+++ b/drivers/gpu/host1x/hw/hw_host1x02_uclass.h
@@ -0,0 +1,181 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * Copyright (c) 2013 NVIDIA Corporation.
+ */
+
+ /*
+  * Function naming determines intended use:
+  *
+  *     <x>_r(void) : Returns the offset for register <x>.
+  *
+  *     <x>_w(void) : Returns the word offset for word (4 byte) element <x>.
+  *
+  *     <x>_<y>_s(void) : Returns size of field <y> of register <x> in bits.
+  *
+  *     <x>_<y>_f(u32 v) : Returns a value based on 'v' which has been shifted
+  *         and masked to place it at field <y> of register <x>.  This value
+  *         can be |'d with others to produce a full register value for
+  *         register <x>.
+  *
+  *     <x>_<y>_m(void) : Returns a mask for field <y> of register <x>.  This
+  *         value can be ~'d and then &'d to clear the value of field <y> for
+  *         register <x>.
+  *
+  *     <x>_<y>_<z>_f(void) : Returns the constant value <z> after being shifted
+  *         to place it at field <y> of register <x>.  This value can be |'d
+  *         with others to produce a full register value for <x>.
+  *
+  *     <x>_<y>_v(u32 r) : Returns the value of field <y> from a full register
+  *         <x> value 'r' after being shifted to place its LSB at bit 0.
+  *         This value is suitable for direct comparison with other unshifted
+  *         values appropriate for use in field <y> of register <x>.
+  *
+  *     <x>_<y>_<z>_v(void) : Returns the constant value for <z> defined for
+  *         field <y> of register <x>.  This value is suitable for direct
+  *         comparison with unshifted values appropriate for use in field <y>
+  *         of register <x>.
+  */
+
+#ifndef HOST1X_HW_HOST1X02_UCLASS_H
+#define HOST1X_HW_HOST1X02_UCLASS_H
+
+static inline u32 host1x_uclass_incr_syncpt_r(void)
+{
+	return 0x0;
+}
+#define HOST1X_UCLASS_INCR_SYNCPT \
+	host1x_uclass_incr_syncpt_r()
+static inline u32 host1x_uclass_incr_syncpt_cond_f(u32 v)
+{
+	return (v & 0xff) << 8;
+}
+#define HOST1X_UCLASS_INCR_SYNCPT_COND_F(v) \
+	host1x_uclass_incr_syncpt_cond_f(v)
+static inline u32 host1x_uclass_incr_syncpt_indx_f(u32 v)
+{
+	return (v & 0xff) << 0;
+}
+#define HOST1X_UCLASS_INCR_SYNCPT_INDX_F(v) \
+	host1x_uclass_incr_syncpt_indx_f(v)
+static inline u32 host1x_uclass_wait_syncpt_r(void)
+{
+	return 0x8;
+}
+#define HOST1X_UCLASS_WAIT_SYNCPT \
+	host1x_uclass_wait_syncpt_r()
+static inline u32 host1x_uclass_wait_syncpt_indx_f(u32 v)
+{
+	return (v & 0xff) << 24;
+}
+#define HOST1X_UCLASS_WAIT_SYNCPT_INDX_F(v) \
+	host1x_uclass_wait_syncpt_indx_f(v)
+static inline u32 host1x_uclass_wait_syncpt_thresh_f(u32 v)
+{
+	return (v & 0xffffff) << 0;
+}
+#define HOST1X_UCLASS_WAIT_SYNCPT_THRESH_F(v) \
+	host1x_uclass_wait_syncpt_thresh_f(v)
+static inline u32 host1x_uclass_wait_syncpt_base_r(void)
+{
+	return 0x9;
+}
+#define HOST1X_UCLASS_WAIT_SYNCPT_BASE \
+	host1x_uclass_wait_syncpt_base_r()
+static inline u32 host1x_uclass_wait_syncpt_base_indx_f(u32 v)
+{
+	return (v & 0xff) << 24;
+}
+#define HOST1X_UCLASS_WAIT_SYNCPT_BASE_INDX_F(v) \
+	host1x_uclass_wait_syncpt_base_indx_f(v)
+static inline u32 host1x_uclass_wait_syncpt_base_base_indx_f(u32 v)
+{
+	return (v & 0xff) << 16;
+}
+#define HOST1X_UCLASS_WAIT_SYNCPT_BASE_BASE_INDX_F(v) \
+	host1x_uclass_wait_syncpt_base_base_indx_f(v)
+static inline u32 host1x_uclass_wait_syncpt_base_offset_f(u32 v)
+{
+	return (v & 0xffff) << 0;
+}
+#define HOST1X_UCLASS_WAIT_SYNCPT_BASE_OFFSET_F(v) \
+	host1x_uclass_wait_syncpt_base_offset_f(v)
+static inline u32 host1x_uclass_load_syncpt_base_r(void)
+{
+	return 0xb;
+}
+#define HOST1X_UCLASS_LOAD_SYNCPT_BASE \
+	host1x_uclass_load_syncpt_base_r()
+static inline u32 host1x_uclass_load_syncpt_base_base_indx_f(u32 v)
+{
+	return (v & 0xff) << 24;
+}
+#define HOST1X_UCLASS_LOAD_SYNCPT_BASE_BASE_INDX_F(v) \
+	host1x_uclass_load_syncpt_base_base_indx_f(v)
+static inline u32 host1x_uclass_load_syncpt_base_value_f(u32 v)
+{
+	return (v & 0xffffff) << 0;
+}
+#define HOST1X_UCLASS_LOAD_SYNCPT_BASE_VALUE_F(v) \
+	host1x_uclass_load_syncpt_base_value_f(v)
+static inline u32 host1x_uclass_incr_syncpt_base_base_indx_f(u32 v)
+{
+	return (v & 0xff) << 24;
+}
+#define HOST1X_UCLASS_INCR_SYNCPT_BASE_BASE_INDX_F(v) \
+	host1x_uclass_incr_syncpt_base_base_indx_f(v)
+static inline u32 host1x_uclass_incr_syncpt_base_offset_f(u32 v)
+{
+	return (v & 0xffffff) << 0;
+}
+#define HOST1X_UCLASS_INCR_SYNCPT_BASE_OFFSET_F(v) \
+	host1x_uclass_incr_syncpt_base_offset_f(v)
+static inline u32 host1x_uclass_indoff_r(void)
+{
+	return 0x2d;
+}
+#define HOST1X_UCLASS_INDOFF \
+	host1x_uclass_indoff_r()
+static inline u32 host1x_uclass_indoff_indbe_f(u32 v)
+{
+	return (v & 0xf) << 28;
+}
+#define HOST1X_UCLASS_INDOFF_INDBE_F(v) \
+	host1x_uclass_indoff_indbe_f(v)
+static inline u32 host1x_uclass_indoff_autoinc_f(u32 v)
+{
+	return (v & 0x1) << 27;
+}
+#define HOST1X_UCLASS_INDOFF_AUTOINC_F(v) \
+	host1x_uclass_indoff_autoinc_f(v)
+static inline u32 host1x_uclass_indoff_indmodid_f(u32 v)
+{
+	return (v & 0xff) << 18;
+}
+#define HOST1X_UCLASS_INDOFF_INDMODID_F(v) \
+	host1x_uclass_indoff_indmodid_f(v)
+static inline u32 host1x_uclass_indoff_indroffset_f(u32 v)
+{
+	return (v & 0xffff) << 2;
+}
+#define HOST1X_UCLASS_INDOFF_INDROFFSET_F(v) \
+	host1x_uclass_indoff_indroffset_f(v)
+static inline u32 host1x_uclass_indoff_rwn_read_v(void)
+{
+	return 1;
+}
+#define HOST1X_UCLASS_INDOFF_INDROFFSET_F(v) \
+	host1x_uclass_indoff_indroffset_f(v)
+static inline u32 host1x_uclass_load_syncpt_payload_32_r(void)
+{
+	return 0x4e;
+}
+#define HOST1X_UCLASS_LOAD_SYNCPT_PAYLOAD_32 \
+	host1x_uclass_load_syncpt_payload_32_r()
+static inline u32 host1x_uclass_wait_syncpt_32_r(void)
+{
+	return 0x50;
+}
+#define HOST1X_UCLASS_WAIT_SYNCPT_32 \
+	host1x_uclass_wait_syncpt_32_r()
+
+#endif
diff --git a/drivers/gpu/host1x/hw/hw_host1x04_channel.h b/drivers/gpu/host1x/hw/hw_host1x04_channel.h
new file mode 100644
index 000000000000..38d110645ee0
--- /dev/null
+++ b/drivers/gpu/host1x/hw/hw_host1x04_channel.h
@@ -0,0 +1,121 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * Copyright (c) 2013 NVIDIA Corporation.
+ */
+
+ /*
+  * Function naming determines intended use:
+  *
+  *     <x>_r(void) : Returns the offset for register <x>.
+  *
+  *     <x>_w(void) : Returns the word offset for word (4 byte) element <x>.
+  *
+  *     <x>_<y>_s(void) : Returns size of field <y> of register <x> in bits.
+  *
+  *     <x>_<y>_f(u32 v) : Returns a value based on 'v' which has been shifted
+  *         and masked to place it at field <y> of register <x>.  This value
+  *         can be |'d with others to produce a full register value for
+  *         register <x>.
+  *
+  *     <x>_<y>_m(void) : Returns a mask for field <y> of register <x>.  This
+  *         value can be ~'d and then &'d to clear the value of field <y> for
+  *         register <x>.
+  *
+  *     <x>_<y>_<z>_f(void) : Returns the constant value <z> after being shifted
+  *         to place it at field <y> of register <x>.  This value can be |'d
+  *         with others to produce a full register value for <x>.
+  *
+  *     <x>_<y>_v(u32 r) : Returns the value of field <y> from a full register
+  *         <x> value 'r' after being shifted to place its LSB at bit 0.
+  *         This value is suitable for direct comparison with other unshifted
+  *         values appropriate for use in field <y> of register <x>.
+  *
+  *     <x>_<y>_<z>_v(void) : Returns the constant value for <z> defined for
+  *         field <y> of register <x>.  This value is suitable for direct
+  *         comparison with unshifted values appropriate for use in field <y>
+  *         of register <x>.
+  */
+
+#ifndef HOST1X_HW_HOST1X04_CHANNEL_H
+#define HOST1X_HW_HOST1X04_CHANNEL_H
+
+static inline u32 host1x_channel_fifostat_r(void)
+{
+	return 0x0;
+}
+#define HOST1X_CHANNEL_FIFOSTAT \
+	host1x_channel_fifostat_r()
+static inline u32 host1x_channel_fifostat_cfempty_v(u32 r)
+{
+	return (r >> 11) & 0x1;
+}
+#define HOST1X_CHANNEL_FIFOSTAT_CFEMPTY_V(r) \
+	host1x_channel_fifostat_cfempty_v(r)
+static inline u32 host1x_channel_dmastart_r(void)
+{
+	return 0x14;
+}
+#define HOST1X_CHANNEL_DMASTART \
+	host1x_channel_dmastart_r()
+static inline u32 host1x_channel_dmaput_r(void)
+{
+	return 0x18;
+}
+#define HOST1X_CHANNEL_DMAPUT \
+	host1x_channel_dmaput_r()
+static inline u32 host1x_channel_dmaget_r(void)
+{
+	return 0x1c;
+}
+#define HOST1X_CHANNEL_DMAGET \
+	host1x_channel_dmaget_r()
+static inline u32 host1x_channel_dmaend_r(void)
+{
+	return 0x20;
+}
+#define HOST1X_CHANNEL_DMAEND \
+	host1x_channel_dmaend_r()
+static inline u32 host1x_channel_dmactrl_r(void)
+{
+	return 0x24;
+}
+#define HOST1X_CHANNEL_DMACTRL \
+	host1x_channel_dmactrl_r()
+static inline u32 host1x_channel_dmactrl_dmastop(void)
+{
+	return 1 << 0;
+}
+#define HOST1X_CHANNEL_DMACTRL_DMASTOP \
+	host1x_channel_dmactrl_dmastop()
+static inline u32 host1x_channel_dmactrl_dmastop_v(u32 r)
+{
+	return (r >> 0) & 0x1;
+}
+#define HOST1X_CHANNEL_DMACTRL_DMASTOP_V(r) \
+	host1x_channel_dmactrl_dmastop_v(r)
+static inline u32 host1x_channel_dmactrl_dmagetrst(void)
+{
+	return 1 << 1;
+}
+#define HOST1X_CHANNEL_DMACTRL_DMAGETRST \
+	host1x_channel_dmactrl_dmagetrst()
+static inline u32 host1x_channel_dmactrl_dmainitget(void)
+{
+	return 1 << 2;
+}
+#define HOST1X_CHANNEL_DMACTRL_DMAINITGET \
+	host1x_channel_dmactrl_dmainitget()
+static inline u32 host1x_channel_channelctrl_r(void)
+{
+	return 0x98;
+}
+#define HOST1X_CHANNEL_CHANNELCTRL \
+	host1x_channel_channelctrl_r()
+static inline u32 host1x_channel_channelctrl_kernel_filter_gbuffer_f(u32 v)
+{
+	return (v & 0x1) << 2;
+}
+#define HOST1X_CHANNEL_CHANNELCTRL_KERNEL_FILTER_GBUFFER(v) \
+	host1x_channel_channelctrl_kernel_filter_gbuffer_f(v)
+
+#endif
diff --git a/drivers/gpu/host1x/hw/hw_host1x04_sync.h b/drivers/gpu/host1x/hw/hw_host1x04_sync.h
new file mode 100644
index 000000000000..0be98562c201
--- /dev/null
+++ b/drivers/gpu/host1x/hw/hw_host1x04_sync.h
@@ -0,0 +1,231 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * Copyright (c) 2013 NVIDIA Corporation.
+ */
+
+ /*
+  * Function naming determines intended use:
+  *
+  *     <x>_r(void) : Returns the offset for register <x>.
+  *
+  *     <x>_w(void) : Returns the word offset for word (4 byte) element <x>.
+  *
+  *     <x>_<y>_s(void) : Returns size of field <y> of register <x> in bits.
+  *
+  *     <x>_<y>_f(u32 v) : Returns a value based on 'v' which has been shifted
+  *         and masked to place it at field <y> of register <x>.  This value
+  *         can be |'d with others to produce a full register value for
+  *         register <x>.
+  *
+  *     <x>_<y>_m(void) : Returns a mask for field <y> of register <x>.  This
+  *         value can be ~'d and then &'d to clear the value of field <y> for
+  *         register <x>.
+  *
+  *     <x>_<y>_<z>_f(void) : Returns the constant value <z> after being shifted
+  *         to place it at field <y> of register <x>.  This value can be |'d
+  *         with others to produce a full register value for <x>.
+  *
+  *     <x>_<y>_v(u32 r) : Returns the value of field <y> from a full register
+  *         <x> value 'r' after being shifted to place its LSB at bit 0.
+  *         This value is suitable for direct comparison with other unshifted
+  *         values appropriate for use in field <y> of register <x>.
+  *
+  *     <x>_<y>_<z>_v(void) : Returns the constant value for <z> defined for
+  *         field <y> of register <x>.  This value is suitable for direct
+  *         comparison with unshifted values appropriate for use in field <y>
+  *         of register <x>.
+  */
+
+#ifndef HOST1X_HW_HOST1X04_SYNC_H
+#define HOST1X_HW_HOST1X04_SYNC_H
+
+#define REGISTER_STRIDE	4
+
+static inline u32 host1x_sync_syncpt_r(unsigned int id)
+{
+	return 0xf80 + id * REGISTER_STRIDE;
+}
+#define HOST1X_SYNC_SYNCPT(id) \
+	host1x_sync_syncpt_r(id)
+static inline u32 host1x_sync_syncpt_thresh_cpu0_int_status_r(unsigned int id)
+{
+	return 0xe80 + id * REGISTER_STRIDE;
+}
+#define HOST1X_SYNC_SYNCPT_THRESH_CPU0_INT_STATUS(id) \
+	host1x_sync_syncpt_thresh_cpu0_int_status_r(id)
+static inline u32 host1x_sync_syncpt_thresh_int_disable_r(unsigned int id)
+{
+	return 0xf00 + id * REGISTER_STRIDE;
+}
+#define HOST1X_SYNC_SYNCPT_THRESH_INT_DISABLE(id) \
+	host1x_sync_syncpt_thresh_int_disable_r(id)
+static inline u32 host1x_sync_syncpt_thresh_int_enable_cpu0_r(unsigned int id)
+{
+	return 0xf20 + id * REGISTER_STRIDE;
+}
+#define HOST1X_SYNC_SYNCPT_THRESH_INT_ENABLE_CPU0(id) \
+	host1x_sync_syncpt_thresh_int_enable_cpu0_r(id)
+static inline u32 host1x_sync_cf_setup_r(unsigned int channel)
+{
+	return 0xc00 + channel * REGISTER_STRIDE;
+}
+#define HOST1X_SYNC_CF_SETUP(channel) \
+	host1x_sync_cf_setup_r(channel)
+static inline u32 host1x_sync_cf_setup_base_v(u32 r)
+{
+	return (r >> 0) & 0x3ff;
+}
+#define HOST1X_SYNC_CF_SETUP_BASE_V(r) \
+	host1x_sync_cf_setup_base_v(r)
+static inline u32 host1x_sync_cf_setup_limit_v(u32 r)
+{
+	return (r >> 16) & 0x3ff;
+}
+#define HOST1X_SYNC_CF_SETUP_LIMIT_V(r) \
+	host1x_sync_cf_setup_limit_v(r)
+static inline u32 host1x_sync_cmdproc_stop_r(void)
+{
+	return 0xac;
+}
+#define HOST1X_SYNC_CMDPROC_STOP \
+	host1x_sync_cmdproc_stop_r()
+static inline u32 host1x_sync_ch_teardown_r(void)
+{
+	return 0xb0;
+}
+#define HOST1X_SYNC_CH_TEARDOWN \
+	host1x_sync_ch_teardown_r()
+static inline u32 host1x_sync_usec_clk_r(void)
+{
+	return 0x1a4;
+}
+#define HOST1X_SYNC_USEC_CLK \
+	host1x_sync_usec_clk_r()
+static inline u32 host1x_sync_ctxsw_timeout_cfg_r(void)
+{
+	return 0x1a8;
+}
+#define HOST1X_SYNC_CTXSW_TIMEOUT_CFG \
+	host1x_sync_ctxsw_timeout_cfg_r()
+static inline u32 host1x_sync_ip_busy_timeout_r(void)
+{
+	return 0x1bc;
+}
+#define HOST1X_SYNC_IP_BUSY_TIMEOUT \
+	host1x_sync_ip_busy_timeout_r()
+static inline u32 host1x_sync_mlock_owner_r(unsigned int id)
+{
+	return 0x340 + id * REGISTER_STRIDE;
+}
+#define HOST1X_SYNC_MLOCK_OWNER(id) \
+	host1x_sync_mlock_owner_r(id)
+static inline u32 host1x_sync_mlock_owner_chid_v(u32 v)
+{
+	return (v >> 8) & 0xf;
+}
+#define HOST1X_SYNC_MLOCK_OWNER_CHID_V(v) \
+	host1x_sync_mlock_owner_chid_v(v)
+static inline u32 host1x_sync_mlock_owner_cpu_owns_v(u32 r)
+{
+	return (r >> 1) & 0x1;
+}
+#define HOST1X_SYNC_MLOCK_OWNER_CPU_OWNS_V(r) \
+	host1x_sync_mlock_owner_cpu_owns_v(r)
+static inline u32 host1x_sync_mlock_owner_ch_owns_v(u32 r)
+{
+	return (r >> 0) & 0x1;
+}
+#define HOST1X_SYNC_MLOCK_OWNER_CH_OWNS_V(r) \
+	host1x_sync_mlock_owner_ch_owns_v(r)
+static inline u32 host1x_sync_syncpt_int_thresh_r(unsigned int id)
+{
+	return 0x1380 + id * REGISTER_STRIDE;
+}
+#define HOST1X_SYNC_SYNCPT_INT_THRESH(id) \
+	host1x_sync_syncpt_int_thresh_r(id)
+static inline u32 host1x_sync_syncpt_base_r(unsigned int id)
+{
+	return 0x600 + id * REGISTER_STRIDE;
+}
+#define HOST1X_SYNC_SYNCPT_BASE(id) \
+	host1x_sync_syncpt_base_r(id)
+static inline u32 host1x_sync_syncpt_cpu_incr_r(unsigned int id)
+{
+	return 0xf60 + id * REGISTER_STRIDE;
+}
+#define HOST1X_SYNC_SYNCPT_CPU_INCR(id) \
+	host1x_sync_syncpt_cpu_incr_r(id)
+static inline u32 host1x_sync_cbread_r(unsigned int channel)
+{
+	return 0xc80 + channel * REGISTER_STRIDE;
+}
+#define HOST1X_SYNC_CBREAD(channel) \
+	host1x_sync_cbread_r(channel)
+static inline u32 host1x_sync_cfpeek_ctrl_r(void)
+{
+	return 0x74c;
+}
+#define HOST1X_SYNC_CFPEEK_CTRL \
+	host1x_sync_cfpeek_ctrl_r()
+static inline u32 host1x_sync_cfpeek_ctrl_addr_f(u32 v)
+{
+	return (v & 0x3ff) << 0;
+}
+#define HOST1X_SYNC_CFPEEK_CTRL_ADDR_F(v) \
+	host1x_sync_cfpeek_ctrl_addr_f(v)
+static inline u32 host1x_sync_cfpeek_ctrl_channr_f(u32 v)
+{
+	return (v & 0xf) << 16;
+}
+#define HOST1X_SYNC_CFPEEK_CTRL_CHANNR_F(v) \
+	host1x_sync_cfpeek_ctrl_channr_f(v)
+static inline u32 host1x_sync_cfpeek_ctrl_ena_f(u32 v)
+{
+	return (v & 0x1) << 31;
+}
+#define HOST1X_SYNC_CFPEEK_CTRL_ENA_F(v) \
+	host1x_sync_cfpeek_ctrl_ena_f(v)
+static inline u32 host1x_sync_cfpeek_read_r(void)
+{
+	return 0x750;
+}
+#define HOST1X_SYNC_CFPEEK_READ \
+	host1x_sync_cfpeek_read_r()
+static inline u32 host1x_sync_cfpeek_ptrs_r(void)
+{
+	return 0x754;
+}
+#define HOST1X_SYNC_CFPEEK_PTRS \
+	host1x_sync_cfpeek_ptrs_r()
+static inline u32 host1x_sync_cfpeek_ptrs_cf_rd_ptr_v(u32 r)
+{
+	return (r >> 0) & 0x3ff;
+}
+#define HOST1X_SYNC_CFPEEK_PTRS_CF_RD_PTR_V(r) \
+	host1x_sync_cfpeek_ptrs_cf_rd_ptr_v(r)
+static inline u32 host1x_sync_cfpeek_ptrs_cf_wr_ptr_v(u32 r)
+{
+	return (r >> 16) & 0x3ff;
+}
+#define HOST1X_SYNC_CFPEEK_PTRS_CF_WR_PTR_V(r) \
+	host1x_sync_cfpeek_ptrs_cf_wr_ptr_v(r)
+static inline u32 host1x_sync_cbstat_r(unsigned int channel)
+{
+	return 0xcc0 + channel * REGISTER_STRIDE;
+}
+#define HOST1X_SYNC_CBSTAT(channel) \
+	host1x_sync_cbstat_r(channel)
+static inline u32 host1x_sync_cbstat_cboffset_v(u32 r)
+{
+	return (r >> 0) & 0xffff;
+}
+#define HOST1X_SYNC_CBSTAT_CBOFFSET_V(r) \
+	host1x_sync_cbstat_cboffset_v(r)
+static inline u32 host1x_sync_cbstat_cbclass_v(u32 r)
+{
+	return (r >> 16) & 0x3ff;
+}
+#define HOST1X_SYNC_CBSTAT_CBCLASS_V(r) \
+	host1x_sync_cbstat_cbclass_v(r)
+
+#endif
diff --git a/drivers/gpu/host1x/hw/hw_host1x04_uclass.h b/drivers/gpu/host1x/hw/hw_host1x04_uclass.h
new file mode 100644
index 000000000000..60c692b92955
--- /dev/null
+++ b/drivers/gpu/host1x/hw/hw_host1x04_uclass.h
@@ -0,0 +1,181 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * Copyright (c) 2013 NVIDIA Corporation.
+ */
+
+ /*
+  * Function naming determines intended use:
+  *
+  *     <x>_r(void) : Returns the offset for register <x>.
+  *
+  *     <x>_w(void) : Returns the word offset for word (4 byte) element <x>.
+  *
+  *     <x>_<y>_s(void) : Returns size of field <y> of register <x> in bits.
+  *
+  *     <x>_<y>_f(u32 v) : Returns a value based on 'v' which has been shifted
+  *         and masked to place it at field <y> of register <x>.  This value
+  *         can be |'d with others to produce a full register value for
+  *         register <x>.
+  *
+  *     <x>_<y>_m(void) : Returns a mask for field <y> of register <x>.  This
+  *         value can be ~'d and then &'d to clear the value of field <y> for
+  *         register <x>.
+  *
+  *     <x>_<y>_<z>_f(void) : Returns the constant value <z> after being shifted
+  *         to place it at field <y> of register <x>.  This value can be |'d
+  *         with others to produce a full register value for <x>.
+  *
+  *     <x>_<y>_v(u32 r) : Returns the value of field <y> from a full register
+  *         <x> value 'r' after being shifted to place its LSB at bit 0.
+  *         This value is suitable for direct comparison with other unshifted
+  *         values appropriate for use in field <y> of register <x>.
+  *
+  *     <x>_<y>_<z>_v(void) : Returns the constant value for <z> defined for
+  *         field <y> of register <x>.  This value is suitable for direct
+  *         comparison with unshifted values appropriate for use in field <y>
+  *         of register <x>.
+  */
+
+#ifndef HOST1X_HW_HOST1X04_UCLASS_H
+#define HOST1X_HW_HOST1X04_UCLASS_H
+
+static inline u32 host1x_uclass_incr_syncpt_r(void)
+{
+	return 0x0;
+}
+#define HOST1X_UCLASS_INCR_SYNCPT \
+	host1x_uclass_incr_syncpt_r()
+static inline u32 host1x_uclass_incr_syncpt_cond_f(u32 v)
+{
+	return (v & 0xff) << 8;
+}
+#define HOST1X_UCLASS_INCR_SYNCPT_COND_F(v) \
+	host1x_uclass_incr_syncpt_cond_f(v)
+static inline u32 host1x_uclass_incr_syncpt_indx_f(u32 v)
+{
+	return (v & 0xff) << 0;
+}
+#define HOST1X_UCLASS_INCR_SYNCPT_INDX_F(v) \
+	host1x_uclass_incr_syncpt_indx_f(v)
+static inline u32 host1x_uclass_wait_syncpt_r(void)
+{
+	return 0x8;
+}
+#define HOST1X_UCLASS_WAIT_SYNCPT \
+	host1x_uclass_wait_syncpt_r()
+static inline u32 host1x_uclass_wait_syncpt_indx_f(u32 v)
+{
+	return (v & 0xff) << 24;
+}
+#define HOST1X_UCLASS_WAIT_SYNCPT_INDX_F(v) \
+	host1x_uclass_wait_syncpt_indx_f(v)
+static inline u32 host1x_uclass_wait_syncpt_thresh_f(u32 v)
+{
+	return (v & 0xffffff) << 0;
+}
+#define HOST1X_UCLASS_WAIT_SYNCPT_THRESH_F(v) \
+	host1x_uclass_wait_syncpt_thresh_f(v)
+static inline u32 host1x_uclass_wait_syncpt_base_r(void)
+{
+	return 0x9;
+}
+#define HOST1X_UCLASS_WAIT_SYNCPT_BASE \
+	host1x_uclass_wait_syncpt_base_r()
+static inline u32 host1x_uclass_wait_syncpt_base_indx_f(u32 v)
+{
+	return (v & 0xff) << 24;
+}
+#define HOST1X_UCLASS_WAIT_SYNCPT_BASE_INDX_F(v) \
+	host1x_uclass_wait_syncpt_base_indx_f(v)
+static inline u32 host1x_uclass_wait_syncpt_base_base_indx_f(u32 v)
+{
+	return (v & 0xff) << 16;
+}
+#define HOST1X_UCLASS_WAIT_SYNCPT_BASE_BASE_INDX_F(v) \
+	host1x_uclass_wait_syncpt_base_base_indx_f(v)
+static inline u32 host1x_uclass_wait_syncpt_base_offset_f(u32 v)
+{
+	return (v & 0xffff) << 0;
+}
+#define HOST1X_UCLASS_WAIT_SYNCPT_BASE_OFFSET_F(v) \
+	host1x_uclass_wait_syncpt_base_offset_f(v)
+static inline u32 host1x_uclass_load_syncpt_base_r(void)
+{
+	return 0xb;
+}
+#define HOST1X_UCLASS_LOAD_SYNCPT_BASE \
+	host1x_uclass_load_syncpt_base_r()
+static inline u32 host1x_uclass_load_syncpt_base_base_indx_f(u32 v)
+{
+	return (v & 0xff) << 24;
+}
+#define HOST1X_UCLASS_LOAD_SYNCPT_BASE_BASE_INDX_F(v) \
+	host1x_uclass_load_syncpt_base_base_indx_f(v)
+static inline u32 host1x_uclass_load_syncpt_base_value_f(u32 v)
+{
+	return (v & 0xffffff) << 0;
+}
+#define HOST1X_UCLASS_LOAD_SYNCPT_BASE_VALUE_F(v) \
+	host1x_uclass_load_syncpt_base_value_f(v)
+static inline u32 host1x_uclass_incr_syncpt_base_base_indx_f(u32 v)
+{
+	return (v & 0xff) << 24;
+}
+#define HOST1X_UCLASS_INCR_SYNCPT_BASE_BASE_INDX_F(v) \
+	host1x_uclass_incr_syncpt_base_base_indx_f(v)
+static inline u32 host1x_uclass_incr_syncpt_base_offset_f(u32 v)
+{
+	return (v & 0xffffff) << 0;
+}
+#define HOST1X_UCLASS_INCR_SYNCPT_BASE_OFFSET_F(v) \
+	host1x_uclass_incr_syncpt_base_offset_f(v)
+static inline u32 host1x_uclass_indoff_r(void)
+{
+	return 0x2d;
+}
+#define HOST1X_UCLASS_INDOFF \
+	host1x_uclass_indoff_r()
+static inline u32 host1x_uclass_indoff_indbe_f(u32 v)
+{
+	return (v & 0xf) << 28;
+}
+#define HOST1X_UCLASS_INDOFF_INDBE_F(v) \
+	host1x_uclass_indoff_indbe_f(v)
+static inline u32 host1x_uclass_indoff_autoinc_f(u32 v)
+{
+	return (v & 0x1) << 27;
+}
+#define HOST1X_UCLASS_INDOFF_AUTOINC_F(v) \
+	host1x_uclass_indoff_autoinc_f(v)
+static inline u32 host1x_uclass_indoff_indmodid_f(u32 v)
+{
+	return (v & 0xff) << 18;
+}
+#define HOST1X_UCLASS_INDOFF_INDMODID_F(v) \
+	host1x_uclass_indoff_indmodid_f(v)
+static inline u32 host1x_uclass_indoff_indroffset_f(u32 v)
+{
+	return (v & 0xffff) << 2;
+}
+#define HOST1X_UCLASS_INDOFF_INDROFFSET_F(v) \
+	host1x_uclass_indoff_indroffset_f(v)
+static inline u32 host1x_uclass_indoff_rwn_read_v(void)
+{
+	return 1;
+}
+#define HOST1X_UCLASS_INDOFF_INDROFFSET_F(v) \
+	host1x_uclass_indoff_indroffset_f(v)
+static inline u32 host1x_uclass_load_syncpt_payload_32_r(void)
+{
+	return 0x4e;
+}
+#define HOST1X_UCLASS_LOAD_SYNCPT_PAYLOAD_32 \
+	host1x_uclass_load_syncpt_payload_32_r()
+static inline u32 host1x_uclass_wait_syncpt_32_r(void)
+{
+	return 0x50;
+}
+#define HOST1X_UCLASS_WAIT_SYNCPT_32 \
+	host1x_uclass_wait_syncpt_32_r()
+
+#endif
diff --git a/drivers/gpu/host1x/hw/hw_host1x05_channel.h b/drivers/gpu/host1x/hw/hw_host1x05_channel.h
new file mode 100644
index 000000000000..7e628ef58c49
--- /dev/null
+++ b/drivers/gpu/host1x/hw/hw_host1x05_channel.h
@@ -0,0 +1,121 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * Copyright (c) 2015 NVIDIA Corporation.
+ */
+
+ /*
+  * Function naming determines intended use:
+  *
+  *     <x>_r(void) : Returns the offset for register <x>.
+  *
+  *     <x>_w(void) : Returns the word offset for word (4 byte) element <x>.
+  *
+  *     <x>_<y>_s(void) : Returns size of field <y> of register <x> in bits.
+  *
+  *     <x>_<y>_f(u32 v) : Returns a value based on 'v' which has been shifted
+  *         and masked to place it at field <y> of register <x>.  This value
+  *         can be |'d with others to produce a full register value for
+  *         register <x>.
+  *
+  *     <x>_<y>_m(void) : Returns a mask for field <y> of register <x>.  This
+  *         value can be ~'d and then &'d to clear the value of field <y> for
+  *         register <x>.
+  *
+  *     <x>_<y>_<z>_f(void) : Returns the constant value <z> after being shifted
+  *         to place it at field <y> of register <x>.  This value can be |'d
+  *         with others to produce a full register value for <x>.
+  *
+  *     <x>_<y>_v(u32 r) : Returns the value of field <y> from a full register
+  *         <x> value 'r' after being shifted to place its LSB at bit 0.
+  *         This value is suitable for direct comparison with other unshifted
+  *         values appropriate for use in field <y> of register <x>.
+  *
+  *     <x>_<y>_<z>_v(void) : Returns the constant value for <z> defined for
+  *         field <y> of register <x>.  This value is suitable for direct
+  *         comparison with unshifted values appropriate for use in field <y>
+  *         of register <x>.
+  */
+
+#ifndef HOST1X_HW_HOST1X05_CHANNEL_H
+#define HOST1X_HW_HOST1X05_CHANNEL_H
+
+static inline u32 host1x_channel_fifostat_r(void)
+{
+	return 0x0;
+}
+#define HOST1X_CHANNEL_FIFOSTAT \
+	host1x_channel_fifostat_r()
+static inline u32 host1x_channel_fifostat_cfempty_v(u32 r)
+{
+	return (r >> 11) & 0x1;
+}
+#define HOST1X_CHANNEL_FIFOSTAT_CFEMPTY_V(r) \
+	host1x_channel_fifostat_cfempty_v(r)
+static inline u32 host1x_channel_dmastart_r(void)
+{
+	return 0x14;
+}
+#define HOST1X_CHANNEL_DMASTART \
+	host1x_channel_dmastart_r()
+static inline u32 host1x_channel_dmaput_r(void)
+{
+	return 0x18;
+}
+#define HOST1X_CHANNEL_DMAPUT \
+	host1x_channel_dmaput_r()
+static inline u32 host1x_channel_dmaget_r(void)
+{
+	return 0x1c;
+}
+#define HOST1X_CHANNEL_DMAGET \
+	host1x_channel_dmaget_r()
+static inline u32 host1x_channel_dmaend_r(void)
+{
+	return 0x20;
+}
+#define HOST1X_CHANNEL_DMAEND \
+	host1x_channel_dmaend_r()
+static inline u32 host1x_channel_dmactrl_r(void)
+{
+	return 0x24;
+}
+#define HOST1X_CHANNEL_DMACTRL \
+	host1x_channel_dmactrl_r()
+static inline u32 host1x_channel_dmactrl_dmastop(void)
+{
+	return 1 << 0;
+}
+#define HOST1X_CHANNEL_DMACTRL_DMASTOP \
+	host1x_channel_dmactrl_dmastop()
+static inline u32 host1x_channel_dmactrl_dmastop_v(u32 r)
+{
+	return (r >> 0) & 0x1;
+}
+#define HOST1X_CHANNEL_DMACTRL_DMASTOP_V(r) \
+	host1x_channel_dmactrl_dmastop_v(r)
+static inline u32 host1x_channel_dmactrl_dmagetrst(void)
+{
+	return 1 << 1;
+}
+#define HOST1X_CHANNEL_DMACTRL_DMAGETRST \
+	host1x_channel_dmactrl_dmagetrst()
+static inline u32 host1x_channel_dmactrl_dmainitget(void)
+{
+	return 1 << 2;
+}
+#define HOST1X_CHANNEL_DMACTRL_DMAINITGET \
+	host1x_channel_dmactrl_dmainitget()
+static inline u32 host1x_channel_channelctrl_r(void)
+{
+	return 0x98;
+}
+#define HOST1X_CHANNEL_CHANNELCTRL \
+	host1x_channel_channelctrl_r()
+static inline u32 host1x_channel_channelctrl_kernel_filter_gbuffer_f(u32 v)
+{
+	return (v & 0x1) << 2;
+}
+#define HOST1X_CHANNEL_CHANNELCTRL_KERNEL_FILTER_GBUFFER(v) \
+	host1x_channel_channelctrl_kernel_filter_gbuffer_f(v)
+
+#endif
diff --git a/drivers/gpu/host1x/hw/hw_host1x05_sync.h b/drivers/gpu/host1x/hw/hw_host1x05_sync.h
new file mode 100644
index 000000000000..1a85c793bd3e
--- /dev/null
+++ b/drivers/gpu/host1x/hw/hw_host1x05_sync.h
@@ -0,0 +1,231 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * Copyright (c) 2015 NVIDIA Corporation.
+ */
+
+ /*
+  * Function naming determines intended use:
+  *
+  *     <x>_r(void) : Returns the offset for register <x>.
+  *
+  *     <x>_w(void) : Returns the word offset for word (4 byte) element <x>.
+  *
+  *     <x>_<y>_s(void) : Returns size of field <y> of register <x> in bits.
+  *
+  *     <x>_<y>_f(u32 v) : Returns a value based on 'v' which has been shifted
+  *         and masked to place it at field <y> of register <x>.  This value
+  *         can be |'d with others to produce a full register value for
+  *         register <x>.
+  *
+  *     <x>_<y>_m(void) : Returns a mask for field <y> of register <x>.  This
+  *         value can be ~'d and then &'d to clear the value of field <y> for
+  *         register <x>.
+  *
+  *     <x>_<y>_<z>_f(void) : Returns the constant value <z> after being shifted
+  *         to place it at field <y> of register <x>.  This value can be |'d
+  *         with others to produce a full register value for <x>.
+  *
+  *     <x>_<y>_v(u32 r) : Returns the value of field <y> from a full register
+  *         <x> value 'r' after being shifted to place its LSB at bit 0.
+  *         This value is suitable for direct comparison with other unshifted
+  *         values appropriate for use in field <y> of register <x>.
+  *
+  *     <x>_<y>_<z>_v(void) : Returns the constant value for <z> defined for
+  *         field <y> of register <x>.  This value is suitable for direct
+  *         comparison with unshifted values appropriate for use in field <y>
+  *         of register <x>.
+  */
+
+#ifndef HOST1X_HW_HOST1X05_SYNC_H
+#define HOST1X_HW_HOST1X05_SYNC_H
+
+#define REGISTER_STRIDE	4
+
+static inline u32 host1x_sync_syncpt_r(unsigned int id)
+{
+	return 0xf80 + id * REGISTER_STRIDE;
+}
+#define HOST1X_SYNC_SYNCPT(id) \
+	host1x_sync_syncpt_r(id)
+static inline u32 host1x_sync_syncpt_thresh_cpu0_int_status_r(unsigned int id)
+{
+	return 0xe80 + id * REGISTER_STRIDE;
+}
+#define HOST1X_SYNC_SYNCPT_THRESH_CPU0_INT_STATUS(id) \
+	host1x_sync_syncpt_thresh_cpu0_int_status_r(id)
+static inline u32 host1x_sync_syncpt_thresh_int_disable_r(unsigned int id)
+{
+	return 0xf00 + id * REGISTER_STRIDE;
+}
+#define HOST1X_SYNC_SYNCPT_THRESH_INT_DISABLE(id) \
+	host1x_sync_syncpt_thresh_int_disable_r(id)
+static inline u32 host1x_sync_syncpt_thresh_int_enable_cpu0_r(unsigned int id)
+{
+	return 0xf20 + id * REGISTER_STRIDE;
+}
+#define HOST1X_SYNC_SYNCPT_THRESH_INT_ENABLE_CPU0(id) \
+	host1x_sync_syncpt_thresh_int_enable_cpu0_r(id)
+static inline u32 host1x_sync_cf_setup_r(unsigned int channel)
+{
+	return 0xc00 + channel * REGISTER_STRIDE;
+}
+#define HOST1X_SYNC_CF_SETUP(channel) \
+	host1x_sync_cf_setup_r(channel)
+static inline u32 host1x_sync_cf_setup_base_v(u32 r)
+{
+	return (r >> 0) & 0x3ff;
+}
+#define HOST1X_SYNC_CF_SETUP_BASE_V(r) \
+	host1x_sync_cf_setup_base_v(r)
+static inline u32 host1x_sync_cf_setup_limit_v(u32 r)
+{
+	return (r >> 16) & 0x3ff;
+}
+#define HOST1X_SYNC_CF_SETUP_LIMIT_V(r) \
+	host1x_sync_cf_setup_limit_v(r)
+static inline u32 host1x_sync_cmdproc_stop_r(void)
+{
+	return 0xac;
+}
+#define HOST1X_SYNC_CMDPROC_STOP \
+	host1x_sync_cmdproc_stop_r()
+static inline u32 host1x_sync_ch_teardown_r(void)
+{
+	return 0xb0;
+}
+#define HOST1X_SYNC_CH_TEARDOWN \
+	host1x_sync_ch_teardown_r()
+static inline u32 host1x_sync_usec_clk_r(void)
+{
+	return 0x1a4;
+}
+#define HOST1X_SYNC_USEC_CLK \
+	host1x_sync_usec_clk_r()
+static inline u32 host1x_sync_ctxsw_timeout_cfg_r(void)
+{
+	return 0x1a8;
+}
+#define HOST1X_SYNC_CTXSW_TIMEOUT_CFG \
+	host1x_sync_ctxsw_timeout_cfg_r()
+static inline u32 host1x_sync_ip_busy_timeout_r(void)
+{
+	return 0x1bc;
+}
+#define HOST1X_SYNC_IP_BUSY_TIMEOUT \
+	host1x_sync_ip_busy_timeout_r()
+static inline u32 host1x_sync_mlock_owner_r(unsigned int id)
+{
+	return 0x340 + id * REGISTER_STRIDE;
+}
+#define HOST1X_SYNC_MLOCK_OWNER(id) \
+	host1x_sync_mlock_owner_r(id)
+static inline u32 host1x_sync_mlock_owner_chid_v(u32 r)
+{
+	return (r >> 8) & 0xf;
+}
+#define HOST1X_SYNC_MLOCK_OWNER_CHID_V(v) \
+	host1x_sync_mlock_owner_chid_v(v)
+static inline u32 host1x_sync_mlock_owner_cpu_owns_v(u32 r)
+{
+	return (r >> 1) & 0x1;
+}
+#define HOST1X_SYNC_MLOCK_OWNER_CPU_OWNS_V(r) \
+	host1x_sync_mlock_owner_cpu_owns_v(r)
+static inline u32 host1x_sync_mlock_owner_ch_owns_v(u32 r)
+{
+	return (r >> 0) & 0x1;
+}
+#define HOST1X_SYNC_MLOCK_OWNER_CH_OWNS_V(r) \
+	host1x_sync_mlock_owner_ch_owns_v(r)
+static inline u32 host1x_sync_syncpt_int_thresh_r(unsigned int id)
+{
+	return 0x1380 + id * REGISTER_STRIDE;
+}
+#define HOST1X_SYNC_SYNCPT_INT_THRESH(id) \
+	host1x_sync_syncpt_int_thresh_r(id)
+static inline u32 host1x_sync_syncpt_base_r(unsigned int id)
+{
+	return 0x600 + id * REGISTER_STRIDE;
+}
+#define HOST1X_SYNC_SYNCPT_BASE(id) \
+	host1x_sync_syncpt_base_r(id)
+static inline u32 host1x_sync_syncpt_cpu_incr_r(unsigned int id)
+{
+	return 0xf60 + id * REGISTER_STRIDE;
+}
+#define HOST1X_SYNC_SYNCPT_CPU_INCR(id) \
+	host1x_sync_syncpt_cpu_incr_r(id)
+static inline u32 host1x_sync_cbread_r(unsigned int channel)
+{
+	return 0xc80 + channel * REGISTER_STRIDE;
+}
+#define HOST1X_SYNC_CBREAD(channel) \
+	host1x_sync_cbread_r(channel)
+static inline u32 host1x_sync_cfpeek_ctrl_r(void)
+{
+	return 0x74c;
+}
+#define HOST1X_SYNC_CFPEEK_CTRL \
+	host1x_sync_cfpeek_ctrl_r()
+static inline u32 host1x_sync_cfpeek_ctrl_addr_f(u32 v)
+{
+	return (v & 0x3ff) << 0;
+}
+#define HOST1X_SYNC_CFPEEK_CTRL_ADDR_F(v) \
+	host1x_sync_cfpeek_ctrl_addr_f(v)
+static inline u32 host1x_sync_cfpeek_ctrl_channr_f(u32 v)
+{
+	return (v & 0xf) << 16;
+}
+#define HOST1X_SYNC_CFPEEK_CTRL_CHANNR_F(v) \
+	host1x_sync_cfpeek_ctrl_channr_f(v)
+static inline u32 host1x_sync_cfpeek_ctrl_ena_f(u32 v)
+{
+	return (v & 0x1) << 31;
+}
+#define HOST1X_SYNC_CFPEEK_CTRL_ENA_F(v) \
+	host1x_sync_cfpeek_ctrl_ena_f(v)
+static inline u32 host1x_sync_cfpeek_read_r(void)
+{
+	return 0x750;
+}
+#define HOST1X_SYNC_CFPEEK_READ \
+	host1x_sync_cfpeek_read_r()
+static inline u32 host1x_sync_cfpeek_ptrs_r(void)
+{
+	return 0x754;
+}
+#define HOST1X_SYNC_CFPEEK_PTRS \
+	host1x_sync_cfpeek_ptrs_r()
+static inline u32 host1x_sync_cfpeek_ptrs_cf_rd_ptr_v(u32 r)
+{
+	return (r >> 0) & 0x3ff;
+}
+#define HOST1X_SYNC_CFPEEK_PTRS_CF_RD_PTR_V(r) \
+	host1x_sync_cfpeek_ptrs_cf_rd_ptr_v(r)
+static inline u32 host1x_sync_cfpeek_ptrs_cf_wr_ptr_v(u32 r)
+{
+	return (r >> 16) & 0x3ff;
+}
+#define HOST1X_SYNC_CFPEEK_PTRS_CF_WR_PTR_V(r) \
+	host1x_sync_cfpeek_ptrs_cf_wr_ptr_v(r)
+static inline u32 host1x_sync_cbstat_r(unsigned int channel)
+{
+	return 0xcc0 + channel * REGISTER_STRIDE;
+}
+#define HOST1X_SYNC_CBSTAT(channel) \
+	host1x_sync_cbstat_r(channel)
+static inline u32 host1x_sync_cbstat_cboffset_v(u32 r)
+{
+	return (r >> 0) & 0xffff;
+}
+#define HOST1X_SYNC_CBSTAT_CBOFFSET_V(r) \
+	host1x_sync_cbstat_cboffset_v(r)
+static inline u32 host1x_sync_cbstat_cbclass_v(u32 r)
+{
+	return (r >> 16) & 0x3ff;
+}
+#define HOST1X_SYNC_CBSTAT_CBCLASS_V(r) \
+	host1x_sync_cbstat_cbclass_v(r)
+
+#endif
diff --git a/drivers/gpu/host1x/hw/hw_host1x05_uclass.h b/drivers/gpu/host1x/hw/hw_host1x05_uclass.h
new file mode 100644
index 000000000000..2fcc9a2ad3ef
--- /dev/null
+++ b/drivers/gpu/host1x/hw/hw_host1x05_uclass.h
@@ -0,0 +1,181 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * Copyright (c) 2015 NVIDIA Corporation.
+ */
+
+ /*
+  * Function naming determines intended use:
+  *
+  *     <x>_r(void) : Returns the offset for register <x>.
+  *
+  *     <x>_w(void) : Returns the word offset for word (4 byte) element <x>.
+  *
+  *     <x>_<y>_s(void) : Returns size of field <y> of register <x> in bits.
+  *
+  *     <x>_<y>_f(u32 v) : Returns a value based on 'v' which has been shifted
+  *         and masked to place it at field <y> of register <x>.  This value
+  *         can be |'d with others to produce a full register value for
+  *         register <x>.
+  *
+  *     <x>_<y>_m(void) : Returns a mask for field <y> of register <x>.  This
+  *         value can be ~'d and then &'d to clear the value of field <y> for
+  *         register <x>.
+  *
+  *     <x>_<y>_<z>_f(void) : Returns the constant value <z> after being shifted
+  *         to place it at field <y> of register <x>.  This value can be |'d
+  *         with others to produce a full register value for <x>.
+  *
+  *     <x>_<y>_v(u32 r) : Returns the value of field <y> from a full register
+  *         <x> value 'r' after being shifted to place its LSB at bit 0.
+  *         This value is suitable for direct comparison with other unshifted
+  *         values appropriate for use in field <y> of register <x>.
+  *
+  *     <x>_<y>_<z>_v(void) : Returns the constant value for <z> defined for
+  *         field <y> of register <x>.  This value is suitable for direct
+  *         comparison with unshifted values appropriate for use in field <y>
+  *         of register <x>.
+  */
+
+#ifndef HOST1X_HW_HOST1X05_UCLASS_H
+#define HOST1X_HW_HOST1X05_UCLASS_H
+
+static inline u32 host1x_uclass_incr_syncpt_r(void)
+{
+	return 0x0;
+}
+#define HOST1X_UCLASS_INCR_SYNCPT \
+	host1x_uclass_incr_syncpt_r()
+static inline u32 host1x_uclass_incr_syncpt_cond_f(u32 v)
+{
+	return (v & 0xff) << 8;
+}
+#define HOST1X_UCLASS_INCR_SYNCPT_COND_F(v) \
+	host1x_uclass_incr_syncpt_cond_f(v)
+static inline u32 host1x_uclass_incr_syncpt_indx_f(u32 v)
+{
+	return (v & 0xff) << 0;
+}
+#define HOST1X_UCLASS_INCR_SYNCPT_INDX_F(v) \
+	host1x_uclass_incr_syncpt_indx_f(v)
+static inline u32 host1x_uclass_wait_syncpt_r(void)
+{
+	return 0x8;
+}
+#define HOST1X_UCLASS_WAIT_SYNCPT \
+	host1x_uclass_wait_syncpt_r()
+static inline u32 host1x_uclass_wait_syncpt_indx_f(u32 v)
+{
+	return (v & 0xff) << 24;
+}
+#define HOST1X_UCLASS_WAIT_SYNCPT_INDX_F(v) \
+	host1x_uclass_wait_syncpt_indx_f(v)
+static inline u32 host1x_uclass_wait_syncpt_thresh_f(u32 v)
+{
+	return (v & 0xffffff) << 0;
+}
+#define HOST1X_UCLASS_WAIT_SYNCPT_THRESH_F(v) \
+	host1x_uclass_wait_syncpt_thresh_f(v)
+static inline u32 host1x_uclass_wait_syncpt_base_r(void)
+{
+	return 0x9;
+}
+#define HOST1X_UCLASS_WAIT_SYNCPT_BASE \
+	host1x_uclass_wait_syncpt_base_r()
+static inline u32 host1x_uclass_wait_syncpt_base_indx_f(u32 v)
+{
+	return (v & 0xff) << 24;
+}
+#define HOST1X_UCLASS_WAIT_SYNCPT_BASE_INDX_F(v) \
+	host1x_uclass_wait_syncpt_base_indx_f(v)
+static inline u32 host1x_uclass_wait_syncpt_base_base_indx_f(u32 v)
+{
+	return (v & 0xff) << 16;
+}
+#define HOST1X_UCLASS_WAIT_SYNCPT_BASE_BASE_INDX_F(v) \
+	host1x_uclass_wait_syncpt_base_base_indx_f(v)
+static inline u32 host1x_uclass_wait_syncpt_base_offset_f(u32 v)
+{
+	return (v & 0xffff) << 0;
+}
+#define HOST1X_UCLASS_WAIT_SYNCPT_BASE_OFFSET_F(v) \
+	host1x_uclass_wait_syncpt_base_offset_f(v)
+static inline u32 host1x_uclass_load_syncpt_base_r(void)
+{
+	return 0xb;
+}
+#define HOST1X_UCLASS_LOAD_SYNCPT_BASE \
+	host1x_uclass_load_syncpt_base_r()
+static inline u32 host1x_uclass_load_syncpt_base_base_indx_f(u32 v)
+{
+	return (v & 0xff) << 24;
+}
+#define HOST1X_UCLASS_LOAD_SYNCPT_BASE_BASE_INDX_F(v) \
+	host1x_uclass_load_syncpt_base_base_indx_f(v)
+static inline u32 host1x_uclass_load_syncpt_base_value_f(u32 v)
+{
+	return (v & 0xffffff) << 0;
+}
+#define HOST1X_UCLASS_LOAD_SYNCPT_BASE_VALUE_F(v) \
+	host1x_uclass_load_syncpt_base_value_f(v)
+static inline u32 host1x_uclass_incr_syncpt_base_base_indx_f(u32 v)
+{
+	return (v & 0xff) << 24;
+}
+#define HOST1X_UCLASS_INCR_SYNCPT_BASE_BASE_INDX_F(v) \
+	host1x_uclass_incr_syncpt_base_base_indx_f(v)
+static inline u32 host1x_uclass_incr_syncpt_base_offset_f(u32 v)
+{
+	return (v & 0xffffff) << 0;
+}
+#define HOST1X_UCLASS_INCR_SYNCPT_BASE_OFFSET_F(v) \
+	host1x_uclass_incr_syncpt_base_offset_f(v)
+static inline u32 host1x_uclass_indoff_r(void)
+{
+	return 0x2d;
+}
+#define HOST1X_UCLASS_INDOFF \
+	host1x_uclass_indoff_r()
+static inline u32 host1x_uclass_indoff_indbe_f(u32 v)
+{
+	return (v & 0xf) << 28;
+}
+#define HOST1X_UCLASS_INDOFF_INDBE_F(v) \
+	host1x_uclass_indoff_indbe_f(v)
+static inline u32 host1x_uclass_indoff_autoinc_f(u32 v)
+{
+	return (v & 0x1) << 27;
+}
+#define HOST1X_UCLASS_INDOFF_AUTOINC_F(v) \
+	host1x_uclass_indoff_autoinc_f(v)
+static inline u32 host1x_uclass_indoff_indmodid_f(u32 v)
+{
+	return (v & 0xff) << 18;
+}
+#define HOST1X_UCLASS_INDOFF_INDMODID_F(v) \
+	host1x_uclass_indoff_indmodid_f(v)
+static inline u32 host1x_uclass_indoff_indroffset_f(u32 v)
+{
+	return (v & 0xffff) << 2;
+}
+#define HOST1X_UCLASS_INDOFF_INDROFFSET_F(v) \
+	host1x_uclass_indoff_indroffset_f(v)
+static inline u32 host1x_uclass_indoff_rwn_read_v(void)
+{
+	return 1;
+}
+#define HOST1X_UCLASS_INDOFF_INDROFFSET_F(v) \
+	host1x_uclass_indoff_indroffset_f(v)
+static inline u32 host1x_uclass_load_syncpt_payload_32_r(void)
+{
+	return 0x4e;
+}
+#define HOST1X_UCLASS_LOAD_SYNCPT_PAYLOAD_32 \
+	host1x_uclass_load_syncpt_payload_32_r()
+static inline u32 host1x_uclass_wait_syncpt_32_r(void)
+{
+	return 0x50;
+}
+#define HOST1X_UCLASS_WAIT_SYNCPT_32 \
+	host1x_uclass_wait_syncpt_32_r()
+
+#endif
diff --git a/drivers/gpu/host1x/hw/hw_host1x06_channel.h b/drivers/gpu/host1x/hw/hw_host1x06_channel.h
new file mode 100644
index 000000000000..18ae1c57bbea
--- /dev/null
+++ b/drivers/gpu/host1x/hw/hw_host1x06_channel.h
@@ -0,0 +1,11 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (C) 2019 NVIDIA Corporation.
+ */
+
+#ifndef HOST1X_HW_HOST1X06_CHANNEL_H
+#define HOST1X_HW_HOST1X06_CHANNEL_H
+
+#define HOST1X_CHANNEL_SMMU_STREAMID 0x084
+
+#endif
diff --git a/drivers/gpu/host1x/hw/hw_host1x06_hypervisor.h b/drivers/gpu/host1x/hw/hw_host1x06_hypervisor.h
new file mode 100644
index 000000000000..a7fc9ec4bc3e
--- /dev/null
+++ b/drivers/gpu/host1x/hw/hw_host1x06_hypervisor.h
@@ -0,0 +1,20 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * Copyright (c) 2017 NVIDIA Corporation.
+ */
+
+#define HOST1X_HV_SYNCPT_PROT_EN			0x1ac4
+#define HOST1X_HV_SYNCPT_PROT_EN_CH_EN			BIT(1)
+#define HOST1X_HV_CH_KERNEL_FILTER_GBUFFER(x)		(0x2020 + (x * 4))
+#define HOST1X_HV_CMDFIFO_PEEK_CTRL			0x233c
+#define HOST1X_HV_CMDFIFO_PEEK_CTRL_ADDR(x)		(x)
+#define HOST1X_HV_CMDFIFO_PEEK_CTRL_CHANNEL(x)		((x) << 16)
+#define HOST1X_HV_CMDFIFO_PEEK_CTRL_ENABLE		BIT(31)
+#define HOST1X_HV_CMDFIFO_PEEK_READ			0x2340
+#define HOST1X_HV_CMDFIFO_PEEK_PTRS			0x2344
+#define HOST1X_HV_CMDFIFO_PEEK_PTRS_WR_PTR_V(x)		(((x) >> 16) & 0xfff)
+#define HOST1X_HV_CMDFIFO_PEEK_PTRS_RD_PTR_V(x)		((x) & 0xfff)
+#define HOST1X_HV_CMDFIFO_SETUP(x)			(0x2588 + (x * 4))
+#define HOST1X_HV_CMDFIFO_SETUP_LIMIT_V(x)		(((x) >> 16) & 0xfff)
+#define HOST1X_HV_CMDFIFO_SETUP_BASE_V(x)		((x) & 0xfff)
+#define HOST1X_HV_ICG_EN_OVERRIDE			0x2aa8
diff --git a/drivers/gpu/host1x/hw/hw_host1x06_uclass.h b/drivers/gpu/host1x/hw/hw_host1x06_uclass.h
new file mode 100644
index 000000000000..50c32de452fb
--- /dev/null
+++ b/drivers/gpu/host1x/hw/hw_host1x06_uclass.h
@@ -0,0 +1,181 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * Copyright (c) 2017 NVIDIA Corporation.
+ */
+
+ /*
+  * Function naming determines intended use:
+  *
+  *     <x>_r(void) : Returns the offset for register <x>.
+  *
+  *     <x>_w(void) : Returns the word offset for word (4 byte) element <x>.
+  *
+  *     <x>_<y>_s(void) : Returns size of field <y> of register <x> in bits.
+  *
+  *     <x>_<y>_f(u32 v) : Returns a value based on 'v' which has been shifted
+  *         and masked to place it at field <y> of register <x>.  This value
+  *         can be |'d with others to produce a full register value for
+  *         register <x>.
+  *
+  *     <x>_<y>_m(void) : Returns a mask for field <y> of register <x>.  This
+  *         value can be ~'d and then &'d to clear the value of field <y> for
+  *         register <x>.
+  *
+  *     <x>_<y>_<z>_f(void) : Returns the constant value <z> after being shifted
+  *         to place it at field <y> of register <x>.  This value can be |'d
+  *         with others to produce a full register value for <x>.
+  *
+  *     <x>_<y>_v(u32 r) : Returns the value of field <y> from a full register
+  *         <x> value 'r' after being shifted to place its LSB at bit 0.
+  *         This value is suitable for direct comparison with other unshifted
+  *         values appropriate for use in field <y> of register <x>.
+  *
+  *     <x>_<y>_<z>_v(void) : Returns the constant value for <z> defined for
+  *         field <y> of register <x>.  This value is suitable for direct
+  *         comparison with unshifted values appropriate for use in field <y>
+  *         of register <x>.
+  */
+
+#ifndef HOST1X_HW_HOST1X06_UCLASS_H
+#define HOST1X_HW_HOST1X06_UCLASS_H
+
+static inline u32 host1x_uclass_incr_syncpt_r(void)
+{
+	return 0x0;
+}
+#define HOST1X_UCLASS_INCR_SYNCPT \
+	host1x_uclass_incr_syncpt_r()
+static inline u32 host1x_uclass_incr_syncpt_cond_f(u32 v)
+{
+	return (v & 0xff) << 10;
+}
+#define HOST1X_UCLASS_INCR_SYNCPT_COND_F(v) \
+	host1x_uclass_incr_syncpt_cond_f(v)
+static inline u32 host1x_uclass_incr_syncpt_indx_f(u32 v)
+{
+	return (v & 0x3ff) << 0;
+}
+#define HOST1X_UCLASS_INCR_SYNCPT_INDX_F(v) \
+	host1x_uclass_incr_syncpt_indx_f(v)
+static inline u32 host1x_uclass_wait_syncpt_r(void)
+{
+	return 0x8;
+}
+#define HOST1X_UCLASS_WAIT_SYNCPT \
+	host1x_uclass_wait_syncpt_r()
+static inline u32 host1x_uclass_wait_syncpt_indx_f(u32 v)
+{
+	return (v & 0xff) << 24;
+}
+#define HOST1X_UCLASS_WAIT_SYNCPT_INDX_F(v) \
+	host1x_uclass_wait_syncpt_indx_f(v)
+static inline u32 host1x_uclass_wait_syncpt_thresh_f(u32 v)
+{
+	return (v & 0xffffff) << 0;
+}
+#define HOST1X_UCLASS_WAIT_SYNCPT_THRESH_F(v) \
+	host1x_uclass_wait_syncpt_thresh_f(v)
+static inline u32 host1x_uclass_wait_syncpt_base_r(void)
+{
+	return 0x9;
+}
+#define HOST1X_UCLASS_WAIT_SYNCPT_BASE \
+	host1x_uclass_wait_syncpt_base_r()
+static inline u32 host1x_uclass_wait_syncpt_base_indx_f(u32 v)
+{
+	return (v & 0xff) << 24;
+}
+#define HOST1X_UCLASS_WAIT_SYNCPT_BASE_INDX_F(v) \
+	host1x_uclass_wait_syncpt_base_indx_f(v)
+static inline u32 host1x_uclass_wait_syncpt_base_base_indx_f(u32 v)
+{
+	return (v & 0xff) << 16;
+}
+#define HOST1X_UCLASS_WAIT_SYNCPT_BASE_BASE_INDX_F(v) \
+	host1x_uclass_wait_syncpt_base_base_indx_f(v)
+static inline u32 host1x_uclass_wait_syncpt_base_offset_f(u32 v)
+{
+	return (v & 0xffff) << 0;
+}
+#define HOST1X_UCLASS_WAIT_SYNCPT_BASE_OFFSET_F(v) \
+	host1x_uclass_wait_syncpt_base_offset_f(v)
+static inline u32 host1x_uclass_load_syncpt_base_r(void)
+{
+	return 0xb;
+}
+#define HOST1X_UCLASS_LOAD_SYNCPT_BASE \
+	host1x_uclass_load_syncpt_base_r()
+static inline u32 host1x_uclass_load_syncpt_base_base_indx_f(u32 v)
+{
+	return (v & 0xff) << 24;
+}
+#define HOST1X_UCLASS_LOAD_SYNCPT_BASE_BASE_INDX_F(v) \
+	host1x_uclass_load_syncpt_base_base_indx_f(v)
+static inline u32 host1x_uclass_load_syncpt_base_value_f(u32 v)
+{
+	return (v & 0xffffff) << 0;
+}
+#define HOST1X_UCLASS_LOAD_SYNCPT_BASE_VALUE_F(v) \
+	host1x_uclass_load_syncpt_base_value_f(v)
+static inline u32 host1x_uclass_incr_syncpt_base_base_indx_f(u32 v)
+{
+	return (v & 0xff) << 24;
+}
+#define HOST1X_UCLASS_INCR_SYNCPT_BASE_BASE_INDX_F(v) \
+	host1x_uclass_incr_syncpt_base_base_indx_f(v)
+static inline u32 host1x_uclass_incr_syncpt_base_offset_f(u32 v)
+{
+	return (v & 0xffffff) << 0;
+}
+#define HOST1X_UCLASS_INCR_SYNCPT_BASE_OFFSET_F(v) \
+	host1x_uclass_incr_syncpt_base_offset_f(v)
+static inline u32 host1x_uclass_indoff_r(void)
+{
+	return 0x2d;
+}
+#define HOST1X_UCLASS_INDOFF \
+	host1x_uclass_indoff_r()
+static inline u32 host1x_uclass_indoff_indbe_f(u32 v)
+{
+	return (v & 0xf) << 28;
+}
+#define HOST1X_UCLASS_INDOFF_INDBE_F(v) \
+	host1x_uclass_indoff_indbe_f(v)
+static inline u32 host1x_uclass_indoff_autoinc_f(u32 v)
+{
+	return (v & 0x1) << 27;
+}
+#define HOST1X_UCLASS_INDOFF_AUTOINC_F(v) \
+	host1x_uclass_indoff_autoinc_f(v)
+static inline u32 host1x_uclass_indoff_indmodid_f(u32 v)
+{
+	return (v & 0xff) << 18;
+}
+#define HOST1X_UCLASS_INDOFF_INDMODID_F(v) \
+	host1x_uclass_indoff_indmodid_f(v)
+static inline u32 host1x_uclass_indoff_indroffset_f(u32 v)
+{
+	return (v & 0xffff) << 2;
+}
+#define HOST1X_UCLASS_INDOFF_INDROFFSET_F(v) \
+	host1x_uclass_indoff_indroffset_f(v)
+static inline u32 host1x_uclass_indoff_rwn_read_v(void)
+{
+	return 1;
+}
+#define HOST1X_UCLASS_INDOFF_INDROFFSET_F(v) \
+	host1x_uclass_indoff_indroffset_f(v)
+static inline u32 host1x_uclass_load_syncpt_payload_32_r(void)
+{
+	return 0x4e;
+}
+#define HOST1X_UCLASS_LOAD_SYNCPT_PAYLOAD_32 \
+	host1x_uclass_load_syncpt_payload_32_r()
+static inline u32 host1x_uclass_wait_syncpt_32_r(void)
+{
+	return 0x50;
+}
+#define HOST1X_UCLASS_WAIT_SYNCPT_32 \
+	host1x_uclass_wait_syncpt_32_r()
+
+#endif
diff --git a/drivers/gpu/host1x/hw/hw_host1x06_vm.h b/drivers/gpu/host1x/hw/hw_host1x06_vm.h
new file mode 100644
index 000000000000..818564a76bc6
--- /dev/null
+++ b/drivers/gpu/host1x/hw/hw_host1x06_vm.h
@@ -0,0 +1,35 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * Copyright (c) 2017 NVIDIA Corporation.
+ */
+
+#define HOST1X_CHANNEL_DMASTART				0x0000
+#define HOST1X_CHANNEL_DMASTART_HI			0x0004
+#define HOST1X_CHANNEL_DMAPUT				0x0008
+#define HOST1X_CHANNEL_DMAPUT_HI			0x000c
+#define HOST1X_CHANNEL_DMAGET				0x0010
+#define HOST1X_CHANNEL_DMAGET_HI			0x0014
+#define HOST1X_CHANNEL_DMAEND				0x0018
+#define HOST1X_CHANNEL_DMAEND_HI			0x001c
+#define HOST1X_CHANNEL_DMACTRL				0x0020
+#define HOST1X_CHANNEL_DMACTRL_DMASTOP			BIT(0)
+#define HOST1X_CHANNEL_DMACTRL_DMAGETRST		BIT(1)
+#define HOST1X_CHANNEL_DMACTRL_DMAINITGET		BIT(2)
+#define HOST1X_CHANNEL_CMDFIFO_STAT			0x0024
+#define HOST1X_CHANNEL_CMDFIFO_STAT_EMPTY		BIT(13)
+#define HOST1X_CHANNEL_CMDFIFO_RDATA			0x0028
+#define HOST1X_CHANNEL_CMDP_OFFSET			0x0030
+#define HOST1X_CHANNEL_CMDP_CLASS			0x0034
+#define HOST1X_CHANNEL_CHANNELSTAT			0x0038
+#define HOST1X_CHANNEL_CMDPROC_STOP			0x0048
+#define HOST1X_CHANNEL_TEARDOWN				0x004c
+
+#define HOST1X_SYNC_SYNCPT_CPU_INCR(x)			(0x6400 + 4*(x))
+#define HOST1X_SYNC_SYNCPT_THRESH_CPU0_INT_STATUS(x)	(0x6464 + 4*(x))
+#define HOST1X_SYNC_SYNCPT_THRESH_INT_ENABLE_CPU0(x)	(0x652c + 4*(x))
+#define HOST1X_SYNC_SYNCPT_THRESH_INT_DISABLE(x)	(0x6590 + 4*(x))
+#define HOST1X_SYNC_SYNCPT_BASE(x)			(0x8000 + 4*(x))
+#define HOST1X_SYNC_SYNCPT(x)				(0x8080 + 4*(x))
+#define HOST1X_SYNC_SYNCPT_INT_THRESH(x)		(0x8a00 + 4*(x))
+#define HOST1X_SYNC_SYNCPT_CH_APP(x)			(0x9384 + 4*(x))
+#define HOST1X_SYNC_SYNCPT_CH_APP_CH(v)			(((v) & 0x3f) << 8)
diff --git a/drivers/gpu/host1x/hw/hw_host1x07_channel.h b/drivers/gpu/host1x/hw/hw_host1x07_channel.h
new file mode 100644
index 000000000000..96fa72bbd7ab
--- /dev/null
+++ b/drivers/gpu/host1x/hw/hw_host1x07_channel.h
@@ -0,0 +1,11 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (C) 2019 NVIDIA Corporation.
+ */
+
+#ifndef HOST1X_HW_HOST1X07_CHANNEL_H
+#define HOST1X_HW_HOST1X07_CHANNEL_H
+
+#define HOST1X_CHANNEL_SMMU_STREAMID 0x084
+
+#endif
diff --git a/drivers/gpu/host1x/hw/hw_host1x07_hypervisor.h b/drivers/gpu/host1x/hw/hw_host1x07_hypervisor.h
new file mode 100644
index 000000000000..52141d53954a
--- /dev/null
+++ b/drivers/gpu/host1x/hw/hw_host1x07_hypervisor.h
@@ -0,0 +1,20 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * Copyright (c) 2018 NVIDIA Corporation.
+ */
+
+#define HOST1X_HV_SYNCPT_PROT_EN			0x1ac4
+#define HOST1X_HV_SYNCPT_PROT_EN_CH_EN			BIT(1)
+#define HOST1X_HV_CH_KERNEL_FILTER_GBUFFER(x)		(0x2020 + (x * 4))
+#define HOST1X_HV_CMDFIFO_PEEK_CTRL			0x233c
+#define HOST1X_HV_CMDFIFO_PEEK_CTRL_ADDR(x)		(x)
+#define HOST1X_HV_CMDFIFO_PEEK_CTRL_CHANNEL(x)		((x) << 16)
+#define HOST1X_HV_CMDFIFO_PEEK_CTRL_ENABLE		BIT(31)
+#define HOST1X_HV_CMDFIFO_PEEK_READ			0x2340
+#define HOST1X_HV_CMDFIFO_PEEK_PTRS			0x2344
+#define HOST1X_HV_CMDFIFO_PEEK_PTRS_WR_PTR_V(x)		(((x) >> 16) & 0xfff)
+#define HOST1X_HV_CMDFIFO_PEEK_PTRS_RD_PTR_V(x)		((x) & 0xfff)
+#define HOST1X_HV_CMDFIFO_SETUP(x)			(0x2588 + (x * 4))
+#define HOST1X_HV_CMDFIFO_SETUP_LIMIT_V(x)		(((x) >> 16) & 0xfff)
+#define HOST1X_HV_CMDFIFO_SETUP_BASE_V(x)		((x) & 0xfff)
+#define HOST1X_HV_ICG_EN_OVERRIDE			0x2aa8
diff --git a/drivers/gpu/host1x/hw/hw_host1x07_uclass.h b/drivers/gpu/host1x/hw/hw_host1x07_uclass.h
new file mode 100644
index 000000000000..887b878f92f7
--- /dev/null
+++ b/drivers/gpu/host1x/hw/hw_host1x07_uclass.h
@@ -0,0 +1,181 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * Copyright (c) 2018 NVIDIA Corporation.
+ */
+
+ /*
+  * Function naming determines intended use:
+  *
+  *     <x>_r(void) : Returns the offset for register <x>.
+  *
+  *     <x>_w(void) : Returns the word offset for word (4 byte) element <x>.
+  *
+  *     <x>_<y>_s(void) : Returns size of field <y> of register <x> in bits.
+  *
+  *     <x>_<y>_f(u32 v) : Returns a value based on 'v' which has been shifted
+  *         and masked to place it at field <y> of register <x>.  This value
+  *         can be |'d with others to produce a full register value for
+  *         register <x>.
+  *
+  *     <x>_<y>_m(void) : Returns a mask for field <y> of register <x>.  This
+  *         value can be ~'d and then &'d to clear the value of field <y> for
+  *         register <x>.
+  *
+  *     <x>_<y>_<z>_f(void) : Returns the constant value <z> after being shifted
+  *         to place it at field <y> of register <x>.  This value can be |'d
+  *         with others to produce a full register value for <x>.
+  *
+  *     <x>_<y>_v(u32 r) : Returns the value of field <y> from a full register
+  *         <x> value 'r' after being shifted to place its LSB at bit 0.
+  *         This value is suitable for direct comparison with other unshifted
+  *         values appropriate for use in field <y> of register <x>.
+  *
+  *     <x>_<y>_<z>_v(void) : Returns the constant value for <z> defined for
+  *         field <y> of register <x>.  This value is suitable for direct
+  *         comparison with unshifted values appropriate for use in field <y>
+  *         of register <x>.
+  */
+
+#ifndef HOST1X_HW_HOST1X07_UCLASS_H
+#define HOST1X_HW_HOST1X07_UCLASS_H
+
+static inline u32 host1x_uclass_incr_syncpt_r(void)
+{
+	return 0x0;
+}
+#define HOST1X_UCLASS_INCR_SYNCPT \
+	host1x_uclass_incr_syncpt_r()
+static inline u32 host1x_uclass_incr_syncpt_cond_f(u32 v)
+{
+	return (v & 0xff) << 10;
+}
+#define HOST1X_UCLASS_INCR_SYNCPT_COND_F(v) \
+	host1x_uclass_incr_syncpt_cond_f(v)
+static inline u32 host1x_uclass_incr_syncpt_indx_f(u32 v)
+{
+	return (v & 0x3ff) << 0;
+}
+#define HOST1X_UCLASS_INCR_SYNCPT_INDX_F(v) \
+	host1x_uclass_incr_syncpt_indx_f(v)
+static inline u32 host1x_uclass_wait_syncpt_r(void)
+{
+	return 0x8;
+}
+#define HOST1X_UCLASS_WAIT_SYNCPT \
+	host1x_uclass_wait_syncpt_r()
+static inline u32 host1x_uclass_wait_syncpt_indx_f(u32 v)
+{
+	return (v & 0xff) << 24;
+}
+#define HOST1X_UCLASS_WAIT_SYNCPT_INDX_F(v) \
+	host1x_uclass_wait_syncpt_indx_f(v)
+static inline u32 host1x_uclass_wait_syncpt_thresh_f(u32 v)
+{
+	return (v & 0xffffff) << 0;
+}
+#define HOST1X_UCLASS_WAIT_SYNCPT_THRESH_F(v) \
+	host1x_uclass_wait_syncpt_thresh_f(v)
+static inline u32 host1x_uclass_wait_syncpt_base_r(void)
+{
+	return 0x9;
+}
+#define HOST1X_UCLASS_WAIT_SYNCPT_BASE \
+	host1x_uclass_wait_syncpt_base_r()
+static inline u32 host1x_uclass_wait_syncpt_base_indx_f(u32 v)
+{
+	return (v & 0xff) << 24;
+}
+#define HOST1X_UCLASS_WAIT_SYNCPT_BASE_INDX_F(v) \
+	host1x_uclass_wait_syncpt_base_indx_f(v)
+static inline u32 host1x_uclass_wait_syncpt_base_base_indx_f(u32 v)
+{
+	return (v & 0xff) << 16;
+}
+#define HOST1X_UCLASS_WAIT_SYNCPT_BASE_BASE_INDX_F(v) \
+	host1x_uclass_wait_syncpt_base_base_indx_f(v)
+static inline u32 host1x_uclass_wait_syncpt_base_offset_f(u32 v)
+{
+	return (v & 0xffff) << 0;
+}
+#define HOST1X_UCLASS_WAIT_SYNCPT_BASE_OFFSET_F(v) \
+	host1x_uclass_wait_syncpt_base_offset_f(v)
+static inline u32 host1x_uclass_load_syncpt_base_r(void)
+{
+	return 0xb;
+}
+#define HOST1X_UCLASS_LOAD_SYNCPT_BASE \
+	host1x_uclass_load_syncpt_base_r()
+static inline u32 host1x_uclass_load_syncpt_base_base_indx_f(u32 v)
+{
+	return (v & 0xff) << 24;
+}
+#define HOST1X_UCLASS_LOAD_SYNCPT_BASE_BASE_INDX_F(v) \
+	host1x_uclass_load_syncpt_base_base_indx_f(v)
+static inline u32 host1x_uclass_load_syncpt_base_value_f(u32 v)
+{
+	return (v & 0xffffff) << 0;
+}
+#define HOST1X_UCLASS_LOAD_SYNCPT_BASE_VALUE_F(v) \
+	host1x_uclass_load_syncpt_base_value_f(v)
+static inline u32 host1x_uclass_incr_syncpt_base_base_indx_f(u32 v)
+{
+	return (v & 0xff) << 24;
+}
+#define HOST1X_UCLASS_INCR_SYNCPT_BASE_BASE_INDX_F(v) \
+	host1x_uclass_incr_syncpt_base_base_indx_f(v)
+static inline u32 host1x_uclass_incr_syncpt_base_offset_f(u32 v)
+{
+	return (v & 0xffffff) << 0;
+}
+#define HOST1X_UCLASS_INCR_SYNCPT_BASE_OFFSET_F(v) \
+	host1x_uclass_incr_syncpt_base_offset_f(v)
+static inline u32 host1x_uclass_indoff_r(void)
+{
+	return 0x2d;
+}
+#define HOST1X_UCLASS_INDOFF \
+	host1x_uclass_indoff_r()
+static inline u32 host1x_uclass_indoff_indbe_f(u32 v)
+{
+	return (v & 0xf) << 28;
+}
+#define HOST1X_UCLASS_INDOFF_INDBE_F(v) \
+	host1x_uclass_indoff_indbe_f(v)
+static inline u32 host1x_uclass_indoff_autoinc_f(u32 v)
+{
+	return (v & 0x1) << 27;
+}
+#define HOST1X_UCLASS_INDOFF_AUTOINC_F(v) \
+	host1x_uclass_indoff_autoinc_f(v)
+static inline u32 host1x_uclass_indoff_indmodid_f(u32 v)
+{
+	return (v & 0xff) << 18;
+}
+#define HOST1X_UCLASS_INDOFF_INDMODID_F(v) \
+	host1x_uclass_indoff_indmodid_f(v)
+static inline u32 host1x_uclass_indoff_indroffset_f(u32 v)
+{
+	return (v & 0xffff) << 2;
+}
+#define HOST1X_UCLASS_INDOFF_INDROFFSET_F(v) \
+	host1x_uclass_indoff_indroffset_f(v)
+static inline u32 host1x_uclass_indoff_rwn_read_v(void)
+{
+	return 1;
+}
+#define HOST1X_UCLASS_INDOFF_INDROFFSET_F(v) \
+	host1x_uclass_indoff_indroffset_f(v)
+static inline u32 host1x_uclass_load_syncpt_payload_32_r(void)
+{
+	return 0x4e;
+}
+#define HOST1X_UCLASS_LOAD_SYNCPT_PAYLOAD_32 \
+	host1x_uclass_load_syncpt_payload_32_r()
+static inline u32 host1x_uclass_wait_syncpt_32_r(void)
+{
+	return 0x50;
+}
+#define HOST1X_UCLASS_WAIT_SYNCPT_32 \
+	host1x_uclass_wait_syncpt_32_r()
+
+#endif
diff --git a/drivers/gpu/host1x/hw/hw_host1x07_vm.h b/drivers/gpu/host1x/hw/hw_host1x07_vm.h
new file mode 100644
index 000000000000..b766851d5b83
--- /dev/null
+++ b/drivers/gpu/host1x/hw/hw_host1x07_vm.h
@@ -0,0 +1,34 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * Copyright (c) 2018 NVIDIA Corporation.
+ */
+
+#define HOST1X_CHANNEL_DMASTART				0x0000
+#define HOST1X_CHANNEL_DMASTART_HI			0x0004
+#define HOST1X_CHANNEL_DMAPUT				0x0008
+#define HOST1X_CHANNEL_DMAPUT_HI			0x000c
+#define HOST1X_CHANNEL_DMAGET				0x0010
+#define HOST1X_CHANNEL_DMAGET_HI			0x0014
+#define HOST1X_CHANNEL_DMAEND				0x0018
+#define HOST1X_CHANNEL_DMAEND_HI			0x001c
+#define HOST1X_CHANNEL_DMACTRL				0x0020
+#define HOST1X_CHANNEL_DMACTRL_DMASTOP			BIT(0)
+#define HOST1X_CHANNEL_DMACTRL_DMAGETRST		BIT(1)
+#define HOST1X_CHANNEL_DMACTRL_DMAINITGET		BIT(2)
+#define HOST1X_CHANNEL_CMDFIFO_STAT			0x0024
+#define HOST1X_CHANNEL_CMDFIFO_STAT_EMPTY		BIT(13)
+#define HOST1X_CHANNEL_CMDFIFO_RDATA			0x0028
+#define HOST1X_CHANNEL_CMDP_OFFSET			0x0030
+#define HOST1X_CHANNEL_CMDP_CLASS			0x0034
+#define HOST1X_CHANNEL_CHANNELSTAT			0x0038
+#define HOST1X_CHANNEL_CMDPROC_STOP			0x0048
+#define HOST1X_CHANNEL_TEARDOWN				0x004c
+
+#define HOST1X_SYNC_SYNCPT_CPU_INCR(x)			(0x6400 + 4 * (x))
+#define HOST1X_SYNC_SYNCPT_THRESH_CPU0_INT_STATUS(x)	(0x6464 + 4 * (x))
+#define HOST1X_SYNC_SYNCPT_THRESH_INT_ENABLE_CPU0(x)	(0x652c + 4 * (x))
+#define HOST1X_SYNC_SYNCPT_THRESH_INT_DISABLE(x)	(0x6590 + 4 * (x))
+#define HOST1X_SYNC_SYNCPT(x)				(0x8080 + 4 * (x))
+#define HOST1X_SYNC_SYNCPT_INT_THRESH(x)		(0x9980 + 4 * (x))
+#define HOST1X_SYNC_SYNCPT_CH_APP(x)			(0xa604 + 4 * (x))
+#define HOST1X_SYNC_SYNCPT_CH_APP_CH(v)			(((v) & 0x3f) << 8)
diff --git a/drivers/gpu/host1x/hw/hw_host1x08_channel.h b/drivers/gpu/host1x/hw/hw_host1x08_channel.h
new file mode 100644
index 000000000000..c9272d2ab14a
--- /dev/null
+++ b/drivers/gpu/host1x/hw/hw_host1x08_channel.h
@@ -0,0 +1,11 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * Copyright (c) 2022 NVIDIA Corporation.
+ */
+
+#ifndef HOST1X_HW_HOST1X08_CHANNEL_H
+#define HOST1X_HW_HOST1X08_CHANNEL_H
+
+#define HOST1X_CHANNEL_SMMU_STREAMID 0x084
+
+#endif
diff --git a/drivers/gpu/host1x/hw/hw_host1x08_common.h b/drivers/gpu/host1x/hw/hw_host1x08_common.h
new file mode 100644
index 000000000000..8e0c99150ec2
--- /dev/null
+++ b/drivers/gpu/host1x/hw/hw_host1x08_common.h
@@ -0,0 +1,11 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * Copyright (c) 2022 NVIDIA Corporation.
+ */
+
+#define HOST1X_COMMON_OFA_MLOCK			0x4050
+#define HOST1X_COMMON_NVJPG1_MLOCK		0x4070
+#define HOST1X_COMMON_VIC_MLOCK			0x4078
+#define HOST1X_COMMON_NVENC_MLOCK		0x407c
+#define HOST1X_COMMON_NVDEC_MLOCK		0x4080
+#define HOST1X_COMMON_NVJPG_MLOCK		0x4084
diff --git a/drivers/gpu/host1x/hw/hw_host1x08_hypervisor.h b/drivers/gpu/host1x/hw/hw_host1x08_hypervisor.h
new file mode 100644
index 000000000000..22964324c914
--- /dev/null
+++ b/drivers/gpu/host1x/hw/hw_host1x08_hypervisor.h
@@ -0,0 +1,9 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * Copyright (c) 2022 NVIDIA Corporation.
+ */
+
+#define HOST1X_HV_SYNCPT_PROT_EN			0x1724
+#define HOST1X_HV_SYNCPT_PROT_EN_CH_EN			BIT(1)
+#define HOST1X_HV_CH_MLOCK_EN(x)			(0x1700 + (x * 4))
+#define HOST1X_HV_CH_KERNEL_FILTER_GBUFFER(x)		(0x1710 + (x * 4))
diff --git a/drivers/gpu/host1x/hw/hw_host1x08_uclass.h b/drivers/gpu/host1x/hw/hw_host1x08_uclass.h
new file mode 100644
index 000000000000..4fb1d090edae
--- /dev/null
+++ b/drivers/gpu/host1x/hw/hw_host1x08_uclass.h
@@ -0,0 +1,181 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * Copyright (c) 2018 NVIDIA Corporation.
+ */
+
+ /*
+  * Function naming determines intended use:
+  *
+  *     <x>_r(void) : Returns the offset for register <x>.
+  *
+  *     <x>_w(void) : Returns the word offset for word (4 byte) element <x>.
+  *
+  *     <x>_<y>_s(void) : Returns size of field <y> of register <x> in bits.
+  *
+  *     <x>_<y>_f(u32 v) : Returns a value based on 'v' which has been shifted
+  *         and masked to place it at field <y> of register <x>.  This value
+  *         can be |'d with others to produce a full register value for
+  *         register <x>.
+  *
+  *     <x>_<y>_m(void) : Returns a mask for field <y> of register <x>.  This
+  *         value can be ~'d and then &'d to clear the value of field <y> for
+  *         register <x>.
+  *
+  *     <x>_<y>_<z>_f(void) : Returns the constant value <z> after being shifted
+  *         to place it at field <y> of register <x>.  This value can be |'d
+  *         with others to produce a full register value for <x>.
+  *
+  *     <x>_<y>_v(u32 r) : Returns the value of field <y> from a full register
+  *         <x> value 'r' after being shifted to place its LSB at bit 0.
+  *         This value is suitable for direct comparison with other unshifted
+  *         values appropriate for use in field <y> of register <x>.
+  *
+  *     <x>_<y>_<z>_v(void) : Returns the constant value for <z> defined for
+  *         field <y> of register <x>.  This value is suitable for direct
+  *         comparison with unshifted values appropriate for use in field <y>
+  *         of register <x>.
+  */
+
+#ifndef HOST1X_HW_HOST1X08_UCLASS_H
+#define HOST1X_HW_HOST1X08_UCLASS_H
+
+static inline u32 host1x_uclass_incr_syncpt_r(void)
+{
+	return 0x0;
+}
+#define HOST1X_UCLASS_INCR_SYNCPT \
+	host1x_uclass_incr_syncpt_r()
+static inline u32 host1x_uclass_incr_syncpt_cond_f(u32 v)
+{
+	return (v & 0xff) << 10;
+}
+#define HOST1X_UCLASS_INCR_SYNCPT_COND_F(v) \
+	host1x_uclass_incr_syncpt_cond_f(v)
+static inline u32 host1x_uclass_incr_syncpt_indx_f(u32 v)
+{
+	return (v & 0x3ff) << 0;
+}
+#define HOST1X_UCLASS_INCR_SYNCPT_INDX_F(v) \
+	host1x_uclass_incr_syncpt_indx_f(v)
+static inline u32 host1x_uclass_wait_syncpt_r(void)
+{
+	return 0x8;
+}
+#define HOST1X_UCLASS_WAIT_SYNCPT \
+	host1x_uclass_wait_syncpt_r()
+static inline u32 host1x_uclass_wait_syncpt_indx_f(u32 v)
+{
+	return (v & 0xff) << 24;
+}
+#define HOST1X_UCLASS_WAIT_SYNCPT_INDX_F(v) \
+	host1x_uclass_wait_syncpt_indx_f(v)
+static inline u32 host1x_uclass_wait_syncpt_thresh_f(u32 v)
+{
+	return (v & 0xffffff) << 0;
+}
+#define HOST1X_UCLASS_WAIT_SYNCPT_THRESH_F(v) \
+	host1x_uclass_wait_syncpt_thresh_f(v)
+static inline u32 host1x_uclass_wait_syncpt_base_r(void)
+{
+	return 0x9;
+}
+#define HOST1X_UCLASS_WAIT_SYNCPT_BASE \
+	host1x_uclass_wait_syncpt_base_r()
+static inline u32 host1x_uclass_wait_syncpt_base_indx_f(u32 v)
+{
+	return (v & 0xff) << 24;
+}
+#define HOST1X_UCLASS_WAIT_SYNCPT_BASE_INDX_F(v) \
+	host1x_uclass_wait_syncpt_base_indx_f(v)
+static inline u32 host1x_uclass_wait_syncpt_base_base_indx_f(u32 v)
+{
+	return (v & 0xff) << 16;
+}
+#define HOST1X_UCLASS_WAIT_SYNCPT_BASE_BASE_INDX_F(v) \
+	host1x_uclass_wait_syncpt_base_base_indx_f(v)
+static inline u32 host1x_uclass_wait_syncpt_base_offset_f(u32 v)
+{
+	return (v & 0xffff) << 0;
+}
+#define HOST1X_UCLASS_WAIT_SYNCPT_BASE_OFFSET_F(v) \
+	host1x_uclass_wait_syncpt_base_offset_f(v)
+static inline u32 host1x_uclass_load_syncpt_base_r(void)
+{
+	return 0xb;
+}
+#define HOST1X_UCLASS_LOAD_SYNCPT_BASE \
+	host1x_uclass_load_syncpt_base_r()
+static inline u32 host1x_uclass_load_syncpt_base_base_indx_f(u32 v)
+{
+	return (v & 0xff) << 24;
+}
+#define HOST1X_UCLASS_LOAD_SYNCPT_BASE_BASE_INDX_F(v) \
+	host1x_uclass_load_syncpt_base_base_indx_f(v)
+static inline u32 host1x_uclass_load_syncpt_base_value_f(u32 v)
+{
+	return (v & 0xffffff) << 0;
+}
+#define HOST1X_UCLASS_LOAD_SYNCPT_BASE_VALUE_F(v) \
+	host1x_uclass_load_syncpt_base_value_f(v)
+static inline u32 host1x_uclass_incr_syncpt_base_base_indx_f(u32 v)
+{
+	return (v & 0xff) << 24;
+}
+#define HOST1X_UCLASS_INCR_SYNCPT_BASE_BASE_INDX_F(v) \
+	host1x_uclass_incr_syncpt_base_base_indx_f(v)
+static inline u32 host1x_uclass_incr_syncpt_base_offset_f(u32 v)
+{
+	return (v & 0xffffff) << 0;
+}
+#define HOST1X_UCLASS_INCR_SYNCPT_BASE_OFFSET_F(v) \
+	host1x_uclass_incr_syncpt_base_offset_f(v)
+static inline u32 host1x_uclass_indoff_r(void)
+{
+	return 0x2d;
+}
+#define HOST1X_UCLASS_INDOFF \
+	host1x_uclass_indoff_r()
+static inline u32 host1x_uclass_indoff_indbe_f(u32 v)
+{
+	return (v & 0xf) << 28;
+}
+#define HOST1X_UCLASS_INDOFF_INDBE_F(v) \
+	host1x_uclass_indoff_indbe_f(v)
+static inline u32 host1x_uclass_indoff_autoinc_f(u32 v)
+{
+	return (v & 0x1) << 27;
+}
+#define HOST1X_UCLASS_INDOFF_AUTOINC_F(v) \
+	host1x_uclass_indoff_autoinc_f(v)
+static inline u32 host1x_uclass_indoff_indmodid_f(u32 v)
+{
+	return (v & 0xff) << 18;
+}
+#define HOST1X_UCLASS_INDOFF_INDMODID_F(v) \
+	host1x_uclass_indoff_indmodid_f(v)
+static inline u32 host1x_uclass_indoff_indroffset_f(u32 v)
+{
+	return (v & 0xffff) << 2;
+}
+#define HOST1X_UCLASS_INDOFF_INDROFFSET_F(v) \
+	host1x_uclass_indoff_indroffset_f(v)
+static inline u32 host1x_uclass_indoff_rwn_read_v(void)
+{
+	return 1;
+}
+#define HOST1X_UCLASS_INDOFF_INDROFFSET_F(v) \
+	host1x_uclass_indoff_indroffset_f(v)
+static inline u32 host1x_uclass_load_syncpt_payload_32_r(void)
+{
+	return 0x4e;
+}
+#define HOST1X_UCLASS_LOAD_SYNCPT_PAYLOAD_32 \
+	host1x_uclass_load_syncpt_payload_32_r()
+static inline u32 host1x_uclass_wait_syncpt_32_r(void)
+{
+	return 0x50;
+}
+#define HOST1X_UCLASS_WAIT_SYNCPT_32 \
+	host1x_uclass_wait_syncpt_32_r()
+
+#endif
diff --git a/drivers/gpu/host1x/hw/hw_host1x08_vm.h b/drivers/gpu/host1x/hw/hw_host1x08_vm.h
new file mode 100644
index 000000000000..1455a4670bf8
--- /dev/null
+++ b/drivers/gpu/host1x/hw/hw_host1x08_vm.h
@@ -0,0 +1,36 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * Copyright (c) 2022 NVIDIA Corporation.
+ */
+
+#define HOST1X_CHANNEL_DMASTART				0x0000
+#define HOST1X_CHANNEL_DMASTART_HI			0x0004
+#define HOST1X_CHANNEL_DMAPUT				0x0008
+#define HOST1X_CHANNEL_DMAPUT_HI			0x000c
+#define HOST1X_CHANNEL_DMAGET				0x0010
+#define HOST1X_CHANNEL_DMAGET_HI			0x0014
+#define HOST1X_CHANNEL_DMAEND				0x0018
+#define HOST1X_CHANNEL_DMAEND_HI			0x001c
+#define HOST1X_CHANNEL_DMACTRL				0x0020
+#define HOST1X_CHANNEL_DMACTRL_DMASTOP			BIT(0)
+#define HOST1X_CHANNEL_DMACTRL_DMAGETRST		BIT(1)
+#define HOST1X_CHANNEL_DMACTRL_DMAINITGET		BIT(2)
+#define HOST1X_CHANNEL_CMDFIFO_STAT			0x0024
+#define HOST1X_CHANNEL_CMDFIFO_STAT_EMPTY		BIT(13)
+#define HOST1X_CHANNEL_CMDFIFO_RDATA			0x0028
+#define HOST1X_CHANNEL_CMDP_OFFSET			0x0030
+#define HOST1X_CHANNEL_CMDP_CLASS			0x0034
+#define HOST1X_CHANNEL_CHANNELSTAT			0x0038
+#define HOST1X_CHANNEL_CMDPROC_STOP			0x0048
+#define HOST1X_CHANNEL_TEARDOWN				0x004c
+#define HOST1X_CHANNEL_SMMU_STREAMID			0x0084
+
+#define HOST1X_SYNC_SYNCPT_CPU_INCR(x)			(0x6400 + 4 * (x))
+#define HOST1X_SYNC_SYNCPT_THRESH_CPU0_INT_STATUS(x)	(0x6600 + 4 * (x))
+#define HOST1X_SYNC_SYNCPT_INTR_DEST(x)			(0x6684 + 4 * (x))
+#define HOST1X_SYNC_SYNCPT_THRESH_INT_ENABLE_CPU0(x)	(0x770c + 4 * (x))
+#define HOST1X_SYNC_SYNCPT_THRESH_INT_DISABLE(x)	(0x7790 + 4 * (x))
+#define HOST1X_SYNC_SYNCPT(x)				(0x8080 + 4 * (x))
+#define HOST1X_SYNC_SYNCPT_INT_THRESH(x)		(0xa088 + 4 * (x))
+#define HOST1X_SYNC_SYNCPT_CH_APP(x)			(0xb090 + 4 * (x))
+#define HOST1X_SYNC_SYNCPT_CH_APP_CH(v)			(((v) & 0x3f) << 8)
diff --git a/drivers/gpu/host1x/hw/intr_hw.c b/drivers/gpu/host1x/hw/intr_hw.c
index b592eef1efcb..bd5b5ef62f35 100644
--- a/drivers/gpu/host1x/hw/intr_hw.c
+++ b/drivers/gpu/host1x/hw/intr_hw.c
@@ -1,71 +1,83 @@
+// SPDX-License-Identifier: GPL-2.0-only
 /*
  * Tegra host1x Interrupt Management
  *
  * Copyright (C) 2010 Google, Inc.
  * Copyright (c) 2010-2013, NVIDIA Corporation.
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms and conditions of the GNU General Public License,
- * version 2, as published by the Free Software Foundation.
- *
- * This program is distributed in the hope it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
- * more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program.  If not, see <http://www.gnu.org/licenses/>.
  */
 
-#include <linux/interrupt.h>
-#include <linux/irq.h>
 #include <linux/io.h>
-#include <asm/mach/irq.h>
 
-#include "intr.h"
-#include "dev.h"
+#include "../intr.h"
+#include "../dev.h"
 
-/*
- * Sync point threshold interrupt service function
- * Handles sync point threshold triggers, in interrupt context
- */
-static void host1x_intr_syncpt_handle(struct host1x_syncpt *syncpt)
+static void process_32_syncpts(struct host1x *host, unsigned long val, u32 reg_offset)
 {
-	unsigned int id = syncpt->id;
-	struct host1x *host = syncpt->host;
+	unsigned int id;
+
+	if (!val)
+		return;
 
-	host1x_sync_writel(host, BIT_MASK(id),
-		HOST1X_SYNC_SYNCPT_THRESH_INT_DISABLE(BIT_WORD(id)));
-	host1x_sync_writel(host, BIT_MASK(id),
-		HOST1X_SYNC_SYNCPT_THRESH_CPU0_INT_STATUS(BIT_WORD(id)));
+	host1x_sync_writel(host, val, HOST1X_SYNC_SYNCPT_THRESH_INT_DISABLE(reg_offset));
+	host1x_sync_writel(host, val, HOST1X_SYNC_SYNCPT_THRESH_CPU0_INT_STATUS(reg_offset));
 
-	queue_work(host->intr_wq, &syncpt->intr.work);
+	for_each_set_bit(id, &val, 32)
+		host1x_intr_handle_interrupt(host, reg_offset * 32 + id);
 }
 
 static irqreturn_t syncpt_thresh_isr(int irq, void *dev_id)
 {
-	struct host1x *host = dev_id;
+	struct host1x_intr_irq_data *irq_data = dev_id;
+	struct host1x *host = irq_data->host;
 	unsigned long reg;
-	int i, id;
+	unsigned int i;
 
-	for (i = 0; i <= BIT_WORD(host->info->nb_pts); i++) {
+#if !defined(CONFIG_64BIT)
+	for (i = irq_data->offset; i < DIV_ROUND_UP(host->info->nb_pts, 32);
+	     i += host->num_syncpt_irqs) {
 		reg = host1x_sync_readl(host,
 			HOST1X_SYNC_SYNCPT_THRESH_CPU0_INT_STATUS(i));
-		for_each_set_bit(id, &reg, BITS_PER_LONG) {
-			struct host1x_syncpt *syncpt =
-				host->syncpt + (i * BITS_PER_LONG + id);
-			host1x_intr_syncpt_handle(syncpt);
-		}
+
+		process_32_syncpts(host, reg, i);
+	}
+#elif HOST1X_HW == 6 || HOST1X_HW == 7
+	/*
+	 * Tegra186 and Tegra194 have the first INT_STATUS register not 64-bit aligned,
+	 * and only have one interrupt line.
+	 */
+	reg = host1x_sync_readl(host, HOST1X_SYNC_SYNCPT_THRESH_CPU0_INT_STATUS(0));
+	process_32_syncpts(host, reg, 0);
+
+	for (i = 1; i < (host->info->nb_pts / 32) - 1; i += 2) {
+		reg = host1x_sync_readq(host,
+			HOST1X_SYNC_SYNCPT_THRESH_CPU0_INT_STATUS(i));
+
+		process_32_syncpts(host, lower_32_bits(reg), i);
+		process_32_syncpts(host, upper_32_bits(reg), i + 1);
+	}
+
+	reg = host1x_sync_readl(host, HOST1X_SYNC_SYNCPT_THRESH_CPU0_INT_STATUS(i));
+	process_32_syncpts(host, reg, i);
+#else
+	/* All 64-bit capable SoCs have number of syncpoints divisible by 64 */
+	for (i = irq_data->offset; i < DIV_ROUND_UP(host->info->nb_pts, 64);
+	     i += host->num_syncpt_irqs) {
+		reg = host1x_sync_readq(host,
+			HOST1X_SYNC_SYNCPT_THRESH_CPU0_INT_STATUS(i * 2));
+
+		process_32_syncpts(host, lower_32_bits(reg), i * 2 + 0);
+		process_32_syncpts(host, upper_32_bits(reg), i * 2 + 1);
 	}
+#endif
 
 	return IRQ_HANDLED;
 }
 
-static void _host1x_intr_disable_all_syncpt_intrs(struct host1x *host)
+static void host1x_intr_disable_all_syncpt_intrs(struct host1x *host)
 {
-	u32 i;
+	unsigned int i;
 
-	for (i = 0; i <= BIT_WORD(host->info->nb_pts); ++i) {
+	for (i = 0; i < DIV_ROUND_UP(host->info->nb_pts, 32); ++i) {
 		host1x_sync_writel(host, 0xffffffffu,
 			HOST1X_SYNC_SYNCPT_THRESH_INT_DISABLE(i));
 		host1x_sync_writel(host, 0xffffffffu,
@@ -73,24 +85,10 @@ static void _host1x_intr_disable_all_syncpt_intrs(struct host1x *host)
 	}
 }
 
-static int _host1x_intr_init_host_sync(struct host1x *host, u32 cpm,
-	void (*syncpt_thresh_work)(struct work_struct *))
+static int
+host1x_intr_init_host_sync(struct host1x *host, u32 cpm)
 {
-	int i, err;
-
-	host1x_hw_intr_disable_all_syncpt_intrs(host);
-
-	for (i = 0; i < host->info->nb_pts; i++)
-		INIT_WORK(&host->syncpt[i].intr.work, syncpt_thresh_work);
-
-	err = devm_request_irq(host->dev, host->intr_syncpt_irq,
-			       syncpt_thresh_isr, IRQF_SHARED,
-			       "host1x_syncpt", host);
-	if (IS_ERR_VALUE(err)) {
-		WARN_ON(1);
-		return err;
-	}
-
+#if HOST1X_HW < 6
 	/* disable the ip_busy_timeout. this prevents write drops */
 	host1x_sync_writel(host, 0, HOST1X_SYNC_IP_BUSY_TIMEOUT);
 
@@ -102,42 +100,55 @@ static int _host1x_intr_init_host_sync(struct host1x *host, u32 cpm,
 
 	/* update host clocks per usec */
 	host1x_sync_writel(host, cpm, HOST1X_SYNC_USEC_CLK);
+#endif
+#if HOST1X_HW >= 8
+	u32 id;
+
+	/*
+	 * Program threshold interrupt destination among 8 lines per VM,
+	 * per syncpoint. For each group of 64 syncpoints (corresponding to two
+	 * interrupt status registers), direct to one interrupt line, going
+	 * around in a round robin fashion.
+	 */
+	for (id = 0; id < host->info->nb_pts; id++) {
+		u32 reg_offset = id / 64;
+		u32 irq_index = reg_offset % host->num_syncpt_irqs;
+
+		host1x_sync_writel(host, irq_index, HOST1X_SYNC_SYNCPT_INTR_DEST(id));
+	}
+#endif
 
 	return 0;
 }
 
-static void _host1x_intr_set_syncpt_threshold(struct host1x *host,
-	u32 id, u32 thresh)
+static void host1x_intr_set_syncpt_threshold(struct host1x *host,
+					      unsigned int id,
+					      u32 thresh)
 {
 	host1x_sync_writel(host, thresh, HOST1X_SYNC_SYNCPT_INT_THRESH(id));
 }
 
-static void _host1x_intr_enable_syncpt_intr(struct host1x *host, u32 id)
+static void host1x_intr_enable_syncpt_intr(struct host1x *host,
+					    unsigned int id)
 {
-	host1x_sync_writel(host, BIT_MASK(id),
-		HOST1X_SYNC_SYNCPT_THRESH_INT_ENABLE_CPU0(BIT_WORD(id)));
+	host1x_sync_writel(host, BIT(id % 32),
+		HOST1X_SYNC_SYNCPT_THRESH_INT_ENABLE_CPU0(id / 32));
 }
 
-static void _host1x_intr_disable_syncpt_intr(struct host1x *host, u32 id)
+static void host1x_intr_disable_syncpt_intr(struct host1x *host,
+					     unsigned int id)
 {
-	host1x_sync_writel(host, BIT_MASK(id),
-		HOST1X_SYNC_SYNCPT_THRESH_INT_DISABLE(BIT_WORD(id)));
-	host1x_sync_writel(host, BIT_MASK(id),
-		HOST1X_SYNC_SYNCPT_THRESH_CPU0_INT_STATUS(BIT_WORD(id)));
-}
-
-static int _host1x_free_syncpt_irq(struct host1x *host)
-{
-	devm_free_irq(host->dev, host->intr_syncpt_irq, host);
-	flush_workqueue(host->intr_wq);
-	return 0;
+	host1x_sync_writel(host, BIT(id % 32),
+		HOST1X_SYNC_SYNCPT_THRESH_INT_DISABLE(id / 32));
+	host1x_sync_writel(host, BIT(id % 32),
+		HOST1X_SYNC_SYNCPT_THRESH_CPU0_INT_STATUS(id / 32));
 }
 
 static const struct host1x_intr_ops host1x_intr_ops = {
-	.init_host_sync = _host1x_intr_init_host_sync,
-	.set_syncpt_threshold = _host1x_intr_set_syncpt_threshold,
-	.enable_syncpt_intr = _host1x_intr_enable_syncpt_intr,
-	.disable_syncpt_intr = _host1x_intr_disable_syncpt_intr,
-	.disable_all_syncpt_intrs = _host1x_intr_disable_all_syncpt_intrs,
-	.free_syncpt_irq = _host1x_free_syncpt_irq,
+	.init_host_sync = host1x_intr_init_host_sync,
+	.set_syncpt_threshold = host1x_intr_set_syncpt_threshold,
+	.enable_syncpt_intr = host1x_intr_enable_syncpt_intr,
+	.disable_syncpt_intr = host1x_intr_disable_syncpt_intr,
+	.disable_all_syncpt_intrs = host1x_intr_disable_all_syncpt_intrs,
+	.isr = syncpt_thresh_isr,
 };
diff --git a/drivers/gpu/host1x/hw/opcodes.h b/drivers/gpu/host1x/hw/opcodes.h
new file mode 100644
index 000000000000..649614499b04
--- /dev/null
+++ b/drivers/gpu/host1x/hw/opcodes.h
@@ -0,0 +1,150 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * Tegra host1x opcodes
+ *
+ * Copyright (c) 2022 NVIDIA Corporation.
+ */
+
+#ifndef __HOST1X_OPCODES_H
+#define __HOST1X_OPCODES_H
+
+#include <linux/types.h>
+
+static inline u32 host1x_class_host_wait_syncpt(
+	unsigned indx, unsigned threshold)
+{
+	return host1x_uclass_wait_syncpt_indx_f(indx)
+		| host1x_uclass_wait_syncpt_thresh_f(threshold);
+}
+
+static inline u32 host1x_class_host_load_syncpt_base(
+	unsigned indx, unsigned threshold)
+{
+	return host1x_uclass_load_syncpt_base_base_indx_f(indx)
+		| host1x_uclass_load_syncpt_base_value_f(threshold);
+}
+
+static inline u32 host1x_class_host_wait_syncpt_base(
+	unsigned indx, unsigned base_indx, unsigned offset)
+{
+	return host1x_uclass_wait_syncpt_base_indx_f(indx)
+		| host1x_uclass_wait_syncpt_base_base_indx_f(base_indx)
+		| host1x_uclass_wait_syncpt_base_offset_f(offset);
+}
+
+static inline u32 host1x_class_host_incr_syncpt_base(
+	unsigned base_indx, unsigned offset)
+{
+	return host1x_uclass_incr_syncpt_base_base_indx_f(base_indx)
+		| host1x_uclass_incr_syncpt_base_offset_f(offset);
+}
+
+static inline u32 host1x_class_host_incr_syncpt(
+	unsigned cond, unsigned indx)
+{
+	return host1x_uclass_incr_syncpt_cond_f(cond)
+		| host1x_uclass_incr_syncpt_indx_f(indx);
+}
+
+static inline u32 host1x_class_host_indoff_reg_write(
+	unsigned mod_id, unsigned offset, bool auto_inc)
+{
+	u32 v = host1x_uclass_indoff_indbe_f(0xf)
+		| host1x_uclass_indoff_indmodid_f(mod_id)
+		| host1x_uclass_indoff_indroffset_f(offset);
+	if (auto_inc)
+		v |= host1x_uclass_indoff_autoinc_f(1);
+	return v;
+}
+
+static inline u32 host1x_class_host_indoff_reg_read(
+	unsigned mod_id, unsigned offset, bool auto_inc)
+{
+	u32 v = host1x_uclass_indoff_indmodid_f(mod_id)
+		| host1x_uclass_indoff_indroffset_f(offset)
+		| host1x_uclass_indoff_rwn_read_v();
+	if (auto_inc)
+		v |= host1x_uclass_indoff_autoinc_f(1);
+	return v;
+}
+
+static inline u32 host1x_opcode_setclass(
+	unsigned class_id, unsigned offset, unsigned mask)
+{
+	return (0 << 28) | (offset << 16) | (class_id << 6) | mask;
+}
+
+static inline u32 host1x_opcode_incr(unsigned offset, unsigned count)
+{
+	return (1 << 28) | (offset << 16) | count;
+}
+
+static inline u32 host1x_opcode_nonincr(unsigned offset, unsigned count)
+{
+	return (2 << 28) | (offset << 16) | count;
+}
+
+static inline u32 host1x_opcode_mask(unsigned offset, unsigned mask)
+{
+	return (3 << 28) | (offset << 16) | mask;
+}
+
+static inline u32 host1x_opcode_imm(unsigned offset, unsigned value)
+{
+	return (4 << 28) | (offset << 16) | value;
+}
+
+static inline u32 host1x_opcode_imm_incr_syncpt(unsigned cond, unsigned indx)
+{
+	return host1x_opcode_imm(host1x_uclass_incr_syncpt_r(),
+		host1x_class_host_incr_syncpt(cond, indx));
+}
+
+static inline u32 host1x_opcode_restart(unsigned address)
+{
+	return (5 << 28) | (address >> 4);
+}
+
+static inline u32 host1x_opcode_gather(unsigned count)
+{
+	return (6 << 28) | count;
+}
+
+static inline u32 host1x_opcode_gather_nonincr(unsigned offset,	unsigned count)
+{
+	return (6 << 28) | (offset << 16) | BIT(15) | count;
+}
+
+static inline u32 host1x_opcode_gather_incr(unsigned offset, unsigned count)
+{
+	return (6 << 28) | (offset << 16) | BIT(15) | BIT(14) | count;
+}
+
+static inline u32 host1x_opcode_setstreamid(unsigned streamid)
+{
+	return (7 << 28) | streamid;
+}
+
+static inline u32 host1x_opcode_setpayload(unsigned payload)
+{
+	return (9 << 28) | payload;
+}
+
+static inline u32 host1x_opcode_gather_wide(unsigned count)
+{
+	return (12 << 28) | count;
+}
+
+static inline u32 host1x_opcode_acquire_mlock(unsigned mlock)
+{
+	return (14 << 28) | (0 << 24) | mlock;
+}
+
+static inline u32 host1x_opcode_release_mlock(unsigned mlock)
+{
+	return (14 << 28) | (1 << 24) | mlock;
+}
+
+#define HOST1X_OPCODE_NOP host1x_opcode_nonincr(0, 0)
+
+#endif
diff --git a/drivers/gpu/host1x/hw/syncpt_hw.c b/drivers/gpu/host1x/hw/syncpt_hw.c
index 0cf6095d3367..8cf35b2eff3d 100644
--- a/drivers/gpu/host1x/hw/syncpt_hw.c
+++ b/drivers/gpu/host1x/hw/syncpt_hw.c
@@ -1,33 +1,23 @@
+// SPDX-License-Identifier: GPL-2.0-only
 /*
  * Tegra host1x Syncpoints
  *
  * Copyright (c) 2010-2013, NVIDIA Corporation.
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms and conditions of the GNU General Public License,
- * version 2, as published by the Free Software Foundation.
- *
- * This program is distributed in the hope it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
- * more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program.  If not, see <http://www.gnu.org/licenses/>.
  */
 
 #include <linux/io.h>
 
-#include "dev.h"
-#include "syncpt.h"
+#include "../dev.h"
+#include "../syncpt.h"
 
 /*
  * Write the current syncpoint value back to hw.
  */
 static void syncpt_restore(struct host1x_syncpt *sp)
 {
+	u32 min = host1x_syncpt_read_min(sp);
 	struct host1x *host = sp->host;
-	int min = host1x_syncpt_read_min(sp);
+
 	host1x_sync_writel(host, min, HOST1X_SYNC_SYNCPT(sp->id));
 }
 
@@ -36,9 +26,12 @@ static void syncpt_restore(struct host1x_syncpt *sp)
  */
 static void syncpt_restore_wait_base(struct host1x_syncpt *sp)
 {
+#if HOST1X_HW < 7
 	struct host1x *host = sp->host;
+
 	host1x_sync_writel(host, sp->base_val,
 			   HOST1X_SYNC_SYNCPT_BASE(sp->id));
+#endif
 }
 
 /*
@@ -46,9 +39,12 @@ static void syncpt_restore_wait_base(struct host1x_syncpt *sp)
  */
 static void syncpt_read_wait_base(struct host1x_syncpt *sp)
 {
+#if HOST1X_HW < 7
 	struct host1x *host = sp->host;
+
 	sp->base_val =
 		host1x_sync_readl(host, HOST1X_SYNC_SYNCPT_BASE(sp->id));
+#endif
 }
 
 /*
@@ -85,21 +81,53 @@ static int syncpt_cpu_incr(struct host1x_syncpt *sp)
 	if (!host1x_syncpt_client_managed(sp) &&
 	    host1x_syncpt_idle(sp))
 		return -EINVAL;
-	host1x_sync_writel(host, BIT_MASK(sp->id),
+
+	host1x_sync_writel(host, BIT(sp->id % 32),
 			   HOST1X_SYNC_SYNCPT_CPU_INCR(reg_offset));
 	wmb();
 
 	return 0;
 }
 
-/* remove a wait pointed to by patch_addr */
-static int syncpt_patch_wait(struct host1x_syncpt *sp, void *patch_addr)
+/**
+ * syncpt_assign_to_channel() - Assign syncpoint to channel
+ * @sp: syncpoint
+ * @ch: channel
+ *
+ * On chips with the syncpoint protection feature (Tegra186+), assign @sp to
+ * @ch, preventing other channels from incrementing the syncpoints. If @ch is
+ * NULL, unassigns the syncpoint.
+ *
+ * On older chips, do nothing.
+ */
+static void syncpt_assign_to_channel(struct host1x_syncpt *sp,
+				  struct host1x_channel *ch)
+{
+#if HOST1X_HW >= 6
+	struct host1x *host = sp->host;
+
+	host1x_sync_writel(host,
+			   HOST1X_SYNC_SYNCPT_CH_APP_CH(ch ? ch->id : 0xff),
+			   HOST1X_SYNC_SYNCPT_CH_APP(sp->id));
+#endif
+}
+
+/**
+ * syncpt_enable_protection() - Enable syncpoint protection
+ * @host: host1x instance
+ *
+ * On chips with the syncpoint protection feature (Tegra186+), enable this
+ * feature. On older chips, do nothing.
+ */
+static void syncpt_enable_protection(struct host1x *host)
 {
-	u32 override = host1x_class_host_wait_syncpt(
-		HOST1X_SYNCPT_RESERVED, 0);
+#if HOST1X_HW >= 6
+	if (!host->hv_regs)
+		return;
 
-	*((u32 *)patch_addr) = override;
-	return 0;
+	host1x_hypervisor_writel(host, HOST1X_HV_SYNCPT_PROT_EN_CH_EN,
+				 HOST1X_HV_SYNCPT_PROT_EN);
+#endif
 }
 
 static const struct host1x_syncpt_ops host1x_syncpt_ops = {
@@ -108,5 +136,6 @@ static const struct host1x_syncpt_ops host1x_syncpt_ops = {
 	.load_wait_base = syncpt_read_wait_base,
 	.load = syncpt_load,
 	.cpu_incr = syncpt_cpu_incr,
-	.patch_wait = syncpt_patch_wait,
+	.assign_to_channel = syncpt_assign_to_channel,
+	.enable_protection = syncpt_enable_protection,
 };
diff --git a/drivers/gpu/host1x/intr.c b/drivers/gpu/host1x/intr.c
index 2491bf82e30c..f77a678949e9 100644
--- a/drivers/gpu/host1x/intr.c
+++ b/drivers/gpu/host1x/intr.c
@@ -1,305 +1,138 @@
+// SPDX-License-Identifier: GPL-2.0-only
 /*
  * Tegra host1x Interrupt Management
  *
- * Copyright (c) 2010-2013, NVIDIA Corporation.
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms and conditions of the GNU General Public License,
- * version 2, as published by the Free Software Foundation.
- *
- * This program is distributed in the hope it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
- * more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ * Copyright (c) 2010-2021, NVIDIA Corporation.
  */
 
 #include <linux/clk.h>
 #include <linux/interrupt.h>
-#include <linux/slab.h>
-#include <linux/irq.h>
-
-#include <trace/events/host1x.h>
-#include "channel.h"
 #include "dev.h"
+#include "fence.h"
 #include "intr.h"
 
-/* Wait list management */
-
-enum waitlist_state {
-	WLS_PENDING,
-	WLS_REMOVED,
-	WLS_CANCELLED,
-	WLS_HANDLED
-};
-
-static void waiter_release(struct kref *kref)
-{
-	kfree(container_of(kref, struct host1x_waitlist, refcount));
-}
-
-/*
- * add a waiter to a waiter queue, sorted by threshold
- * returns true if it was added at the head of the queue
- */
-static bool add_waiter_to_queue(struct host1x_waitlist *waiter,
-				struct list_head *queue)
+static void host1x_intr_add_fence_to_list(struct host1x_fence_list *list,
+					  struct host1x_syncpt_fence *fence)
 {
-	struct host1x_waitlist *pos;
-	u32 thresh = waiter->thresh;
+	struct host1x_syncpt_fence *fence_in_list;
 
-	list_for_each_entry_reverse(pos, queue, list)
-		if ((s32)(pos->thresh - thresh) <= 0) {
-			list_add(&waiter->list, &pos->list);
-			return false;
+	list_for_each_entry_reverse(fence_in_list, &list->list, list) {
+		if ((s32)(fence_in_list->threshold - fence->threshold) <= 0) {
+			/* Fence in list is before us, we can insert here */
+			list_add(&fence->list, &fence_in_list->list);
+			return;
 		}
+	}
 
-	list_add(&waiter->list, queue);
-	return true;
+	/* Add as first in list */
+	list_add(&fence->list, &list->list);
 }
 
-/*
- * run through a waiter queue for a single sync point ID
- * and gather all completed waiters into lists by actions
- */
-static void remove_completed_waiters(struct list_head *head, u32 sync,
-			struct list_head completed[HOST1X_INTR_ACTION_COUNT])
+static void host1x_intr_update_hw_state(struct host1x *host, struct host1x_syncpt *sp)
 {
-	struct list_head *dest;
-	struct host1x_waitlist *waiter, *next, *prev;
+	struct host1x_syncpt_fence *fence;
 
-	list_for_each_entry_safe(waiter, next, head, list) {
-		if ((s32)(waiter->thresh - sync) > 0)
-			break;
+	if (!list_empty(&sp->fences.list)) {
+		fence = list_first_entry(&sp->fences.list, struct host1x_syncpt_fence, list);
 
-		dest = completed + waiter->action;
-
-		/* consolidate submit cleanups */
-		if (waiter->action == HOST1X_INTR_ACTION_SUBMIT_COMPLETE &&
-		    !list_empty(dest)) {
-			prev = list_entry(dest->prev,
-					  struct host1x_waitlist, list);
-			if (prev->data == waiter->data) {
-				prev->count++;
-				dest = NULL;
-			}
-		}
-
-		/* PENDING->REMOVED or CANCELLED->HANDLED */
-		if (atomic_inc_return(&waiter->state) == WLS_HANDLED || !dest) {
-			list_del(&waiter->list);
-			kref_put(&waiter->refcount, waiter_release);
-		} else
-			list_move_tail(&waiter->list, dest);
+		host1x_hw_intr_set_syncpt_threshold(host, sp->id, fence->threshold);
+		host1x_hw_intr_enable_syncpt_intr(host, sp->id);
+	} else {
+		host1x_hw_intr_disable_syncpt_intr(host, sp->id);
 	}
 }
 
-static void reset_threshold_interrupt(struct host1x *host,
-				      struct list_head *head,
-				      unsigned int id)
+void host1x_intr_add_fence_locked(struct host1x *host, struct host1x_syncpt_fence *fence)
 {
-	u32 thresh =
-		list_first_entry(head, struct host1x_waitlist, list)->thresh;
+	struct host1x_fence_list *fence_list = &fence->sp->fences;
 
-	host1x_hw_intr_set_syncpt_threshold(host, id, thresh);
-	host1x_hw_intr_enable_syncpt_intr(host, id);
-}
+	INIT_LIST_HEAD(&fence->list);
 
-static void action_submit_complete(struct host1x_waitlist *waiter)
-{
-	struct host1x_channel *channel = waiter->data;
-
-	host1x_cdma_update(&channel->cdma);
-
-	/*  Add nr_completed to trace */
-	trace_host1x_channel_submit_complete(dev_name(channel->dev),
-					     waiter->count, waiter->thresh);
-
-}
-
-static void action_wakeup(struct host1x_waitlist *waiter)
-{
-	wait_queue_head_t *wq = waiter->data;
-	wake_up(wq);
+	host1x_intr_add_fence_to_list(fence_list, fence);
+	host1x_intr_update_hw_state(host, fence->sp);
 }
 
-static void action_wakeup_interruptible(struct host1x_waitlist *waiter)
+bool host1x_intr_remove_fence(struct host1x *host, struct host1x_syncpt_fence *fence)
 {
-	wait_queue_head_t *wq = waiter->data;
-	wake_up_interruptible(wq);
-}
+	struct host1x_fence_list *fence_list = &fence->sp->fences;
+	unsigned long irqflags;
 
-typedef void (*action_handler)(struct host1x_waitlist *waiter);
+	spin_lock_irqsave(&fence_list->lock, irqflags);
 
-static action_handler action_handlers[HOST1X_INTR_ACTION_COUNT] = {
-	action_submit_complete,
-	action_wakeup,
-	action_wakeup_interruptible,
-};
-
-static void run_handlers(struct list_head completed[HOST1X_INTR_ACTION_COUNT])
-{
-	struct list_head *head = completed;
-	int i;
-
-	for (i = 0; i < HOST1X_INTR_ACTION_COUNT; ++i, ++head) {
-		action_handler handler = action_handlers[i];
-		struct host1x_waitlist *waiter, *next;
-
-		list_for_each_entry_safe(waiter, next, head, list) {
-			list_del(&waiter->list);
-			handler(waiter);
-			WARN_ON(atomic_xchg(&waiter->state, WLS_HANDLED) !=
-				WLS_REMOVED);
-			kref_put(&waiter->refcount, waiter_release);
-		}
+	if (list_empty(&fence->list)) {
+		spin_unlock_irqrestore(&fence_list->lock, irqflags);
+		return false;
 	}
-}
-
-/*
- * Remove & handle all waiters that have completed for the given syncpt
- */
-static int process_wait_list(struct host1x *host,
-			     struct host1x_syncpt *syncpt,
-			     u32 threshold)
-{
-	struct list_head completed[HOST1X_INTR_ACTION_COUNT];
-	unsigned int i;
-	int empty;
-
-	for (i = 0; i < HOST1X_INTR_ACTION_COUNT; ++i)
-		INIT_LIST_HEAD(completed + i);
-
-	spin_lock(&syncpt->intr.lock);
-
-	remove_completed_waiters(&syncpt->intr.wait_head, threshold,
-				 completed);
 
-	empty = list_empty(&syncpt->intr.wait_head);
-	if (empty)
-		host1x_hw_intr_disable_syncpt_intr(host, syncpt->id);
-	else
-		reset_threshold_interrupt(host, &syncpt->intr.wait_head,
-					  syncpt->id);
+	list_del_init(&fence->list);
+	host1x_intr_update_hw_state(host, fence->sp);
 
-	spin_unlock(&syncpt->intr.lock);
+	spin_unlock_irqrestore(&fence_list->lock, irqflags);
 
-	run_handlers(completed);
-
-	return empty;
-}
-
-/*
- * Sync point threshold interrupt service thread function
- * Handles sync point threshold triggers, in thread context
- */
-
-static void syncpt_thresh_work(struct work_struct *work)
-{
-	struct host1x_syncpt_intr *syncpt_intr =
-		container_of(work, struct host1x_syncpt_intr, work);
-	struct host1x_syncpt *syncpt =
-		container_of(syncpt_intr, struct host1x_syncpt, intr);
-	unsigned int id = syncpt->id;
-	struct host1x *host = syncpt->host;
-
-	(void)process_wait_list(host, syncpt,
-				host1x_syncpt_load(host->syncpt + id));
+	return true;
 }
 
-int host1x_intr_add_action(struct host1x *host, u32 id, u32 thresh,
-			   enum host1x_intr_action action, void *data,
-			   struct host1x_waitlist *waiter, void **ref)
+void host1x_intr_handle_interrupt(struct host1x *host, unsigned int id)
 {
-	struct host1x_syncpt *syncpt;
-	int queue_was_empty;
-
-	if (waiter == NULL) {
-		pr_warn("%s: NULL waiter\n", __func__);
-		return -EINVAL;
-	}
+	struct host1x_syncpt *sp = &host->syncpt[id];
+	struct host1x_syncpt_fence *fence, *tmp;
+	unsigned int value;
 
-	/* initialize a new waiter */
-	INIT_LIST_HEAD(&waiter->list);
-	kref_init(&waiter->refcount);
-	if (ref)
-		kref_get(&waiter->refcount);
-	waiter->thresh = thresh;
-	waiter->action = action;
-	atomic_set(&waiter->state, WLS_PENDING);
-	waiter->data = data;
-	waiter->count = 1;
+	value = host1x_syncpt_load(sp);
 
-	syncpt = host->syncpt + id;
+	spin_lock(&sp->fences.lock);
 
-	spin_lock(&syncpt->intr.lock);
-
-	queue_was_empty = list_empty(&syncpt->intr.wait_head);
-
-	if (add_waiter_to_queue(waiter, &syncpt->intr.wait_head)) {
-		/* added at head of list - new threshold value */
-		host1x_hw_intr_set_syncpt_threshold(host, id, thresh);
+	list_for_each_entry_safe(fence, tmp, &sp->fences.list, list) {
+		if (((value - fence->threshold) & 0x80000000U) != 0U) {
+			/* Fence is not yet expired, we are done */
+			break;
+		}
 
-		/* added as first waiter - enable interrupt */
-		if (queue_was_empty)
-			host1x_hw_intr_enable_syncpt_intr(host, id);
+		list_del_init(&fence->list);
+		host1x_fence_signal(fence);
 	}
 
-	spin_unlock(&syncpt->intr.lock);
+	/* Re-enable interrupt if necessary */
+	host1x_intr_update_hw_state(host, sp);
 
-	if (ref)
-		*ref = waiter;
-	return 0;
+	spin_unlock(&sp->fences.lock);
 }
 
-void host1x_intr_put_ref(struct host1x *host, u32 id, void *ref)
+int host1x_intr_init(struct host1x *host)
 {
-	struct host1x_waitlist *waiter = ref;
-	struct host1x_syncpt *syncpt;
-
-	while (atomic_cmpxchg(&waiter->state, WLS_PENDING, WLS_CANCELLED) ==
-	       WLS_REMOVED)
-		schedule();
+	struct host1x_intr_irq_data *irq_data;
+	unsigned int id;
+	int i, err;
 
-	syncpt = host->syncpt + id;
-	(void)process_wait_list(host, syncpt,
-				host1x_syncpt_load(host->syncpt + id));
+	for (id = 0; id < host1x_syncpt_nb_pts(host); ++id) {
+		struct host1x_syncpt *syncpt = &host->syncpt[id];
 
-	kref_put(&waiter->refcount, waiter_release);
-}
-
-int host1x_intr_init(struct host1x *host, unsigned int irq_sync)
-{
-	unsigned int id;
-	u32 nb_pts = host1x_syncpt_nb_pts(host);
+		spin_lock_init(&syncpt->fences.lock);
+		INIT_LIST_HEAD(&syncpt->fences.list);
+	}
 
-	mutex_init(&host->intr_mutex);
-	host->intr_syncpt_irq = irq_sync;
-	host->intr_wq = create_workqueue("host_syncpt");
-	if (!host->intr_wq)
+	irq_data = devm_kcalloc(host->dev, host->num_syncpt_irqs, sizeof(irq_data[0]), GFP_KERNEL);
+	if (!irq_data)
 		return -ENOMEM;
 
-	for (id = 0; id < nb_pts; ++id) {
-		struct host1x_syncpt *syncpt = host->syncpt + id;
+	host1x_hw_intr_disable_all_syncpt_intrs(host);
 
-		spin_lock_init(&syncpt->intr.lock);
-		INIT_LIST_HEAD(&syncpt->intr.wait_head);
-		snprintf(syncpt->intr.thresh_irq_name,
-			 sizeof(syncpt->intr.thresh_irq_name),
-			 "host1x_sp_%02d", id);
-	}
+	for (i = 0; i < host->num_syncpt_irqs; i++) {
+		irq_data[i].host = host;
+		irq_data[i].offset = i;
 
-	host1x_intr_start(host);
+		err = devm_request_irq(host->dev, host->syncpt_irqs[i],
+				       host->intr_op->isr, IRQF_SHARED,
+				       "host1x_syncpt", &irq_data[i]);
+		if (err < 0)
+			return err;
+	}
 
 	return 0;
 }
 
 void host1x_intr_deinit(struct host1x *host)
 {
-	host1x_intr_stop(host);
-	destroy_workqueue(host->intr_wq);
 }
 
 void host1x_intr_start(struct host1x *host)
@@ -308,8 +141,7 @@ void host1x_intr_start(struct host1x *host)
 	int err;
 
 	mutex_lock(&host->intr_mutex);
-	err = host1x_hw_intr_init_host_sync(host, DIV_ROUND_UP(hz, 1000000),
-					    syncpt_thresh_work);
+	err = host1x_hw_intr_init_host_sync(host, DIV_ROUND_UP(hz, 1000000));
 	if (err) {
 		mutex_unlock(&host->intr_mutex);
 		return;
@@ -319,36 +151,5 @@ void host1x_intr_start(struct host1x *host)
 
 void host1x_intr_stop(struct host1x *host)
 {
-	unsigned int id;
-	struct host1x_syncpt *syncpt = host->syncpt;
-	u32 nb_pts = host1x_syncpt_nb_pts(host);
-
-	mutex_lock(&host->intr_mutex);
-
 	host1x_hw_intr_disable_all_syncpt_intrs(host);
-
-	for (id = 0; id < nb_pts; ++id) {
-		struct host1x_waitlist *waiter, *next;
-
-		list_for_each_entry_safe(waiter, next,
-			&syncpt[id].intr.wait_head, list) {
-			if (atomic_cmpxchg(&waiter->state,
-			    WLS_CANCELLED, WLS_HANDLED) == WLS_CANCELLED) {
-				list_del(&waiter->list);
-				kref_put(&waiter->refcount, waiter_release);
-			}
-		}
-
-		if (!list_empty(&syncpt[id].intr.wait_head)) {
-			/* output diagnostics */
-			mutex_unlock(&host->intr_mutex);
-			pr_warn("%s cannot stop syncpt intr id=%d\n",
-				__func__, id);
-			return;
-		}
-	}
-
-	host1x_hw_intr_free_syncpt_irq(host);
-
-	mutex_unlock(&host->intr_mutex);
 }
diff --git a/drivers/gpu/host1x/intr.h b/drivers/gpu/host1x/intr.h
index 2b8adf016a05..11cdf13e32fe 100644
--- a/drivers/gpu/host1x/intr.h
+++ b/drivers/gpu/host1x/intr.h
@@ -1,93 +1,23 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
 /*
  * Tegra host1x Interrupt Management
  *
- * Copyright (c) 2010-2013, NVIDIA Corporation.
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms and conditions of the GNU General Public License,
- * version 2, as published by the Free Software Foundation.
- *
- * This program is distributed in the hope it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
- * more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ * Copyright (c) 2010-2021, NVIDIA Corporation.
  */
 
 #ifndef __HOST1X_INTR_H
 #define __HOST1X_INTR_H
 
-#include <linux/interrupt.h>
-#include <linux/workqueue.h>
-
 struct host1x;
+struct host1x_syncpt_fence;
 
-enum host1x_intr_action {
-	/*
-	 * Perform cleanup after a submit has completed.
-	 * 'data' points to a channel
-	 */
-	HOST1X_INTR_ACTION_SUBMIT_COMPLETE = 0,
-
-	/*
-	 * Wake up a  task.
-	 * 'data' points to a wait_queue_head_t
-	 */
-	HOST1X_INTR_ACTION_WAKEUP,
-
-	/*
-	 * Wake up a interruptible task.
-	 * 'data' points to a wait_queue_head_t
-	 */
-	HOST1X_INTR_ACTION_WAKEUP_INTERRUPTIBLE,
-
-	HOST1X_INTR_ACTION_COUNT
-};
-
-struct host1x_syncpt_intr {
-	spinlock_t lock;
-	struct list_head wait_head;
-	char thresh_irq_name[12];
-	struct work_struct work;
-};
-
-struct host1x_waitlist {
-	struct list_head list;
-	struct kref refcount;
-	u32 thresh;
-	enum host1x_intr_action action;
-	atomic_t state;
-	void *data;
-	int count;
+struct host1x_intr_irq_data {
+	struct host1x *host;
+	u32 offset;
 };
 
-/*
- * Schedule an action to be taken when a sync point reaches the given threshold.
- *
- * @id the sync point
- * @thresh the threshold
- * @action the action to take
- * @data a pointer to extra data depending on action, see above
- * @waiter waiter structure - assumes ownership
- * @ref must be passed if cancellation is possible, else NULL
- *
- * This is a non-blocking api.
- */
-int host1x_intr_add_action(struct host1x *host, u32 id, u32 thresh,
-	enum host1x_intr_action action, void *data,
-	struct host1x_waitlist *waiter, void **ref);
-
-/*
- * Unreference an action submitted to host1x_intr_add_action().
- * You must call this if you passed non-NULL as ref.
- * @ref the ref returned from host1x_intr_add_action()
- */
-void host1x_intr_put_ref(struct host1x *host, u32 id, void *ref);
-
 /* Initialize host1x sync point interrupt */
-int host1x_intr_init(struct host1x *host, unsigned int irq_sync);
+int host1x_intr_init(struct host1x *host);
 
 /* Deinitialize host1x sync point interrupt */
 void host1x_intr_deinit(struct host1x *host);
@@ -98,5 +28,10 @@ void host1x_intr_start(struct host1x *host);
 /* Disable host1x sync point interrupt */
 void host1x_intr_stop(struct host1x *host);
 
-irqreturn_t host1x_syncpt_thresh_fn(void *dev_id);
+void host1x_intr_handle_interrupt(struct host1x *host, unsigned int id);
+
+void host1x_intr_add_fence_locked(struct host1x *host, struct host1x_syncpt_fence *fence);
+
+bool host1x_intr_remove_fence(struct host1x *host, struct host1x_syncpt_fence *fence);
+
 #endif
diff --git a/drivers/gpu/host1x/job.c b/drivers/gpu/host1x/job.c
index cc807667d8f1..3ed49e1fd933 100644
--- a/drivers/gpu/host1x/job.c
+++ b/drivers/gpu/host1x/job.c
@@ -1,23 +1,14 @@
+// SPDX-License-Identifier: GPL-2.0-only
 /*
  * Tegra host1x Job
  *
- * Copyright (c) 2010-2013, NVIDIA Corporation.
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms and conditions of the GNU General Public License,
- * version 2, as published by the Free Software Foundation.
- *
- * This program is distributed in the hope it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
- * more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ * Copyright (c) 2010-2015, NVIDIA Corporation.
  */
 
 #include <linux/dma-mapping.h>
 #include <linux/err.h>
+#include <linux/host1x.h>
+#include <linux/iommu.h>
 #include <linux/kref.h>
 #include <linux/module.h>
 #include <linux/scatterlist.h>
@@ -27,27 +18,33 @@
 
 #include "channel.h"
 #include "dev.h"
-#include "host1x_bo.h"
 #include "job.h"
 #include "syncpt.h"
 
+#define HOST1X_WAIT_SYNCPT_OFFSET 0x8
+
 struct host1x_job *host1x_job_alloc(struct host1x_channel *ch,
 				    u32 num_cmdbufs, u32 num_relocs,
-				    u32 num_waitchks)
+				    bool skip_firewall)
 {
 	struct host1x_job *job = NULL;
-	unsigned int num_unpins = num_cmdbufs + num_relocs;
+	unsigned int num_unpins = num_relocs;
+	bool enable_firewall;
 	u64 total;
 	void *mem;
 
+	enable_firewall = IS_ENABLED(CONFIG_TEGRA_HOST1X_FIREWALL) && !skip_firewall;
+
+	if (!enable_firewall)
+		num_unpins += num_cmdbufs;
+
 	/* Check that we're not going to overflow */
 	total = sizeof(struct host1x_job) +
-		num_relocs * sizeof(struct host1x_reloc) +
-		num_unpins * sizeof(struct host1x_job_unpin_data) +
-		num_waitchks * sizeof(struct host1x_waitchk) +
-		num_cmdbufs * sizeof(struct host1x_job_gather) +
-		num_unpins * sizeof(dma_addr_t) +
-		num_unpins * sizeof(u32 *);
+		(u64)num_relocs * sizeof(struct host1x_reloc) +
+		(u64)num_unpins * sizeof(struct host1x_job_unpin_data) +
+		(u64)num_cmdbufs * sizeof(struct host1x_job_cmd) +
+		(u64)num_unpins * sizeof(dma_addr_t) +
+		(u64)num_unpins * sizeof(u32 *);
 	if (total > ULONG_MAX)
 		return NULL;
 
@@ -55,19 +52,19 @@ struct host1x_job *host1x_job_alloc(struct host1x_channel *ch,
 	if (!job)
 		return NULL;
 
+	job->enable_firewall = enable_firewall;
+
 	kref_init(&job->ref);
 	job->channel = ch;
 
 	/* Redistribute memory to the structs  */
 	mem += sizeof(struct host1x_job);
-	job->relocarray = num_relocs ? mem : NULL;
+	job->relocs = num_relocs ? mem : NULL;
 	mem += num_relocs * sizeof(struct host1x_reloc);
 	job->unpins = num_unpins ? mem : NULL;
 	mem += num_unpins * sizeof(struct host1x_job_unpin_data);
-	job->waitchk = num_waitchks ? mem : NULL;
-	mem += num_waitchks * sizeof(struct host1x_waitchk);
-	job->gathers = num_cmdbufs ? mem : NULL;
-	mem += num_cmdbufs * sizeof(struct host1x_job_gather);
+	job->cmds = num_cmdbufs ? mem : NULL;
+	mem += num_cmdbufs * sizeof(struct host1x_job_cmd);
 	job->addr_phys = num_unpins ? mem : NULL;
 
 	job->reloc_addr_phys = job->addr_phys;
@@ -75,17 +72,35 @@ struct host1x_job *host1x_job_alloc(struct host1x_channel *ch,
 
 	return job;
 }
+EXPORT_SYMBOL(host1x_job_alloc);
 
 struct host1x_job *host1x_job_get(struct host1x_job *job)
 {
 	kref_get(&job->ref);
 	return job;
 }
+EXPORT_SYMBOL(host1x_job_get);
 
 static void job_free(struct kref *ref)
 {
 	struct host1x_job *job = container_of(ref, struct host1x_job, ref);
 
+	if (job->release)
+		job->release(job);
+
+	if (job->fence) {
+		/*
+		 * remove_callback is atomic w.r.t. fence signaling, so
+		 * after the call returns, we know that the callback is not
+		 * in execution, and the fence can be safely freed.
+		 */
+		dma_fence_remove_callback(job->fence, &job->fence_cb);
+		dma_fence_put(job->fence);
+	}
+
+	if (job->syncpt)
+		host1x_syncpt_put(job->syncpt);
+
 	kfree(job);
 }
 
@@ -93,182 +108,228 @@ void host1x_job_put(struct host1x_job *job)
 {
 	kref_put(&job->ref, job_free);
 }
+EXPORT_SYMBOL(host1x_job_put);
 
 void host1x_job_add_gather(struct host1x_job *job, struct host1x_bo *bo,
-			   u32 words, u32 offset)
+			   unsigned int words, unsigned int offset)
 {
-	struct host1x_job_gather *cur_gather = &job->gathers[job->num_gathers];
+	struct host1x_job_gather *gather = &job->cmds[job->num_cmds].gather;
 
-	cur_gather->words = words;
-	cur_gather->bo = bo;
-	cur_gather->offset = offset;
-	job->num_gathers++;
-}
+	gather->words = words;
+	gather->bo = bo;
+	gather->offset = offset;
 
-/*
- * NULL an already satisfied WAIT_SYNCPT host method, by patching its
- * args in the command stream. The method data is changed to reference
- * a reserved (never given out or incr) HOST1X_SYNCPT_RESERVED syncpt
- * with a matching threshold value of 0, so is guaranteed to be popped
- * by the host HW.
- */
-static void host1x_syncpt_patch_offset(struct host1x_syncpt *sp,
-				       struct host1x_bo *h, u32 offset)
-{
-	void *patch_addr = NULL;
-
-	/* patch the wait */
-	patch_addr = host1x_bo_kmap(h, offset >> PAGE_SHIFT);
-	if (patch_addr) {
-		host1x_syncpt_patch_wait(sp,
-					 patch_addr + (offset & ~PAGE_MASK));
-		host1x_bo_kunmap(h, offset >> PAGE_SHIFT, patch_addr);
-	} else
-		pr_err("Could not map cmdbuf for wait check\n");
+	job->num_cmds++;
 }
+EXPORT_SYMBOL(host1x_job_add_gather);
 
-/*
- * Check driver supplied waitchk structs for syncpt thresholds
- * that have already been satisfied and NULL the comparison (to
- * avoid a wrap condition in the HW).
- */
-static int do_waitchks(struct host1x_job *job, struct host1x *host,
-		       struct host1x_bo *patch)
+void host1x_job_add_wait(struct host1x_job *job, u32 id, u32 thresh,
+			 bool relative, u32 next_class)
 {
-	int i;
+	struct host1x_job_cmd *cmd = &job->cmds[job->num_cmds];
 
-	/* compare syncpt vs wait threshold */
-	for (i = 0; i < job->num_waitchk; i++) {
-		struct host1x_waitchk *wait = &job->waitchk[i];
-		struct host1x_syncpt *sp =
-			host1x_syncpt_get(host, wait->syncpt_id);
+	cmd->is_wait = true;
+	cmd->wait.id = id;
+	cmd->wait.threshold = thresh;
+	cmd->wait.next_class = next_class;
+	cmd->wait.relative = relative;
 
-		/* validate syncpt id */
-		if (wait->syncpt_id > host1x_syncpt_nb_pts(host))
-			continue;
+	job->num_cmds++;
+}
+EXPORT_SYMBOL(host1x_job_add_wait);
 
-		/* skip all other gathers */
-		if (patch != wait->bo)
-			continue;
+static unsigned int pin_job(struct host1x *host, struct host1x_job *job)
+{
+	unsigned long mask = HOST1X_RELOC_READ | HOST1X_RELOC_WRITE;
+	struct host1x_client *client = job->client;
+	struct device *dev = client->dev;
+	struct host1x_job_gather *g;
+	unsigned int i;
+	int err;
 
-		trace_host1x_syncpt_wait_check(wait->bo, wait->offset,
-					       wait->syncpt_id, wait->thresh,
-					       host1x_syncpt_read_min(sp));
+	job->num_unpins = 0;
 
-		if (host1x_syncpt_is_expired(sp, wait->thresh)) {
-			dev_dbg(host->dev,
-				"drop WAIT id %d (%s) thresh 0x%x, min 0x%x\n",
-				wait->syncpt_id, sp->name, wait->thresh,
-				host1x_syncpt_read_min(sp));
+	for (i = 0; i < job->num_relocs; i++) {
+		struct host1x_reloc *reloc = &job->relocs[i];
+		enum dma_data_direction direction;
+		struct host1x_bo_mapping *map;
+		struct host1x_bo *bo;
 
-			host1x_syncpt_patch_offset(sp, patch, wait->offset);
+		reloc->target.bo = host1x_bo_get(reloc->target.bo);
+		if (!reloc->target.bo) {
+			err = -EINVAL;
+			goto unpin;
 		}
 
-		wait->bo = NULL;
-	}
+		bo = reloc->target.bo;
 
-	return 0;
-}
+		switch (reloc->flags & mask) {
+		case HOST1X_RELOC_READ:
+			direction = DMA_TO_DEVICE;
+			break;
 
-static unsigned int pin_job(struct host1x_job *job)
-{
-	unsigned int i;
+		case HOST1X_RELOC_WRITE:
+			direction = DMA_FROM_DEVICE;
+			break;
 
-	job->num_unpins = 0;
+		case HOST1X_RELOC_READ | HOST1X_RELOC_WRITE:
+			direction = DMA_BIDIRECTIONAL;
+			break;
 
-	for (i = 0; i < job->num_relocs; i++) {
-		struct host1x_reloc *reloc = &job->relocarray[i];
-		struct sg_table *sgt;
-		dma_addr_t phys_addr;
+		default:
+			err = -EINVAL;
+			goto unpin;
+		}
 
-		reloc->target = host1x_bo_get(reloc->target);
-		if (!reloc->target)
+		map = host1x_bo_pin(dev, bo, direction, NULL);
+		if (IS_ERR(map)) {
+			err = PTR_ERR(map);
 			goto unpin;
+		}
 
-		phys_addr = host1x_bo_pin(reloc->target, &sgt);
-		if (!phys_addr)
+		/*
+		 * host1x clients are generally not able to do scatter-gather themselves, so fail
+		 * if the buffer is discontiguous and we fail to map its SG table to a single
+		 * contiguous chunk of I/O virtual memory.
+		 */
+		if (map->chunks > 1) {
+			err = -EINVAL;
 			goto unpin;
+		}
 
-		job->addr_phys[job->num_unpins] = phys_addr;
-		job->unpins[job->num_unpins].bo = reloc->target;
-		job->unpins[job->num_unpins].sgt = sgt;
+		job->addr_phys[job->num_unpins] = map->phys;
+		job->unpins[job->num_unpins].map = map;
 		job->num_unpins++;
 	}
 
-	for (i = 0; i < job->num_gathers; i++) {
-		struct host1x_job_gather *g = &job->gathers[i];
-		struct sg_table *sgt;
-		dma_addr_t phys_addr;
+	/*
+	 * We will copy gathers BO content later, so there is no need to
+	 * hold and pin them.
+	 */
+	if (job->enable_firewall)
+		return 0;
+
+	for (i = 0; i < job->num_cmds; i++) {
+		struct host1x_bo_mapping *map;
+		size_t gather_size = 0;
+		struct scatterlist *sg;
+		unsigned long shift;
+		struct iova *alloc;
+		unsigned int j;
+
+		if (job->cmds[i].is_wait)
+			continue;
+
+		g = &job->cmds[i].gather;
 
 		g->bo = host1x_bo_get(g->bo);
-		if (!g->bo)
+		if (!g->bo) {
+			err = -EINVAL;
 			goto unpin;
+		}
 
-		phys_addr = host1x_bo_pin(g->bo, &sgt);
-		if (!phys_addr)
+		map = host1x_bo_pin(host->dev, g->bo, DMA_TO_DEVICE, NULL);
+		if (IS_ERR(map)) {
+			err = PTR_ERR(map);
 			goto unpin;
+		}
+
+		if (host->domain) {
+			for_each_sgtable_sg(map->sgt, sg, j)
+				gather_size += sg->length;
+
+			gather_size = iova_align(&host->iova, gather_size);
+
+			shift = iova_shift(&host->iova);
+			alloc = alloc_iova(&host->iova, gather_size >> shift,
+					   host->iova_end >> shift, true);
+			if (!alloc) {
+				err = -ENOMEM;
+				goto put;
+			}
+
+			err = iommu_map_sgtable(host->domain, iova_dma_addr(&host->iova, alloc),
+						map->sgt, IOMMU_READ);
+			if (err == 0) {
+				__free_iova(&host->iova, alloc);
+				err = -EINVAL;
+				goto put;
+			}
 
-		job->addr_phys[job->num_unpins] = phys_addr;
-		job->unpins[job->num_unpins].bo = g->bo;
-		job->unpins[job->num_unpins].sgt = sgt;
+			map->phys = iova_dma_addr(&host->iova, alloc);
+			map->size = gather_size;
+		}
+
+		job->addr_phys[job->num_unpins] = map->phys;
+		job->unpins[job->num_unpins].map = map;
 		job->num_unpins++;
+
+		job->gather_addr_phys[i] = map->phys;
 	}
 
-	return job->num_unpins;
+	return 0;
 
+put:
+	host1x_bo_put(g->bo);
 unpin:
 	host1x_job_unpin(job);
-	return 0;
+	return err;
 }
 
-static unsigned int do_relocs(struct host1x_job *job, struct host1x_bo *cmdbuf)
+static int do_relocs(struct host1x_job *job, struct host1x_job_gather *g)
 {
-	int i = 0;
-	u32 last_page = ~0;
-	void *cmdbuf_page_addr = NULL;
+	void *cmdbuf_addr = NULL;
+	struct host1x_bo *cmdbuf = g->bo;
+	unsigned int i;
 
 	/* pin & patch the relocs for one gather */
 	for (i = 0; i < job->num_relocs; i++) {
-		struct host1x_reloc *reloc = &job->relocarray[i];
+		struct host1x_reloc *reloc = &job->relocs[i];
 		u32 reloc_addr = (job->reloc_addr_phys[i] +
-			reloc->target_offset) >> reloc->shift;
+				  reloc->target.offset) >> reloc->shift;
 		u32 *target;
 
 		/* skip all other gathers */
-		if (cmdbuf != reloc->cmdbuf)
+		if (cmdbuf != reloc->cmdbuf.bo)
 			continue;
 
-		if (last_page != reloc->cmdbuf_offset >> PAGE_SHIFT) {
-			if (cmdbuf_page_addr)
-				host1x_bo_kunmap(cmdbuf, last_page,
-						 cmdbuf_page_addr);
+		if (job->enable_firewall) {
+			target = (u32 *)job->gather_copy_mapped +
+					reloc->cmdbuf.offset / sizeof(u32) +
+						g->offset / sizeof(u32);
+			goto patch_reloc;
+		}
 
-			cmdbuf_page_addr = host1x_bo_kmap(cmdbuf,
-					reloc->cmdbuf_offset >> PAGE_SHIFT);
-			last_page = reloc->cmdbuf_offset >> PAGE_SHIFT;
+		if (!cmdbuf_addr) {
+			cmdbuf_addr = host1x_bo_mmap(cmdbuf);
 
-			if (unlikely(!cmdbuf_page_addr)) {
+			if (unlikely(!cmdbuf_addr)) {
 				pr_err("Could not map cmdbuf for relocation\n");
 				return -ENOMEM;
 			}
 		}
 
-		target = cmdbuf_page_addr + (reloc->cmdbuf_offset & ~PAGE_MASK);
+		target = cmdbuf_addr + reloc->cmdbuf.offset;
+patch_reloc:
 		*target = reloc_addr;
 	}
 
-	if (cmdbuf_page_addr)
-		host1x_bo_kunmap(cmdbuf, last_page, cmdbuf_page_addr);
+	if (cmdbuf_addr)
+		host1x_bo_munmap(cmdbuf, cmdbuf_addr);
 
 	return 0;
 }
 
 static bool check_reloc(struct host1x_reloc *reloc, struct host1x_bo *cmdbuf,
-		       unsigned int offset)
+			unsigned int offset)
 {
 	offset *= sizeof(u32);
 
-	if (reloc->cmdbuf != cmdbuf || reloc->cmdbuf_offset != offset)
+	if (reloc->cmdbuf.bo != cmdbuf || reloc->cmdbuf.offset != offset)
+		return false;
+
+	/* relocation shift value validation isn't implemented yet */
+	if (reloc->shift)
 		return false;
 
 	return true;
@@ -281,7 +342,7 @@ struct host1x_firewall {
 	unsigned int num_relocs;
 	struct host1x_reloc *reloc;
 
-	struct host1x_bo *cmdbuf_id;
+	struct host1x_bo *cmdbuf;
 	unsigned int offset;
 
 	u32 words;
@@ -291,25 +352,53 @@ struct host1x_firewall {
 	u32 count;
 };
 
+static int check_register(struct host1x_firewall *fw, unsigned long offset)
+{
+	if (!fw->job->is_addr_reg)
+		return 0;
+
+	if (fw->job->is_addr_reg(fw->dev, fw->class, offset)) {
+		if (!fw->num_relocs)
+			return -EINVAL;
+
+		if (!check_reloc(fw->reloc, fw->cmdbuf, fw->offset))
+			return -EINVAL;
+
+		fw->num_relocs--;
+		fw->reloc++;
+	}
+
+	return 0;
+}
+
+static int check_class(struct host1x_firewall *fw, u32 class)
+{
+	if (!fw->job->is_valid_class) {
+		if (fw->class != class)
+			return -EINVAL;
+	} else {
+		if (!fw->job->is_valid_class(fw->class))
+			return -EINVAL;
+	}
+
+	return 0;
+}
+
 static int check_mask(struct host1x_firewall *fw)
 {
 	u32 mask = fw->mask;
 	u32 reg = fw->reg;
+	int ret;
 
 	while (mask) {
 		if (fw->words == 0)
 			return -EINVAL;
 
 		if (mask & 1) {
-			if (fw->job->is_addr_reg(fw->dev, fw->class, reg)) {
-				if (!fw->num_relocs)
-					return -EINVAL;
-				if (!check_reloc(fw->reloc, fw->cmdbuf_id,
-						 fw->offset))
-					return -EINVAL;
-				fw->reloc++;
-				fw->num_relocs--;
-			}
+			ret = check_register(fw, reg);
+			if (ret < 0)
+				return ret;
+
 			fw->words--;
 			fw->offset++;
 		}
@@ -324,19 +413,16 @@ static int check_incr(struct host1x_firewall *fw)
 {
 	u32 count = fw->count;
 	u32 reg = fw->reg;
+	int ret;
 
 	while (count) {
 		if (fw->words == 0)
 			return -EINVAL;
 
-		if (fw->job->is_addr_reg(fw->dev, fw->class, reg)) {
-			if (!fw->num_relocs)
-				return -EINVAL;
-			if (!check_reloc(fw->reloc, fw->cmdbuf_id, fw->offset))
-				return -EINVAL;
-			fw->reloc++;
-			fw->num_relocs--;
-		}
+		ret = check_register(fw, reg);
+		if (ret < 0)
+			return ret;
+
 		reg++;
 		fw->words--;
 		fw->offset++;
@@ -348,21 +434,17 @@ static int check_incr(struct host1x_firewall *fw)
 
 static int check_nonincr(struct host1x_firewall *fw)
 {
-	int is_addr_reg = fw->job->is_addr_reg(fw->dev, fw->class, fw->reg);
 	u32 count = fw->count;
+	int ret;
 
 	while (count) {
 		if (fw->words == 0)
 			return -EINVAL;
 
-		if (is_addr_reg) {
-			if (!fw->num_relocs)
-				return -EINVAL;
-			if (!check_reloc(fw->reloc, fw->cmdbuf_id, fw->offset))
-				return -EINVAL;
-			fw->reloc++;
-			fw->num_relocs--;
-		}
+		ret = check_register(fw, fw->reg);
+		if (ret < 0)
+			return ret;
+
 		fw->words--;
 		fw->offset++;
 		count--;
@@ -375,13 +457,11 @@ static int validate(struct host1x_firewall *fw, struct host1x_job_gather *g)
 {
 	u32 *cmdbuf_base = (u32 *)fw->job->gather_copy_mapped +
 		(g->offset / sizeof(u32));
+	u32 job_class = fw->class;
 	int err = 0;
 
-	if (!fw->job->is_addr_reg)
-		return 0;
-
 	fw->words = g->words;
-	fw->cmdbuf_id = g->bo;
+	fw->cmdbuf = g->bo;
 	fw->offset = 0;
 
 	while (fw->words && !err) {
@@ -399,7 +479,9 @@ static int validate(struct host1x_firewall *fw, struct host1x_job_gather *g)
 			fw->class = word >> 6 & 0x3ff;
 			fw->mask = word & 0x3f;
 			fw->reg = word >> 16 & 0xfff;
-			err = check_mask(fw);
+			err = check_class(fw, job_class);
+			if (!err)
+				err = check_mask(fw);
 			if (err)
 				goto out;
 			break;
@@ -427,7 +509,6 @@ static int validate(struct host1x_firewall *fw, struct host1x_job_gather *g)
 				goto out;
 			break;
 		case 4:
-		case 5:
 		case 14:
 			break;
 		default:
@@ -436,47 +517,60 @@ static int validate(struct host1x_firewall *fw, struct host1x_job_gather *g)
 		}
 	}
 
-	/* No relocs should remain at this point */
-	if (fw->num_relocs)
-		err = -EINVAL;
-
 out:
 	return err;
 }
 
-static inline int copy_gathers(struct host1x_job *job, struct device *dev)
+static inline int copy_gathers(struct device *host, struct host1x_job *job,
+			       struct device *dev)
 {
 	struct host1x_firewall fw;
 	size_t size = 0;
 	size_t offset = 0;
-	int i;
+	unsigned int i;
 
 	fw.job = job;
 	fw.dev = dev;
-	fw.reloc = job->relocarray;
+	fw.reloc = job->relocs;
 	fw.num_relocs = job->num_relocs;
-	fw.class = 0;
+	fw.class = job->class;
+
+	for (i = 0; i < job->num_cmds; i++) {
+		struct host1x_job_gather *g;
+
+		if (job->cmds[i].is_wait)
+			continue;
+
+		g = &job->cmds[i].gather;
 
-	for (i = 0; i < job->num_gathers; i++) {
-		struct host1x_job_gather *g = &job->gathers[i];
 		size += g->words * sizeof(u32);
 	}
 
-	job->gather_copy_mapped = dma_alloc_writecombine(dev, size,
-							 &job->gather_copy,
-							 GFP_KERNEL);
-	if (!job->gather_copy_mapped) {
-		int err = PTR_ERR(job->gather_copy_mapped);
-		job->gather_copy_mapped = NULL;
-		return err;
-	}
+	/*
+	 * Try a non-blocking allocation from a higher priority pools first,
+	 * as awaiting for the allocation here is a major performance hit.
+	 */
+	job->gather_copy_mapped = dma_alloc_wc(host, size, &job->gather_copy,
+					       GFP_NOWAIT);
+
+	/* the higher priority allocation failed, try the generic-blocking */
+	if (!job->gather_copy_mapped)
+		job->gather_copy_mapped = dma_alloc_wc(host, size,
+						       &job->gather_copy,
+						       GFP_KERNEL);
+	if (!job->gather_copy_mapped)
+		return -ENOMEM;
 
 	job->gather_copy_size = size;
 
-	for (i = 0; i < job->num_gathers; i++) {
-		struct host1x_job_gather *g = &job->gathers[i];
+	for (i = 0; i < job->num_cmds; i++) {
+		struct host1x_job_gather *g;
 		void *gather;
 
+		if (job->cmds[i].is_wait)
+			continue;
+		g = &job->cmds[i].gather;
+
 		/* Copy the gather */
 		gather = host1x_bo_mmap(g->bo);
 		memcpy(job->gather_copy_mapped + offset, gather + g->offset,
@@ -494,6 +588,10 @@ static inline int copy_gathers(struct host1x_job *job, struct device *dev)
 		offset += g->words * sizeof(u32);
 	}
 
+	/* No relocs should remain at this point */
+	if (fw.num_relocs)
+		return -EINVAL;
+
 	return 0;
 }
 
@@ -502,84 +600,88 @@ int host1x_job_pin(struct host1x_job *job, struct device *dev)
 	int err;
 	unsigned int i, j;
 	struct host1x *host = dev_get_drvdata(dev->parent);
-	DECLARE_BITMAP(waitchk_mask, host1x_syncpt_nb_pts(host));
-
-	bitmap_zero(waitchk_mask, host1x_syncpt_nb_pts(host));
-	for (i = 0; i < job->num_waitchk; i++) {
-		u32 syncpt_id = job->waitchk[i].syncpt_id;
-		if (syncpt_id < host1x_syncpt_nb_pts(host))
-			set_bit(syncpt_id, waitchk_mask);
-	}
-
-	/* get current syncpt values for waitchk */
-	for_each_set_bit(i, waitchk_mask, host1x_syncpt_nb_pts(host))
-		host1x_syncpt_load(host->syncpt + i);
 
 	/* pin memory */
-	err = pin_job(job);
-	if (!err)
+	err = pin_job(host, job);
+	if (err)
 		goto out;
 
+	if (job->enable_firewall) {
+		err = copy_gathers(host->dev, job, dev);
+		if (err)
+			goto out;
+	}
+
 	/* patch gathers */
-	for (i = 0; i < job->num_gathers; i++) {
-		struct host1x_job_gather *g = &job->gathers[i];
+	for (i = 0; i < job->num_cmds; i++) {
+		struct host1x_job_gather *g;
+
+		if (job->cmds[i].is_wait)
+			continue;
+		g = &job->cmds[i].gather;
 
 		/* process each gather mem only once */
 		if (g->handled)
 			continue;
 
-		g->base = job->gather_addr_phys[i];
+		/* copy_gathers() sets gathers base if firewall is enabled */
+		if (!job->enable_firewall)
+			g->base = job->gather_addr_phys[i];
 
-		for (j = 0; j < job->num_gathers; j++)
-			if (job->gathers[j].bo == g->bo)
-				job->gathers[j].handled = true;
-
-		err = do_relocs(job, g->bo);
-		if (err)
-			break;
+		for (j = i + 1; j < job->num_cmds; j++) {
+			if (!job->cmds[j].is_wait &&
+			    job->cmds[j].gather.bo == g->bo) {
+				job->cmds[j].gather.handled = true;
+				job->cmds[j].gather.base = g->base;
+			}
+		}
 
-		err = do_waitchks(job, host, g->bo);
+		err = do_relocs(job, g);
 		if (err)
 			break;
 	}
 
-	if (IS_ENABLED(CONFIG_TEGRA_HOST1X_FIREWALL) && !err) {
-		err = copy_gathers(job, dev);
-		if (err) {
-			host1x_job_unpin(job);
-			return err;
-		}
-	}
-
 out:
+	if (err)
+		host1x_job_unpin(job);
 	wmb();
 
 	return err;
 }
+EXPORT_SYMBOL(host1x_job_pin);
 
 void host1x_job_unpin(struct host1x_job *job)
 {
+	struct host1x *host = dev_get_drvdata(job->channel->dev->parent);
 	unsigned int i;
 
 	for (i = 0; i < job->num_unpins; i++) {
-		struct host1x_job_unpin_data *unpin = &job->unpins[i];
-		host1x_bo_unpin(unpin->bo, unpin->sgt);
-		host1x_bo_put(unpin->bo);
+		struct host1x_bo_mapping *map = job->unpins[i].map;
+		struct host1x_bo *bo = map->bo;
+
+		if (!job->enable_firewall && map->size && host->domain) {
+			iommu_unmap(host->domain, job->addr_phys[i], map->size);
+			free_iova(&host->iova, iova_pfn(&host->iova, job->addr_phys[i]));
+		}
+
+		host1x_bo_unpin(map);
+		host1x_bo_put(bo);
 	}
+
 	job->num_unpins = 0;
 
 	if (job->gather_copy_size)
-		dma_free_writecombine(job->channel->dev, job->gather_copy_size,
-				      job->gather_copy_mapped,
-				      job->gather_copy);
+		dma_free_wc(host->dev, job->gather_copy_size,
+			    job->gather_copy_mapped, job->gather_copy);
 }
+EXPORT_SYMBOL(host1x_job_unpin);
 
 /*
  * Debug routine used to dump job entries
  */
 void host1x_job_dump(struct device *dev, struct host1x_job *job)
 {
-	dev_dbg(dev, "    SYNCPT_ID   %d\n", job->syncpt_id);
+	dev_dbg(dev, "    SYNCPT_ID   %d\n", job->syncpt->id);
 	dev_dbg(dev, "    SYNCPT_VAL  %d\n", job->syncpt_end);
 	dev_dbg(dev, "    FIRST_GET   0x%x\n", job->first_get);
 	dev_dbg(dev, "    TIMEOUT     %d\n", job->timeout);
diff --git a/drivers/gpu/host1x/job.h b/drivers/gpu/host1x/job.h
index fba45f20458e..dad5a1946693 100644
--- a/drivers/gpu/host1x/job.h
+++ b/drivers/gpu/host1x/job.h
@@ -1,160 +1,44 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
 /*
  * Tegra host1x Job
  *
  * Copyright (c) 2011-2013, NVIDIA Corporation.
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms and conditions of the GNU General Public License,
- * version 2, as published by the Free Software Foundation.
- *
- * This program is distributed in the hope it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
- * more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program.  If not, see <http://www.gnu.org/licenses/>.
  */
 
 #ifndef __HOST1X_JOB_H
 #define __HOST1X_JOB_H
 
+#include <linux/dma-direction.h>
+
 struct host1x_job_gather {
-	u32 words;
+	unsigned int words;
 	dma_addr_t base;
 	struct host1x_bo *bo;
-	int offset;
+	unsigned int offset;
 	bool handled;
 };
 
-struct host1x_cmdbuf {
-	u32 handle;
-	u32 offset;
-	u32 words;
-	u32 pad;
+struct host1x_job_wait {
+	u32 id;
+	u32 threshold;
+	u32 next_class;
+	bool relative;
 };
 
-struct host1x_reloc {
-	struct host1x_bo *cmdbuf;
-	u32 cmdbuf_offset;
-	struct host1x_bo *target;
-	u32 target_offset;
-	u32 shift;
-	u32 pad;
-};
+struct host1x_job_cmd {
+	bool is_wait;
 
-struct host1x_waitchk {
-	struct host1x_bo *bo;
-	u32 offset;
-	u32 syncpt_id;
-	u32 thresh;
+	union {
+		struct host1x_job_gather gather;
+		struct host1x_job_wait wait;
+	};
 };
 
 struct host1x_job_unpin_data {
-	struct host1x_bo *bo;
-	struct sg_table *sgt;
+	struct host1x_bo_mapping *map;
 };
 
 /*
- * Each submit is tracked as a host1x_job.
- */
-struct host1x_job {
-	/* When refcount goes to zero, job can be freed */
-	struct kref ref;
-
-	/* List entry */
-	struct list_head list;
-
-	/* Channel where job is submitted to */
-	struct host1x_channel *channel;
-
-	u32 client;
-
-	/* Gathers and their memory */
-	struct host1x_job_gather *gathers;
-	unsigned int num_gathers;
-
-	/* Wait checks to be processed at submit time */
-	struct host1x_waitchk *waitchk;
-	unsigned int num_waitchk;
-	u32 waitchk_mask;
-
-	/* Array of handles to be pinned & unpinned */
-	struct host1x_reloc *relocarray;
-	unsigned int num_relocs;
-	struct host1x_job_unpin_data *unpins;
-	unsigned int num_unpins;
-
-	dma_addr_t *addr_phys;
-	dma_addr_t *gather_addr_phys;
-	dma_addr_t *reloc_addr_phys;
-
-	/* Sync point id, number of increments and end related to the submit */
-	u32 syncpt_id;
-	u32 syncpt_incrs;
-	u32 syncpt_end;
-
-	/* Maximum time to wait for this job */
-	unsigned int timeout;
-
-	/* Index and number of slots used in the push buffer */
-	unsigned int first_get;
-	unsigned int num_slots;
-
-	/* Copy of gathers */
-	size_t gather_copy_size;
-	dma_addr_t gather_copy;
-	u8 *gather_copy_mapped;
-
-	/* Check if register is marked as an address reg */
-	int (*is_addr_reg)(struct device *dev, u32 reg, u32 class);
-
-	/* Request a SETCLASS to this class */
-	u32 class;
-
-	/* Add a channel wait for previous ops to complete */
-	bool serialize;
-};
-/*
- * Allocate memory for a job. Just enough memory will be allocated to
- * accomodate the submit.
- */
-struct host1x_job *host1x_job_alloc(struct host1x_channel *ch,
-				    u32 num_cmdbufs, u32 num_relocs,
-				    u32 num_waitchks);
-
-/*
- * Add a gather to a job.
- */
-void host1x_job_add_gather(struct host1x_job *job, struct host1x_bo *mem_id,
-			   u32 words, u32 offset);
-
-/*
- * Increment reference going to host1x_job.
- */
-struct host1x_job *host1x_job_get(struct host1x_job *job);
-
-/*
- * Decrement reference job, free if goes to zero.
- */
-void host1x_job_put(struct host1x_job *job);
-
-/*
- * Pin memory related to job. This handles relocation of addresses to the
- * host1x address space. Handles both the gather memory and any other memory
- * referred to from the gather buffers.
- *
- * Handles also patching out host waits that would wait for an expired sync
- * point value.
- */
-int host1x_job_pin(struct host1x_job *job, struct device *dev);
-
-/*
- * Unpin memory related to job.
- */
-void host1x_job_unpin(struct host1x_job *job);
-
-/*
  * Dump contents of job to debug output.
  */
 void host1x_job_dump(struct device *dev, struct host1x_job *job);
diff --git a/drivers/gpu/host1x/mipi.c b/drivers/gpu/host1x/mipi.c
new file mode 100644
index 000000000000..e51b43dd15a3
--- /dev/null
+++ b/drivers/gpu/host1x/mipi.c
@@ -0,0 +1,539 @@
+/*
+ * Copyright (C) 2013 NVIDIA Corporation
+ *
+ * Permission to use, copy, modify, distribute, and sell this software and its
+ * documentation for any purpose is hereby granted without fee, provided that
+ * the above copyright notice appear in all copies and that both that copyright
+ * notice and this permission notice appear in supporting documentation, and
+ * that the name of the copyright holders not be used in advertising or
+ * publicity pertaining to distribution of the software without specific,
+ * written prior permission.  The copyright holders make no representations
+ * about the suitability of this software for any purpose.  It is provided "as
+ * is" without express or implied warranty.
+ *
+ * THE COPYRIGHT HOLDERS DISCLAIM ALL WARRANTIES WITH REGARD TO THIS SOFTWARE,
+ * INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS, IN NO
+ * EVENT SHALL THE COPYRIGHT HOLDERS BE LIABLE FOR ANY SPECIAL, INDIRECT OR
+ * CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE,
+ * DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER
+ * TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE
+ * OF THIS SOFTWARE.
+ */
+
+#include <linux/clk.h>
+#include <linux/host1x.h>
+#include <linux/io.h>
+#include <linux/iopoll.h>
+#include <linux/of_platform.h>
+#include <linux/platform_device.h>
+#include <linux/slab.h>
+
+#include "dev.h"
+
+#define MIPI_CAL_CTRL			0x00
+#define MIPI_CAL_CTRL_NOISE_FILTER(x)	(((x) & 0xf) << 26)
+#define MIPI_CAL_CTRL_PRESCALE(x)	(((x) & 0x3) << 24)
+#define MIPI_CAL_CTRL_CLKEN_OVR		(1 << 4)
+#define MIPI_CAL_CTRL_START		(1 << 0)
+
+#define MIPI_CAL_AUTOCAL_CTRL		0x01
+
+#define MIPI_CAL_STATUS			0x02
+#define MIPI_CAL_STATUS_DONE		(1 << 16)
+#define MIPI_CAL_STATUS_ACTIVE		(1 <<  0)
+
+#define MIPI_CAL_CONFIG_CSIA		0x05
+#define MIPI_CAL_CONFIG_CSIB		0x06
+#define MIPI_CAL_CONFIG_CSIC		0x07
+#define MIPI_CAL_CONFIG_CSID		0x08
+#define MIPI_CAL_CONFIG_CSIE		0x09
+#define MIPI_CAL_CONFIG_CSIF		0x0a
+#define MIPI_CAL_CONFIG_DSIA		0x0e
+#define MIPI_CAL_CONFIG_DSIB		0x0f
+#define MIPI_CAL_CONFIG_DSIC		0x10
+#define MIPI_CAL_CONFIG_DSID		0x11
+
+#define MIPI_CAL_CONFIG_DSIA_CLK	0x19
+#define MIPI_CAL_CONFIG_DSIB_CLK	0x1a
+#define MIPI_CAL_CONFIG_CSIAB_CLK	0x1b
+#define MIPI_CAL_CONFIG_DSIC_CLK	0x1c
+#define MIPI_CAL_CONFIG_CSICD_CLK	0x1c
+#define MIPI_CAL_CONFIG_DSID_CLK	0x1d
+#define MIPI_CAL_CONFIG_CSIE_CLK	0x1d
+
+/* for data and clock lanes */
+#define MIPI_CAL_CONFIG_SELECT		(1 << 21)
+
+/* for data lanes */
+#define MIPI_CAL_CONFIG_HSPDOS(x)	(((x) & 0x1f) << 16)
+#define MIPI_CAL_CONFIG_HSPUOS(x)	(((x) & 0x1f) <<  8)
+#define MIPI_CAL_CONFIG_TERMOS(x)	(((x) & 0x1f) <<  0)
+
+/* for clock lanes */
+#define MIPI_CAL_CONFIG_HSCLKPDOSD(x)	(((x) & 0x1f) <<  8)
+#define MIPI_CAL_CONFIG_HSCLKPUOSD(x)	(((x) & 0x1f) <<  0)
+
+#define MIPI_CAL_BIAS_PAD_CFG0		0x16
+#define MIPI_CAL_BIAS_PAD_PDVCLAMP	(1 << 1)
+#define MIPI_CAL_BIAS_PAD_E_VCLAMP_REF	(1 << 0)
+
+#define MIPI_CAL_BIAS_PAD_CFG1		0x17
+#define MIPI_CAL_BIAS_PAD_DRV_DN_REF(x) (((x) & 0x7) << 16)
+#define MIPI_CAL_BIAS_PAD_DRV_UP_REF(x) (((x) & 0x7) << 8)
+
+#define MIPI_CAL_BIAS_PAD_CFG2		0x18
+#define MIPI_CAL_BIAS_PAD_VCLAMP(x)	(((x) & 0x7) << 16)
+#define MIPI_CAL_BIAS_PAD_VAUXP(x)	(((x) & 0x7) << 4)
+#define MIPI_CAL_BIAS_PAD_PDVREG	(1 << 1)
+
+struct tegra_mipi_pad {
+	unsigned long data;
+	unsigned long clk;
+};
+
+struct tegra_mipi_soc {
+	bool has_clk_lane;
+	const struct tegra_mipi_pad *pads;
+	unsigned int num_pads;
+
+	bool clock_enable_override;
+	bool needs_vclamp_ref;
+
+	/* bias pad configuration settings */
+	u8 pad_drive_down_ref;
+	u8 pad_drive_up_ref;
+
+	u8 pad_vclamp_level;
+	u8 pad_vauxp_level;
+
+	/* calibration settings for data lanes */
+	u8 hspdos;
+	u8 hspuos;
+	u8 termos;
+
+	/* calibration settings for clock lanes */
+	u8 hsclkpdos;
+	u8 hsclkpuos;
+};
+
+struct tegra_mipi {
+	const struct tegra_mipi_soc *soc;
+	struct device *dev;
+	void __iomem *regs;
+	struct mutex lock;
+	struct clk *clk;
+
+	unsigned long usage_count;
+};
+
+struct tegra_mipi_device {
+	struct platform_device *pdev;
+	struct tegra_mipi *mipi;
+	struct device *device;
+	unsigned long pads;
+};
+
+static inline u32 tegra_mipi_readl(struct tegra_mipi *mipi,
+				   unsigned long offset)
+{
+	return readl(mipi->regs + (offset << 2));
+}
+
+static inline void tegra_mipi_writel(struct tegra_mipi *mipi, u32 value,
+				     unsigned long offset)
+{
+	writel(value, mipi->regs + (offset << 2));
+}
+
+static int tegra_mipi_power_up(struct tegra_mipi *mipi)
+{
+	u32 value;
+	int err;
+
+	err = clk_enable(mipi->clk);
+	if (err < 0)
+		return err;
+
+	value = tegra_mipi_readl(mipi, MIPI_CAL_BIAS_PAD_CFG0);
+	value &= ~MIPI_CAL_BIAS_PAD_PDVCLAMP;
+
+	if (mipi->soc->needs_vclamp_ref)
+		value |= MIPI_CAL_BIAS_PAD_E_VCLAMP_REF;
+
+	tegra_mipi_writel(mipi, value, MIPI_CAL_BIAS_PAD_CFG0);
+
+	value = tegra_mipi_readl(mipi, MIPI_CAL_BIAS_PAD_CFG2);
+	value &= ~MIPI_CAL_BIAS_PAD_PDVREG;
+	tegra_mipi_writel(mipi, value, MIPI_CAL_BIAS_PAD_CFG2);
+
+	clk_disable(mipi->clk);
+
+	return 0;
+}
+
+static int tegra_mipi_power_down(struct tegra_mipi *mipi)
+{
+	u32 value;
+	int err;
+
+	err = clk_enable(mipi->clk);
+	if (err < 0)
+		return err;
+
+	/*
+	 * The MIPI_CAL_BIAS_PAD_PDVREG controls a voltage regulator that
+	 * supplies the DSI pads. This must be kept enabled until none of the
+	 * DSI lanes are used anymore.
+	 */
+	value = tegra_mipi_readl(mipi, MIPI_CAL_BIAS_PAD_CFG2);
+	value |= MIPI_CAL_BIAS_PAD_PDVREG;
+	tegra_mipi_writel(mipi, value, MIPI_CAL_BIAS_PAD_CFG2);
+
+	/*
+	 * MIPI_CAL_BIAS_PAD_PDVCLAMP and MIPI_CAL_BIAS_PAD_E_VCLAMP_REF
+	 * control a regulator that supplies current to the pre-driver logic.
+	 * Powering down this regulator causes DSI to fail, so it must remain
+	 * powered on until none of the DSI lanes are used anymore.
+	 */
+	value = tegra_mipi_readl(mipi, MIPI_CAL_BIAS_PAD_CFG0);
+
+	if (mipi->soc->needs_vclamp_ref)
+		value &= ~MIPI_CAL_BIAS_PAD_E_VCLAMP_REF;
+
+	value |= MIPI_CAL_BIAS_PAD_PDVCLAMP;
+	tegra_mipi_writel(mipi, value, MIPI_CAL_BIAS_PAD_CFG0);
+
+	return 0;
+}
+
+struct tegra_mipi_device *tegra_mipi_request(struct device *device,
+					     struct device_node *np)
+{
+	struct tegra_mipi_device *dev;
+	struct of_phandle_args args;
+	int err;
+
+	err = of_parse_phandle_with_args(np, "nvidia,mipi-calibrate",
+					 "#nvidia,mipi-calibrate-cells", 0,
+					 &args);
+	if (err < 0)
+		return ERR_PTR(err);
+
+	dev = kzalloc(sizeof(*dev), GFP_KERNEL);
+	if (!dev) {
+		err = -ENOMEM;
+		goto out;
+	}
+
+	dev->pdev = of_find_device_by_node(args.np);
+	if (!dev->pdev) {
+		err = -ENODEV;
+		goto free;
+	}
+
+	dev->mipi = platform_get_drvdata(dev->pdev);
+	if (!dev->mipi) {
+		err = -EPROBE_DEFER;
+		goto put;
+	}
+
+	of_node_put(args.np);
+
+	dev->pads = args.args[0];
+	dev->device = device;
+
+	return dev;
+
+put:
+	platform_device_put(dev->pdev);
+free:
+	kfree(dev);
+out:
+	of_node_put(args.np);
+	return ERR_PTR(err);
+}
+EXPORT_SYMBOL(tegra_mipi_request);
+
+void tegra_mipi_free(struct tegra_mipi_device *device)
+{
+	platform_device_put(device->pdev);
+	kfree(device);
+}
+EXPORT_SYMBOL(tegra_mipi_free);
+
+int tegra_mipi_enable(struct tegra_mipi_device *dev)
+{
+	int err = 0;
+
+	mutex_lock(&dev->mipi->lock);
+
+	if (dev->mipi->usage_count++ == 0)
+		err = tegra_mipi_power_up(dev->mipi);
+
+	mutex_unlock(&dev->mipi->lock);
+
+	return err;
+
+}
+EXPORT_SYMBOL(tegra_mipi_enable);
+
+int tegra_mipi_disable(struct tegra_mipi_device *dev)
+{
+	int err = 0;
+
+	mutex_lock(&dev->mipi->lock);
+
+	if (--dev->mipi->usage_count == 0)
+		err = tegra_mipi_power_down(dev->mipi);
+
+	mutex_unlock(&dev->mipi->lock);
+
+	return err;
+
+}
+EXPORT_SYMBOL(tegra_mipi_disable);
+
+int tegra_mipi_finish_calibration(struct tegra_mipi_device *device)
+{
+	struct tegra_mipi *mipi = device->mipi;
+	void __iomem *status_reg = mipi->regs + (MIPI_CAL_STATUS << 2);
+	u32 value;
+	int err;
+
+	err = readl_relaxed_poll_timeout(status_reg, value,
+					 !(value & MIPI_CAL_STATUS_ACTIVE) &&
+					 (value & MIPI_CAL_STATUS_DONE), 50,
+					 250000);
+	mutex_unlock(&device->mipi->lock);
+	clk_disable(device->mipi->clk);
+
+	return err;
+}
+EXPORT_SYMBOL(tegra_mipi_finish_calibration);
+
+int tegra_mipi_start_calibration(struct tegra_mipi_device *device)
+{
+	const struct tegra_mipi_soc *soc = device->mipi->soc;
+	unsigned int i;
+	u32 value;
+	int err;
+
+	err = clk_enable(device->mipi->clk);
+	if (err < 0)
+		return err;
+
+	mutex_lock(&device->mipi->lock);
+
+	value = MIPI_CAL_BIAS_PAD_DRV_DN_REF(soc->pad_drive_down_ref) |
+		MIPI_CAL_BIAS_PAD_DRV_UP_REF(soc->pad_drive_up_ref);
+	tegra_mipi_writel(device->mipi, value, MIPI_CAL_BIAS_PAD_CFG1);
+
+	value = tegra_mipi_readl(device->mipi, MIPI_CAL_BIAS_PAD_CFG2);
+	value &= ~MIPI_CAL_BIAS_PAD_VCLAMP(0x7);
+	value &= ~MIPI_CAL_BIAS_PAD_VAUXP(0x7);
+	value |= MIPI_CAL_BIAS_PAD_VCLAMP(soc->pad_vclamp_level);
+	value |= MIPI_CAL_BIAS_PAD_VAUXP(soc->pad_vauxp_level);
+	tegra_mipi_writel(device->mipi, value, MIPI_CAL_BIAS_PAD_CFG2);
+
+	for (i = 0; i < soc->num_pads; i++) {
+		u32 clk = 0, data = 0;
+
+		if (device->pads & BIT(i)) {
+			data = MIPI_CAL_CONFIG_SELECT |
+			       MIPI_CAL_CONFIG_HSPDOS(soc->hspdos) |
+			       MIPI_CAL_CONFIG_HSPUOS(soc->hspuos) |
+			       MIPI_CAL_CONFIG_TERMOS(soc->termos);
+			clk = MIPI_CAL_CONFIG_SELECT |
+			      MIPI_CAL_CONFIG_HSCLKPDOSD(soc->hsclkpdos) |
+			      MIPI_CAL_CONFIG_HSCLKPUOSD(soc->hsclkpuos);
+		}
+
+		tegra_mipi_writel(device->mipi, data, soc->pads[i].data);
+
+		if (soc->has_clk_lane && soc->pads[i].clk != 0)
+			tegra_mipi_writel(device->mipi, clk, soc->pads[i].clk);
+	}
+
+	value = tegra_mipi_readl(device->mipi, MIPI_CAL_CTRL);
+	value &= ~MIPI_CAL_CTRL_NOISE_FILTER(0xf);
+	value &= ~MIPI_CAL_CTRL_PRESCALE(0x3);
+	value |= MIPI_CAL_CTRL_NOISE_FILTER(0xa);
+	value |= MIPI_CAL_CTRL_PRESCALE(0x2);
+
+	if (!soc->clock_enable_override)
+		value &= ~MIPI_CAL_CTRL_CLKEN_OVR;
+	else
+		value |= MIPI_CAL_CTRL_CLKEN_OVR;
+
+	tegra_mipi_writel(device->mipi, value, MIPI_CAL_CTRL);
+
+	/* clear any pending status bits */
+	value = tegra_mipi_readl(device->mipi, MIPI_CAL_STATUS);
+	tegra_mipi_writel(device->mipi, value, MIPI_CAL_STATUS);
+
+	value = tegra_mipi_readl(device->mipi, MIPI_CAL_CTRL);
+	value |= MIPI_CAL_CTRL_START;
+	tegra_mipi_writel(device->mipi, value, MIPI_CAL_CTRL);
+
+	/*
+	 * Wait for min 72uS to let calibration logic finish calibration
+	 * sequence codes before waiting for pads idle state to apply the
+	 * results.
+	 */
+	usleep_range(75, 80);
+
+	return 0;
+}
+EXPORT_SYMBOL(tegra_mipi_start_calibration);
+
+static const struct tegra_mipi_pad tegra114_mipi_pads[] = {
+	{ .data = MIPI_CAL_CONFIG_CSIA },
+	{ .data = MIPI_CAL_CONFIG_CSIB },
+	{ .data = MIPI_CAL_CONFIG_CSIC },
+	{ .data = MIPI_CAL_CONFIG_CSID },
+	{ .data = MIPI_CAL_CONFIG_CSIE },
+	{ .data = MIPI_CAL_CONFIG_DSIA },
+	{ .data = MIPI_CAL_CONFIG_DSIB },
+	{ .data = MIPI_CAL_CONFIG_DSIC },
+	{ .data = MIPI_CAL_CONFIG_DSID },
+};
+
+static const struct tegra_mipi_soc tegra114_mipi_soc = {
+	.has_clk_lane = false,
+	.pads = tegra114_mipi_pads,
+	.num_pads = ARRAY_SIZE(tegra114_mipi_pads),
+	.clock_enable_override = true,
+	.needs_vclamp_ref = true,
+	.pad_drive_down_ref = 0x2,
+	.pad_drive_up_ref = 0x0,
+	.pad_vclamp_level = 0x0,
+	.pad_vauxp_level = 0x0,
+	.hspdos = 0x0,
+	.hspuos = 0x4,
+	.termos = 0x5,
+	.hsclkpdos = 0x0,
+	.hsclkpuos = 0x4,
+};
+
+static const struct tegra_mipi_pad tegra124_mipi_pads[] = {
+	{ .data = MIPI_CAL_CONFIG_CSIA, .clk = MIPI_CAL_CONFIG_CSIAB_CLK },
+	{ .data = MIPI_CAL_CONFIG_CSIB, .clk = MIPI_CAL_CONFIG_CSIAB_CLK },
+	{ .data = MIPI_CAL_CONFIG_CSIC, .clk = MIPI_CAL_CONFIG_CSICD_CLK },
+	{ .data = MIPI_CAL_CONFIG_CSID, .clk = MIPI_CAL_CONFIG_CSICD_CLK },
+	{ .data = MIPI_CAL_CONFIG_CSIE, .clk = MIPI_CAL_CONFIG_CSIE_CLK  },
+	{ .data = MIPI_CAL_CONFIG_DSIA, .clk = MIPI_CAL_CONFIG_DSIA_CLK  },
+	{ .data = MIPI_CAL_CONFIG_DSIB, .clk = MIPI_CAL_CONFIG_DSIB_CLK  },
+};
+
+static const struct tegra_mipi_soc tegra124_mipi_soc = {
+	.has_clk_lane = true,
+	.pads = tegra124_mipi_pads,
+	.num_pads = ARRAY_SIZE(tegra124_mipi_pads),
+	.clock_enable_override = true,
+	.needs_vclamp_ref = true,
+	.pad_drive_down_ref = 0x2,
+	.pad_drive_up_ref = 0x0,
+	.pad_vclamp_level = 0x0,
+	.pad_vauxp_level = 0x0,
+	.hspdos = 0x0,
+	.hspuos = 0x0,
+	.termos = 0x0,
+	.hsclkpdos = 0x1,
+	.hsclkpuos = 0x2,
+};
+
+static const struct tegra_mipi_soc tegra132_mipi_soc = {
+	.has_clk_lane = true,
+	.pads = tegra124_mipi_pads,
+	.num_pads = ARRAY_SIZE(tegra124_mipi_pads),
+	.clock_enable_override = false,
+	.needs_vclamp_ref = false,
+	.pad_drive_down_ref = 0x0,
+	.pad_drive_up_ref = 0x3,
+	.pad_vclamp_level = 0x0,
+	.pad_vauxp_level = 0x0,
+	.hspdos = 0x0,
+	.hspuos = 0x0,
+	.termos = 0x0,
+	.hsclkpdos = 0x3,
+	.hsclkpuos = 0x2,
+};
+
+static const struct tegra_mipi_pad tegra210_mipi_pads[] = {
+	{ .data = MIPI_CAL_CONFIG_CSIA, .clk = 0 },
+	{ .data = MIPI_CAL_CONFIG_CSIB, .clk = 0 },
+	{ .data = MIPI_CAL_CONFIG_CSIC, .clk = 0 },
+	{ .data = MIPI_CAL_CONFIG_CSID, .clk = 0 },
+	{ .data = MIPI_CAL_CONFIG_CSIE, .clk = 0 },
+	{ .data = MIPI_CAL_CONFIG_CSIF, .clk = 0 },
+	{ .data = MIPI_CAL_CONFIG_DSIA, .clk = MIPI_CAL_CONFIG_DSIA_CLK },
+	{ .data = MIPI_CAL_CONFIG_DSIB, .clk = MIPI_CAL_CONFIG_DSIB_CLK },
+	{ .data = MIPI_CAL_CONFIG_DSIC, .clk = MIPI_CAL_CONFIG_DSIC_CLK },
+	{ .data = MIPI_CAL_CONFIG_DSID, .clk = MIPI_CAL_CONFIG_DSID_CLK },
+};
+
+static const struct tegra_mipi_soc tegra210_mipi_soc = {
+	.has_clk_lane = true,
+	.pads = tegra210_mipi_pads,
+	.num_pads = ARRAY_SIZE(tegra210_mipi_pads),
+	.clock_enable_override = true,
+	.needs_vclamp_ref = false,
+	.pad_drive_down_ref = 0x0,
+	.pad_drive_up_ref = 0x3,
+	.pad_vclamp_level = 0x1,
+	.pad_vauxp_level = 0x1,
+	.hspdos = 0x0,
+	.hspuos = 0x2,
+	.termos = 0x0,
+	.hsclkpdos = 0x0,
+	.hsclkpuos = 0x2,
+};
+
+static const struct of_device_id tegra_mipi_of_match[] = {
+	{ .compatible = "nvidia,tegra114-mipi", .data = &tegra114_mipi_soc },
+	{ .compatible = "nvidia,tegra124-mipi", .data = &tegra124_mipi_soc },
+	{ .compatible = "nvidia,tegra132-mipi", .data = &tegra132_mipi_soc },
+	{ .compatible = "nvidia,tegra210-mipi", .data = &tegra210_mipi_soc },
+	{ },
+};
+
+static int tegra_mipi_probe(struct platform_device *pdev)
+{
+	const struct of_device_id *match;
+	struct tegra_mipi *mipi;
+
+	match = of_match_node(tegra_mipi_of_match, pdev->dev.of_node);
+	if (!match)
+		return -ENODEV;
+
+	mipi = devm_kzalloc(&pdev->dev, sizeof(*mipi), GFP_KERNEL);
+	if (!mipi)
+		return -ENOMEM;
+
+	mipi->soc = match->data;
+	mipi->dev = &pdev->dev;
+
+	mipi->regs = devm_platform_get_and_ioremap_resource(pdev, 0, NULL);
+	if (IS_ERR(mipi->regs))
+		return PTR_ERR(mipi->regs);
+
+	mutex_init(&mipi->lock);
+
+	mipi->clk = devm_clk_get_prepared(&pdev->dev, NULL);
+	if (IS_ERR(mipi->clk)) {
+		dev_err(&pdev->dev, "failed to get clock\n");
+		return PTR_ERR(mipi->clk);
+	}
+
+	platform_set_drvdata(pdev, mipi);
+
+	return 0;
+}
+
+struct platform_driver tegra_mipi_driver = {
+	.driver = {
+		.name = "tegra-mipi",
+		.of_match_table = tegra_mipi_of_match,
+	},
+	.probe = tegra_mipi_probe,
+};
diff --git a/drivers/gpu/host1x/syncpt.c b/drivers/gpu/host1x/syncpt.c
index 409745b949db..acc7d82e0585 100644
--- a/drivers/gpu/host1x/syncpt.c
+++ b/drivers/gpu/host1x/syncpt.c
@@ -1,23 +1,13 @@
+// SPDX-License-Identifier: GPL-2.0-only
 /*
  * Tegra host1x Syncpoints
  *
- * Copyright (c) 2010-2013, NVIDIA Corporation.
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms and conditions of the GNU General Public License,
- * version 2, as published by the Free Software Foundation.
- *
- * This program is distributed in the hope it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
- * more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ * Copyright (c) 2010-2015, NVIDIA Corporation.
  */
 
 #include <linux/module.h>
 #include <linux/device.h>
+#include <linux/dma-fence.h>
 #include <linux/slab.h>
 
 #include <trace/events/host1x.h>
@@ -30,44 +20,115 @@
 #define SYNCPT_CHECK_PERIOD (2 * HZ)
 #define MAX_STUCK_CHECK_COUNT 15
 
-static struct host1x_syncpt *_host1x_syncpt_alloc(struct host1x *host,
-						  struct device *dev,
-						  bool client_managed)
+static struct host1x_syncpt_base *
+host1x_syncpt_base_request(struct host1x *host)
 {
-	int i;
-	struct host1x_syncpt *sp = host->syncpt;
-	char *name;
+	struct host1x_syncpt_base *bases = host->bases;
+	unsigned int i;
 
-	for (i = 0; i < host->info->nb_pts && sp->name; i++, sp++)
-		;
+	for (i = 0; i < host->info->nb_bases; i++)
+		if (!bases[i].requested)
+			break;
 
-	if (i >= host->info->nb_pts)
+	if (i >= host->info->nb_bases)
 		return NULL;
 
-	name = kasprintf(GFP_KERNEL, "%02d-%s", sp->id,
-			dev ? dev_name(dev) : NULL);
+	bases[i].requested = true;
+	return &bases[i];
+}
+
+static void host1x_syncpt_base_free(struct host1x_syncpt_base *base)
+{
+	if (base)
+		base->requested = false;
+}
+
+/**
+ * host1x_syncpt_alloc() - allocate a syncpoint
+ * @host: host1x device data
+ * @flags: bitfield of HOST1X_SYNCPT_* flags
+ * @name: name for the syncpoint for use in debug prints
+ *
+ * Allocates a hardware syncpoint for the caller's use. The caller then has
+ * the sole authority to mutate the syncpoint's value until it is freed again.
+ *
+ * If no free syncpoints are available, or a NULL name was specified, returns
+ * NULL.
+ */
+struct host1x_syncpt *host1x_syncpt_alloc(struct host1x *host,
+					  unsigned long flags,
+					  const char *name)
+{
+	struct host1x_syncpt *sp = host->syncpt;
+	char *full_name;
+	unsigned int i;
+
 	if (!name)
 		return NULL;
 
-	sp->dev = dev;
-	sp->name = name;
-	sp->client_managed = client_managed;
+	mutex_lock(&host->syncpt_mutex);
+
+	for (i = 0; i < host->info->nb_pts && kref_read(&sp->ref); i++, sp++)
+		;
 
+	if (i >= host->info->nb_pts)
+		goto unlock;
+
+	if (flags & HOST1X_SYNCPT_HAS_BASE) {
+		sp->base = host1x_syncpt_base_request(host);
+		if (!sp->base)
+			goto unlock;
+	}
+
+	full_name = kasprintf(GFP_KERNEL, "%u-%s", sp->id, name);
+	if (!full_name)
+		goto free_base;
+
+	sp->name = full_name;
+
+	if (flags & HOST1X_SYNCPT_CLIENT_MANAGED)
+		sp->client_managed = true;
+	else
+		sp->client_managed = false;
+
+	kref_init(&sp->ref);
+
+	mutex_unlock(&host->syncpt_mutex);
 	return sp;
+
+free_base:
+	host1x_syncpt_base_free(sp->base);
+	sp->base = NULL;
+unlock:
+	mutex_unlock(&host->syncpt_mutex);
+	return NULL;
 }
+EXPORT_SYMBOL(host1x_syncpt_alloc);
 
+/**
+ * host1x_syncpt_id() - retrieve syncpoint ID
+ * @sp: host1x syncpoint
+ *
+ * Given a pointer to a struct host1x_syncpt, retrieves its ID. This ID is
+ * often used as a value to program into registers that control how hardware
+ * blocks interact with syncpoints.
+ */
 u32 host1x_syncpt_id(struct host1x_syncpt *sp)
 {
 	return sp->id;
 }
+EXPORT_SYMBOL(host1x_syncpt_id);
 
-/*
- * Updates the value sent to hardware.
+/**
+ * host1x_syncpt_incr_max() - update the value sent to hardware
+ * @sp: host1x syncpoint
+ * @incrs: number of increments
  */
 u32 host1x_syncpt_incr_max(struct host1x_syncpt *sp, u32 incrs)
 {
 	return (u32)atomic_add_return(incrs, &sp->max_val);
 }
+EXPORT_SYMBOL(host1x_syncpt_incr_max);
 
  /*
  * Write cached syncpoint and waitbase values to hardware.
@@ -75,12 +136,23 @@ u32 host1x_syncpt_incr_max(struct host1x_syncpt *sp, u32 incrs)
 void host1x_syncpt_restore(struct host1x *host)
 {
 	struct host1x_syncpt *sp_base = host->syncpt;
-	u32 i;
+	unsigned int i;
 
-	for (i = 0; i < host1x_syncpt_nb_pts(host); i++)
+	for (i = 0; i < host1x_syncpt_nb_pts(host); i++) {
+		/*
+		 * Unassign syncpt from channels for purposes of Tegra186
+		 * syncpoint protection. This prevents any channel from
+		 * accessing it until it is reassigned.
+		 */
+		host1x_hw_syncpt_assign_to_channel(host, sp_base + i, NULL);
 		host1x_hw_syncpt_restore(host, sp_base + i);
+	}
+
 	for (i = 0; i < host1x_syncpt_nb_bases(host); i++)
 		host1x_hw_syncpt_restore_wait_base(host, sp_base + i);
+
+	host1x_hw_syncpt_enable_protection(host);
+
 	wmb();
 }
 
@@ -91,7 +163,7 @@ void host1x_syncpt_restore(struct host1x *host)
 void host1x_syncpt_save(struct host1x *host)
 {
 	struct host1x_syncpt *sp_base = host->syncpt;
-	u32 i;
+	unsigned int i;
 
 	for (i = 0; i < host1x_syncpt_nb_pts(host); i++) {
 		if (host1x_syncpt_client_managed(sp_base + i))
@@ -111,6 +183,7 @@ void host1x_syncpt_save(struct host1x *host)
 u32 host1x_syncpt_load(struct host1x_syncpt *sp)
 {
 	u32 val;
+
 	val = host1x_hw_syncpt_load(sp->host, sp);
 	trace_host1x_syncpt_load_min(sp->id, val);
 
@@ -122,117 +195,71 @@ u32 host1x_syncpt_load(struct host1x_syncpt *sp)
  */
 u32 host1x_syncpt_load_wait_base(struct host1x_syncpt *sp)
 {
-	u32 val;
 	host1x_hw_syncpt_load_wait_base(sp->host, sp);
-	val = sp->base_val;
-	return val;
+
+	return sp->base_val;
 }
 
-/*
- * Increment syncpoint value from cpu, updating cache
+/**
+ * host1x_syncpt_incr() - increment syncpoint value from CPU, updating cache
+ * @sp: host1x syncpoint
  */
 int host1x_syncpt_incr(struct host1x_syncpt *sp)
 {
 	return host1x_hw_syncpt_cpu_incr(sp->host, sp);
 }
-
-/*
- * Updated sync point form hardware, and returns true if syncpoint is expired,
- * false if we may need to wait
- */
-static bool syncpt_load_min_is_expired(struct host1x_syncpt *sp, u32 thresh)
-{
-	host1x_hw_syncpt_load(sp->host, sp);
-	return host1x_syncpt_is_expired(sp, thresh);
-}
-
-/*
- * Main entrypoint for syncpoint value waits.
+EXPORT_SYMBOL(host1x_syncpt_incr);
+
+/**
+ * host1x_syncpt_wait() - wait for a syncpoint to reach a given value
+ * @sp: host1x syncpoint
+ * @thresh: threshold
+ * @timeout: maximum time to wait for the syncpoint to reach the given value
+ * @value: return location for the syncpoint value
  */
 int host1x_syncpt_wait(struct host1x_syncpt *sp, u32 thresh, long timeout,
-			u32 *value)
+		       u32 *value)
 {
-	DECLARE_WAIT_QUEUE_HEAD_ONSTACK(wq);
-	void *ref;
-	struct host1x_waitlist *waiter;
-	int err = 0, check_count = 0;
-	u32 val;
+	struct dma_fence *fence;
+	long wait_err;
+
+	host1x_hw_syncpt_load(sp->host, sp);
 
 	if (value)
-		*value = 0;
+		*value = host1x_syncpt_load(sp);
 
-	/* first check cache */
-	if (host1x_syncpt_is_expired(sp, thresh)) {
-		if (value)
-			*value = host1x_syncpt_load(sp);
+	if (host1x_syncpt_is_expired(sp, thresh))
 		return 0;
-	}
 
-	/* try to read from register */
-	val = host1x_hw_syncpt_load(sp->host, sp);
-	if (host1x_syncpt_is_expired(sp, thresh)) {
-		if (value)
-			*value = val;
-		goto done;
-	}
-
-	if (!timeout) {
-		err = -EAGAIN;
-		goto done;
-	}
-
-	/* allocate a waiter */
-	waiter = kzalloc(sizeof(*waiter), GFP_KERNEL);
-	if (!waiter) {
-		err = -ENOMEM;
-		goto done;
-	}
-
-	/* schedule a wakeup when the syncpoint value is reached */
-	err = host1x_intr_add_action(sp->host, sp->id, thresh,
-				     HOST1X_INTR_ACTION_WAKEUP_INTERRUPTIBLE,
-				     &wq, waiter, &ref);
-	if (err)
-		goto done;
-
-	err = -EAGAIN;
-	/* Caller-specified timeout may be impractically low */
 	if (timeout < 0)
 		timeout = LONG_MAX;
+	else if (timeout == 0)
+		return -EAGAIN;
 
-	/* wait for the syncpoint, or timeout, or signal */
-	while (timeout) {
-		long check = min_t(long, SYNCPT_CHECK_PERIOD, timeout);
-		int remain = wait_event_interruptible_timeout(wq,
-				syncpt_load_min_is_expired(sp, thresh),
-				check);
-		if (remain > 0 || host1x_syncpt_is_expired(sp, thresh)) {
-			if (value)
-				*value = host1x_syncpt_load(sp);
-			err = 0;
-			break;
-		}
-		if (remain < 0) {
-			err = remain;
-			break;
-		}
-		timeout -= check;
-		if (timeout && check_count <= MAX_STUCK_CHECK_COUNT) {
-			dev_warn(sp->host->dev,
-				"%s: syncpoint id %d (%s) stuck waiting %d, timeout=%ld\n",
-				 current->comm, sp->id, sp->name,
-				 thresh, timeout);
-
-			host1x_debug_dump_syncpts(sp->host);
-			if (check_count == MAX_STUCK_CHECK_COUNT)
-				host1x_debug_dump(sp->host);
-			check_count++;
-		}
-	}
-	host1x_intr_put_ref(sp->host, sp->id, ref);
+	fence = host1x_fence_create(sp, thresh, false);
+	if (IS_ERR(fence))
+		return PTR_ERR(fence);
+
+	wait_err = dma_fence_wait_timeout(fence, true, timeout);
+	if (wait_err == 0)
+		host1x_fence_cancel(fence);
+	dma_fence_put(fence);
+
+	if (value)
+		*value = host1x_syncpt_load(sp);
 
-done:
-	return err;
+	/*
+	 * Don't rely on dma_fence_wait_timeout return value,
+	 * since it returns zero both on timeout and if the
+	 * wait completed with 0 jiffies left.
+	 */
+	host1x_hw_syncpt_load(sp->host, sp);
+	if (wait_err == 0 && !host1x_syncpt_is_expired(sp, thresh))
+		return -EAGAIN;
+	else if (wait_err < 0)
+		return wait_err;
+	else
+		return 0;
 }
 EXPORT_SYMBOL(host1x_syncpt_wait);
 
@@ -242,136 +269,262 @@ EXPORT_SYMBOL(host1x_syncpt_wait);
 bool host1x_syncpt_is_expired(struct host1x_syncpt *sp, u32 thresh)
 {
 	u32 current_val;
-	u32 future_val;
+
 	smp_rmb();
+
 	current_val = (u32)atomic_read(&sp->min_val);
-	future_val = (u32)atomic_read(&sp->max_val);
-
-	/* Note the use of unsigned arithmetic here (mod 1<<32).
-	 *
-	 * c = current_val = min_val	= the current value of the syncpoint.
-	 * t = thresh			= the value we are checking
-	 * f = future_val  = max_val	= the value c will reach when all
-	 *				  outstanding increments have completed.
-	 *
-	 * Note that c always chases f until it reaches f.
-	 *
-	 * Dtf = (f - t)
-	 * Dtc = (c - t)
-	 *
-	 *  Consider all cases:
-	 *
-	 *	A) .....c..t..f.....	Dtf < Dtc	need to wait
-	 *	B) .....c.....f..t..	Dtf > Dtc	expired
-	 *	C) ..t..c.....f.....	Dtf > Dtc	expired	   (Dct very large)
-	 *
-	 *  Any case where f==c: always expired (for any t).	Dtf == Dcf
-	 *  Any case where t==c: always expired (for any f).	Dtf >= Dtc (because Dtc==0)
-	 *  Any case where t==f!=c: always wait.		Dtf <  Dtc (because Dtf==0,
-	 *							Dtc!=0)
-	 *
-	 *  Other cases:
-	 *
-	 *	A) .....t..f..c.....	Dtf < Dtc	need to wait
-	 *	A) .....f..c..t.....	Dtf < Dtc	need to wait
-	 *	A) .....f..t..c.....	Dtf > Dtc	expired
-	 *
-	 *   So:
-	 *	   Dtf >= Dtc implies EXPIRED	(return true)
-	 *	   Dtf <  Dtc implies WAIT	(return false)
-	 *
-	 * Note: If t is expired then we *cannot* wait on it. We would wait
-	 * forever (hang the system).
-	 *
-	 * Note: do NOT get clever and remove the -thresh from both sides. It
-	 * is NOT the same.
-	 *
-	 * If future valueis zero, we have a client managed sync point. In that
-	 * case we do a direct comparison.
-	 */
-	if (!host1x_syncpt_client_managed(sp))
-		return future_val - thresh >= current_val - thresh;
-	else
-		return (s32)(current_val - thresh) >= 0;
-}
 
-/* remove a wait pointed to by patch_addr */
-int host1x_syncpt_patch_wait(struct host1x_syncpt *sp, void *patch_addr)
-{
-	return host1x_hw_syncpt_patch_wait(sp->host, sp, patch_addr);
+	return ((current_val - thresh) & 0x80000000U) == 0U;
 }
 
 int host1x_syncpt_init(struct host1x *host)
 {
+	struct host1x_syncpt_base *bases;
 	struct host1x_syncpt *syncpt;
-	int i;
+	unsigned int i;
 
-	syncpt = devm_kzalloc(host->dev, sizeof(*syncpt) * host->info->nb_pts,
-		GFP_KERNEL);
+	syncpt = devm_kcalloc(host->dev, host->info->nb_pts, sizeof(*syncpt),
+			      GFP_KERNEL);
 	if (!syncpt)
 		return -ENOMEM;
 
-	for (i = 0; i < host->info->nb_pts; ++i) {
+	bases = devm_kcalloc(host->dev, host->info->nb_bases, sizeof(*bases),
+			     GFP_KERNEL);
+	if (!bases)
+		return -ENOMEM;
+
+	for (i = 0; i < host->info->nb_pts; i++) {
 		syncpt[i].id = i;
 		syncpt[i].host = host;
 	}
 
-	host->syncpt = syncpt;
+	for (i = 0; i < host->info->nb_bases; i++)
+		bases[i].id = i;
 
-	host1x_syncpt_restore(host);
+	mutex_init(&host->syncpt_mutex);
+	host->syncpt = syncpt;
+	host->bases = bases;
 
 	/* Allocate sync point to use for clearing waits for expired fences */
-	host->nop_sp = _host1x_syncpt_alloc(host, NULL, false);
+	host->nop_sp = host1x_syncpt_alloc(host, 0, "reserved-nop");
 	if (!host->nop_sp)
 		return -ENOMEM;
 
+	if (host->info->reserve_vblank_syncpts) {
+		kref_init(&host->syncpt[26].ref);
+		kref_init(&host->syncpt[27].ref);
+	}
+
 	return 0;
 }
 
-struct host1x_syncpt *host1x_syncpt_request(struct device *dev,
-					    bool client_managed)
+/**
+ * host1x_syncpt_request() - request a syncpoint
+ * @client: client requesting the syncpoint
+ * @flags: flags
+ *
+ * host1x client drivers can use this function to allocate a syncpoint for
+ * subsequent use. A syncpoint returned by this function will be reserved for
+ * use by the client exclusively. When no longer using a syncpoint, a host1x
+ * client driver needs to release it using host1x_syncpt_put().
+ */
+struct host1x_syncpt *host1x_syncpt_request(struct host1x_client *client,
+					    unsigned long flags)
 {
-	struct host1x *host = dev_get_drvdata(dev->parent);
-	return _host1x_syncpt_alloc(host, dev, client_managed);
+	struct host1x *host = dev_get_drvdata(client->host->parent);
+
+	return host1x_syncpt_alloc(host, flags, dev_name(client->dev));
 }
+EXPORT_SYMBOL(host1x_syncpt_request);
 
-void host1x_syncpt_free(struct host1x_syncpt *sp)
+static void syncpt_release(struct kref *ref)
 {
-	if (!sp)
-		return;
+	struct host1x_syncpt *sp = container_of(ref, struct host1x_syncpt, ref);
+
+	atomic_set(&sp->max_val, host1x_syncpt_read(sp));
 
+	sp->locked = false;
+
+	host1x_syncpt_base_free(sp->base);
 	kfree(sp->name);
-	sp->dev = NULL;
+	sp->base = NULL;
 	sp->name = NULL;
 	sp->client_managed = false;
+
+	mutex_unlock(&sp->host->syncpt_mutex);
 }
 
+/**
+ * host1x_syncpt_put() - free a requested syncpoint
+ * @sp: host1x syncpoint
+ *
+ * Release a syncpoint previously allocated using host1x_syncpt_request(). A
+ * host1x client driver should call this when the syncpoint is no longer in
+ * use.
+ */
+void host1x_syncpt_put(struct host1x_syncpt *sp)
+{
+	if (!sp)
+		return;
+
+	kref_put_mutex(&sp->ref, syncpt_release, &sp->host->syncpt_mutex);
+}
+EXPORT_SYMBOL(host1x_syncpt_put);
+
 void host1x_syncpt_deinit(struct host1x *host)
 {
-	int i;
 	struct host1x_syncpt *sp = host->syncpt;
+	unsigned int i;
+
 	for (i = 0; i < host->info->nb_pts; i++, sp++)
 		kfree(sp->name);
 }
 
-int host1x_syncpt_nb_pts(struct host1x *host)
+/**
+ * host1x_syncpt_read_max() - read maximum syncpoint value
+ * @sp: host1x syncpoint
+ *
+ * The maximum syncpoint value indicates how many operations there are in
+ * queue, either in channel or in a software thread.
+ */
+u32 host1x_syncpt_read_max(struct host1x_syncpt *sp)
+{
+	smp_rmb();
+
+	return (u32)atomic_read(&sp->max_val);
+}
+EXPORT_SYMBOL(host1x_syncpt_read_max);
+
+/**
+ * host1x_syncpt_read_min() - read minimum syncpoint value
+ * @sp: host1x syncpoint
+ *
+ * The minimum syncpoint value is a shadow of the current sync point value in
+ * hardware.
+ */
+u32 host1x_syncpt_read_min(struct host1x_syncpt *sp)
+{
+	smp_rmb();
+
+	return (u32)atomic_read(&sp->min_val);
+}
+EXPORT_SYMBOL(host1x_syncpt_read_min);
+
+/**
+ * host1x_syncpt_read() - read the current syncpoint value
+ * @sp: host1x syncpoint
+ */
+u32 host1x_syncpt_read(struct host1x_syncpt *sp)
+{
+	return host1x_syncpt_load(sp);
+}
+EXPORT_SYMBOL(host1x_syncpt_read);
+
+unsigned int host1x_syncpt_nb_pts(struct host1x *host)
 {
 	return host->info->nb_pts;
 }
 
-int host1x_syncpt_nb_bases(struct host1x *host)
+unsigned int host1x_syncpt_nb_bases(struct host1x *host)
 {
 	return host->info->nb_bases;
 }
 
-int host1x_syncpt_nb_mlocks(struct host1x *host)
+unsigned int host1x_syncpt_nb_mlocks(struct host1x *host)
 {
 	return host->info->nb_mlocks;
 }
 
-struct host1x_syncpt *host1x_syncpt_get(struct host1x *host, u32 id)
+/**
+ * host1x_syncpt_get_by_id() - obtain a syncpoint by ID
+ * @host: host1x controller
+ * @id: syncpoint ID
+ */
+struct host1x_syncpt *host1x_syncpt_get_by_id(struct host1x *host,
+					      unsigned int id)
+{
+	if (id >= host->info->nb_pts)
+		return NULL;
+
+	if (kref_get_unless_zero(&host->syncpt[id].ref))
+		return &host->syncpt[id];
+	else
+		return NULL;
+}
+EXPORT_SYMBOL(host1x_syncpt_get_by_id);
+
+/**
+ * host1x_syncpt_get_by_id_noref() - obtain a syncpoint by ID but don't
+ * 	increase the refcount.
+ * @host: host1x controller
+ * @id: syncpoint ID
+ */
+struct host1x_syncpt *host1x_syncpt_get_by_id_noref(struct host1x *host,
+						    unsigned int id)
 {
-	if (host->info->nb_pts < id)
+	if (id >= host->info->nb_pts)
 		return NULL;
-	return host->syncpt + id;
+
+	return &host->syncpt[id];
+}
+EXPORT_SYMBOL(host1x_syncpt_get_by_id_noref);
+
+/**
+ * host1x_syncpt_get() - increment syncpoint refcount
+ * @sp: syncpoint
+ */
+struct host1x_syncpt *host1x_syncpt_get(struct host1x_syncpt *sp)
+{
+	kref_get(&sp->ref);
+
+	return sp;
+}
+EXPORT_SYMBOL(host1x_syncpt_get);
+
+/**
+ * host1x_syncpt_get_base() - obtain the wait base associated with a syncpoint
+ * @sp: host1x syncpoint
+ */
+struct host1x_syncpt_base *host1x_syncpt_get_base(struct host1x_syncpt *sp)
+{
+	return sp ? sp->base : NULL;
+}
+EXPORT_SYMBOL(host1x_syncpt_get_base);
+
+/**
+ * host1x_syncpt_base_id() - retrieve the ID of a syncpoint wait base
+ * @base: host1x syncpoint wait base
+ */
+u32 host1x_syncpt_base_id(struct host1x_syncpt_base *base)
+{
+	return base->id;
+}
+EXPORT_SYMBOL(host1x_syncpt_base_id);
+
+static void do_nothing(struct kref *ref)
+{
+}
+
+/**
+ * host1x_syncpt_release_vblank_reservation() - Make VBLANK syncpoint
+ *   available for allocation
+ *
+ * @client: host1x bus client
+ * @syncpt_id: syncpoint ID to make available
+ *
+ * Makes VBLANK<i> syncpoint available for allocatation if it was
+ * reserved at initialization time. This should be called by the display
+ * driver after it has ensured that any VBLANK increment programming configured
+ * by the boot chain has been disabled.
+ */
+void host1x_syncpt_release_vblank_reservation(struct host1x_client *client,
+					      u32 syncpt_id)
+{
+	struct host1x *host = dev_get_drvdata(client->host->parent);
+
+	if (!host->info->reserve_vblank_syncpts)
+		return;
+
+	kref_put(&host->syncpt[syncpt_id].ref, do_nothing);
 }
+EXPORT_SYMBOL(host1x_syncpt_release_vblank_reservation);
diff --git a/drivers/gpu/host1x/syncpt.h b/drivers/gpu/host1x/syncpt.h
index 267c0b9d3647..4c3f3b2f0e9c 100644
--- a/drivers/gpu/host1x/syncpt.h
+++ b/drivers/gpu/host1x/syncpt.h
@@ -1,28 +1,20 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
 /*
  * Tegra host1x Syncpoints
  *
  * Copyright (c) 2010-2013, NVIDIA Corporation.
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms and conditions of the GNU General Public License,
- * version 2, as published by the Free Software Foundation.
- *
- * This program is distributed in the hope it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
- * more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program.  If not, see <http://www.gnu.org/licenses/>.
  */
 
 #ifndef __HOST1X_SYNCPT_H
 #define __HOST1X_SYNCPT_H
 
 #include <linux/atomic.h>
+#include <linux/host1x.h>
 #include <linux/kernel.h>
+#include <linux/kref.h>
 #include <linux/sched.h>
 
+#include "fence.h"
 #include "intr.h"
 
 struct host1x;
@@ -30,18 +22,32 @@ struct host1x;
 /* Reserved for replacing an expired wait with a NOP */
 #define HOST1X_SYNCPT_RESERVED			0
 
+struct host1x_syncpt_base {
+	unsigned int id;
+	bool requested;
+};
+
 struct host1x_syncpt {
-	int id;
+	struct kref ref;
+
+	unsigned int id;
 	atomic_t min_val;
 	atomic_t max_val;
 	u32 base_val;
 	const char *name;
 	bool client_managed;
 	struct host1x *host;
-	struct device *dev;
+	struct host1x_syncpt_base *base;
 
 	/* interrupt data */
-	struct host1x_syncpt_intr intr;
+	struct host1x_fence_list fences;
+
+	/*
+	 * If a submission incrementing this syncpoint fails, lock it so that
+	 * further submission cannot be made until application has handled the
+	 * failure.
+	 */
+	bool locked;
 };
 
 /* Initialize sync point array  */
@@ -50,33 +56,14 @@ int host1x_syncpt_init(struct host1x *host);
 /*  Free sync point array */
 void host1x_syncpt_deinit(struct host1x *host);
 
-/*
- * Read max. It indicates how many operations there are in queue, either in
- * channel or in a software thread.
- * */
-static inline u32 host1x_syncpt_read_max(struct host1x_syncpt *sp)
-{
-	smp_rmb();
-	return (u32)atomic_read(&sp->max_val);
-}
-
-/*
- * Read min, which is a shadow of the current sync point value in hardware.
- */
-static inline u32 host1x_syncpt_read_min(struct host1x_syncpt *sp)
-{
-	smp_rmb();
-	return (u32)atomic_read(&sp->min_val);
-}
-
 /* Return number of sync point supported. */
-int host1x_syncpt_nb_pts(struct host1x *host);
+unsigned int host1x_syncpt_nb_pts(struct host1x *host);
 
 /* Return number of wait bases supported. */
-int host1x_syncpt_nb_bases(struct host1x *host);
+unsigned int host1x_syncpt_nb_bases(struct host1x *host);
 
 /* Return number of mlocks supported. */
-int host1x_syncpt_nb_mlocks(struct host1x *host);
+unsigned int host1x_syncpt_nb_mlocks(struct host1x *host);
 
 /*
  * Check sync point sanity. If max is larger than min, there have too many
@@ -112,9 +99,6 @@ static inline bool host1x_syncpt_idle(struct host1x_syncpt *sp)
 	return (min == max);
 }
 
-/* Return pointer to struct denoting sync point id. */
-struct host1x_syncpt *host1x_syncpt_get(struct host1x *host, u32 id);
-
 /* Load current value from hardware to the shadow register. */
 u32 host1x_syncpt_load(struct host1x_syncpt *sp);
 
@@ -130,33 +114,18 @@ void host1x_syncpt_restore(struct host1x *host);
 /* Read current wait base value into shadow register and return it. */
 u32 host1x_syncpt_load_wait_base(struct host1x_syncpt *sp);
 
-/* Request incrementing a sync point. */
-int host1x_syncpt_incr(struct host1x_syncpt *sp);
-
 /* Indicate future operations by incrementing the sync point max. */
 u32 host1x_syncpt_incr_max(struct host1x_syncpt *sp, u32 incrs);
 
-/* Wait until sync point reaches a threshold value, or a timeout. */
-int host1x_syncpt_wait(struct host1x_syncpt *sp, u32 thresh,
-			long timeout, u32 *value);
-
 /* Check if sync point id is valid. */
 static inline int host1x_syncpt_is_valid(struct host1x_syncpt *sp)
 {
 	return sp->id < host1x_syncpt_nb_pts(sp->host);
 }
 
-/* Patch a wait by replacing it with a wait for syncpt 0 value 0 */
-int host1x_syncpt_patch_wait(struct host1x_syncpt *sp, void *patch_addr);
-
-/* Return id of the sync point */
-u32 host1x_syncpt_id(struct host1x_syncpt *sp);
-
-/* Allocate a sync point for a device. */
-struct host1x_syncpt *host1x_syncpt_request(struct device *dev,
-					    bool client_managed);
-
-/* Free a sync point. */
-void host1x_syncpt_free(struct host1x_syncpt *sp);
+static inline void host1x_syncpt_set_locked(struct host1x_syncpt *sp)
+{
+	sp->locked = true;
+}
 
 #endif