summaryrefslogtreecommitdiff
path: root/drivers/gpu/host1x
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/gpu/host1x')
-rw-r--r--drivers/gpu/host1x/Kconfig7
-rw-r--r--drivers/gpu/host1x/Makefile7
-rw-r--r--drivers/gpu/host1x/bus.c72
-rw-r--r--drivers/gpu/host1x/bus.h2
-rw-r--r--drivers/gpu/host1x/cdma.c67
-rw-r--r--drivers/gpu/host1x/cdma.h2
-rw-r--r--drivers/gpu/host1x/channel.c32
-rw-r--r--drivers/gpu/host1x/channel.h4
-rw-r--r--drivers/gpu/host1x/context.c177
-rw-r--r--drivers/gpu/host1x/context.h38
-rw-r--r--drivers/gpu/host1x/context_bus.c26
-rw-r--r--drivers/gpu/host1x/debug.c53
-rw-r--r--drivers/gpu/host1x/debug.h1
-rw-r--r--drivers/gpu/host1x/dev.c330
-rw-r--r--drivers/gpu/host1x/dev.h41
-rw-r--r--drivers/gpu/host1x/fence.c120
-rw-r--r--drivers/gpu/host1x/fence.h19
-rw-r--r--drivers/gpu/host1x/hw/cdma_hw.c46
-rw-r--r--drivers/gpu/host1x/hw/channel_hw.c229
-rw-r--r--drivers/gpu/host1x/hw/debug_hw.c15
-rw-r--r--drivers/gpu/host1x/hw/host1x01_hardware.h114
-rw-r--r--drivers/gpu/host1x/hw/host1x02_hardware.h113
-rw-r--r--drivers/gpu/host1x/hw/host1x04_hardware.h113
-rw-r--r--drivers/gpu/host1x/hw/host1x05_hardware.h113
-rw-r--r--drivers/gpu/host1x/hw/host1x06_hardware.h118
-rw-r--r--drivers/gpu/host1x/hw/host1x07_hardware.h118
-rw-r--r--drivers/gpu/host1x/hw/host1x08.c33
-rw-r--r--drivers/gpu/host1x/hw/host1x08.h15
-rw-r--r--drivers/gpu/host1x/hw/host1x08_hardware.h21
-rw-r--r--drivers/gpu/host1x/hw/hw_host1x06_uclass.h2
-rw-r--r--drivers/gpu/host1x/hw/hw_host1x07_uclass.h2
-rw-r--r--drivers/gpu/host1x/hw/hw_host1x08_channel.h11
-rw-r--r--drivers/gpu/host1x/hw/hw_host1x08_common.h11
-rw-r--r--drivers/gpu/host1x/hw/hw_host1x08_hypervisor.h9
-rw-r--r--drivers/gpu/host1x/hw/hw_host1x08_uclass.h181
-rw-r--r--drivers/gpu/host1x/hw/hw_host1x08_vm.h36
-rw-r--r--drivers/gpu/host1x/hw/intr_hw.c136
-rw-r--r--drivers/gpu/host1x/hw/opcodes.h150
-rw-r--r--drivers/gpu/host1x/hw/syncpt_hw.c3
-rw-r--r--drivers/gpu/host1x/intr.c347
-rw-r--r--drivers/gpu/host1x/intr.h84
-rw-r--r--drivers/gpu/host1x/job.c16
-rw-r--r--drivers/gpu/host1x/mipi.c21
-rw-r--r--drivers/gpu/host1x/syncpt.c143
-rw-r--r--drivers/gpu/host1x/syncpt.h3
45 files changed, 1604 insertions, 1597 deletions
diff --git a/drivers/gpu/host1x/Kconfig b/drivers/gpu/host1x/Kconfig
index 6815b4db17c1..e6c78ae2003a 100644
--- a/drivers/gpu/host1x/Kconfig
+++ b/drivers/gpu/host1x/Kconfig
@@ -1,8 +1,13 @@
# SPDX-License-Identifier: GPL-2.0-only
+
+config TEGRA_HOST1X_CONTEXT_BUS
+ bool
+
config TEGRA_HOST1X
tristate "NVIDIA Tegra host1x driver"
- depends on ARCH_TEGRA || (ARM && COMPILE_TEST)
+ depends on ARCH_TEGRA || COMPILE_TEST
select DMA_SHARED_BUFFER
+ select TEGRA_HOST1X_CONTEXT_BUS
select IOMMU_IOVA
help
Driver for the NVIDIA Tegra host1x hardware.
diff --git a/drivers/gpu/host1x/Makefile b/drivers/gpu/host1x/Makefile
index d2b6f7de0498..ee5286ffe08d 100644
--- a/drivers/gpu/host1x/Makefile
+++ b/drivers/gpu/host1x/Makefile
@@ -15,6 +15,11 @@ host1x-y = \
hw/host1x04.o \
hw/host1x05.o \
hw/host1x06.o \
- hw/host1x07.o
+ hw/host1x07.o \
+ hw/host1x08.o
+
+host1x-$(CONFIG_IOMMU_API) += \
+ context.o
obj-$(CONFIG_TEGRA_HOST1X) += host1x.o
+obj-$(CONFIG_TEGRA_HOST1X_CONTEXT_BUS) += context_bus.o
diff --git a/drivers/gpu/host1x/bus.c b/drivers/gpu/host1x/bus.c
index bdee16a0bb8e..723a80895cd4 100644
--- a/drivers/gpu/host1x/bus.c
+++ b/drivers/gpu/host1x/bus.c
@@ -41,7 +41,6 @@ static int host1x_subdev_add(struct host1x_device *device,
struct device_node *np)
{
struct host1x_subdev *subdev;
- struct device_node *child;
int err;
subdev = kzalloc(sizeof(*subdev), GFP_KERNEL);
@@ -56,13 +55,12 @@ static int host1x_subdev_add(struct host1x_device *device,
mutex_unlock(&device->subdevs_lock);
/* recursively add children */
- for_each_child_of_node(np, child) {
+ for_each_child_of_node_scoped(np, child) {
if (of_match_node(driver->subdevs, child) &&
of_device_is_available(child)) {
err = host1x_subdev_add(device, driver, child);
if (err < 0) {
/* XXX cleanup? */
- of_node_put(child);
return err;
}
}
@@ -90,17 +88,14 @@ static void host1x_subdev_del(struct host1x_subdev *subdev)
static int host1x_device_parse_dt(struct host1x_device *device,
struct host1x_driver *driver)
{
- struct device_node *np;
int err;
- for_each_child_of_node(device->dev.parent->of_node, np) {
+ for_each_child_of_node_scoped(device->dev.parent->of_node, np) {
if (of_match_node(driver->subdevs, np) &&
of_device_is_available(np)) {
err = host1x_subdev_add(device, driver, np);
- if (err < 0) {
- of_node_put(np);
+ if (err < 0)
return err;
- }
}
}
@@ -333,46 +328,24 @@ static int host1x_del_client(struct host1x *host1x,
return -ENODEV;
}
-static int host1x_device_match(struct device *dev, struct device_driver *drv)
+static int host1x_device_match(struct device *dev, const struct device_driver *drv)
{
return strcmp(dev_name(dev), drv->name) == 0;
}
-static int host1x_device_uevent(struct device *dev,
+/*
+ * Note that this is really only needed for backwards compatibility
+ * with libdrm, which parses this information from sysfs and will
+ * fail if it can't find the OF_FULLNAME, specifically.
+ */
+static int host1x_device_uevent(const struct device *dev,
struct kobj_uevent_env *env)
{
- struct device_node *np = dev->parent->of_node;
- unsigned int count = 0;
- struct property *p;
- const char *compat;
-
- /*
- * This duplicates most of of_device_uevent(), but the latter cannot
- * be called from modules and operates on dev->of_node, which is not
- * available in this case.
- *
- * Note that this is really only needed for backwards compatibility
- * with libdrm, which parses this information from sysfs and will
- * fail if it can't find the OF_FULLNAME, specifically.
- */
- add_uevent_var(env, "OF_NAME=%pOFn", np);
- add_uevent_var(env, "OF_FULLNAME=%pOF", np);
-
- of_property_for_each_string(np, "compatible", p, compat) {
- add_uevent_var(env, "OF_COMPATIBLE_%u=%s", count, compat);
- count++;
- }
-
- add_uevent_var(env, "OF_COMPATIBLE_N=%u", count);
+ of_device_uevent(dev->parent, env);
return 0;
}
-static int host1x_dma_configure(struct device *dev)
-{
- return of_dma_configure(dev, dev->of_node, true);
-}
-
static const struct dev_pm_ops host1x_device_pm_ops = {
.suspend = pm_generic_suspend,
.resume = pm_generic_resume,
@@ -382,11 +355,10 @@ static const struct dev_pm_ops host1x_device_pm_ops = {
.restore = pm_generic_restore,
};
-struct bus_type host1x_bus_type = {
+const struct bus_type host1x_bus_type = {
.name = "host1x",
.match = host1x_device_match,
.uevent = host1x_device_uevent,
- .dma_configure = host1x_dma_configure,
.pm = &host1x_device_pm_ops,
};
@@ -475,8 +447,6 @@ static int host1x_device_add(struct host1x *host1x,
device->dev.bus = &host1x_bus_type;
device->dev.parent = host1x->dev;
- of_dma_configure(&device->dev, host1x->dev->of_node, true);
-
device->dev.dma_parms = &device->dma_parms;
dma_set_max_seg_size(&device->dev, UINT_MAX);
@@ -501,6 +471,18 @@ static int host1x_device_add(struct host1x *host1x,
mutex_unlock(&clients_lock);
+ /*
+ * Add device even if there are no subdevs to ensure syncpoint functionality
+ * is available regardless of whether any engine subdevices are present
+ */
+ if (list_empty(&device->subdevs)) {
+ err = device_add(&device->dev);
+ if (err < 0)
+ dev_err(&device->dev, "failed to add device: %d\n", err);
+ else
+ device->registered = true;
+ }
+
return 0;
}
@@ -803,7 +785,7 @@ EXPORT_SYMBOL(__host1x_client_register);
* Removes a host1x client from its host1x controller instance. If a logical
* device has already been initialized, it will be torn down.
*/
-int host1x_client_unregister(struct host1x_client *client)
+void host1x_client_unregister(struct host1x_client *client)
{
struct host1x_client *c;
struct host1x *host1x;
@@ -815,7 +797,7 @@ int host1x_client_unregister(struct host1x_client *client)
err = host1x_del_client(host1x, client);
if (!err) {
mutex_unlock(&devices_lock);
- return 0;
+ return;
}
}
@@ -832,8 +814,6 @@ int host1x_client_unregister(struct host1x_client *client)
mutex_unlock(&clients_lock);
host1x_bo_cache_destroy(&client->cache);
-
- return 0;
}
EXPORT_SYMBOL(host1x_client_unregister);
diff --git a/drivers/gpu/host1x/bus.h b/drivers/gpu/host1x/bus.h
index a4adf9abc3b4..a80ceadfeb34 100644
--- a/drivers/gpu/host1x/bus.h
+++ b/drivers/gpu/host1x/bus.h
@@ -10,7 +10,7 @@
struct bus_type;
struct host1x;
-extern struct bus_type host1x_bus_type;
+extern const struct bus_type host1x_bus_type;
int host1x_register(struct host1x *host1x);
int host1x_unregister(struct host1x *host1x);
diff --git a/drivers/gpu/host1x/cdma.c b/drivers/gpu/host1x/cdma.c
index 765e5aa64eb6..ba2e572567c0 100644
--- a/drivers/gpu/host1x/cdma.c
+++ b/drivers/gpu/host1x/cdma.c
@@ -105,7 +105,7 @@ static int host1x_pushbuffer_init(struct push_buffer *pb)
pb->dma = iova_dma_addr(&host1x->iova, alloc);
err = iommu_map(host1x->domain, pb->dma, pb->phys, size,
- IOMMU_READ);
+ IOMMU_READ, GFP_KERNEL);
if (err)
goto iommu_free_iova;
} else {
@@ -247,8 +247,6 @@ static int host1x_cdma_wait_pushbuffer_space(struct host1x *host1x,
trace_host1x_wait_cdma(dev_name(cdma_to_channel(cdma)->dev),
CDMA_EVENT_PUSH_BUFFER_SPACE);
- host1x_hw_cdma_flush(host1x, cdma);
-
/* If somebody has managed to already start waiting, yield */
if (cdma->event != CDMA_EVENT_NONE) {
mutex_unlock(&cdma->lock);
@@ -457,9 +455,24 @@ syncpt_incr:
* to offset 0xbad. This does nothing but
* has a easily detected signature in debug
* traces.
+ *
+ * On systems with MLOCK enforcement enabled,
+ * the above 0 word writes would fall foul of
+ * the enforcement. As such, in the first slot
+ * put a RESTART_W opcode to the beginning
+ * of the next job. We don't use this for older
+ * chips since those only support the RESTART
+ * opcode with inconvenient alignment requirements.
*/
- mapped[2*slot+0] = 0x1bad0000;
- mapped[2*slot+1] = 0x1bad0000;
+ if (i == 0 && host1x->info->has_wide_gather) {
+ unsigned int next_job = (job->first_get/8 + job->num_slots)
+ % HOST1X_PUSHBUFFER_SLOTS;
+ mapped[2*slot+0] = (0xd << 28) | (next_job * 2);
+ mapped[2*slot+1] = 0x0;
+ } else {
+ mapped[2*slot+0] = 0x1bad0000;
+ mapped[2*slot+1] = 0x1bad0000;
+ }
}
job->cancelled = true;
@@ -475,6 +488,15 @@ resume:
host1x_hw_cdma_resume(host1x, cdma, restart_addr);
}
+static void cdma_update_work(struct work_struct *work)
+{
+ struct host1x_cdma *cdma = container_of(work, struct host1x_cdma, update_work);
+
+ mutex_lock(&cdma->lock);
+ update_cdma_locked(cdma);
+ mutex_unlock(&cdma->lock);
+}
+
/*
* Create a cdma
*/
@@ -484,6 +506,7 @@ int host1x_cdma_init(struct host1x_cdma *cdma)
mutex_init(&cdma->lock);
init_completion(&cdma->complete);
+ INIT_WORK(&cdma->update_work, cdma_update_work);
INIT_LIST_HEAD(&cdma->sync_queue);
@@ -566,7 +589,6 @@ int host1x_cdma_begin(struct host1x_cdma *cdma, struct host1x_job *job)
*/
void host1x_cdma_push(struct host1x_cdma *cdma, u32 op1, u32 op2)
{
- struct host1x *host1x = cdma_to_host1x(cdma);
struct push_buffer *pb = &cdma->push_buffer;
u32 slots_free = cdma->slots_free;
@@ -574,11 +596,9 @@ void host1x_cdma_push(struct host1x_cdma *cdma, u32 op1, u32 op2)
trace_host1x_cdma_push(dev_name(cdma_to_channel(cdma)->dev),
op1, op2);
- if (slots_free == 0) {
- host1x_hw_cdma_flush(host1x, cdma);
+ if (slots_free == 0)
slots_free = host1x_cdma_wait_locked(cdma,
CDMA_EVENT_PUSH_BUFFER_SPACE);
- }
cdma->slots_free = slots_free - 1;
cdma->slots_used++;
@@ -600,8 +620,7 @@ void host1x_cdma_push_wide(struct host1x_cdma *cdma, u32 op1, u32 op2,
struct host1x_channel *channel = cdma_to_channel(cdma);
struct host1x *host1x = cdma_to_host1x(cdma);
struct push_buffer *pb = &cdma->push_buffer;
- unsigned int needed = 2, extra = 0, i;
- unsigned int space = cdma->slots_free;
+ unsigned int space, needed = 2, extra = 0;
if (host1x_debug_trace_cmdbuf)
trace_host1x_cdma_push_wide(dev_name(channel->dev), op1, op2,
@@ -619,20 +638,14 @@ void host1x_cdma_push_wide(struct host1x_cdma *cdma, u32 op1, u32 op2,
cdma->slots_free = space - needed;
cdma->slots_used += needed;
- /*
- * Note that we rely on the fact that this is only used to submit wide
- * gather opcodes, which consist of 3 words, and they are padded with
- * a NOP to avoid having to deal with fractional slots (a slot always
- * represents 2 words). The fourth opcode passed to this function will
- * therefore always be a NOP.
- *
- * This works around a slight ambiguity when it comes to opcodes. For
- * all current host1x incarnations the NOP opcode uses the exact same
- * encoding (0x20000000), so we could hard-code the value here, but a
- * new incarnation may change it and break that assumption.
- */
- for (i = 0; i < extra; i++)
- host1x_pushbuffer_push(pb, op4, op4);
+ if (extra > 0) {
+ /*
+ * If there isn't enough space at the tail of the pushbuffer,
+ * insert a RESTART(0) here to go back to the beginning.
+ * The code above adjusted the indexes appropriately.
+ */
+ host1x_pushbuffer_push(pb, (0x5 << 28), 0xdead0000);
+ }
host1x_pushbuffer_push(pb, op1, op2);
host1x_pushbuffer_push(pb, op3, op4);
@@ -670,7 +683,5 @@ void host1x_cdma_end(struct host1x_cdma *cdma,
*/
void host1x_cdma_update(struct host1x_cdma *cdma)
{
- mutex_lock(&cdma->lock);
- update_cdma_locked(cdma);
- mutex_unlock(&cdma->lock);
+ schedule_work(&cdma->update_work);
}
diff --git a/drivers/gpu/host1x/cdma.h b/drivers/gpu/host1x/cdma.h
index 12c4327c4df0..7fd8168af4f9 100644
--- a/drivers/gpu/host1x/cdma.h
+++ b/drivers/gpu/host1x/cdma.h
@@ -11,6 +11,7 @@
#include <linux/sched.h>
#include <linux/completion.h>
#include <linux/list.h>
+#include <linux/workqueue.h>
struct host1x_syncpt;
struct host1x_userctx_timeout;
@@ -69,6 +70,7 @@ struct host1x_cdma {
struct buffer_timeout timeout; /* channel's timeout state/wq */
bool running;
bool torndown;
+ struct work_struct update_work;
};
#define cdma_to_channel(cdma) container_of(cdma, struct host1x_channel, cdma)
diff --git a/drivers/gpu/host1x/channel.c b/drivers/gpu/host1x/channel.c
index 2a9a3a8d5931..08077afe4cde 100644
--- a/drivers/gpu/host1x/channel.c
+++ b/drivers/gpu/host1x/channel.c
@@ -21,22 +21,20 @@ int host1x_channel_list_init(struct host1x_channel_list *chlist,
if (!chlist->channels)
return -ENOMEM;
- chlist->allocated_channels =
- kcalloc(BITS_TO_LONGS(num_channels), sizeof(unsigned long),
- GFP_KERNEL);
+ chlist->allocated_channels = bitmap_zalloc(num_channels, GFP_KERNEL);
if (!chlist->allocated_channels) {
kfree(chlist->channels);
return -ENOMEM;
}
- bitmap_zero(chlist->allocated_channels, num_channels);
+ mutex_init(&chlist->lock);
return 0;
}
void host1x_channel_list_free(struct host1x_channel_list *chlist)
{
- kfree(chlist->allocated_channels);
+ bitmap_free(chlist->allocated_channels);
kfree(chlist->channels);
}
@@ -83,6 +81,25 @@ void host1x_channel_stop(struct host1x_channel *channel)
}
EXPORT_SYMBOL(host1x_channel_stop);
+/**
+ * host1x_channel_stop_all() - disable CDMA on allocated channels
+ * @host: host1x instance
+ *
+ * Stop CDMA on allocated channels
+ */
+void host1x_channel_stop_all(struct host1x *host)
+{
+ struct host1x_channel_list *chlist = &host->channel_list;
+ int bit;
+
+ mutex_lock(&chlist->lock);
+
+ for_each_set_bit(bit, chlist->allocated_channels, host->info->nb_channels)
+ host1x_channel_stop(&chlist->channels[bit]);
+
+ mutex_unlock(&chlist->lock);
+}
+
static void release_channel(struct kref *kref)
{
struct host1x_channel *channel =
@@ -108,8 +125,11 @@ static struct host1x_channel *acquire_unused_channel(struct host1x *host)
unsigned int max_channels = host->info->nb_channels;
unsigned int index;
+ mutex_lock(&chlist->lock);
+
index = find_first_zero_bit(chlist->allocated_channels, max_channels);
if (index >= max_channels) {
+ mutex_unlock(&chlist->lock);
dev_err(host->dev, "failed to find free channel\n");
return NULL;
}
@@ -118,6 +138,8 @@ static struct host1x_channel *acquire_unused_channel(struct host1x *host)
set_bit(index, chlist->allocated_channels);
+ mutex_unlock(&chlist->lock);
+
return &chlist->channels[index];
}
diff --git a/drivers/gpu/host1x/channel.h b/drivers/gpu/host1x/channel.h
index 39044ff6c3aa..d7aede204d83 100644
--- a/drivers/gpu/host1x/channel.h
+++ b/drivers/gpu/host1x/channel.h
@@ -10,6 +10,7 @@
#include <linux/io.h>
#include <linux/kref.h>
+#include <linux/mutex.h>
#include "cdma.h"
@@ -18,6 +19,8 @@ struct host1x_channel;
struct host1x_channel_list {
struct host1x_channel *channels;
+
+ struct mutex lock;
unsigned long *allocated_channels;
};
@@ -37,5 +40,6 @@ int host1x_channel_list_init(struct host1x_channel_list *chlist,
void host1x_channel_list_free(struct host1x_channel_list *chlist);
struct host1x_channel *host1x_channel_get_index(struct host1x *host,
unsigned int index);
+void host1x_channel_stop_all(struct host1x *host);
#endif
diff --git a/drivers/gpu/host1x/context.c b/drivers/gpu/host1x/context.c
new file mode 100644
index 000000000000..a6f6779662a3
--- /dev/null
+++ b/drivers/gpu/host1x/context.c
@@ -0,0 +1,177 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright (c) 2021, NVIDIA Corporation.
+ */
+
+#include <linux/device.h>
+#include <linux/kref.h>
+#include <linux/of.h>
+#include <linux/of_device.h>
+#include <linux/pid.h>
+#include <linux/slab.h>
+
+#include "context.h"
+#include "dev.h"
+
+static void host1x_memory_context_release(struct device *dev)
+{
+ /* context device is freed in host1x_memory_context_list_free() */
+}
+
+int host1x_memory_context_list_init(struct host1x *host1x)
+{
+ struct host1x_memory_context_list *cdl = &host1x->context_list;
+ struct device_node *node = host1x->dev->of_node;
+ struct host1x_memory_context *ctx;
+ unsigned int i;
+ int err;
+
+ cdl->devs = NULL;
+ cdl->len = 0;
+ mutex_init(&cdl->lock);
+
+ err = of_property_count_u32_elems(node, "iommu-map");
+ if (err < 0)
+ return 0;
+
+ cdl->len = err / 4;
+ cdl->devs = kcalloc(cdl->len, sizeof(*cdl->devs), GFP_KERNEL);
+ if (!cdl->devs)
+ return -ENOMEM;
+
+ for (i = 0; i < cdl->len; i++) {
+ ctx = &cdl->devs[i];
+
+ ctx->host = host1x;
+
+ device_initialize(&ctx->dev);
+
+ /*
+ * Due to an issue with T194 NVENC, only 38 bits can be used.
+ * Anyway, 256GiB of IOVA ought to be enough for anyone.
+ */
+ ctx->dma_mask = DMA_BIT_MASK(38);
+ ctx->dev.dma_mask = &ctx->dma_mask;
+ ctx->dev.coherent_dma_mask = ctx->dma_mask;
+ dev_set_name(&ctx->dev, "host1x-ctx.%d", i);
+ ctx->dev.bus = &host1x_context_device_bus_type;
+ ctx->dev.parent = host1x->dev;
+ ctx->dev.release = host1x_memory_context_release;
+
+ ctx->dev.dma_parms = &ctx->dma_parms;
+ dma_set_max_seg_size(&ctx->dev, UINT_MAX);
+
+ err = device_add(&ctx->dev);
+ if (err) {
+ dev_err(host1x->dev, "could not add context device %d: %d\n", i, err);
+ put_device(&ctx->dev);
+ goto unreg_devices;
+ }
+
+ err = of_dma_configure_id(&ctx->dev, node, true, &i);
+ if (err) {
+ dev_err(host1x->dev, "IOMMU configuration failed for context device %d: %d\n",
+ i, err);
+ device_unregister(&ctx->dev);
+ goto unreg_devices;
+ }
+
+ if (!tegra_dev_iommu_get_stream_id(&ctx->dev, &ctx->stream_id) ||
+ !device_iommu_mapped(&ctx->dev)) {
+ dev_err(host1x->dev, "Context device %d has no IOMMU!\n", i);
+ device_unregister(&ctx->dev);
+
+ /*
+ * This means that if IOMMU is disabled but context devices
+ * are defined in the device tree, Host1x will fail to probe.
+ * That's probably OK in this time and age.
+ */
+ err = -EINVAL;
+
+ goto unreg_devices;
+ }
+ }
+
+ return 0;
+
+unreg_devices:
+ while (i--)
+ device_unregister(&cdl->devs[i].dev);
+
+ kfree(cdl->devs);
+ cdl->devs = NULL;
+ cdl->len = 0;
+
+ return err;
+}
+
+void host1x_memory_context_list_free(struct host1x_memory_context_list *cdl)
+{
+ unsigned int i;
+
+ for (i = 0; i < cdl->len; i++)
+ device_unregister(&cdl->devs[i].dev);
+
+ kfree(cdl->devs);
+ cdl->len = 0;
+}
+
+struct host1x_memory_context *host1x_memory_context_alloc(struct host1x *host1x,
+ struct device *dev,
+ struct pid *pid)
+{
+ struct host1x_memory_context_list *cdl = &host1x->context_list;
+ struct host1x_memory_context *free = NULL;
+ int i;
+
+ if (!cdl->len)
+ return ERR_PTR(-EOPNOTSUPP);
+
+ mutex_lock(&cdl->lock);
+
+ for (i = 0; i < cdl->len; i++) {
+ struct host1x_memory_context *cd = &cdl->devs[i];
+
+ if (cd->dev.iommu->iommu_dev != dev->iommu->iommu_dev)
+ continue;
+
+ if (cd->owner == pid) {
+ refcount_inc(&cd->ref);
+ mutex_unlock(&cdl->lock);
+ return cd;
+ } else if (!cd->owner && !free) {
+ free = cd;
+ }
+ }
+
+ if (!free) {
+ mutex_unlock(&cdl->lock);
+ return ERR_PTR(-EBUSY);
+ }
+
+ refcount_set(&free->ref, 1);
+ free->owner = get_pid(pid);
+
+ mutex_unlock(&cdl->lock);
+
+ return free;
+}
+EXPORT_SYMBOL_GPL(host1x_memory_context_alloc);
+
+void host1x_memory_context_get(struct host1x_memory_context *cd)
+{
+ refcount_inc(&cd->ref);
+}
+EXPORT_SYMBOL_GPL(host1x_memory_context_get);
+
+void host1x_memory_context_put(struct host1x_memory_context *cd)
+{
+ struct host1x_memory_context_list *cdl = &cd->host->context_list;
+
+ if (refcount_dec_and_mutex_lock(&cd->ref, &cdl->lock)) {
+ put_pid(cd->owner);
+ cd->owner = NULL;
+ mutex_unlock(&cdl->lock);
+ }
+}
+EXPORT_SYMBOL_GPL(host1x_memory_context_put);
diff --git a/drivers/gpu/host1x/context.h b/drivers/gpu/host1x/context.h
new file mode 100644
index 000000000000..3e03bc1d3bac
--- /dev/null
+++ b/drivers/gpu/host1x/context.h
@@ -0,0 +1,38 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * Host1x context devices
+ *
+ * Copyright (c) 2020, NVIDIA Corporation.
+ */
+
+#ifndef __HOST1X_CONTEXT_H
+#define __HOST1X_CONTEXT_H
+
+#include <linux/mutex.h>
+#include <linux/refcount.h>
+
+struct host1x;
+
+extern struct bus_type host1x_context_device_bus_type;
+
+struct host1x_memory_context_list {
+ struct mutex lock;
+ struct host1x_memory_context *devs;
+ unsigned int len;
+};
+
+#ifdef CONFIG_IOMMU_API
+int host1x_memory_context_list_init(struct host1x *host1x);
+void host1x_memory_context_list_free(struct host1x_memory_context_list *cdl);
+#else
+static inline int host1x_memory_context_list_init(struct host1x *host1x)
+{
+ return 0;
+}
+
+static inline void host1x_memory_context_list_free(struct host1x_memory_context_list *cdl)
+{
+}
+#endif
+
+#endif
diff --git a/drivers/gpu/host1x/context_bus.c b/drivers/gpu/host1x/context_bus.c
new file mode 100644
index 000000000000..7cd0e1a5edd1
--- /dev/null
+++ b/drivers/gpu/host1x/context_bus.c
@@ -0,0 +1,26 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright (c) 2021, NVIDIA Corporation.
+ */
+
+#include <linux/device.h>
+#include <linux/of.h>
+
+const struct bus_type host1x_context_device_bus_type = {
+ .name = "host1x-context",
+};
+EXPORT_SYMBOL_GPL(host1x_context_device_bus_type);
+
+static int __init host1x_context_device_bus_init(void)
+{
+ int err;
+
+ err = bus_register(&host1x_context_device_bus_type);
+ if (err < 0) {
+ pr_err("bus type registration failed: %d\n", err);
+ return err;
+ }
+
+ return 0;
+}
+postcore_initcall(host1x_context_device_bus_init);
diff --git a/drivers/gpu/host1x/debug.c b/drivers/gpu/host1x/debug.c
index 18d9c8d206e3..6433c00d5d7e 100644
--- a/drivers/gpu/host1x/debug.c
+++ b/drivers/gpu/host1x/debug.c
@@ -75,8 +75,9 @@ static int show_channel(struct host1x_channel *ch, void *data, bool show_fifo)
return 0;
}
-static void show_syncpts(struct host1x *m, struct output *o)
+static void show_syncpts(struct host1x *m, struct output *o, bool show_all)
{
+ unsigned long irqflags;
struct list_head *pos;
unsigned int i;
int err;
@@ -92,12 +93,15 @@ static void show_syncpts(struct host1x *m, struct output *o)
u32 min = host1x_syncpt_load(m->syncpt + i);
unsigned int waiters = 0;
- spin_lock(&m->syncpt[i].intr.lock);
- list_for_each(pos, &m->syncpt[i].intr.wait_head)
+ spin_lock_irqsave(&m->syncpt[i].fences.lock, irqflags);
+ list_for_each(pos, &m->syncpt[i].fences.list)
waiters++;
- spin_unlock(&m->syncpt[i].intr.lock);
+ spin_unlock_irqrestore(&m->syncpt[i].fences.lock, irqflags);
- if (!min && !max && !waiters)
+ if (!kref_read(&m->syncpt[i].ref))
+ continue;
+
+ if (!show_all && !min && !max && !waiters)
continue;
host1x_debug_output(o,
@@ -124,7 +128,7 @@ static void show_all(struct host1x *m, struct output *o, bool show_fifo)
unsigned int i;
host1x_hw_show_mlocks(m, o);
- show_syncpts(m, o);
+ show_syncpts(m, o, true);
host1x_debug_output(o, "---- channels ----\n");
for (i = 0; i < m->info->nb_channels; ++i) {
@@ -137,7 +141,7 @@ static void show_all(struct host1x *m, struct output *o, bool show_fifo)
}
}
-static int host1x_debug_show_all(struct seq_file *s, void *unused)
+static int host1x_debug_all_show(struct seq_file *s, void *unused)
{
struct output o = {
.fn = write_to_seqfile,
@@ -148,6 +152,7 @@ static int host1x_debug_show_all(struct seq_file *s, void *unused)
return 0;
}
+DEFINE_SHOW_ATTRIBUTE(host1x_debug_all);
static int host1x_debug_show(struct seq_file *s, void *unused)
{
@@ -160,30 +165,7 @@ static int host1x_debug_show(struct seq_file *s, void *unused)
return 0;
}
-
-static int host1x_debug_open_all(struct inode *inode, struct file *file)
-{
- return single_open(file, host1x_debug_show_all, inode->i_private);
-}
-
-static const struct file_operations host1x_debug_all_fops = {
- .open = host1x_debug_open_all,
- .read = seq_read,
- .llseek = seq_lseek,
- .release = single_release,
-};
-
-static int host1x_debug_open(struct inode *inode, struct file *file)
-{
- return single_open(file, host1x_debug_show, inode->i_private);
-}
-
-static const struct file_operations host1x_debug_fops = {
- .open = host1x_debug_open,
- .read = seq_read,
- .llseek = seq_lseek,
- .release = single_release,
-};
+DEFINE_SHOW_ATTRIBUTE(host1x_debug);
static void host1x_debugfs_init(struct host1x *host1x)
{
@@ -234,12 +216,3 @@ void host1x_debug_dump(struct host1x *host1x)
show_all(host1x, &o, true);
}
-
-void host1x_debug_dump_syncpts(struct host1x *host1x)
-{
- struct output o = {
- .fn = write_to_printk
- };
-
- show_syncpts(host1x, &o);
-}
diff --git a/drivers/gpu/host1x/debug.h b/drivers/gpu/host1x/debug.h
index 62bd8a091fa7..c43c61d876a9 100644
--- a/drivers/gpu/host1x/debug.h
+++ b/drivers/gpu/host1x/debug.h
@@ -41,6 +41,5 @@ extern unsigned int host1x_debug_trace_cmdbuf;
void host1x_debug_init(struct host1x *host1x);
void host1x_debug_deinit(struct host1x *host1x);
void host1x_debug_dump(struct host1x *host1x);
-void host1x_debug_dump_syncpts(struct host1x *host1x);
#endif
diff --git a/drivers/gpu/host1x/dev.c b/drivers/gpu/host1x/dev.c
index 6994f8c0e02e..3f475f0e6545 100644
--- a/drivers/gpu/host1x/dev.c
+++ b/drivers/gpu/host1x/dev.c
@@ -11,8 +11,9 @@
#include <linux/io.h>
#include <linux/list.h>
#include <linux/module.h>
-#include <linux/of_device.h>
#include <linux/of.h>
+#include <linux/of_platform.h>
+#include <linux/platform_device.h>
#include <linux/pm_runtime.h>
#include <linux/slab.h>
@@ -28,6 +29,7 @@
#include "bus.h"
#include "channel.h"
+#include "context.h"
#include "debug.h"
#include "dev.h"
#include "intr.h"
@@ -38,6 +40,12 @@
#include "hw/host1x05.h"
#include "hw/host1x06.h"
#include "hw/host1x07.h"
+#include "hw/host1x08.h"
+
+void host1x_common_writel(struct host1x *host1x, u32 v, u32 r)
+{
+ writel(v, host1x->common_regs + r);
+}
void host1x_hypervisor_writel(struct host1x *host1x, u32 v, u32 r)
{
@@ -63,6 +71,15 @@ u32 host1x_sync_readl(struct host1x *host1x, u32 r)
return readl(sync_regs + r);
}
+#ifdef CONFIG_64BIT
+u64 host1x_sync_readq(struct host1x *host1x, u32 r)
+{
+ void __iomem *sync_regs = host1x->regs + host1x->info->sync_offset;
+
+ return readq(sync_regs + r);
+}
+#endif
+
void host1x_ch_writel(struct host1x_channel *ch, u32 v, u32 r)
{
writel(v, ch->regs + r);
@@ -134,18 +151,29 @@ static const struct host1x_info host1x05_info = {
};
static const struct host1x_sid_entry tegra186_sid_table[] = {
- {
- /* VIC */
- .base = 0x1af0,
- .offset = 0x30,
- .limit = 0x34
- },
- {
- /* NVDEC */
- .base = 0x1b00,
- .offset = 0x30,
- .limit = 0x34
- },
+ { /* SE1 */ .base = 0x1ac8, .offset = 0x90, .limit = 0x90 },
+ { /* SE2 */ .base = 0x1ad0, .offset = 0x90, .limit = 0x90 },
+ { /* SE3 */ .base = 0x1ad8, .offset = 0x90, .limit = 0x90 },
+ { /* SE4 */ .base = 0x1ae0, .offset = 0x90, .limit = 0x90 },
+ { /* ISP */ .base = 0x1ae8, .offset = 0x50, .limit = 0x50 },
+ { /* VIC */ .base = 0x1af0, .offset = 0x30, .limit = 0x34 },
+ { /* NVENC */ .base = 0x1af8, .offset = 0x30, .limit = 0x34 },
+ { /* NVDEC */ .base = 0x1b00, .offset = 0x30, .limit = 0x34 },
+ { /* NVJPG */ .base = 0x1b08, .offset = 0x30, .limit = 0x34 },
+ { /* TSEC */ .base = 0x1b10, .offset = 0x30, .limit = 0x34 },
+ { /* TSECB */ .base = 0x1b18, .offset = 0x30, .limit = 0x34 },
+ { /* VI 0 */ .base = 0x1b80, .offset = 0x10000, .limit = 0x10000 },
+ { /* VI 1 */ .base = 0x1b88, .offset = 0x20000, .limit = 0x20000 },
+ { /* VI 2 */ .base = 0x1b90, .offset = 0x30000, .limit = 0x30000 },
+ { /* VI 3 */ .base = 0x1b98, .offset = 0x40000, .limit = 0x40000 },
+ { /* VI 4 */ .base = 0x1ba0, .offset = 0x50000, .limit = 0x50000 },
+ { /* VI 5 */ .base = 0x1ba8, .offset = 0x60000, .limit = 0x60000 },
+ { /* VI 6 */ .base = 0x1bb0, .offset = 0x70000, .limit = 0x70000 },
+ { /* VI 7 */ .base = 0x1bb8, .offset = 0x80000, .limit = 0x80000 },
+ { /* VI 8 */ .base = 0x1bc0, .offset = 0x90000, .limit = 0x90000 },
+ { /* VI 9 */ .base = 0x1bc8, .offset = 0xa0000, .limit = 0xa0000 },
+ { /* VI 10 */ .base = 0x1bd0, .offset = 0xb0000, .limit = 0xb0000 },
+ { /* VI 11 */ .base = 0x1bd8, .offset = 0xc0000, .limit = 0xc0000 },
};
static const struct host1x_info host1x06_info = {
@@ -161,27 +189,30 @@ static const struct host1x_info host1x06_info = {
.num_sid_entries = ARRAY_SIZE(tegra186_sid_table),
.sid_table = tegra186_sid_table,
.reserve_vblank_syncpts = false,
+ .skip_reset_assert = true,
};
static const struct host1x_sid_entry tegra194_sid_table[] = {
- {
- /* VIC */
- .base = 0x1af0,
- .offset = 0x30,
- .limit = 0x34
- },
- {
- /* NVDEC */
- .base = 0x1b00,
- .offset = 0x30,
- .limit = 0x34
- },
- {
- /* NVDEC1 */
- .base = 0x1bc0,
- .offset = 0x30,
- .limit = 0x34
- },
+ { /* SE1 */ .base = 0x1ac8, .offset = 0x90, .limit = 0x90 },
+ { /* SE2 */ .base = 0x1ad0, .offset = 0x90, .limit = 0x90 },
+ { /* SE3 */ .base = 0x1ad8, .offset = 0x90, .limit = 0x90 },
+ { /* SE4 */ .base = 0x1ae0, .offset = 0x90, .limit = 0x90 },
+ { /* ISP */ .base = 0x1ae8, .offset = 0x800, .limit = 0x800 },
+ { /* VIC */ .base = 0x1af0, .offset = 0x30, .limit = 0x34 },
+ { /* NVENC */ .base = 0x1af8, .offset = 0x30, .limit = 0x34 },
+ { /* NVDEC */ .base = 0x1b00, .offset = 0x30, .limit = 0x34 },
+ { /* NVJPG */ .base = 0x1b08, .offset = 0x30, .limit = 0x34 },
+ { /* TSEC */ .base = 0x1b10, .offset = 0x30, .limit = 0x34 },
+ { /* TSECB */ .base = 0x1b18, .offset = 0x30, .limit = 0x34 },
+ { /* VI */ .base = 0x1b80, .offset = 0x800, .limit = 0x800 },
+ { /* VI_THI */ .base = 0x1b88, .offset = 0x30, .limit = 0x34 },
+ { /* ISP_THI */ .base = 0x1b90, .offset = 0x30, .limit = 0x34 },
+ { /* PVA0_CLUSTER */ .base = 0x1b98, .offset = 0x0, .limit = 0x0 },
+ { /* PVA0_CLUSTER */ .base = 0x1ba0, .offset = 0x0, .limit = 0x0 },
+ { /* NVDLA0 */ .base = 0x1ba8, .offset = 0x30, .limit = 0x34 },
+ { /* NVDLA1 */ .base = 0x1bb0, .offset = 0x30, .limit = 0x34 },
+ { /* NVENC1 */ .base = 0x1bb8, .offset = 0x30, .limit = 0x34 },
+ { /* NVDEC1 */ .base = 0x1bc0, .offset = 0x30, .limit = 0x34 },
};
static const struct host1x_info host1x07_info = {
@@ -199,7 +230,65 @@ static const struct host1x_info host1x07_info = {
.reserve_vblank_syncpts = false,
};
+/*
+ * Tegra234 has two stream ID protection tables, one for setting stream IDs
+ * through the channel path via SETSTREAMID, and one for setting them via
+ * MMIO. We program each engine's data stream ID in the channel path table
+ * and firmware stream ID in the MMIO path table.
+ */
+static const struct host1x_sid_entry tegra234_sid_table[] = {
+ { /* SE1 MMIO */ .base = 0x1650, .offset = 0x90, .limit = 0x90 },
+ { /* SE1 ch */ .base = 0x1730, .offset = 0x90, .limit = 0x90 },
+ { /* SE2 MMIO */ .base = 0x1658, .offset = 0x90, .limit = 0x90 },
+ { /* SE2 ch */ .base = 0x1738, .offset = 0x90, .limit = 0x90 },
+ { /* SE4 MMIO */ .base = 0x1660, .offset = 0x90, .limit = 0x90 },
+ { /* SE4 ch */ .base = 0x1740, .offset = 0x90, .limit = 0x90 },
+ { /* ISP MMIO */ .base = 0x1680, .offset = 0x800, .limit = 0x800 },
+ { /* VIC MMIO */ .base = 0x1688, .offset = 0x34, .limit = 0x34 },
+ { /* VIC ch */ .base = 0x17b8, .offset = 0x30, .limit = 0x30 },
+ { /* NVENC MMIO */ .base = 0x1690, .offset = 0x34, .limit = 0x34 },
+ { /* NVENC ch */ .base = 0x17c0, .offset = 0x30, .limit = 0x30 },
+ { /* NVDEC MMIO */ .base = 0x1698, .offset = 0x34, .limit = 0x34 },
+ { /* NVDEC ch */ .base = 0x17c8, .offset = 0x30, .limit = 0x30 },
+ { /* NVJPG MMIO */ .base = 0x16a0, .offset = 0x34, .limit = 0x34 },
+ { /* NVJPG ch */ .base = 0x17d0, .offset = 0x30, .limit = 0x30 },
+ { /* TSEC MMIO */ .base = 0x16a8, .offset = 0x30, .limit = 0x34 },
+ { /* NVJPG1 MMIO */ .base = 0x16b0, .offset = 0x34, .limit = 0x34 },
+ { /* NVJPG1 ch */ .base = 0x17a8, .offset = 0x30, .limit = 0x30 },
+ { /* VI MMIO */ .base = 0x16b8, .offset = 0x800, .limit = 0x800 },
+ { /* VI_THI MMIO */ .base = 0x16c0, .offset = 0x30, .limit = 0x34 },
+ { /* ISP_THI MMIO */ .base = 0x16c8, .offset = 0x30, .limit = 0x34 },
+ { /* NVDLA MMIO */ .base = 0x16d8, .offset = 0x30, .limit = 0x34 },
+ { /* NVDLA ch */ .base = 0x17e0, .offset = 0x30, .limit = 0x34 },
+ { /* NVDLA1 MMIO */ .base = 0x16e0, .offset = 0x30, .limit = 0x34 },
+ { /* NVDLA1 ch */ .base = 0x17e8, .offset = 0x30, .limit = 0x34 },
+ { /* OFA MMIO */ .base = 0x16e8, .offset = 0x34, .limit = 0x34 },
+ { /* OFA ch */ .base = 0x1768, .offset = 0x30, .limit = 0x30 },
+ { /* VI2 MMIO */ .base = 0x16f0, .offset = 0x800, .limit = 0x800 },
+ { /* VI2_THI MMIO */ .base = 0x16f8, .offset = 0x30, .limit = 0x34 },
+};
+
+static const struct host1x_info host1x08_info = {
+ .nb_channels = 63,
+ .nb_pts = 1024,
+ .nb_mlocks = 24,
+ .nb_bases = 0,
+ .init = host1x08_init,
+ .sync_offset = 0x0,
+ .dma_mask = DMA_BIT_MASK(40),
+ .has_wide_gather = true,
+ .has_hypervisor = true,
+ .has_common = true,
+ .num_sid_entries = ARRAY_SIZE(tegra234_sid_table),
+ .sid_table = tegra234_sid_table,
+ .streamid_vm_table = { 0x1004, 128 },
+ .classid_vm_table = { 0x1404, 25 },
+ .mmio_vm_table = { 0x1504, 25 },
+ .reserve_vblank_syncpts = false,
+};
+
static const struct of_device_id host1x_of_match[] = {
+ { .compatible = "nvidia,tegra234-host1x", .data = &host1x08_info, },
{ .compatible = "nvidia,tegra194-host1x", .data = &host1x07_info, },
{ .compatible = "nvidia,tegra186-host1x", .data = &host1x06_info, },
{ .compatible = "nvidia,tegra210-host1x", .data = &host1x05_info, },
@@ -211,7 +300,7 @@ static const struct of_device_id host1x_of_match[] = {
};
MODULE_DEVICE_TABLE(of, host1x_of_match);
-static void host1x_setup_sid_table(struct host1x *host)
+static void host1x_setup_virtualization_tables(struct host1x *host)
{
const struct host1x_info *info = host->info;
unsigned int i;
@@ -225,10 +314,29 @@ static void host1x_setup_sid_table(struct host1x *host)
host1x_hypervisor_writel(host, entry->offset, entry->base);
host1x_hypervisor_writel(host, entry->limit, entry->base + 4);
}
+
+ for (i = 0; i < info->streamid_vm_table.count; i++) {
+ /* Allow access to all stream IDs to all VMs. */
+ host1x_hypervisor_writel(host, 0xff, info->streamid_vm_table.base + 4 * i);
+ }
+
+ for (i = 0; i < info->classid_vm_table.count; i++) {
+ /* Allow access to all classes to all VMs. */
+ host1x_hypervisor_writel(host, 0xff, info->classid_vm_table.base + 4 * i);
+ }
+
+ for (i = 0; i < info->mmio_vm_table.count; i++) {
+ /* Use VM1 (that's us) as originator VMID for engine MMIO accesses. */
+ host1x_hypervisor_writel(host, 0x1, info->mmio_vm_table.base + 4 * i);
+ }
}
static bool host1x_wants_iommu(struct host1x *host1x)
{
+ /* Our IOMMU usage policy doesn't currently play well with GART */
+ if (of_machine_is_compatible("nvidia,tegra20"))
+ return false;
+
/*
* If we support addressing a maximum of 32 bits of physical memory
* and if the host1x firewall is enabled, there's no need to enable
@@ -262,6 +370,10 @@ static bool host1x_wants_iommu(struct host1x *host1x)
return true;
}
+/*
+ * Returns ERR_PTR on failure, NULL if the translation is IDENTITY, otherwise a
+ * valid paging domain.
+ */
static struct iommu_domain *host1x_iommu_attach(struct host1x *host)
{
struct iommu_domain *domain = iommu_get_domain_for_dev(host->dev);
@@ -286,6 +398,8 @@ static struct iommu_domain *host1x_iommu_attach(struct host1x *host)
* Similarly, if host1x is already attached to an IOMMU (via the DMA
* API), don't try to attach again.
*/
+ if (domain && domain->type == IOMMU_DOMAIN_IDENTITY)
+ domain = NULL;
if (!host1x_wants_iommu(host) || domain)
return domain;
@@ -299,9 +413,10 @@ static struct iommu_domain *host1x_iommu_attach(struct host1x *host)
if (err < 0)
goto put_group;
- host->domain = iommu_domain_alloc(&platform_bus_type);
- if (!host->domain) {
- err = -ENOMEM;
+ host->domain = iommu_paging_domain_alloc(host->dev);
+ if (IS_ERR(host->domain)) {
+ err = PTR_ERR(host->domain);
+ host->domain = NULL;
goto put_cache;
}
@@ -402,18 +517,13 @@ static int host1x_get_resets(struct host1x *host)
return err;
}
- if (WARN_ON(!host->resets[1].rstc))
- return -ENOENT;
-
return 0;
}
static int host1x_probe(struct platform_device *pdev)
{
struct host1x *host;
- struct resource *regs, *hv_regs = NULL;
- int syncpt_irq;
- int err;
+ int err, i;
host = devm_kzalloc(&pdev->dev, sizeof(*host), GFP_KERNEL);
if (!host)
@@ -422,32 +532,50 @@ static int host1x_probe(struct platform_device *pdev)
host->info = of_device_get_match_data(&pdev->dev);
if (host->info->has_hypervisor) {
- regs = platform_get_resource_byname(pdev, IORESOURCE_MEM, "vm");
- if (!regs) {
- dev_err(&pdev->dev, "failed to get vm registers\n");
- return -ENXIO;
- }
+ host->regs = devm_platform_ioremap_resource_byname(pdev, "vm");
+ if (IS_ERR(host->regs))
+ return PTR_ERR(host->regs);
- hv_regs = platform_get_resource_byname(pdev, IORESOURCE_MEM,
- "hypervisor");
- if (!hv_regs) {
- dev_err(&pdev->dev,
- "failed to get hypervisor registers\n");
- return -ENXIO;
+ host->hv_regs = devm_platform_ioremap_resource_byname(pdev, "hypervisor");
+ if (IS_ERR(host->hv_regs))
+ return PTR_ERR(host->hv_regs);
+
+ if (host->info->has_common) {
+ host->common_regs = devm_platform_ioremap_resource_byname(pdev, "common");
+ if (IS_ERR(host->common_regs))
+ return PTR_ERR(host->common_regs);
}
} else {
- regs = platform_get_resource(pdev, IORESOURCE_MEM, 0);
- if (!regs) {
- dev_err(&pdev->dev, "failed to get registers\n");
- return -ENXIO;
- }
+ host->regs = devm_platform_ioremap_resource(pdev, 0);
+ if (IS_ERR(host->regs))
+ return PTR_ERR(host->regs);
}
- syncpt_irq = platform_get_irq(pdev, 0);
- if (syncpt_irq < 0)
- return syncpt_irq;
+ for (i = 0; i < ARRAY_SIZE(host->syncpt_irqs); i++) {
+ char irq_name[] = "syncptX";
+
+ sprintf(irq_name, "syncpt%d", i);
+
+ err = platform_get_irq_byname_optional(pdev, irq_name);
+ if (err == -ENXIO)
+ break;
+ if (err < 0)
+ return err;
+
+ host->syncpt_irqs[i] = err;
+ }
+
+ host->num_syncpt_irqs = i;
+
+ /* Device tree without irq names */
+ if (i == 0) {
+ host->syncpt_irqs[0] = platform_get_irq(pdev, 0);
+ if (host->syncpt_irqs[0] < 0)
+ return host->syncpt_irqs[0];
+
+ host->num_syncpt_irqs = 1;
+ }
- host1x_bo_cache_init(&host->cache);
mutex_init(&host->devices_lock);
INIT_LIST_HEAD(&host->devices);
INIT_LIST_HEAD(&host->list);
@@ -456,16 +584,6 @@ static int host1x_probe(struct platform_device *pdev)
/* set common host1x device data */
platform_set_drvdata(pdev, host);
- host->regs = devm_ioremap_resource(&pdev->dev, regs);
- if (IS_ERR(host->regs))
- return PTR_ERR(host->regs);
-
- if (host->info->has_hypervisor) {
- host->hv_regs = devm_ioremap_resource(&pdev->dev, hv_regs);
- if (IS_ERR(host->hv_regs))
- return PTR_ERR(host->hv_regs);
- }
-
host->dev->dma_parms = &host->dma_parms;
dma_set_max_seg_size(host->dev, UINT_MAX);
@@ -476,23 +594,19 @@ static int host1x_probe(struct platform_device *pdev)
}
host->clk = devm_clk_get(&pdev->dev, NULL);
- if (IS_ERR(host->clk)) {
- err = PTR_ERR(host->clk);
-
- if (err != -EPROBE_DEFER)
- dev_err(&pdev->dev, "failed to get clock: %d\n", err);
-
- return err;
- }
+ if (IS_ERR(host->clk))
+ return dev_err_probe(&pdev->dev, PTR_ERR(host->clk), "failed to get clock\n");
err = host1x_get_resets(host);
if (err)
return err;
+ host1x_bo_cache_init(&host->cache);
+
err = host1x_iommu_init(host);
if (err < 0) {
dev_err(&pdev->dev, "failed to setup IOMMU: %d\n", err);
- return err;
+ goto destroy_cache;
}
err = host1x_channel_list_init(&host->channel_list,
@@ -502,18 +616,20 @@ static int host1x_probe(struct platform_device *pdev)
goto iommu_exit;
}
- err = host1x_syncpt_init(host);
+ err = host1x_memory_context_list_init(host);
if (err) {
- dev_err(&pdev->dev, "failed to initialize syncpts\n");
+ dev_err(&pdev->dev, "failed to initialize context list\n");
goto free_channels;
}
- err = host1x_intr_init(host, syncpt_irq);
+ err = host1x_syncpt_init(host);
if (err) {
- dev_err(&pdev->dev, "failed to initialize interrupts\n");
- goto deinit_syncpt;
+ dev_err(&pdev->dev, "failed to initialize syncpts\n");
+ goto free_contexts;
}
+ mutex_init(&host->intr_mutex);
+
pm_runtime_enable(&pdev->dev);
err = devm_tegra_core_dev_init_opp_table_common(&pdev->dev);
@@ -525,6 +641,12 @@ static int host1x_probe(struct platform_device *pdev)
if (err)
goto pm_disable;
+ err = host1x_intr_init(host);
+ if (err) {
+ dev_err(&pdev->dev, "failed to initialize interrupts\n");
+ goto pm_put;
+ }
+
host1x_debug_init(host);
err = host1x_register(host);
@@ -541,23 +663,25 @@ unregister:
host1x_unregister(host);
deinit_debugfs:
host1x_debug_deinit(host);
-
+ host1x_intr_deinit(host);
+pm_put:
pm_runtime_put_sync_suspend(&pdev->dev);
pm_disable:
pm_runtime_disable(&pdev->dev);
-
- host1x_intr_deinit(host);
-deinit_syncpt:
host1x_syncpt_deinit(host);
+free_contexts:
+ host1x_memory_context_list_free(&host->context_list);
free_channels:
host1x_channel_list_free(&host->channel_list);
iommu_exit:
host1x_iommu_exit(host);
+destroy_cache:
+ host1x_bo_cache_destroy(&host->cache);
return err;
}
-static int host1x_remove(struct platform_device *pdev)
+static void host1x_remove(struct platform_device *pdev)
{
struct host1x *host = platform_get_drvdata(pdev);
@@ -568,10 +692,10 @@ static int host1x_remove(struct platform_device *pdev)
host1x_intr_deinit(host);
host1x_syncpt_deinit(host);
+ host1x_memory_context_list_free(&host->context_list);
+ host1x_channel_list_free(&host->channel_list);
host1x_iommu_exit(host);
host1x_bo_cache_destroy(&host->cache);
-
- return 0;
}
static int __maybe_unused host1x_runtime_suspend(struct device *dev)
@@ -579,16 +703,19 @@ static int __maybe_unused host1x_runtime_suspend(struct device *dev)
struct host1x *host = dev_get_drvdata(dev);
int err;
+ host1x_channel_stop_all(host);
host1x_intr_stop(host);
host1x_syncpt_save(host);
- err = reset_control_bulk_assert(host->nresets, host->resets);
- if (err) {
- dev_err(dev, "failed to assert reset: %d\n", err);
- goto resume_host1x;
- }
+ if (!host->info->skip_reset_assert) {
+ err = reset_control_bulk_assert(host->nresets, host->resets);
+ if (err) {
+ dev_err(dev, "failed to assert reset: %d\n", err);
+ goto resume_host1x;
+ }
- usleep_range(1000, 2000);
+ usleep_range(1000, 2000);
+ }
clk_disable_unprepare(host->clk);
reset_control_bulk_release(host->nresets, host->resets);
@@ -596,7 +723,7 @@ static int __maybe_unused host1x_runtime_suspend(struct device *dev)
return 0;
resume_host1x:
- host1x_setup_sid_table(host);
+ host1x_setup_virtualization_tables(host);
host1x_syncpt_restore(host);
host1x_intr_start(host);
@@ -626,7 +753,7 @@ static int __maybe_unused host1x_runtime_resume(struct device *dev)
goto disable_clk;
}
- host1x_setup_sid_table(host);
+ host1x_setup_virtualization_tables(host);
host1x_syncpt_restore(host);
host1x_intr_start(host);
@@ -643,7 +770,7 @@ release_reset:
static const struct dev_pm_ops host1x_pm_ops = {
SET_RUNTIME_PM_OPS(host1x_runtime_suspend, host1x_runtime_resume,
NULL)
- /* TODO: add system suspend-resume once driver will be ready for that */
+ SET_SYSTEM_SLEEP_PM_OPS(pm_runtime_force_suspend, pm_runtime_force_resume)
};
static struct platform_driver tegra_host1x_driver = {
@@ -697,6 +824,7 @@ u64 host1x_get_dma_mask(struct host1x *host1x)
}
EXPORT_SYMBOL(host1x_get_dma_mask);
+MODULE_SOFTDEP("post: tegra-drm");
MODULE_AUTHOR("Thierry Reding <thierry.reding@avionic-design.de>");
MODULE_AUTHOR("Terje Bergstrom <tbergstrom@nvidia.com>");
MODULE_DESCRIPTION("Host1x driver for Tegra products");
diff --git a/drivers/gpu/host1x/dev.h b/drivers/gpu/host1x/dev.h
index ca4b082f0cd4..ef44618ed88a 100644
--- a/drivers/gpu/host1x/dev.h
+++ b/drivers/gpu/host1x/dev.h
@@ -9,11 +9,13 @@
#include <linux/device.h>
#include <linux/iommu.h>
#include <linux/iova.h>
+#include <linux/irqreturn.h>
#include <linux/platform_device.h>
#include <linux/reset.h>
#include "cdma.h"
#include "channel.h"
+#include "context.h"
#include "intr.h"
#include "job.h"
#include "syncpt.h"
@@ -73,14 +75,14 @@ struct host1x_syncpt_ops {
};
struct host1x_intr_ops {
- int (*init_host_sync)(struct host1x *host, u32 cpm,
- void (*syncpt_thresh_work)(struct work_struct *work));
+ int (*init_host_sync)(struct host1x *host, u32 cpm);
void (*set_syncpt_threshold)(
struct host1x *host, unsigned int id, u32 thresh);
void (*enable_syncpt_intr)(struct host1x *host, unsigned int id);
void (*disable_syncpt_intr)(struct host1x *host, unsigned int id);
void (*disable_all_syncpt_intrs)(struct host1x *host);
int (*free_syncpt_irq)(struct host1x *host);
+ irqreturn_t (*isr)(int irq, void *dev_id);
};
struct host1x_sid_entry {
@@ -89,6 +91,11 @@ struct host1x_sid_entry {
unsigned int limit;
};
+struct host1x_table_desc {
+ unsigned int base;
+ unsigned int count;
+};
+
struct host1x_info {
unsigned int nb_channels; /* host1x: number of channels supported */
unsigned int nb_pts; /* host1x: number of syncpoints supported */
@@ -99,14 +106,24 @@ struct host1x_info {
u64 dma_mask; /* mask of addressable memory */
bool has_wide_gather; /* supports GATHER_W opcode */
bool has_hypervisor; /* has hypervisor registers */
+ bool has_common; /* has common registers separate from hypervisor */
unsigned int num_sid_entries;
const struct host1x_sid_entry *sid_table;
+ struct host1x_table_desc streamid_vm_table;
+ struct host1x_table_desc classid_vm_table;
+ struct host1x_table_desc mmio_vm_table;
/*
* On T20-T148, the boot chain may setup DC to increment syncpoints
* 26/27 on VBLANK. As such we cannot use these syncpoints until
* the display driver disables VBLANK increments.
*/
bool reserve_vblank_syncpts;
+ /*
+ * On Tegra186, secure world applications may require access to
+ * host1x during suspend/resume. To allow this, we need to leave
+ * host1x not in reset.
+ */
+ bool skip_reset_assert;
};
struct host1x {
@@ -114,6 +131,9 @@ struct host1x {
void __iomem *regs;
void __iomem *hv_regs; /* hypervisor region */
+ void __iomem *common_regs;
+ int syncpt_irqs[8];
+ int num_syncpt_irqs;
struct host1x_syncpt *syncpt;
struct host1x_syncpt_base *bases;
struct device *dev;
@@ -127,7 +147,6 @@ struct host1x {
dma_addr_t iova_end;
struct mutex intr_mutex;
- int intr_syncpt_irq;
const struct host1x_syncpt_ops *syncpt_op;
const struct host1x_intr_ops *intr_op;
@@ -141,6 +160,7 @@ struct host1x {
struct mutex syncpt_mutex;
struct host1x_channel_list channel_list;
+ struct host1x_memory_context_list context_list;
struct dentry *debugfs;
@@ -154,11 +174,15 @@ struct host1x {
struct host1x_bo_cache cache;
};
-void host1x_hypervisor_writel(struct host1x *host1x, u32 r, u32 v);
+void host1x_common_writel(struct host1x *host1x, u32 v, u32 r);
+void host1x_hypervisor_writel(struct host1x *host1x, u32 v, u32 r);
u32 host1x_hypervisor_readl(struct host1x *host1x, u32 r);
-void host1x_sync_writel(struct host1x *host1x, u32 r, u32 v);
+void host1x_sync_writel(struct host1x *host1x, u32 v, u32 r);
u32 host1x_sync_readl(struct host1x *host1x, u32 r);
-void host1x_ch_writel(struct host1x_channel *ch, u32 r, u32 v);
+#ifdef CONFIG_64BIT
+u64 host1x_sync_readq(struct host1x *host1x, u32 r);
+#endif
+void host1x_ch_writel(struct host1x_channel *ch, u32 v, u32 r);
u32 host1x_ch_readl(struct host1x_channel *ch, u32 r);
static inline void host1x_hw_syncpt_restore(struct host1x *host,
@@ -203,10 +227,9 @@ static inline void host1x_hw_syncpt_enable_protection(struct host1x *host)
return host->syncpt_op->enable_protection(host);
}
-static inline int host1x_hw_intr_init_host_sync(struct host1x *host, u32 cpm,
- void (*syncpt_thresh_work)(struct work_struct *))
+static inline int host1x_hw_intr_init_host_sync(struct host1x *host, u32 cpm)
{
- return host->intr_op->init_host_sync(host, cpm, syncpt_thresh_work);
+ return host->intr_op->init_host_sync(host, cpm);
}
static inline void host1x_hw_intr_set_syncpt_threshold(struct host1x *host,
diff --git a/drivers/gpu/host1x/fence.c b/drivers/gpu/host1x/fence.c
index ecab72882192..139ad1afd935 100644
--- a/drivers/gpu/host1x/fence.c
+++ b/drivers/gpu/host1x/fence.c
@@ -15,22 +15,6 @@
#include "intr.h"
#include "syncpt.h"
-static DEFINE_SPINLOCK(lock);
-
-struct host1x_syncpt_fence {
- struct dma_fence base;
-
- atomic_t signaling;
-
- struct host1x_syncpt *sp;
- u32 threshold;
-
- struct host1x_waitlist *waiter;
- void *waiter_ref;
-
- struct delayed_work timeout_work;
-};
-
static const char *host1x_syncpt_fence_get_driver_name(struct dma_fence *f)
{
return "host1x";
@@ -49,11 +33,11 @@ static struct host1x_syncpt_fence *to_host1x_fence(struct dma_fence *f)
static bool host1x_syncpt_fence_enable_signaling(struct dma_fence *f)
{
struct host1x_syncpt_fence *sf = to_host1x_fence(f);
- int err;
if (host1x_syncpt_is_expired(sf->sp, sf->threshold))
return false;
+ /* Reference for interrupt path. */
dma_fence_get(f);
/*
@@ -61,24 +45,17 @@ static bool host1x_syncpt_fence_enable_signaling(struct dma_fence *f)
* reference to any fences for which 'enable_signaling' has been
* called (and that have not been signalled).
*
- * We provide a userspace API to create arbitrary syncpoint fences,
- * so we cannot normally guarantee that all fences get signalled.
- * As such, setup a timeout, so that long-lasting fences will get
- * reaped eventually.
+ * We cannot currently always guarantee that all fences get signalled
+ * or cancelled. As such, for such situations, set up a timeout, so
+ * that long-lasting fences will get reaped eventually.
*/
- schedule_delayed_work(&sf->timeout_work, msecs_to_jiffies(30000));
-
- err = host1x_intr_add_action(sf->sp->host, sf->sp, sf->threshold,
- HOST1X_INTR_ACTION_SIGNAL_FENCE, f,
- sf->waiter, &sf->waiter_ref);
- if (err) {
- cancel_delayed_work_sync(&sf->timeout_work);
- dma_fence_put(f);
- return false;
+ if (sf->timeout) {
+ /* Reference for timeout path. */
+ dma_fence_get(f);
+ schedule_delayed_work(&sf->timeout_work, msecs_to_jiffies(30000));
}
- /* intr framework takes ownership of waiter */
- sf->waiter = NULL;
+ host1x_intr_add_fence_locked(sf->sp->host, sf);
/*
* The fence may get signalled at any time after the above call,
@@ -89,37 +66,32 @@ static bool host1x_syncpt_fence_enable_signaling(struct dma_fence *f)
return true;
}
-static void host1x_syncpt_fence_release(struct dma_fence *f)
-{
- struct host1x_syncpt_fence *sf = to_host1x_fence(f);
-
- if (sf->waiter)
- kfree(sf->waiter);
-
- dma_fence_free(f);
-}
-
-const struct dma_fence_ops host1x_syncpt_fence_ops = {
+static const struct dma_fence_ops host1x_syncpt_fence_ops = {
.get_driver_name = host1x_syncpt_fence_get_driver_name,
.get_timeline_name = host1x_syncpt_fence_get_timeline_name,
.enable_signaling = host1x_syncpt_fence_enable_signaling,
- .release = host1x_syncpt_fence_release,
};
void host1x_fence_signal(struct host1x_syncpt_fence *f)
{
- if (atomic_xchg(&f->signaling, 1))
+ if (atomic_xchg(&f->signaling, 1)) {
+ /*
+ * Already on timeout path, but we removed the fence before
+ * timeout path could, so drop interrupt path reference.
+ */
+ dma_fence_put(&f->base);
return;
+ }
- /*
- * Cancel pending timeout work - if it races, it will
- * not get 'f->signaling' and return.
- */
- cancel_delayed_work_sync(&f->timeout_work);
-
- host1x_intr_put_ref(f->sp->host, f->sp->id, f->waiter_ref, false);
+ if (f->timeout && cancel_delayed_work(&f->timeout_work)) {
+ /*
+ * We know that the timeout path will not be entered.
+ * Safe to drop the timeout path's reference now.
+ */
+ dma_fence_put(&f->base);
+ }
- dma_fence_signal(&f->base);
+ dma_fence_signal_locked(&f->base);
dma_fence_put(&f->base);
}
@@ -129,21 +101,29 @@ static void do_fence_timeout(struct work_struct *work)
struct host1x_syncpt_fence *f =
container_of(dwork, struct host1x_syncpt_fence, timeout_work);
- if (atomic_xchg(&f->signaling, 1))
+ if (atomic_xchg(&f->signaling, 1)) {
+ /* Already on interrupt path, drop timeout path reference if any. */
+ if (f->timeout)
+ dma_fence_put(&f->base);
return;
+ }
- /*
- * Cancel pending timeout work - if it races, it will
- * not get 'f->signaling' and return.
- */
- host1x_intr_put_ref(f->sp->host, f->sp->id, f->waiter_ref, true);
+ if (host1x_intr_remove_fence(f->sp->host, f)) {
+ /*
+ * Managed to remove fence from queue, so it's safe to drop
+ * the interrupt path's reference.
+ */
+ dma_fence_put(&f->base);
+ }
dma_fence_set_error(&f->base, -ETIMEDOUT);
dma_fence_signal(&f->base);
- dma_fence_put(&f->base);
+ if (f->timeout)
+ dma_fence_put(&f->base);
}
-struct dma_fence *host1x_fence_create(struct host1x_syncpt *sp, u32 threshold)
+struct dma_fence *host1x_fence_create(struct host1x_syncpt *sp, u32 threshold,
+ bool timeout)
{
struct host1x_syncpt_fence *fence;
@@ -151,16 +131,11 @@ struct dma_fence *host1x_fence_create(struct host1x_syncpt *sp, u32 threshold)
if (!fence)
return ERR_PTR(-ENOMEM);
- fence->waiter = kzalloc(sizeof(*fence->waiter), GFP_KERNEL);
- if (!fence->waiter) {
- kfree(fence);
- return ERR_PTR(-ENOMEM);
- }
-
fence->sp = sp;
fence->threshold = threshold;
+ fence->timeout = timeout;
- dma_fence_init(&fence->base, &host1x_syncpt_fence_ops, &lock,
+ dma_fence_init(&fence->base, &host1x_syncpt_fence_ops, &sp->fences.lock,
dma_fence_context_alloc(1), 0);
INIT_DELAYED_WORK(&fence->timeout_work, do_fence_timeout);
@@ -168,3 +143,12 @@ struct dma_fence *host1x_fence_create(struct host1x_syncpt *sp, u32 threshold)
return &fence->base;
}
EXPORT_SYMBOL(host1x_fence_create);
+
+void host1x_fence_cancel(struct dma_fence *f)
+{
+ struct host1x_syncpt_fence *sf = to_host1x_fence(f);
+
+ schedule_delayed_work(&sf->timeout_work, 0);
+ flush_delayed_work(&sf->timeout_work);
+}
+EXPORT_SYMBOL(host1x_fence_cancel);
diff --git a/drivers/gpu/host1x/fence.h b/drivers/gpu/host1x/fence.h
index 70c91de82f14..f3c644c73cad 100644
--- a/drivers/gpu/host1x/fence.h
+++ b/drivers/gpu/host1x/fence.h
@@ -6,7 +6,24 @@
#ifndef HOST1X_FENCE_H
#define HOST1X_FENCE_H
-struct host1x_syncpt_fence;
+struct host1x_syncpt_fence {
+ struct dma_fence base;
+
+ atomic_t signaling;
+
+ struct host1x_syncpt *sp;
+ u32 threshold;
+ bool timeout;
+
+ struct delayed_work timeout_work;
+
+ struct list_head list;
+};
+
+struct host1x_fence_list {
+ spinlock_t lock;
+ struct list_head list;
+};
void host1x_fence_signal(struct host1x_syncpt_fence *fence);
diff --git a/drivers/gpu/host1x/hw/cdma_hw.c b/drivers/gpu/host1x/hw/cdma_hw.c
index e49cd5b8f735..3f3f0018eee0 100644
--- a/drivers/gpu/host1x/hw/cdma_hw.c
+++ b/drivers/gpu/host1x/hw/cdma_hw.c
@@ -238,6 +238,49 @@ static void cdma_resume(struct host1x_cdma *cdma, u32 getptr)
cdma_timeout_restart(cdma, getptr);
}
+static void timeout_release_mlock(struct host1x_cdma *cdma)
+{
+#if HOST1X_HW >= 8
+ /* Tegra186 and Tegra194 require a more complicated MLOCK release
+ * sequence. Furthermore, those chips by default don't enforce MLOCKs,
+ * so it turns out that if we don't /actually/ need MLOCKs, we can just
+ * ignore them.
+ *
+ * As such, for now just implement this on Tegra234 where things are
+ * stricter but also easy to implement.
+ */
+ struct host1x_channel *ch = cdma_to_channel(cdma);
+ struct host1x *host1x = cdma_to_host1x(cdma);
+ u32 offset;
+
+ switch (ch->client->class) {
+ case HOST1X_CLASS_NVJPG1:
+ offset = HOST1X_COMMON_NVJPG1_MLOCK;
+ break;
+ case HOST1X_CLASS_NVENC:
+ offset = HOST1X_COMMON_NVENC_MLOCK;
+ break;
+ case HOST1X_CLASS_VIC:
+ offset = HOST1X_COMMON_VIC_MLOCK;
+ break;
+ case HOST1X_CLASS_NVJPG:
+ offset = HOST1X_COMMON_NVJPG_MLOCK;
+ break;
+ case HOST1X_CLASS_NVDEC:
+ offset = HOST1X_COMMON_NVDEC_MLOCK;
+ break;
+ case HOST1X_CLASS_OFA:
+ offset = HOST1X_COMMON_OFA_MLOCK;
+ break;
+ default:
+ WARN(1, "%s was not updated for class %u", __func__, ch->client->class);
+ return;
+ }
+
+ host1x_common_writel(host1x, 0x0, offset);
+#endif
+}
+
/*
* If this timeout fires, it indicates the current sync_queue entry has
* exceeded its TTL and the userctx should be timed out and remaining
@@ -288,6 +331,9 @@ static void cdma_timeout_handler(struct work_struct *work)
/* stop HW, resetting channel/module */
host1x_hw_cdma_freeze(host1x, cdma);
+ /* release any held MLOCK */
+ timeout_release_mlock(cdma);
+
host1x_cdma_update_sync_queue(cdma, ch->dev);
mutex_unlock(&cdma->lock);
}
diff --git a/drivers/gpu/host1x/hw/channel_hw.c b/drivers/gpu/host1x/hw/channel_hw.c
index 6b40e9af1e88..2df6a16d484e 100644
--- a/drivers/gpu/host1x/hw/channel_hw.c
+++ b/drivers/gpu/host1x/hw/channel_hw.c
@@ -47,9 +47,10 @@ static void trace_write_gather(struct host1x_cdma *cdma, struct host1x_bo *bo,
}
}
-static void submit_wait(struct host1x_cdma *cdma, u32 id, u32 threshold,
- u32 next_class)
+static void submit_wait(struct host1x_job *job, u32 id, u32 threshold)
{
+ struct host1x_cdma *cdma = &job->channel->cdma;
+
#if HOST1X_HW >= 2
host1x_cdma_push_wide(cdma,
host1x_opcode_setclass(
@@ -60,7 +61,7 @@ static void submit_wait(struct host1x_cdma *cdma, u32 id, u32 threshold,
),
threshold,
id,
- host1x_opcode_setclass(next_class, 0, 0)
+ HOST1X_OPCODE_NOP
);
#else
/* TODO add waitchk or use waitbases or other mitigation */
@@ -72,6 +73,32 @@ static void submit_wait(struct host1x_cdma *cdma, u32 id, u32 threshold,
),
host1x_class_host_wait_syncpt(id, threshold)
);
+#endif
+}
+
+static void submit_setclass(struct host1x_job *job, u32 next_class)
+{
+ struct host1x_cdma *cdma = &job->channel->cdma;
+
+#if HOST1X_HW >= 6
+ u32 stream_id;
+
+ /*
+ * If a memory context has been set, use it. Otherwise
+ * (if context isolation is disabled) use the engine's
+ * firmware stream ID.
+ */
+ if (job->memory_context)
+ stream_id = job->memory_context->stream_id;
+ else
+ stream_id = job->engine_fallback_streamid;
+
+ host1x_cdma_push_wide(cdma,
+ host1x_opcode_setclass(next_class, 0, 0),
+ host1x_opcode_setpayload(stream_id),
+ host1x_opcode_setstreamid(job->engine_streamid_offset / 4),
+ HOST1X_OPCODE_NOP);
+#else
host1x_cdma_push(cdma,
host1x_opcode_setclass(next_class, 0, 0),
HOST1X_OPCODE_NOP
@@ -79,7 +106,8 @@ static void submit_wait(struct host1x_cdma *cdma, u32 id, u32 threshold,
#endif
}
-static void submit_gathers(struct host1x_job *job, u32 job_syncpt_base)
+static void submit_gathers(struct host1x_job *job, struct host1x_job_cmd *cmds, u32 num_cmds,
+ u32 job_syncpt_base)
{
struct host1x_cdma *cdma = &job->channel->cdma;
#if HOST1X_HW < 6
@@ -88,8 +116,8 @@ static void submit_gathers(struct host1x_job *job, u32 job_syncpt_base)
unsigned int i;
u32 threshold;
- for (i = 0; i < job->num_cmds; i++) {
- struct host1x_job_cmd *cmd = &job->cmds[i];
+ for (i = 0; i < num_cmds; i++) {
+ struct host1x_job_cmd *cmd = &cmds[i];
if (cmd->is_wait) {
if (cmd->wait.relative)
@@ -97,7 +125,8 @@ static void submit_gathers(struct host1x_job *job, u32 job_syncpt_base)
else
threshold = cmd->wait.threshold;
- submit_wait(cdma, cmd->wait.id, threshold, cmd->wait.next_class);
+ submit_wait(job, cmd->wait.id, threshold);
+ submit_setclass(job, cmd->wait.next_class);
} else {
struct host1x_job_gather *g = &cmd->gather;
@@ -148,14 +177,12 @@ static inline void synchronize_syncpt_base(struct host1x_job *job)
static void host1x_channel_set_streamid(struct host1x_channel *channel)
{
#if HOST1X_HW >= 6
- u32 sid = 0x7f;
-#ifdef CONFIG_IOMMU_API
- struct iommu_fwspec *spec = dev_iommu_fwspec_get(channel->dev->parent);
- if (spec)
- sid = spec->ids[0] & 0xffff;
-#endif
+ u32 stream_id;
+
+ if (!tegra_dev_iommu_get_stream_id(channel->dev->parent, &stream_id))
+ stream_id = TEGRA_STREAM_ID_BYPASS;
- host1x_ch_writel(channel, sid, HOST1X_CHANNEL_SMMU_STREAMID);
+ host1x_ch_writel(channel, stream_id, HOST1X_CHANNEL_SMMU_STREAMID);
#endif
}
@@ -180,52 +207,79 @@ static void host1x_enable_gather_filter(struct host1x_channel *ch)
#endif
}
-static int channel_submit(struct host1x_job *job)
+static void channel_program_cdma(struct host1x_job *job)
{
- struct host1x_channel *ch = job->channel;
+ struct host1x_cdma *cdma = &job->channel->cdma;
struct host1x_syncpt *sp = job->syncpt;
- u32 user_syncpt_incrs = job->syncpt_incrs;
- u32 prev_max = 0;
- u32 syncval;
- int err;
- struct host1x_waitlist *completed_waiter = NULL;
- struct host1x *host = dev_get_drvdata(ch->dev->parent);
- trace_host1x_channel_submit(dev_name(ch->dev),
- job->num_cmds, job->num_relocs,
- job->syncpt->id, job->syncpt_incrs);
+#if HOST1X_HW >= 6
+ u32 fence;
+ int i = 0;
- /* before error checks, return current max */
- prev_max = job->syncpt_end = host1x_syncpt_read_max(sp);
+ if (job->num_cmds == 0)
+ goto prefences_done;
+ if (!job->cmds[0].is_wait || job->cmds[0].wait.relative)
+ goto prefences_done;
- /* get submit lock */
- err = mutex_lock_interruptible(&ch->submitlock);
- if (err)
- goto error;
+ /* Enter host1x class with invalid stream ID for prefence waits. */
+ host1x_cdma_push_wide(cdma,
+ host1x_opcode_acquire_mlock(1),
+ host1x_opcode_setclass(1, 0, 0),
+ host1x_opcode_setpayload(0),
+ host1x_opcode_setstreamid(0x1fffff));
- completed_waiter = kzalloc(sizeof(*completed_waiter), GFP_KERNEL);
- if (!completed_waiter) {
- mutex_unlock(&ch->submitlock);
- err = -ENOMEM;
- goto error;
- }
+ for (i = 0; i < job->num_cmds; i++) {
+ struct host1x_job_cmd *cmd = &job->cmds[i];
- host1x_channel_set_streamid(ch);
- host1x_enable_gather_filter(ch);
+ if (!cmd->is_wait || cmd->wait.relative)
+ break;
- /* begin a CDMA submit */
- err = host1x_cdma_begin(&ch->cdma, job);
- if (err) {
- mutex_unlock(&ch->submitlock);
- goto error;
+ submit_wait(job, cmd->wait.id, cmd->wait.threshold);
}
+ host1x_cdma_push(cdma,
+ HOST1X_OPCODE_NOP,
+ host1x_opcode_release_mlock(1));
+
+prefences_done:
+ /* Enter engine class with invalid stream ID. */
+ host1x_cdma_push_wide(cdma,
+ host1x_opcode_acquire_mlock(job->class),
+ host1x_opcode_setclass(job->class, 0, 0),
+ host1x_opcode_setpayload(0),
+ host1x_opcode_setstreamid(job->engine_streamid_offset / 4));
+
+ /* Before switching stream ID to real stream ID, ensure engine is idle. */
+ fence = host1x_syncpt_incr_max(sp, 1);
+ host1x_cdma_push(&job->channel->cdma,
+ host1x_opcode_nonincr(HOST1X_UCLASS_INCR_SYNCPT, 1),
+ HOST1X_UCLASS_INCR_SYNCPT_INDX_F(job->syncpt->id) |
+ HOST1X_UCLASS_INCR_SYNCPT_COND_F(4));
+ submit_wait(job, job->syncpt->id, fence);
+ submit_setclass(job, job->class);
+
+ /* Submit work. */
+ job->syncpt_end = host1x_syncpt_incr_max(sp, job->syncpt_incrs);
+ submit_gathers(job, job->cmds + i, job->num_cmds - i, job->syncpt_end - job->syncpt_incrs);
+
+ /* Before releasing MLOCK, ensure engine is idle again. */
+ fence = host1x_syncpt_incr_max(sp, 1);
+ host1x_cdma_push(&job->channel->cdma,
+ host1x_opcode_nonincr(HOST1X_UCLASS_INCR_SYNCPT, 1),
+ HOST1X_UCLASS_INCR_SYNCPT_INDX_F(job->syncpt->id) |
+ HOST1X_UCLASS_INCR_SYNCPT_COND_F(4));
+ submit_wait(job, job->syncpt->id, fence);
+
+ /* Release MLOCK. */
+ host1x_cdma_push(cdma,
+ HOST1X_OPCODE_NOP, host1x_opcode_release_mlock(job->class));
+#else
if (job->serialize) {
/*
* Force serialization by inserting a host wait for the
* previous job to finish before this one can commence.
*/
- host1x_cdma_push(&ch->cdma,
+ host1x_cdma_push(cdma,
host1x_opcode_setclass(HOST1X_CLASS_HOST1X,
host1x_uclass_wait_syncpt_r(), 1),
host1x_class_host_wait_syncpt(job->syncpt->id,
@@ -236,39 +290,86 @@ static int channel_submit(struct host1x_job *job)
if (sp->base)
synchronize_syncpt_base(job);
- syncval = host1x_syncpt_incr_max(sp, user_syncpt_incrs);
-
- host1x_hw_syncpt_assign_to_channel(host, sp, ch);
-
- job->syncpt_end = syncval;
-
/* add a setclass for modules that require it */
if (job->class)
- host1x_cdma_push(&ch->cdma,
+ host1x_cdma_push(cdma,
host1x_opcode_setclass(job->class, 0, 0),
HOST1X_OPCODE_NOP);
- submit_gathers(job, syncval - user_syncpt_incrs);
+ job->syncpt_end = host1x_syncpt_incr_max(sp, job->syncpt_incrs);
+
+ submit_gathers(job, job->cmds, job->num_cmds, job->syncpt_end - job->syncpt_incrs);
+#endif
+}
+
+static void job_complete_callback(struct dma_fence *fence, struct dma_fence_cb *cb)
+{
+ struct host1x_job *job = container_of(cb, struct host1x_job, fence_cb);
+
+ /* Schedules CDMA update. */
+ host1x_cdma_update(&job->channel->cdma);
+}
+
+static int channel_submit(struct host1x_job *job)
+{
+ struct host1x_channel *ch = job->channel;
+ struct host1x_syncpt *sp = job->syncpt;
+ u32 prev_max = 0;
+ u32 syncval;
+ int err;
+ struct host1x *host = dev_get_drvdata(ch->dev->parent);
+
+ trace_host1x_channel_submit(dev_name(ch->dev),
+ job->num_cmds, job->num_relocs,
+ job->syncpt->id, job->syncpt_incrs);
+
+ /* before error checks, return current max */
+ prev_max = job->syncpt_end = host1x_syncpt_read_max(sp);
+
+ /* get submit lock */
+ err = mutex_lock_interruptible(&ch->submitlock);
+ if (err)
+ return err;
+
+ host1x_channel_set_streamid(ch);
+ host1x_enable_gather_filter(ch);
+ host1x_hw_syncpt_assign_to_channel(host, sp, ch);
+
+ /* begin a CDMA submit */
+ err = host1x_cdma_begin(&ch->cdma, job);
+ if (err) {
+ mutex_unlock(&ch->submitlock);
+ return err;
+ }
+
+ channel_program_cdma(job);
+ syncval = host1x_syncpt_read_max(sp);
+
+ /*
+ * Create fence before submitting job to HW to avoid job completing
+ * before the fence is set up.
+ */
+ job->fence = host1x_fence_create(sp, syncval, true);
+ if (WARN(IS_ERR(job->fence), "Failed to create submit complete fence")) {
+ job->fence = NULL;
+ } else {
+ err = dma_fence_add_callback(job->fence, &job->fence_cb,
+ job_complete_callback);
+ }
/* end CDMA submit & stash pinned hMems into sync queue */
host1x_cdma_end(&ch->cdma, job);
trace_host1x_channel_submitted(dev_name(ch->dev), prev_max, syncval);
- /* schedule a submit complete interrupt */
- err = host1x_intr_add_action(host, sp, syncval,
- HOST1X_INTR_ACTION_SUBMIT_COMPLETE, ch,
- completed_waiter, &job->waiter);
- completed_waiter = NULL;
- WARN(err, "Failed to set submit complete interrupt");
-
mutex_unlock(&ch->submitlock);
- return 0;
+ if (err == -ENOENT)
+ host1x_cdma_update(&ch->cdma);
+ else
+ WARN(err, "Failed to set submit complete interrupt");
-error:
- kfree(completed_waiter);
- return err;
+ return 0;
}
static int host1x_channel_init(struct host1x_channel *ch, struct host1x *dev,
diff --git a/drivers/gpu/host1x/hw/debug_hw.c b/drivers/gpu/host1x/hw/debug_hw.c
index 54e31d81517b..4c32aa1b95e8 100644
--- a/drivers/gpu/host1x/hw/debug_hw.c
+++ b/drivers/gpu/host1x/hw/debug_hw.c
@@ -177,7 +177,16 @@ static void show_gather(struct output *o, dma_addr_t phys_addr,
for (i = 0; i < words; i++) {
dma_addr_t addr = phys_addr + i * 4;
- u32 val = *(map_addr + offset / 4 + i);
+ u32 voffset = offset + i * 4;
+ u32 val;
+
+ /* If we reach the RESTART opcode, continue at the beginning of pushbuffer */
+ if (cdma && voffset >= cdma->push_buffer.size) {
+ addr -= cdma->push_buffer.size;
+ voffset -= cdma->push_buffer.size;
+ }
+
+ val = *(map_addr + voffset / 4);
if (!data_count) {
host1x_debug_output(o, " %pad: %08x: ", &addr, val);
@@ -203,7 +212,7 @@ static void show_channel_gathers(struct output *o, struct host1x_cdma *cdma)
job->num_slots, job->num_unpins);
show_gather(o, pb->dma + job->first_get, job->num_slots * 2, cdma,
- pb->dma + job->first_get, pb->mapped + job->first_get);
+ pb->dma, pb->mapped);
for (i = 0; i < job->num_cmds; i++) {
struct host1x_job_gather *g;
@@ -227,7 +236,7 @@ static void show_channel_gathers(struct output *o, struct host1x_cdma *cdma)
host1x_debug_output(o, " GATHER at %pad+%#x, %d words\n",
&g->base, g->offset, g->words);
- show_gather(o, g->base + g->offset, g->words, cdma,
+ show_gather(o, g->base + g->offset, g->words, NULL,
g->base, mapped);
if (!job->gather_copy_mapped)
diff --git a/drivers/gpu/host1x/hw/host1x01_hardware.h b/drivers/gpu/host1x/hw/host1x01_hardware.h
index fe59df1d3dc3..cb93d7c1808c 100644
--- a/drivers/gpu/host1x/hw/host1x01_hardware.h
+++ b/drivers/gpu/host1x/hw/host1x01_hardware.h
@@ -15,118 +15,6 @@
#include "hw_host1x01_sync.h"
#include "hw_host1x01_uclass.h"
-static inline u32 host1x_class_host_wait_syncpt(
- unsigned indx, unsigned threshold)
-{
- return host1x_uclass_wait_syncpt_indx_f(indx)
- | host1x_uclass_wait_syncpt_thresh_f(threshold);
-}
-
-static inline u32 host1x_class_host_load_syncpt_base(
- unsigned indx, unsigned threshold)
-{
- return host1x_uclass_load_syncpt_base_base_indx_f(indx)
- | host1x_uclass_load_syncpt_base_value_f(threshold);
-}
-
-static inline u32 host1x_class_host_wait_syncpt_base(
- unsigned indx, unsigned base_indx, unsigned offset)
-{
- return host1x_uclass_wait_syncpt_base_indx_f(indx)
- | host1x_uclass_wait_syncpt_base_base_indx_f(base_indx)
- | host1x_uclass_wait_syncpt_base_offset_f(offset);
-}
-
-static inline u32 host1x_class_host_incr_syncpt_base(
- unsigned base_indx, unsigned offset)
-{
- return host1x_uclass_incr_syncpt_base_base_indx_f(base_indx)
- | host1x_uclass_incr_syncpt_base_offset_f(offset);
-}
-
-static inline u32 host1x_class_host_incr_syncpt(
- unsigned cond, unsigned indx)
-{
- return host1x_uclass_incr_syncpt_cond_f(cond)
- | host1x_uclass_incr_syncpt_indx_f(indx);
-}
-
-static inline u32 host1x_class_host_indoff_reg_write(
- unsigned mod_id, unsigned offset, bool auto_inc)
-{
- u32 v = host1x_uclass_indoff_indbe_f(0xf)
- | host1x_uclass_indoff_indmodid_f(mod_id)
- | host1x_uclass_indoff_indroffset_f(offset);
- if (auto_inc)
- v |= host1x_uclass_indoff_autoinc_f(1);
- return v;
-}
-
-static inline u32 host1x_class_host_indoff_reg_read(
- unsigned mod_id, unsigned offset, bool auto_inc)
-{
- u32 v = host1x_uclass_indoff_indmodid_f(mod_id)
- | host1x_uclass_indoff_indroffset_f(offset)
- | host1x_uclass_indoff_rwn_read_v();
- if (auto_inc)
- v |= host1x_uclass_indoff_autoinc_f(1);
- return v;
-}
-
-
-/* cdma opcodes */
-static inline u32 host1x_opcode_setclass(
- unsigned class_id, unsigned offset, unsigned mask)
-{
- return (0 << 28) | (offset << 16) | (class_id << 6) | mask;
-}
-
-static inline u32 host1x_opcode_incr(unsigned offset, unsigned count)
-{
- return (1 << 28) | (offset << 16) | count;
-}
-
-static inline u32 host1x_opcode_nonincr(unsigned offset, unsigned count)
-{
- return (2 << 28) | (offset << 16) | count;
-}
-
-static inline u32 host1x_opcode_mask(unsigned offset, unsigned mask)
-{
- return (3 << 28) | (offset << 16) | mask;
-}
-
-static inline u32 host1x_opcode_imm(unsigned offset, unsigned value)
-{
- return (4 << 28) | (offset << 16) | value;
-}
-
-static inline u32 host1x_opcode_imm_incr_syncpt(unsigned cond, unsigned indx)
-{
- return host1x_opcode_imm(host1x_uclass_incr_syncpt_r(),
- host1x_class_host_incr_syncpt(cond, indx));
-}
-
-static inline u32 host1x_opcode_restart(unsigned address)
-{
- return (5 << 28) | (address >> 4);
-}
-
-static inline u32 host1x_opcode_gather(unsigned count)
-{
- return (6 << 28) | count;
-}
-
-static inline u32 host1x_opcode_gather_nonincr(unsigned offset, unsigned count)
-{
- return (6 << 28) | (offset << 16) | BIT(15) | count;
-}
-
-static inline u32 host1x_opcode_gather_incr(unsigned offset, unsigned count)
-{
- return (6 << 28) | (offset << 16) | BIT(15) | BIT(14) | count;
-}
-
-#define HOST1X_OPCODE_NOP host1x_opcode_nonincr(0, 0)
+#include "opcodes.h"
#endif
diff --git a/drivers/gpu/host1x/hw/host1x02_hardware.h b/drivers/gpu/host1x/hw/host1x02_hardware.h
index af60d7fb016d..2d1282b9bc33 100644
--- a/drivers/gpu/host1x/hw/host1x02_hardware.h
+++ b/drivers/gpu/host1x/hw/host1x02_hardware.h
@@ -15,117 +15,6 @@
#include "hw_host1x02_sync.h"
#include "hw_host1x02_uclass.h"
-static inline u32 host1x_class_host_wait_syncpt(
- unsigned indx, unsigned threshold)
-{
- return host1x_uclass_wait_syncpt_indx_f(indx)
- | host1x_uclass_wait_syncpt_thresh_f(threshold);
-}
-
-static inline u32 host1x_class_host_load_syncpt_base(
- unsigned indx, unsigned threshold)
-{
- return host1x_uclass_load_syncpt_base_base_indx_f(indx)
- | host1x_uclass_load_syncpt_base_value_f(threshold);
-}
-
-static inline u32 host1x_class_host_wait_syncpt_base(
- unsigned indx, unsigned base_indx, unsigned offset)
-{
- return host1x_uclass_wait_syncpt_base_indx_f(indx)
- | host1x_uclass_wait_syncpt_base_base_indx_f(base_indx)
- | host1x_uclass_wait_syncpt_base_offset_f(offset);
-}
-
-static inline u32 host1x_class_host_incr_syncpt_base(
- unsigned base_indx, unsigned offset)
-{
- return host1x_uclass_incr_syncpt_base_base_indx_f(base_indx)
- | host1x_uclass_incr_syncpt_base_offset_f(offset);
-}
-
-static inline u32 host1x_class_host_incr_syncpt(
- unsigned cond, unsigned indx)
-{
- return host1x_uclass_incr_syncpt_cond_f(cond)
- | host1x_uclass_incr_syncpt_indx_f(indx);
-}
-
-static inline u32 host1x_class_host_indoff_reg_write(
- unsigned mod_id, unsigned offset, bool auto_inc)
-{
- u32 v = host1x_uclass_indoff_indbe_f(0xf)
- | host1x_uclass_indoff_indmodid_f(mod_id)
- | host1x_uclass_indoff_indroffset_f(offset);
- if (auto_inc)
- v |= host1x_uclass_indoff_autoinc_f(1);
- return v;
-}
-
-static inline u32 host1x_class_host_indoff_reg_read(
- unsigned mod_id, unsigned offset, bool auto_inc)
-{
- u32 v = host1x_uclass_indoff_indmodid_f(mod_id)
- | host1x_uclass_indoff_indroffset_f(offset)
- | host1x_uclass_indoff_rwn_read_v();
- if (auto_inc)
- v |= host1x_uclass_indoff_autoinc_f(1);
- return v;
-}
-
-/* cdma opcodes */
-static inline u32 host1x_opcode_setclass(
- unsigned class_id, unsigned offset, unsigned mask)
-{
- return (0 << 28) | (offset << 16) | (class_id << 6) | mask;
-}
-
-static inline u32 host1x_opcode_incr(unsigned offset, unsigned count)
-{
- return (1 << 28) | (offset << 16) | count;
-}
-
-static inline u32 host1x_opcode_nonincr(unsigned offset, unsigned count)
-{
- return (2 << 28) | (offset << 16) | count;
-}
-
-static inline u32 host1x_opcode_mask(unsigned offset, unsigned mask)
-{
- return (3 << 28) | (offset << 16) | mask;
-}
-
-static inline u32 host1x_opcode_imm(unsigned offset, unsigned value)
-{
- return (4 << 28) | (offset << 16) | value;
-}
-
-static inline u32 host1x_opcode_imm_incr_syncpt(unsigned cond, unsigned indx)
-{
- return host1x_opcode_imm(host1x_uclass_incr_syncpt_r(),
- host1x_class_host_incr_syncpt(cond, indx));
-}
-
-static inline u32 host1x_opcode_restart(unsigned address)
-{
- return (5 << 28) | (address >> 4);
-}
-
-static inline u32 host1x_opcode_gather(unsigned count)
-{
- return (6 << 28) | count;
-}
-
-static inline u32 host1x_opcode_gather_nonincr(unsigned offset, unsigned count)
-{
- return (6 << 28) | (offset << 16) | BIT(15) | count;
-}
-
-static inline u32 host1x_opcode_gather_incr(unsigned offset, unsigned count)
-{
- return (6 << 28) | (offset << 16) | BIT(15) | BIT(14) | count;
-}
-
-#define HOST1X_OPCODE_NOP host1x_opcode_nonincr(0, 0)
+#include "opcodes.h"
#endif
diff --git a/drivers/gpu/host1x/hw/host1x04_hardware.h b/drivers/gpu/host1x/hw/host1x04_hardware.h
index 4f9bcddf27e3..84d244e8af30 100644
--- a/drivers/gpu/host1x/hw/host1x04_hardware.h
+++ b/drivers/gpu/host1x/hw/host1x04_hardware.h
@@ -15,117 +15,6 @@
#include "hw_host1x04_sync.h"
#include "hw_host1x04_uclass.h"
-static inline u32 host1x_class_host_wait_syncpt(
- unsigned indx, unsigned threshold)
-{
- return host1x_uclass_wait_syncpt_indx_f(indx)
- | host1x_uclass_wait_syncpt_thresh_f(threshold);
-}
-
-static inline u32 host1x_class_host_load_syncpt_base(
- unsigned indx, unsigned threshold)
-{
- return host1x_uclass_load_syncpt_base_base_indx_f(indx)
- | host1x_uclass_load_syncpt_base_value_f(threshold);
-}
-
-static inline u32 host1x_class_host_wait_syncpt_base(
- unsigned indx, unsigned base_indx, unsigned offset)
-{
- return host1x_uclass_wait_syncpt_base_indx_f(indx)
- | host1x_uclass_wait_syncpt_base_base_indx_f(base_indx)
- | host1x_uclass_wait_syncpt_base_offset_f(offset);
-}
-
-static inline u32 host1x_class_host_incr_syncpt_base(
- unsigned base_indx, unsigned offset)
-{
- return host1x_uclass_incr_syncpt_base_base_indx_f(base_indx)
- | host1x_uclass_incr_syncpt_base_offset_f(offset);
-}
-
-static inline u32 host1x_class_host_incr_syncpt(
- unsigned cond, unsigned indx)
-{
- return host1x_uclass_incr_syncpt_cond_f(cond)
- | host1x_uclass_incr_syncpt_indx_f(indx);
-}
-
-static inline u32 host1x_class_host_indoff_reg_write(
- unsigned mod_id, unsigned offset, bool auto_inc)
-{
- u32 v = host1x_uclass_indoff_indbe_f(0xf)
- | host1x_uclass_indoff_indmodid_f(mod_id)
- | host1x_uclass_indoff_indroffset_f(offset);
- if (auto_inc)
- v |= host1x_uclass_indoff_autoinc_f(1);
- return v;
-}
-
-static inline u32 host1x_class_host_indoff_reg_read(
- unsigned mod_id, unsigned offset, bool auto_inc)
-{
- u32 v = host1x_uclass_indoff_indmodid_f(mod_id)
- | host1x_uclass_indoff_indroffset_f(offset)
- | host1x_uclass_indoff_rwn_read_v();
- if (auto_inc)
- v |= host1x_uclass_indoff_autoinc_f(1);
- return v;
-}
-
-/* cdma opcodes */
-static inline u32 host1x_opcode_setclass(
- unsigned class_id, unsigned offset, unsigned mask)
-{
- return (0 << 28) | (offset << 16) | (class_id << 6) | mask;
-}
-
-static inline u32 host1x_opcode_incr(unsigned offset, unsigned count)
-{
- return (1 << 28) | (offset << 16) | count;
-}
-
-static inline u32 host1x_opcode_nonincr(unsigned offset, unsigned count)
-{
- return (2 << 28) | (offset << 16) | count;
-}
-
-static inline u32 host1x_opcode_mask(unsigned offset, unsigned mask)
-{
- return (3 << 28) | (offset << 16) | mask;
-}
-
-static inline u32 host1x_opcode_imm(unsigned offset, unsigned value)
-{
- return (4 << 28) | (offset << 16) | value;
-}
-
-static inline u32 host1x_opcode_imm_incr_syncpt(unsigned cond, unsigned indx)
-{
- return host1x_opcode_imm(host1x_uclass_incr_syncpt_r(),
- host1x_class_host_incr_syncpt(cond, indx));
-}
-
-static inline u32 host1x_opcode_restart(unsigned address)
-{
- return (5 << 28) | (address >> 4);
-}
-
-static inline u32 host1x_opcode_gather(unsigned count)
-{
- return (6 << 28) | count;
-}
-
-static inline u32 host1x_opcode_gather_nonincr(unsigned offset, unsigned count)
-{
- return (6 << 28) | (offset << 16) | BIT(15) | count;
-}
-
-static inline u32 host1x_opcode_gather_incr(unsigned offset, unsigned count)
-{
- return (6 << 28) | (offset << 16) | BIT(15) | BIT(14) | count;
-}
-
-#define HOST1X_OPCODE_NOP host1x_opcode_nonincr(0, 0)
+#include "opcodes.h"
#endif
diff --git a/drivers/gpu/host1x/hw/host1x05_hardware.h b/drivers/gpu/host1x/hw/host1x05_hardware.h
index af3ab4b7f010..1dcde6ec7909 100644
--- a/drivers/gpu/host1x/hw/host1x05_hardware.h
+++ b/drivers/gpu/host1x/hw/host1x05_hardware.h
@@ -15,117 +15,6 @@
#include "hw_host1x05_sync.h"
#include "hw_host1x05_uclass.h"
-static inline u32 host1x_class_host_wait_syncpt(
- unsigned indx, unsigned threshold)
-{
- return host1x_uclass_wait_syncpt_indx_f(indx)
- | host1x_uclass_wait_syncpt_thresh_f(threshold);
-}
-
-static inline u32 host1x_class_host_load_syncpt_base(
- unsigned indx, unsigned threshold)
-{
- return host1x_uclass_load_syncpt_base_base_indx_f(indx)
- | host1x_uclass_load_syncpt_base_value_f(threshold);
-}
-
-static inline u32 host1x_class_host_wait_syncpt_base(
- unsigned indx, unsigned base_indx, unsigned offset)
-{
- return host1x_uclass_wait_syncpt_base_indx_f(indx)
- | host1x_uclass_wait_syncpt_base_base_indx_f(base_indx)
- | host1x_uclass_wait_syncpt_base_offset_f(offset);
-}
-
-static inline u32 host1x_class_host_incr_syncpt_base(
- unsigned base_indx, unsigned offset)
-{
- return host1x_uclass_incr_syncpt_base_base_indx_f(base_indx)
- | host1x_uclass_incr_syncpt_base_offset_f(offset);
-}
-
-static inline u32 host1x_class_host_incr_syncpt(
- unsigned cond, unsigned indx)
-{
- return host1x_uclass_incr_syncpt_cond_f(cond)
- | host1x_uclass_incr_syncpt_indx_f(indx);
-}
-
-static inline u32 host1x_class_host_indoff_reg_write(
- unsigned mod_id, unsigned offset, bool auto_inc)
-{
- u32 v = host1x_uclass_indoff_indbe_f(0xf)
- | host1x_uclass_indoff_indmodid_f(mod_id)
- | host1x_uclass_indoff_indroffset_f(offset);
- if (auto_inc)
- v |= host1x_uclass_indoff_autoinc_f(1);
- return v;
-}
-
-static inline u32 host1x_class_host_indoff_reg_read(
- unsigned mod_id, unsigned offset, bool auto_inc)
-{
- u32 v = host1x_uclass_indoff_indmodid_f(mod_id)
- | host1x_uclass_indoff_indroffset_f(offset)
- | host1x_uclass_indoff_rwn_read_v();
- if (auto_inc)
- v |= host1x_uclass_indoff_autoinc_f(1);
- return v;
-}
-
-/* cdma opcodes */
-static inline u32 host1x_opcode_setclass(
- unsigned class_id, unsigned offset, unsigned mask)
-{
- return (0 << 28) | (offset << 16) | (class_id << 6) | mask;
-}
-
-static inline u32 host1x_opcode_incr(unsigned offset, unsigned count)
-{
- return (1 << 28) | (offset << 16) | count;
-}
-
-static inline u32 host1x_opcode_nonincr(unsigned offset, unsigned count)
-{
- return (2 << 28) | (offset << 16) | count;
-}
-
-static inline u32 host1x_opcode_mask(unsigned offset, unsigned mask)
-{
- return (3 << 28) | (offset << 16) | mask;
-}
-
-static inline u32 host1x_opcode_imm(unsigned offset, unsigned value)
-{
- return (4 << 28) | (offset << 16) | value;
-}
-
-static inline u32 host1x_opcode_imm_incr_syncpt(unsigned cond, unsigned indx)
-{
- return host1x_opcode_imm(host1x_uclass_incr_syncpt_r(),
- host1x_class_host_incr_syncpt(cond, indx));
-}
-
-static inline u32 host1x_opcode_restart(unsigned address)
-{
- return (5 << 28) | (address >> 4);
-}
-
-static inline u32 host1x_opcode_gather(unsigned count)
-{
- return (6 << 28) | count;
-}
-
-static inline u32 host1x_opcode_gather_nonincr(unsigned offset, unsigned count)
-{
- return (6 << 28) | (offset << 16) | BIT(15) | count;
-}
-
-static inline u32 host1x_opcode_gather_incr(unsigned offset, unsigned count)
-{
- return (6 << 28) | (offset << 16) | BIT(15) | BIT(14) | count;
-}
-
-#define HOST1X_OPCODE_NOP host1x_opcode_nonincr(0, 0)
+#include "opcodes.h"
#endif
diff --git a/drivers/gpu/host1x/hw/host1x06_hardware.h b/drivers/gpu/host1x/hw/host1x06_hardware.h
index 01a142a09800..c05cfa7e3090 100644
--- a/drivers/gpu/host1x/hw/host1x06_hardware.h
+++ b/drivers/gpu/host1x/hw/host1x06_hardware.h
@@ -16,122 +16,6 @@
#include "hw_host1x06_vm.h"
#include "hw_host1x06_hypervisor.h"
-static inline u32 host1x_class_host_wait_syncpt(
- unsigned indx, unsigned threshold)
-{
- return host1x_uclass_wait_syncpt_indx_f(indx)
- | host1x_uclass_wait_syncpt_thresh_f(threshold);
-}
-
-static inline u32 host1x_class_host_load_syncpt_base(
- unsigned indx, unsigned threshold)
-{
- return host1x_uclass_load_syncpt_base_base_indx_f(indx)
- | host1x_uclass_load_syncpt_base_value_f(threshold);
-}
-
-static inline u32 host1x_class_host_wait_syncpt_base(
- unsigned indx, unsigned base_indx, unsigned offset)
-{
- return host1x_uclass_wait_syncpt_base_indx_f(indx)
- | host1x_uclass_wait_syncpt_base_base_indx_f(base_indx)
- | host1x_uclass_wait_syncpt_base_offset_f(offset);
-}
-
-static inline u32 host1x_class_host_incr_syncpt_base(
- unsigned base_indx, unsigned offset)
-{
- return host1x_uclass_incr_syncpt_base_base_indx_f(base_indx)
- | host1x_uclass_incr_syncpt_base_offset_f(offset);
-}
-
-static inline u32 host1x_class_host_incr_syncpt(
- unsigned cond, unsigned indx)
-{
- return host1x_uclass_incr_syncpt_cond_f(cond)
- | host1x_uclass_incr_syncpt_indx_f(indx);
-}
-
-static inline u32 host1x_class_host_indoff_reg_write(
- unsigned mod_id, unsigned offset, bool auto_inc)
-{
- u32 v = host1x_uclass_indoff_indbe_f(0xf)
- | host1x_uclass_indoff_indmodid_f(mod_id)
- | host1x_uclass_indoff_indroffset_f(offset);
- if (auto_inc)
- v |= host1x_uclass_indoff_autoinc_f(1);
- return v;
-}
-
-static inline u32 host1x_class_host_indoff_reg_read(
- unsigned mod_id, unsigned offset, bool auto_inc)
-{
- u32 v = host1x_uclass_indoff_indmodid_f(mod_id)
- | host1x_uclass_indoff_indroffset_f(offset)
- | host1x_uclass_indoff_rwn_read_v();
- if (auto_inc)
- v |= host1x_uclass_indoff_autoinc_f(1);
- return v;
-}
-
-/* cdma opcodes */
-static inline u32 host1x_opcode_setclass(
- unsigned class_id, unsigned offset, unsigned mask)
-{
- return (0 << 28) | (offset << 16) | (class_id << 6) | mask;
-}
-
-static inline u32 host1x_opcode_incr(unsigned offset, unsigned count)
-{
- return (1 << 28) | (offset << 16) | count;
-}
-
-static inline u32 host1x_opcode_nonincr(unsigned offset, unsigned count)
-{
- return (2 << 28) | (offset << 16) | count;
-}
-
-static inline u32 host1x_opcode_mask(unsigned offset, unsigned mask)
-{
- return (3 << 28) | (offset << 16) | mask;
-}
-
-static inline u32 host1x_opcode_imm(unsigned offset, unsigned value)
-{
- return (4 << 28) | (offset << 16) | value;
-}
-
-static inline u32 host1x_opcode_imm_incr_syncpt(unsigned cond, unsigned indx)
-{
- return host1x_opcode_imm(host1x_uclass_incr_syncpt_r(),
- host1x_class_host_incr_syncpt(cond, indx));
-}
-
-static inline u32 host1x_opcode_restart(unsigned address)
-{
- return (5 << 28) | (address >> 4);
-}
-
-static inline u32 host1x_opcode_gather(unsigned count)
-{
- return (6 << 28) | count;
-}
-
-static inline u32 host1x_opcode_gather_nonincr(unsigned offset, unsigned count)
-{
- return (6 << 28) | (offset << 16) | BIT(15) | count;
-}
-
-static inline u32 host1x_opcode_gather_incr(unsigned offset, unsigned count)
-{
- return (6 << 28) | (offset << 16) | BIT(15) | BIT(14) | count;
-}
-
-static inline u32 host1x_opcode_gather_wide(unsigned count)
-{
- return (12 << 28) | count;
-}
-
-#define HOST1X_OPCODE_NOP host1x_opcode_nonincr(0, 0)
+#include "opcodes.h"
#endif
diff --git a/drivers/gpu/host1x/hw/host1x07_hardware.h b/drivers/gpu/host1x/hw/host1x07_hardware.h
index e6582172ebfd..d67364e03956 100644
--- a/drivers/gpu/host1x/hw/host1x07_hardware.h
+++ b/drivers/gpu/host1x/hw/host1x07_hardware.h
@@ -16,122 +16,6 @@
#include "hw_host1x07_vm.h"
#include "hw_host1x07_hypervisor.h"
-static inline u32 host1x_class_host_wait_syncpt(
- unsigned indx, unsigned threshold)
-{
- return host1x_uclass_wait_syncpt_indx_f(indx)
- | host1x_uclass_wait_syncpt_thresh_f(threshold);
-}
-
-static inline u32 host1x_class_host_load_syncpt_base(
- unsigned indx, unsigned threshold)
-{
- return host1x_uclass_load_syncpt_base_base_indx_f(indx)
- | host1x_uclass_load_syncpt_base_value_f(threshold);
-}
-
-static inline u32 host1x_class_host_wait_syncpt_base(
- unsigned indx, unsigned base_indx, unsigned offset)
-{
- return host1x_uclass_wait_syncpt_base_indx_f(indx)
- | host1x_uclass_wait_syncpt_base_base_indx_f(base_indx)
- | host1x_uclass_wait_syncpt_base_offset_f(offset);
-}
-
-static inline u32 host1x_class_host_incr_syncpt_base(
- unsigned base_indx, unsigned offset)
-{
- return host1x_uclass_incr_syncpt_base_base_indx_f(base_indx)
- | host1x_uclass_incr_syncpt_base_offset_f(offset);
-}
-
-static inline u32 host1x_class_host_incr_syncpt(
- unsigned cond, unsigned indx)
-{
- return host1x_uclass_incr_syncpt_cond_f(cond)
- | host1x_uclass_incr_syncpt_indx_f(indx);
-}
-
-static inline u32 host1x_class_host_indoff_reg_write(
- unsigned mod_id, unsigned offset, bool auto_inc)
-{
- u32 v = host1x_uclass_indoff_indbe_f(0xf)
- | host1x_uclass_indoff_indmodid_f(mod_id)
- | host1x_uclass_indoff_indroffset_f(offset);
- if (auto_inc)
- v |= host1x_uclass_indoff_autoinc_f(1);
- return v;
-}
-
-static inline u32 host1x_class_host_indoff_reg_read(
- unsigned mod_id, unsigned offset, bool auto_inc)
-{
- u32 v = host1x_uclass_indoff_indmodid_f(mod_id)
- | host1x_uclass_indoff_indroffset_f(offset)
- | host1x_uclass_indoff_rwn_read_v();
- if (auto_inc)
- v |= host1x_uclass_indoff_autoinc_f(1);
- return v;
-}
-
-/* cdma opcodes */
-static inline u32 host1x_opcode_setclass(
- unsigned class_id, unsigned offset, unsigned mask)
-{
- return (0 << 28) | (offset << 16) | (class_id << 6) | mask;
-}
-
-static inline u32 host1x_opcode_incr(unsigned offset, unsigned count)
-{
- return (1 << 28) | (offset << 16) | count;
-}
-
-static inline u32 host1x_opcode_nonincr(unsigned offset, unsigned count)
-{
- return (2 << 28) | (offset << 16) | count;
-}
-
-static inline u32 host1x_opcode_mask(unsigned offset, unsigned mask)
-{
- return (3 << 28) | (offset << 16) | mask;
-}
-
-static inline u32 host1x_opcode_imm(unsigned offset, unsigned value)
-{
- return (4 << 28) | (offset << 16) | value;
-}
-
-static inline u32 host1x_opcode_imm_incr_syncpt(unsigned cond, unsigned indx)
-{
- return host1x_opcode_imm(host1x_uclass_incr_syncpt_r(),
- host1x_class_host_incr_syncpt(cond, indx));
-}
-
-static inline u32 host1x_opcode_restart(unsigned address)
-{
- return (5 << 28) | (address >> 4);
-}
-
-static inline u32 host1x_opcode_gather(unsigned count)
-{
- return (6 << 28) | count;
-}
-
-static inline u32 host1x_opcode_gather_nonincr(unsigned offset, unsigned count)
-{
- return (6 << 28) | (offset << 16) | BIT(15) | count;
-}
-
-static inline u32 host1x_opcode_gather_incr(unsigned offset, unsigned count)
-{
- return (6 << 28) | (offset << 16) | BIT(15) | BIT(14) | count;
-}
-
-static inline u32 host1x_opcode_gather_wide(unsigned count)
-{
- return (12 << 28) | count;
-}
-
-#define HOST1X_OPCODE_NOP host1x_opcode_nonincr(0, 0)
+#include "opcodes.h"
#endif
diff --git a/drivers/gpu/host1x/hw/host1x08.c b/drivers/gpu/host1x/hw/host1x08.c
new file mode 100644
index 000000000000..754890c34c74
--- /dev/null
+++ b/drivers/gpu/host1x/hw/host1x08.c
@@ -0,0 +1,33 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Host1x init for Tegra234 SoCs
+ *
+ * Copyright (c) 2022 NVIDIA Corporation.
+ */
+
+/* include hw specification */
+#include "host1x08.h"
+#include "host1x08_hardware.h"
+
+/* include code */
+#define HOST1X_HW 8
+
+#include "cdma_hw.c"
+#include "channel_hw.c"
+#include "debug_hw.c"
+#include "intr_hw.c"
+#include "syncpt_hw.c"
+
+#include "../dev.h"
+
+int host1x08_init(struct host1x *host)
+{
+ host->channel_op = &host1x_channel_ops;
+ host->cdma_op = &host1x_cdma_ops;
+ host->cdma_pb_op = &host1x_pushbuffer_ops;
+ host->syncpt_op = &host1x_syncpt_ops;
+ host->intr_op = &host1x_intr_ops;
+ host->debug_op = &host1x_debug_ops;
+
+ return 0;
+}
diff --git a/drivers/gpu/host1x/hw/host1x08.h b/drivers/gpu/host1x/hw/host1x08.h
new file mode 100644
index 000000000000..a6bad56e44cf
--- /dev/null
+++ b/drivers/gpu/host1x/hw/host1x08.h
@@ -0,0 +1,15 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * Host1x init for Tegra234 SoCs
+ *
+ * Copyright (c) 2018 NVIDIA Corporation.
+ */
+
+#ifndef HOST1X_HOST1X08_H
+#define HOST1X_HOST1X08_H
+
+struct host1x;
+
+int host1x08_init(struct host1x *host);
+
+#endif
diff --git a/drivers/gpu/host1x/hw/host1x08_hardware.h b/drivers/gpu/host1x/hw/host1x08_hardware.h
new file mode 100644
index 000000000000..936243060bff
--- /dev/null
+++ b/drivers/gpu/host1x/hw/host1x08_hardware.h
@@ -0,0 +1,21 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * Tegra host1x Register Offsets for Tegra234
+ *
+ * Copyright (c) 2022 NVIDIA Corporation.
+ */
+
+#ifndef __HOST1X_HOST1X08_HARDWARE_H
+#define __HOST1X_HOST1X08_HARDWARE_H
+
+#include <linux/types.h>
+#include <linux/bitops.h>
+
+#include "hw_host1x08_uclass.h"
+#include "hw_host1x08_vm.h"
+#include "hw_host1x08_hypervisor.h"
+#include "hw_host1x08_common.h"
+
+#include "opcodes.h"
+
+#endif
diff --git a/drivers/gpu/host1x/hw/hw_host1x06_uclass.h b/drivers/gpu/host1x/hw/hw_host1x06_uclass.h
index 5f831438d19b..50c32de452fb 100644
--- a/drivers/gpu/host1x/hw/hw_host1x06_uclass.h
+++ b/drivers/gpu/host1x/hw/hw_host1x06_uclass.h
@@ -53,7 +53,7 @@ static inline u32 host1x_uclass_incr_syncpt_cond_f(u32 v)
host1x_uclass_incr_syncpt_cond_f(v)
static inline u32 host1x_uclass_incr_syncpt_indx_f(u32 v)
{
- return (v & 0xff) << 0;
+ return (v & 0x3ff) << 0;
}
#define HOST1X_UCLASS_INCR_SYNCPT_INDX_F(v) \
host1x_uclass_incr_syncpt_indx_f(v)
diff --git a/drivers/gpu/host1x/hw/hw_host1x07_uclass.h b/drivers/gpu/host1x/hw/hw_host1x07_uclass.h
index 8cd2ef087d5d..887b878f92f7 100644
--- a/drivers/gpu/host1x/hw/hw_host1x07_uclass.h
+++ b/drivers/gpu/host1x/hw/hw_host1x07_uclass.h
@@ -53,7 +53,7 @@ static inline u32 host1x_uclass_incr_syncpt_cond_f(u32 v)
host1x_uclass_incr_syncpt_cond_f(v)
static inline u32 host1x_uclass_incr_syncpt_indx_f(u32 v)
{
- return (v & 0xff) << 0;
+ return (v & 0x3ff) << 0;
}
#define HOST1X_UCLASS_INCR_SYNCPT_INDX_F(v) \
host1x_uclass_incr_syncpt_indx_f(v)
diff --git a/drivers/gpu/host1x/hw/hw_host1x08_channel.h b/drivers/gpu/host1x/hw/hw_host1x08_channel.h
new file mode 100644
index 000000000000..c9272d2ab14a
--- /dev/null
+++ b/drivers/gpu/host1x/hw/hw_host1x08_channel.h
@@ -0,0 +1,11 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * Copyright (c) 2022 NVIDIA Corporation.
+ */
+
+#ifndef HOST1X_HW_HOST1X08_CHANNEL_H
+#define HOST1X_HW_HOST1X08_CHANNEL_H
+
+#define HOST1X_CHANNEL_SMMU_STREAMID 0x084
+
+#endif
diff --git a/drivers/gpu/host1x/hw/hw_host1x08_common.h b/drivers/gpu/host1x/hw/hw_host1x08_common.h
new file mode 100644
index 000000000000..8e0c99150ec2
--- /dev/null
+++ b/drivers/gpu/host1x/hw/hw_host1x08_common.h
@@ -0,0 +1,11 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * Copyright (c) 2022 NVIDIA Corporation.
+ */
+
+#define HOST1X_COMMON_OFA_MLOCK 0x4050
+#define HOST1X_COMMON_NVJPG1_MLOCK 0x4070
+#define HOST1X_COMMON_VIC_MLOCK 0x4078
+#define HOST1X_COMMON_NVENC_MLOCK 0x407c
+#define HOST1X_COMMON_NVDEC_MLOCK 0x4080
+#define HOST1X_COMMON_NVJPG_MLOCK 0x4084
diff --git a/drivers/gpu/host1x/hw/hw_host1x08_hypervisor.h b/drivers/gpu/host1x/hw/hw_host1x08_hypervisor.h
new file mode 100644
index 000000000000..22964324c914
--- /dev/null
+++ b/drivers/gpu/host1x/hw/hw_host1x08_hypervisor.h
@@ -0,0 +1,9 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * Copyright (c) 2022 NVIDIA Corporation.
+ */
+
+#define HOST1X_HV_SYNCPT_PROT_EN 0x1724
+#define HOST1X_HV_SYNCPT_PROT_EN_CH_EN BIT(1)
+#define HOST1X_HV_CH_MLOCK_EN(x) (0x1700 + (x * 4))
+#define HOST1X_HV_CH_KERNEL_FILTER_GBUFFER(x) (0x1710 + (x * 4))
diff --git a/drivers/gpu/host1x/hw/hw_host1x08_uclass.h b/drivers/gpu/host1x/hw/hw_host1x08_uclass.h
new file mode 100644
index 000000000000..4fb1d090edae
--- /dev/null
+++ b/drivers/gpu/host1x/hw/hw_host1x08_uclass.h
@@ -0,0 +1,181 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * Copyright (c) 2018 NVIDIA Corporation.
+ */
+
+ /*
+ * Function naming determines intended use:
+ *
+ * <x>_r(void) : Returns the offset for register <x>.
+ *
+ * <x>_w(void) : Returns the word offset for word (4 byte) element <x>.
+ *
+ * <x>_<y>_s(void) : Returns size of field <y> of register <x> in bits.
+ *
+ * <x>_<y>_f(u32 v) : Returns a value based on 'v' which has been shifted
+ * and masked to place it at field <y> of register <x>. This value
+ * can be |'d with others to produce a full register value for
+ * register <x>.
+ *
+ * <x>_<y>_m(void) : Returns a mask for field <y> of register <x>. This
+ * value can be ~'d and then &'d to clear the value of field <y> for
+ * register <x>.
+ *
+ * <x>_<y>_<z>_f(void) : Returns the constant value <z> after being shifted
+ * to place it at field <y> of register <x>. This value can be |'d
+ * with others to produce a full register value for <x>.
+ *
+ * <x>_<y>_v(u32 r) : Returns the value of field <y> from a full register
+ * <x> value 'r' after being shifted to place its LSB at bit 0.
+ * This value is suitable for direct comparison with other unshifted
+ * values appropriate for use in field <y> of register <x>.
+ *
+ * <x>_<y>_<z>_v(void) : Returns the constant value for <z> defined for
+ * field <y> of register <x>. This value is suitable for direct
+ * comparison with unshifted values appropriate for use in field <y>
+ * of register <x>.
+ */
+
+#ifndef HOST1X_HW_HOST1X08_UCLASS_H
+#define HOST1X_HW_HOST1X08_UCLASS_H
+
+static inline u32 host1x_uclass_incr_syncpt_r(void)
+{
+ return 0x0;
+}
+#define HOST1X_UCLASS_INCR_SYNCPT \
+ host1x_uclass_incr_syncpt_r()
+static inline u32 host1x_uclass_incr_syncpt_cond_f(u32 v)
+{
+ return (v & 0xff) << 10;
+}
+#define HOST1X_UCLASS_INCR_SYNCPT_COND_F(v) \
+ host1x_uclass_incr_syncpt_cond_f(v)
+static inline u32 host1x_uclass_incr_syncpt_indx_f(u32 v)
+{
+ return (v & 0x3ff) << 0;
+}
+#define HOST1X_UCLASS_INCR_SYNCPT_INDX_F(v) \
+ host1x_uclass_incr_syncpt_indx_f(v)
+static inline u32 host1x_uclass_wait_syncpt_r(void)
+{
+ return 0x8;
+}
+#define HOST1X_UCLASS_WAIT_SYNCPT \
+ host1x_uclass_wait_syncpt_r()
+static inline u32 host1x_uclass_wait_syncpt_indx_f(u32 v)
+{
+ return (v & 0xff) << 24;
+}
+#define HOST1X_UCLASS_WAIT_SYNCPT_INDX_F(v) \
+ host1x_uclass_wait_syncpt_indx_f(v)
+static inline u32 host1x_uclass_wait_syncpt_thresh_f(u32 v)
+{
+ return (v & 0xffffff) << 0;
+}
+#define HOST1X_UCLASS_WAIT_SYNCPT_THRESH_F(v) \
+ host1x_uclass_wait_syncpt_thresh_f(v)
+static inline u32 host1x_uclass_wait_syncpt_base_r(void)
+{
+ return 0x9;
+}
+#define HOST1X_UCLASS_WAIT_SYNCPT_BASE \
+ host1x_uclass_wait_syncpt_base_r()
+static inline u32 host1x_uclass_wait_syncpt_base_indx_f(u32 v)
+{
+ return (v & 0xff) << 24;
+}
+#define HOST1X_UCLASS_WAIT_SYNCPT_BASE_INDX_F(v) \
+ host1x_uclass_wait_syncpt_base_indx_f(v)
+static inline u32 host1x_uclass_wait_syncpt_base_base_indx_f(u32 v)
+{
+ return (v & 0xff) << 16;
+}
+#define HOST1X_UCLASS_WAIT_SYNCPT_BASE_BASE_INDX_F(v) \
+ host1x_uclass_wait_syncpt_base_base_indx_f(v)
+static inline u32 host1x_uclass_wait_syncpt_base_offset_f(u32 v)
+{
+ return (v & 0xffff) << 0;
+}
+#define HOST1X_UCLASS_WAIT_SYNCPT_BASE_OFFSET_F(v) \
+ host1x_uclass_wait_syncpt_base_offset_f(v)
+static inline u32 host1x_uclass_load_syncpt_base_r(void)
+{
+ return 0xb;
+}
+#define HOST1X_UCLASS_LOAD_SYNCPT_BASE \
+ host1x_uclass_load_syncpt_base_r()
+static inline u32 host1x_uclass_load_syncpt_base_base_indx_f(u32 v)
+{
+ return (v & 0xff) << 24;
+}
+#define HOST1X_UCLASS_LOAD_SYNCPT_BASE_BASE_INDX_F(v) \
+ host1x_uclass_load_syncpt_base_base_indx_f(v)
+static inline u32 host1x_uclass_load_syncpt_base_value_f(u32 v)
+{
+ return (v & 0xffffff) << 0;
+}
+#define HOST1X_UCLASS_LOAD_SYNCPT_BASE_VALUE_F(v) \
+ host1x_uclass_load_syncpt_base_value_f(v)
+static inline u32 host1x_uclass_incr_syncpt_base_base_indx_f(u32 v)
+{
+ return (v & 0xff) << 24;
+}
+#define HOST1X_UCLASS_INCR_SYNCPT_BASE_BASE_INDX_F(v) \
+ host1x_uclass_incr_syncpt_base_base_indx_f(v)
+static inline u32 host1x_uclass_incr_syncpt_base_offset_f(u32 v)
+{
+ return (v & 0xffffff) << 0;
+}
+#define HOST1X_UCLASS_INCR_SYNCPT_BASE_OFFSET_F(v) \
+ host1x_uclass_incr_syncpt_base_offset_f(v)
+static inline u32 host1x_uclass_indoff_r(void)
+{
+ return 0x2d;
+}
+#define HOST1X_UCLASS_INDOFF \
+ host1x_uclass_indoff_r()
+static inline u32 host1x_uclass_indoff_indbe_f(u32 v)
+{
+ return (v & 0xf) << 28;
+}
+#define HOST1X_UCLASS_INDOFF_INDBE_F(v) \
+ host1x_uclass_indoff_indbe_f(v)
+static inline u32 host1x_uclass_indoff_autoinc_f(u32 v)
+{
+ return (v & 0x1) << 27;
+}
+#define HOST1X_UCLASS_INDOFF_AUTOINC_F(v) \
+ host1x_uclass_indoff_autoinc_f(v)
+static inline u32 host1x_uclass_indoff_indmodid_f(u32 v)
+{
+ return (v & 0xff) << 18;
+}
+#define HOST1X_UCLASS_INDOFF_INDMODID_F(v) \
+ host1x_uclass_indoff_indmodid_f(v)
+static inline u32 host1x_uclass_indoff_indroffset_f(u32 v)
+{
+ return (v & 0xffff) << 2;
+}
+#define HOST1X_UCLASS_INDOFF_INDROFFSET_F(v) \
+ host1x_uclass_indoff_indroffset_f(v)
+static inline u32 host1x_uclass_indoff_rwn_read_v(void)
+{
+ return 1;
+}
+#define HOST1X_UCLASS_INDOFF_INDROFFSET_F(v) \
+ host1x_uclass_indoff_indroffset_f(v)
+static inline u32 host1x_uclass_load_syncpt_payload_32_r(void)
+{
+ return 0x4e;
+}
+#define HOST1X_UCLASS_LOAD_SYNCPT_PAYLOAD_32 \
+ host1x_uclass_load_syncpt_payload_32_r()
+static inline u32 host1x_uclass_wait_syncpt_32_r(void)
+{
+ return 0x50;
+}
+#define HOST1X_UCLASS_WAIT_SYNCPT_32 \
+ host1x_uclass_wait_syncpt_32_r()
+
+#endif
diff --git a/drivers/gpu/host1x/hw/hw_host1x08_vm.h b/drivers/gpu/host1x/hw/hw_host1x08_vm.h
new file mode 100644
index 000000000000..1455a4670bf8
--- /dev/null
+++ b/drivers/gpu/host1x/hw/hw_host1x08_vm.h
@@ -0,0 +1,36 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * Copyright (c) 2022 NVIDIA Corporation.
+ */
+
+#define HOST1X_CHANNEL_DMASTART 0x0000
+#define HOST1X_CHANNEL_DMASTART_HI 0x0004
+#define HOST1X_CHANNEL_DMAPUT 0x0008
+#define HOST1X_CHANNEL_DMAPUT_HI 0x000c
+#define HOST1X_CHANNEL_DMAGET 0x0010
+#define HOST1X_CHANNEL_DMAGET_HI 0x0014
+#define HOST1X_CHANNEL_DMAEND 0x0018
+#define HOST1X_CHANNEL_DMAEND_HI 0x001c
+#define HOST1X_CHANNEL_DMACTRL 0x0020
+#define HOST1X_CHANNEL_DMACTRL_DMASTOP BIT(0)
+#define HOST1X_CHANNEL_DMACTRL_DMAGETRST BIT(1)
+#define HOST1X_CHANNEL_DMACTRL_DMAINITGET BIT(2)
+#define HOST1X_CHANNEL_CMDFIFO_STAT 0x0024
+#define HOST1X_CHANNEL_CMDFIFO_STAT_EMPTY BIT(13)
+#define HOST1X_CHANNEL_CMDFIFO_RDATA 0x0028
+#define HOST1X_CHANNEL_CMDP_OFFSET 0x0030
+#define HOST1X_CHANNEL_CMDP_CLASS 0x0034
+#define HOST1X_CHANNEL_CHANNELSTAT 0x0038
+#define HOST1X_CHANNEL_CMDPROC_STOP 0x0048
+#define HOST1X_CHANNEL_TEARDOWN 0x004c
+#define HOST1X_CHANNEL_SMMU_STREAMID 0x0084
+
+#define HOST1X_SYNC_SYNCPT_CPU_INCR(x) (0x6400 + 4 * (x))
+#define HOST1X_SYNC_SYNCPT_THRESH_CPU0_INT_STATUS(x) (0x6600 + 4 * (x))
+#define HOST1X_SYNC_SYNCPT_INTR_DEST(x) (0x6684 + 4 * (x))
+#define HOST1X_SYNC_SYNCPT_THRESH_INT_ENABLE_CPU0(x) (0x770c + 4 * (x))
+#define HOST1X_SYNC_SYNCPT_THRESH_INT_DISABLE(x) (0x7790 + 4 * (x))
+#define HOST1X_SYNC_SYNCPT(x) (0x8080 + 4 * (x))
+#define HOST1X_SYNC_SYNCPT_INT_THRESH(x) (0xa088 + 4 * (x))
+#define HOST1X_SYNC_SYNCPT_CH_APP(x) (0xb090 + 4 * (x))
+#define HOST1X_SYNC_SYNCPT_CH_APP_CH(v) (((v) & 0x3f) << 8)
diff --git a/drivers/gpu/host1x/hw/intr_hw.c b/drivers/gpu/host1x/hw/intr_hw.c
index f56375ee6e71..bd5b5ef62f35 100644
--- a/drivers/gpu/host1x/hw/intr_hw.c
+++ b/drivers/gpu/host1x/hw/intr_hw.c
@@ -6,50 +6,74 @@
* Copyright (c) 2010-2013, NVIDIA Corporation.
*/
-#include <linux/interrupt.h>
-#include <linux/irq.h>
#include <linux/io.h>
#include "../intr.h"
#include "../dev.h"
-/*
- * Sync point threshold interrupt service function
- * Handles sync point threshold triggers, in interrupt context
- */
-static void host1x_intr_syncpt_handle(struct host1x_syncpt *syncpt)
+static void process_32_syncpts(struct host1x *host, unsigned long val, u32 reg_offset)
{
- unsigned int id = syncpt->id;
- struct host1x *host = syncpt->host;
+ unsigned int id;
- host1x_sync_writel(host, BIT(id % 32),
- HOST1X_SYNC_SYNCPT_THRESH_INT_DISABLE(id / 32));
- host1x_sync_writel(host, BIT(id % 32),
- HOST1X_SYNC_SYNCPT_THRESH_CPU0_INT_STATUS(id / 32));
+ if (!val)
+ return;
+
+ host1x_sync_writel(host, val, HOST1X_SYNC_SYNCPT_THRESH_INT_DISABLE(reg_offset));
+ host1x_sync_writel(host, val, HOST1X_SYNC_SYNCPT_THRESH_CPU0_INT_STATUS(reg_offset));
- schedule_work(&syncpt->intr.work);
+ for_each_set_bit(id, &val, 32)
+ host1x_intr_handle_interrupt(host, reg_offset * 32 + id);
}
static irqreturn_t syncpt_thresh_isr(int irq, void *dev_id)
{
- struct host1x *host = dev_id;
+ struct host1x_intr_irq_data *irq_data = dev_id;
+ struct host1x *host = irq_data->host;
unsigned long reg;
- unsigned int i, id;
+ unsigned int i;
- for (i = 0; i < DIV_ROUND_UP(host->info->nb_pts, 32); i++) {
+#if !defined(CONFIG_64BIT)
+ for (i = irq_data->offset; i < DIV_ROUND_UP(host->info->nb_pts, 32);
+ i += host->num_syncpt_irqs) {
reg = host1x_sync_readl(host,
HOST1X_SYNC_SYNCPT_THRESH_CPU0_INT_STATUS(i));
- for_each_set_bit(id, &reg, 32) {
- struct host1x_syncpt *syncpt =
- host->syncpt + (i * 32 + id);
- host1x_intr_syncpt_handle(syncpt);
- }
+
+ process_32_syncpts(host, reg, i);
+ }
+#elif HOST1X_HW == 6 || HOST1X_HW == 7
+ /*
+ * Tegra186 and Tegra194 have the first INT_STATUS register not 64-bit aligned,
+ * and only have one interrupt line.
+ */
+ reg = host1x_sync_readl(host, HOST1X_SYNC_SYNCPT_THRESH_CPU0_INT_STATUS(0));
+ process_32_syncpts(host, reg, 0);
+
+ for (i = 1; i < (host->info->nb_pts / 32) - 1; i += 2) {
+ reg = host1x_sync_readq(host,
+ HOST1X_SYNC_SYNCPT_THRESH_CPU0_INT_STATUS(i));
+
+ process_32_syncpts(host, lower_32_bits(reg), i);
+ process_32_syncpts(host, upper_32_bits(reg), i + 1);
}
+ reg = host1x_sync_readl(host, HOST1X_SYNC_SYNCPT_THRESH_CPU0_INT_STATUS(i));
+ process_32_syncpts(host, reg, i);
+#else
+ /* All 64-bit capable SoCs have number of syncpoints divisible by 64 */
+ for (i = irq_data->offset; i < DIV_ROUND_UP(host->info->nb_pts, 64);
+ i += host->num_syncpt_irqs) {
+ reg = host1x_sync_readq(host,
+ HOST1X_SYNC_SYNCPT_THRESH_CPU0_INT_STATUS(i * 2));
+
+ process_32_syncpts(host, lower_32_bits(reg), i * 2 + 0);
+ process_32_syncpts(host, upper_32_bits(reg), i * 2 + 1);
+ }
+#endif
+
return IRQ_HANDLED;
}
-static void _host1x_intr_disable_all_syncpt_intrs(struct host1x *host)
+static void host1x_intr_disable_all_syncpt_intrs(struct host1x *host)
{
unsigned int i;
@@ -61,7 +85,8 @@ static void _host1x_intr_disable_all_syncpt_intrs(struct host1x *host)
}
}
-static void intr_hw_init(struct host1x *host, u32 cpm)
+static int
+host1x_intr_init_host_sync(struct host1x *host, u32 cpm)
{
#if HOST1X_HW < 6
/* disable the ip_busy_timeout. this prevents write drops */
@@ -76,48 +101,41 @@ static void intr_hw_init(struct host1x *host, u32 cpm)
/* update host clocks per usec */
host1x_sync_writel(host, cpm, HOST1X_SYNC_USEC_CLK);
#endif
-}
-
-static int
-_host1x_intr_init_host_sync(struct host1x *host, u32 cpm,
- void (*syncpt_thresh_work)(struct work_struct *))
-{
- unsigned int i;
- int err;
+#if HOST1X_HW >= 8
+ u32 id;
- host1x_hw_intr_disable_all_syncpt_intrs(host);
-
- for (i = 0; i < host->info->nb_pts; i++)
- INIT_WORK(&host->syncpt[i].intr.work, syncpt_thresh_work);
+ /*
+ * Program threshold interrupt destination among 8 lines per VM,
+ * per syncpoint. For each group of 64 syncpoints (corresponding to two
+ * interrupt status registers), direct to one interrupt line, going
+ * around in a round robin fashion.
+ */
+ for (id = 0; id < host->info->nb_pts; id++) {
+ u32 reg_offset = id / 64;
+ u32 irq_index = reg_offset % host->num_syncpt_irqs;
- err = devm_request_irq(host->dev, host->intr_syncpt_irq,
- syncpt_thresh_isr, IRQF_SHARED,
- "host1x_syncpt", host);
- if (err < 0) {
- WARN_ON(1);
- return err;
+ host1x_sync_writel(host, irq_index, HOST1X_SYNC_SYNCPT_INTR_DEST(id));
}
-
- intr_hw_init(host, cpm);
+#endif
return 0;
}
-static void _host1x_intr_set_syncpt_threshold(struct host1x *host,
+static void host1x_intr_set_syncpt_threshold(struct host1x *host,
unsigned int id,
u32 thresh)
{
host1x_sync_writel(host, thresh, HOST1X_SYNC_SYNCPT_INT_THRESH(id));
}
-static void _host1x_intr_enable_syncpt_intr(struct host1x *host,
+static void host1x_intr_enable_syncpt_intr(struct host1x *host,
unsigned int id)
{
host1x_sync_writel(host, BIT(id % 32),
HOST1X_SYNC_SYNCPT_THRESH_INT_ENABLE_CPU0(id / 32));
}
-static void _host1x_intr_disable_syncpt_intr(struct host1x *host,
+static void host1x_intr_disable_syncpt_intr(struct host1x *host,
unsigned int id)
{
host1x_sync_writel(host, BIT(id % 32),
@@ -126,23 +144,11 @@ static void _host1x_intr_disable_syncpt_intr(struct host1x *host,
HOST1X_SYNC_SYNCPT_THRESH_CPU0_INT_STATUS(id / 32));
}
-static int _host1x_free_syncpt_irq(struct host1x *host)
-{
- unsigned int i;
-
- devm_free_irq(host->dev, host->intr_syncpt_irq, host);
-
- for (i = 0; i < host->info->nb_pts; i++)
- cancel_work_sync(&host->syncpt[i].intr.work);
-
- return 0;
-}
-
static const struct host1x_intr_ops host1x_intr_ops = {
- .init_host_sync = _host1x_intr_init_host_sync,
- .set_syncpt_threshold = _host1x_intr_set_syncpt_threshold,
- .enable_syncpt_intr = _host1x_intr_enable_syncpt_intr,
- .disable_syncpt_intr = _host1x_intr_disable_syncpt_intr,
- .disable_all_syncpt_intrs = _host1x_intr_disable_all_syncpt_intrs,
- .free_syncpt_irq = _host1x_free_syncpt_irq,
+ .init_host_sync = host1x_intr_init_host_sync,
+ .set_syncpt_threshold = host1x_intr_set_syncpt_threshold,
+ .enable_syncpt_intr = host1x_intr_enable_syncpt_intr,
+ .disable_syncpt_intr = host1x_intr_disable_syncpt_intr,
+ .disable_all_syncpt_intrs = host1x_intr_disable_all_syncpt_intrs,
+ .isr = syncpt_thresh_isr,
};
diff --git a/drivers/gpu/host1x/hw/opcodes.h b/drivers/gpu/host1x/hw/opcodes.h
new file mode 100644
index 000000000000..649614499b04
--- /dev/null
+++ b/drivers/gpu/host1x/hw/opcodes.h
@@ -0,0 +1,150 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * Tegra host1x opcodes
+ *
+ * Copyright (c) 2022 NVIDIA Corporation.
+ */
+
+#ifndef __HOST1X_OPCODES_H
+#define __HOST1X_OPCODES_H
+
+#include <linux/types.h>
+
+static inline u32 host1x_class_host_wait_syncpt(
+ unsigned indx, unsigned threshold)
+{
+ return host1x_uclass_wait_syncpt_indx_f(indx)
+ | host1x_uclass_wait_syncpt_thresh_f(threshold);
+}
+
+static inline u32 host1x_class_host_load_syncpt_base(
+ unsigned indx, unsigned threshold)
+{
+ return host1x_uclass_load_syncpt_base_base_indx_f(indx)
+ | host1x_uclass_load_syncpt_base_value_f(threshold);
+}
+
+static inline u32 host1x_class_host_wait_syncpt_base(
+ unsigned indx, unsigned base_indx, unsigned offset)
+{
+ return host1x_uclass_wait_syncpt_base_indx_f(indx)
+ | host1x_uclass_wait_syncpt_base_base_indx_f(base_indx)
+ | host1x_uclass_wait_syncpt_base_offset_f(offset);
+}
+
+static inline u32 host1x_class_host_incr_syncpt_base(
+ unsigned base_indx, unsigned offset)
+{
+ return host1x_uclass_incr_syncpt_base_base_indx_f(base_indx)
+ | host1x_uclass_incr_syncpt_base_offset_f(offset);
+}
+
+static inline u32 host1x_class_host_incr_syncpt(
+ unsigned cond, unsigned indx)
+{
+ return host1x_uclass_incr_syncpt_cond_f(cond)
+ | host1x_uclass_incr_syncpt_indx_f(indx);
+}
+
+static inline u32 host1x_class_host_indoff_reg_write(
+ unsigned mod_id, unsigned offset, bool auto_inc)
+{
+ u32 v = host1x_uclass_indoff_indbe_f(0xf)
+ | host1x_uclass_indoff_indmodid_f(mod_id)
+ | host1x_uclass_indoff_indroffset_f(offset);
+ if (auto_inc)
+ v |= host1x_uclass_indoff_autoinc_f(1);
+ return v;
+}
+
+static inline u32 host1x_class_host_indoff_reg_read(
+ unsigned mod_id, unsigned offset, bool auto_inc)
+{
+ u32 v = host1x_uclass_indoff_indmodid_f(mod_id)
+ | host1x_uclass_indoff_indroffset_f(offset)
+ | host1x_uclass_indoff_rwn_read_v();
+ if (auto_inc)
+ v |= host1x_uclass_indoff_autoinc_f(1);
+ return v;
+}
+
+static inline u32 host1x_opcode_setclass(
+ unsigned class_id, unsigned offset, unsigned mask)
+{
+ return (0 << 28) | (offset << 16) | (class_id << 6) | mask;
+}
+
+static inline u32 host1x_opcode_incr(unsigned offset, unsigned count)
+{
+ return (1 << 28) | (offset << 16) | count;
+}
+
+static inline u32 host1x_opcode_nonincr(unsigned offset, unsigned count)
+{
+ return (2 << 28) | (offset << 16) | count;
+}
+
+static inline u32 host1x_opcode_mask(unsigned offset, unsigned mask)
+{
+ return (3 << 28) | (offset << 16) | mask;
+}
+
+static inline u32 host1x_opcode_imm(unsigned offset, unsigned value)
+{
+ return (4 << 28) | (offset << 16) | value;
+}
+
+static inline u32 host1x_opcode_imm_incr_syncpt(unsigned cond, unsigned indx)
+{
+ return host1x_opcode_imm(host1x_uclass_incr_syncpt_r(),
+ host1x_class_host_incr_syncpt(cond, indx));
+}
+
+static inline u32 host1x_opcode_restart(unsigned address)
+{
+ return (5 << 28) | (address >> 4);
+}
+
+static inline u32 host1x_opcode_gather(unsigned count)
+{
+ return (6 << 28) | count;
+}
+
+static inline u32 host1x_opcode_gather_nonincr(unsigned offset, unsigned count)
+{
+ return (6 << 28) | (offset << 16) | BIT(15) | count;
+}
+
+static inline u32 host1x_opcode_gather_incr(unsigned offset, unsigned count)
+{
+ return (6 << 28) | (offset << 16) | BIT(15) | BIT(14) | count;
+}
+
+static inline u32 host1x_opcode_setstreamid(unsigned streamid)
+{
+ return (7 << 28) | streamid;
+}
+
+static inline u32 host1x_opcode_setpayload(unsigned payload)
+{
+ return (9 << 28) | payload;
+}
+
+static inline u32 host1x_opcode_gather_wide(unsigned count)
+{
+ return (12 << 28) | count;
+}
+
+static inline u32 host1x_opcode_acquire_mlock(unsigned mlock)
+{
+ return (14 << 28) | (0 << 24) | mlock;
+}
+
+static inline u32 host1x_opcode_release_mlock(unsigned mlock)
+{
+ return (14 << 28) | (1 << 24) | mlock;
+}
+
+#define HOST1X_OPCODE_NOP host1x_opcode_nonincr(0, 0)
+
+#endif
diff --git a/drivers/gpu/host1x/hw/syncpt_hw.c b/drivers/gpu/host1x/hw/syncpt_hw.c
index dd39d67ccec3..8cf35b2eff3d 100644
--- a/drivers/gpu/host1x/hw/syncpt_hw.c
+++ b/drivers/gpu/host1x/hw/syncpt_hw.c
@@ -106,9 +106,6 @@ static void syncpt_assign_to_channel(struct host1x_syncpt *sp,
#if HOST1X_HW >= 6
struct host1x *host = sp->host;
- if (!host->hv_regs)
- return;
-
host1x_sync_writel(host,
HOST1X_SYNC_SYNCPT_CH_APP_CH(ch ? ch->id : 0xff),
HOST1X_SYNC_SYNCPT_CH_APP(sp->id));
diff --git a/drivers/gpu/host1x/intr.c b/drivers/gpu/host1x/intr.c
index 965ba21818b1..f77a678949e9 100644
--- a/drivers/gpu/host1x/intr.c
+++ b/drivers/gpu/host1x/intr.c
@@ -2,299 +2,130 @@
/*
* Tegra host1x Interrupt Management
*
- * Copyright (c) 2010-2013, NVIDIA Corporation.
+ * Copyright (c) 2010-2021, NVIDIA Corporation.
*/
#include <linux/clk.h>
#include <linux/interrupt.h>
-#include <linux/slab.h>
-#include <linux/irq.h>
-
-#include <trace/events/host1x.h>
-#include "channel.h"
#include "dev.h"
#include "fence.h"
#include "intr.h"
-/* Wait list management */
-
-enum waitlist_state {
- WLS_PENDING,
- WLS_REMOVED,
- WLS_CANCELLED,
- WLS_HANDLED
-};
-
-static void waiter_release(struct kref *kref)
-{
- kfree(container_of(kref, struct host1x_waitlist, refcount));
-}
-
-/*
- * add a waiter to a waiter queue, sorted by threshold
- * returns true if it was added at the head of the queue
- */
-static bool add_waiter_to_queue(struct host1x_waitlist *waiter,
- struct list_head *queue)
-{
- struct host1x_waitlist *pos;
- u32 thresh = waiter->thresh;
-
- list_for_each_entry_reverse(pos, queue, list)
- if ((s32)(pos->thresh - thresh) <= 0) {
- list_add(&waiter->list, &pos->list);
- return false;
- }
-
- list_add(&waiter->list, queue);
- return true;
-}
-
-/*
- * run through a waiter queue for a single sync point ID
- * and gather all completed waiters into lists by actions
- */
-static void remove_completed_waiters(struct list_head *head, u32 sync,
- struct list_head completed[HOST1X_INTR_ACTION_COUNT])
+static void host1x_intr_add_fence_to_list(struct host1x_fence_list *list,
+ struct host1x_syncpt_fence *fence)
{
- struct list_head *dest;
- struct host1x_waitlist *waiter, *next, *prev;
+ struct host1x_syncpt_fence *fence_in_list;
- list_for_each_entry_safe(waiter, next, head, list) {
- if ((s32)(waiter->thresh - sync) > 0)
- break;
-
- dest = completed + waiter->action;
-
- /* consolidate submit cleanups */
- if (waiter->action == HOST1X_INTR_ACTION_SUBMIT_COMPLETE &&
- !list_empty(dest)) {
- prev = list_entry(dest->prev,
- struct host1x_waitlist, list);
- if (prev->data == waiter->data) {
- prev->count++;
- dest = NULL;
- }
+ list_for_each_entry_reverse(fence_in_list, &list->list, list) {
+ if ((s32)(fence_in_list->threshold - fence->threshold) <= 0) {
+ /* Fence in list is before us, we can insert here */
+ list_add(&fence->list, &fence_in_list->list);
+ return;
}
-
- /* PENDING->REMOVED or CANCELLED->HANDLED */
- if (atomic_inc_return(&waiter->state) == WLS_HANDLED || !dest) {
- list_del(&waiter->list);
- kref_put(&waiter->refcount, waiter_release);
- } else
- list_move_tail(&waiter->list, dest);
}
-}
-static void reset_threshold_interrupt(struct host1x *host,
- struct list_head *head,
- unsigned int id)
-{
- u32 thresh =
- list_first_entry(head, struct host1x_waitlist, list)->thresh;
-
- host1x_hw_intr_set_syncpt_threshold(host, id, thresh);
- host1x_hw_intr_enable_syncpt_intr(host, id);
+ /* Add as first in list */
+ list_add(&fence->list, &list->list);
}
-static void action_submit_complete(struct host1x_waitlist *waiter)
+static void host1x_intr_update_hw_state(struct host1x *host, struct host1x_syncpt *sp)
{
- struct host1x_channel *channel = waiter->data;
+ struct host1x_syncpt_fence *fence;
- host1x_cdma_update(&channel->cdma);
+ if (!list_empty(&sp->fences.list)) {
+ fence = list_first_entry(&sp->fences.list, struct host1x_syncpt_fence, list);
- /* Add nr_completed to trace */
- trace_host1x_channel_submit_complete(dev_name(channel->dev),
- waiter->count, waiter->thresh);
+ host1x_hw_intr_set_syncpt_threshold(host, sp->id, fence->threshold);
+ host1x_hw_intr_enable_syncpt_intr(host, sp->id);
+ } else {
+ host1x_hw_intr_disable_syncpt_intr(host, sp->id);
+ }
}
-static void action_wakeup(struct host1x_waitlist *waiter)
+void host1x_intr_add_fence_locked(struct host1x *host, struct host1x_syncpt_fence *fence)
{
- wait_queue_head_t *wq = waiter->data;
+ struct host1x_fence_list *fence_list = &fence->sp->fences;
- wake_up(wq);
-}
-
-static void action_wakeup_interruptible(struct host1x_waitlist *waiter)
-{
- wait_queue_head_t *wq = waiter->data;
+ INIT_LIST_HEAD(&fence->list);
- wake_up_interruptible(wq);
+ host1x_intr_add_fence_to_list(fence_list, fence);
+ host1x_intr_update_hw_state(host, fence->sp);
}
-static void action_signal_fence(struct host1x_waitlist *waiter)
+bool host1x_intr_remove_fence(struct host1x *host, struct host1x_syncpt_fence *fence)
{
- struct host1x_syncpt_fence *f = waiter->data;
-
- host1x_fence_signal(f);
-}
+ struct host1x_fence_list *fence_list = &fence->sp->fences;
+ unsigned long irqflags;
-typedef void (*action_handler)(struct host1x_waitlist *waiter);
+ spin_lock_irqsave(&fence_list->lock, irqflags);
-static const action_handler action_handlers[HOST1X_INTR_ACTION_COUNT] = {
- action_submit_complete,
- action_wakeup,
- action_wakeup_interruptible,
- action_signal_fence,
-};
-
-static void run_handlers(struct list_head completed[HOST1X_INTR_ACTION_COUNT])
-{
- struct list_head *head = completed;
- unsigned int i;
-
- for (i = 0; i < HOST1X_INTR_ACTION_COUNT; ++i, ++head) {
- action_handler handler = action_handlers[i];
- struct host1x_waitlist *waiter, *next;
-
- list_for_each_entry_safe(waiter, next, head, list) {
- list_del(&waiter->list);
- handler(waiter);
- WARN_ON(atomic_xchg(&waiter->state, WLS_HANDLED) !=
- WLS_REMOVED);
- kref_put(&waiter->refcount, waiter_release);
- }
+ if (list_empty(&fence->list)) {
+ spin_unlock_irqrestore(&fence_list->lock, irqflags);
+ return false;
}
-}
-
-/*
- * Remove & handle all waiters that have completed for the given syncpt
- */
-static int process_wait_list(struct host1x *host,
- struct host1x_syncpt *syncpt,
- u32 threshold)
-{
- struct list_head completed[HOST1X_INTR_ACTION_COUNT];
- unsigned int i;
- int empty;
-
- for (i = 0; i < HOST1X_INTR_ACTION_COUNT; ++i)
- INIT_LIST_HEAD(completed + i);
-
- spin_lock(&syncpt->intr.lock);
- remove_completed_waiters(&syncpt->intr.wait_head, threshold,
- completed);
+ list_del_init(&fence->list);
+ host1x_intr_update_hw_state(host, fence->sp);
- empty = list_empty(&syncpt->intr.wait_head);
- if (empty)
- host1x_hw_intr_disable_syncpt_intr(host, syncpt->id);
- else
- reset_threshold_interrupt(host, &syncpt->intr.wait_head,
- syncpt->id);
+ spin_unlock_irqrestore(&fence_list->lock, irqflags);
- spin_unlock(&syncpt->intr.lock);
-
- run_handlers(completed);
-
- return empty;
-}
-
-/*
- * Sync point threshold interrupt service thread function
- * Handles sync point threshold triggers, in thread context
- */
-
-static void syncpt_thresh_work(struct work_struct *work)
-{
- struct host1x_syncpt_intr *syncpt_intr =
- container_of(work, struct host1x_syncpt_intr, work);
- struct host1x_syncpt *syncpt =
- container_of(syncpt_intr, struct host1x_syncpt, intr);
- unsigned int id = syncpt->id;
- struct host1x *host = syncpt->host;
-
- (void)process_wait_list(host, syncpt,
- host1x_syncpt_load(host->syncpt + id));
+ return true;
}
-int host1x_intr_add_action(struct host1x *host, struct host1x_syncpt *syncpt,
- u32 thresh, enum host1x_intr_action action,
- void *data, struct host1x_waitlist *waiter,
- void **ref)
+void host1x_intr_handle_interrupt(struct host1x *host, unsigned int id)
{
- int queue_was_empty;
-
- if (waiter == NULL) {
- pr_warn("%s: NULL waiter\n", __func__);
- return -EINVAL;
- }
-
- /* initialize a new waiter */
- INIT_LIST_HEAD(&waiter->list);
- kref_init(&waiter->refcount);
- if (ref)
- kref_get(&waiter->refcount);
- waiter->thresh = thresh;
- waiter->action = action;
- atomic_set(&waiter->state, WLS_PENDING);
- waiter->data = data;
- waiter->count = 1;
+ struct host1x_syncpt *sp = &host->syncpt[id];
+ struct host1x_syncpt_fence *fence, *tmp;
+ unsigned int value;
- spin_lock(&syncpt->intr.lock);
+ value = host1x_syncpt_load(sp);
- queue_was_empty = list_empty(&syncpt->intr.wait_head);
+ spin_lock(&sp->fences.lock);
- if (add_waiter_to_queue(waiter, &syncpt->intr.wait_head)) {
- /* added at head of list - new threshold value */
- host1x_hw_intr_set_syncpt_threshold(host, syncpt->id, thresh);
+ list_for_each_entry_safe(fence, tmp, &sp->fences.list, list) {
+ if (((value - fence->threshold) & 0x80000000U) != 0U) {
+ /* Fence is not yet expired, we are done */
+ break;
+ }
- /* added as first waiter - enable interrupt */
- if (queue_was_empty)
- host1x_hw_intr_enable_syncpt_intr(host, syncpt->id);
+ list_del_init(&fence->list);
+ host1x_fence_signal(fence);
}
- if (ref)
- *ref = waiter;
-
- spin_unlock(&syncpt->intr.lock);
+ /* Re-enable interrupt if necessary */
+ host1x_intr_update_hw_state(host, sp);
- return 0;
+ spin_unlock(&sp->fences.lock);
}
-void host1x_intr_put_ref(struct host1x *host, unsigned int id, void *ref,
- bool flush)
+int host1x_intr_init(struct host1x *host)
{
- struct host1x_waitlist *waiter = ref;
- struct host1x_syncpt *syncpt;
-
- atomic_cmpxchg(&waiter->state, WLS_PENDING, WLS_CANCELLED);
+ struct host1x_intr_irq_data *irq_data;
+ unsigned int id;
+ int i, err;
- syncpt = host->syncpt + id;
+ for (id = 0; id < host1x_syncpt_nb_pts(host); ++id) {
+ struct host1x_syncpt *syncpt = &host->syncpt[id];
- spin_lock(&syncpt->intr.lock);
- if (atomic_cmpxchg(&waiter->state, WLS_CANCELLED, WLS_HANDLED) ==
- WLS_CANCELLED) {
- list_del(&waiter->list);
- kref_put(&waiter->refcount, waiter_release);
+ spin_lock_init(&syncpt->fences.lock);
+ INIT_LIST_HEAD(&syncpt->fences.list);
}
- spin_unlock(&syncpt->intr.lock);
- if (flush) {
- /* Wait until any concurrently executing handler has finished. */
- while (atomic_read(&waiter->state) != WLS_HANDLED)
- schedule();
- }
+ irq_data = devm_kcalloc(host->dev, host->num_syncpt_irqs, sizeof(irq_data[0]), GFP_KERNEL);
+ if (!irq_data)
+ return -ENOMEM;
- kref_put(&waiter->refcount, waiter_release);
-}
-
-int host1x_intr_init(struct host1x *host, unsigned int irq_sync)
-{
- unsigned int id;
- u32 nb_pts = host1x_syncpt_nb_pts(host);
-
- mutex_init(&host->intr_mutex);
- host->intr_syncpt_irq = irq_sync;
+ host1x_hw_intr_disable_all_syncpt_intrs(host);
- for (id = 0; id < nb_pts; ++id) {
- struct host1x_syncpt *syncpt = host->syncpt + id;
+ for (i = 0; i < host->num_syncpt_irqs; i++) {
+ irq_data[i].host = host;
+ irq_data[i].offset = i;
- spin_lock_init(&syncpt->intr.lock);
- INIT_LIST_HEAD(&syncpt->intr.wait_head);
- snprintf(syncpt->intr.thresh_irq_name,
- sizeof(syncpt->intr.thresh_irq_name),
- "host1x_sp_%02u", id);
+ err = devm_request_irq(host->dev, host->syncpt_irqs[i],
+ host->intr_op->isr, IRQF_SHARED,
+ "host1x_syncpt", &irq_data[i]);
+ if (err < 0)
+ return err;
}
return 0;
@@ -310,8 +141,7 @@ void host1x_intr_start(struct host1x *host)
int err;
mutex_lock(&host->intr_mutex);
- err = host1x_hw_intr_init_host_sync(host, DIV_ROUND_UP(hz, 1000000),
- syncpt_thresh_work);
+ err = host1x_hw_intr_init_host_sync(host, DIV_ROUND_UP(hz, 1000000));
if (err) {
mutex_unlock(&host->intr_mutex);
return;
@@ -321,36 +151,5 @@ void host1x_intr_start(struct host1x *host)
void host1x_intr_stop(struct host1x *host)
{
- unsigned int id;
- struct host1x_syncpt *syncpt = host->syncpt;
- u32 nb_pts = host1x_syncpt_nb_pts(host);
-
- mutex_lock(&host->intr_mutex);
-
host1x_hw_intr_disable_all_syncpt_intrs(host);
-
- for (id = 0; id < nb_pts; ++id) {
- struct host1x_waitlist *waiter, *next;
-
- list_for_each_entry_safe(waiter, next,
- &syncpt[id].intr.wait_head, list) {
- if (atomic_cmpxchg(&waiter->state,
- WLS_CANCELLED, WLS_HANDLED) == WLS_CANCELLED) {
- list_del(&waiter->list);
- kref_put(&waiter->refcount, waiter_release);
- }
- }
-
- if (!list_empty(&syncpt[id].intr.wait_head)) {
- /* output diagnostics */
- mutex_unlock(&host->intr_mutex);
- pr_warn("%s cannot stop syncpt intr id=%u\n",
- __func__, id);
- return;
- }
- }
-
- host1x_hw_intr_free_syncpt_irq(host);
-
- mutex_unlock(&host->intr_mutex);
}
diff --git a/drivers/gpu/host1x/intr.h b/drivers/gpu/host1x/intr.h
index e4c346099273..11cdf13e32fe 100644
--- a/drivers/gpu/host1x/intr.h
+++ b/drivers/gpu/host1x/intr.h
@@ -2,87 +2,22 @@
/*
* Tegra host1x Interrupt Management
*
- * Copyright (c) 2010-2013, NVIDIA Corporation.
+ * Copyright (c) 2010-2021, NVIDIA Corporation.
*/
#ifndef __HOST1X_INTR_H
#define __HOST1X_INTR_H
-#include <linux/interrupt.h>
-#include <linux/workqueue.h>
-
-struct host1x_syncpt;
struct host1x;
+struct host1x_syncpt_fence;
-enum host1x_intr_action {
- /*
- * Perform cleanup after a submit has completed.
- * 'data' points to a channel
- */
- HOST1X_INTR_ACTION_SUBMIT_COMPLETE = 0,
-
- /*
- * Wake up a task.
- * 'data' points to a wait_queue_head_t
- */
- HOST1X_INTR_ACTION_WAKEUP,
-
- /*
- * Wake up a interruptible task.
- * 'data' points to a wait_queue_head_t
- */
- HOST1X_INTR_ACTION_WAKEUP_INTERRUPTIBLE,
-
- HOST1X_INTR_ACTION_SIGNAL_FENCE,
-
- HOST1X_INTR_ACTION_COUNT
+struct host1x_intr_irq_data {
+ struct host1x *host;
+ u32 offset;
};
-struct host1x_syncpt_intr {
- spinlock_t lock;
- struct list_head wait_head;
- char thresh_irq_name[12];
- struct work_struct work;
-};
-
-struct host1x_waitlist {
- struct list_head list;
- struct kref refcount;
- u32 thresh;
- enum host1x_intr_action action;
- atomic_t state;
- void *data;
- int count;
-};
-
-/*
- * Schedule an action to be taken when a sync point reaches the given threshold.
- *
- * @id the sync point
- * @thresh the threshold
- * @action the action to take
- * @data a pointer to extra data depending on action, see above
- * @waiter waiter structure - assumes ownership
- * @ref must be passed if cancellation is possible, else NULL
- *
- * This is a non-blocking api.
- */
-int host1x_intr_add_action(struct host1x *host, struct host1x_syncpt *syncpt,
- u32 thresh, enum host1x_intr_action action,
- void *data, struct host1x_waitlist *waiter,
- void **ref);
-
-/*
- * Unreference an action submitted to host1x_intr_add_action().
- * You must call this if you passed non-NULL as ref.
- * @ref the ref returned from host1x_intr_add_action()
- * @flush wait until any pending handlers have completed before returning.
- */
-void host1x_intr_put_ref(struct host1x *host, unsigned int id, void *ref,
- bool flush);
-
/* Initialize host1x sync point interrupt */
-int host1x_intr_init(struct host1x *host, unsigned int irq_sync);
+int host1x_intr_init(struct host1x *host);
/* Deinitialize host1x sync point interrupt */
void host1x_intr_deinit(struct host1x *host);
@@ -93,5 +28,10 @@ void host1x_intr_start(struct host1x *host);
/* Disable host1x sync point interrupt */
void host1x_intr_stop(struct host1x *host);
-irqreturn_t host1x_syncpt_thresh_fn(void *dev_id);
+void host1x_intr_handle_interrupt(struct host1x *host, unsigned int id);
+
+void host1x_intr_add_fence_locked(struct host1x *host, struct host1x_syncpt_fence *fence);
+
+bool host1x_intr_remove_fence(struct host1x *host, struct host1x_syncpt_fence *fence);
+
#endif
diff --git a/drivers/gpu/host1x/job.c b/drivers/gpu/host1x/job.c
index 5e8c183167b7..3ed49e1fd933 100644
--- a/drivers/gpu/host1x/job.c
+++ b/drivers/gpu/host1x/job.c
@@ -88,9 +88,15 @@ static void job_free(struct kref *ref)
if (job->release)
job->release(job);
- if (job->waiter)
- host1x_intr_put_ref(job->syncpt->host, job->syncpt->id,
- job->waiter, false);
+ if (job->fence) {
+ /*
+ * remove_callback is atomic w.r.t. fence signaling, so
+ * after the call returns, we know that the callback is not
+ * in execution, and the fence can be safely freed.
+ */
+ dma_fence_remove_callback(job->fence, &job->fence_cb);
+ dma_fence_put(job->fence);
+ }
if (job->syncpt)
host1x_syncpt_put(job->syncpt);
@@ -175,7 +181,7 @@ static unsigned int pin_job(struct host1x *host, struct host1x_job *job)
goto unpin;
}
- map = host1x_bo_pin(dev, bo, direction, &client->cache);
+ map = host1x_bo_pin(dev, bo, direction, NULL);
if (IS_ERR(map)) {
err = PTR_ERR(map);
goto unpin;
@@ -222,7 +228,7 @@ static unsigned int pin_job(struct host1x *host, struct host1x_job *job)
goto unpin;
}
- map = host1x_bo_pin(host->dev, g->bo, DMA_TO_DEVICE, &host->cache);
+ map = host1x_bo_pin(host->dev, g->bo, DMA_TO_DEVICE, NULL);
if (IS_ERR(map)) {
err = PTR_ERR(map);
goto unpin;
diff --git a/drivers/gpu/host1x/mipi.c b/drivers/gpu/host1x/mipi.c
index 2efe12dde8bc..e51b43dd15a3 100644
--- a/drivers/gpu/host1x/mipi.c
+++ b/drivers/gpu/host1x/mipi.c
@@ -501,8 +501,6 @@ static int tegra_mipi_probe(struct platform_device *pdev)
{
const struct of_device_id *match;
struct tegra_mipi *mipi;
- struct resource *res;
- int err;
match = of_match_node(tegra_mipi_of_match, pdev->dev.of_node);
if (!match)
@@ -515,42 +513,27 @@ static int tegra_mipi_probe(struct platform_device *pdev)
mipi->soc = match->data;
mipi->dev = &pdev->dev;
- res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
- mipi->regs = devm_ioremap_resource(&pdev->dev, res);
+ mipi->regs = devm_platform_get_and_ioremap_resource(pdev, 0, NULL);
if (IS_ERR(mipi->regs))
return PTR_ERR(mipi->regs);
mutex_init(&mipi->lock);
- mipi->clk = devm_clk_get(&pdev->dev, NULL);
+ mipi->clk = devm_clk_get_prepared(&pdev->dev, NULL);
if (IS_ERR(mipi->clk)) {
dev_err(&pdev->dev, "failed to get clock\n");
return PTR_ERR(mipi->clk);
}
- err = clk_prepare(mipi->clk);
- if (err < 0)
- return err;
-
platform_set_drvdata(pdev, mipi);
return 0;
}
-static int tegra_mipi_remove(struct platform_device *pdev)
-{
- struct tegra_mipi *mipi = platform_get_drvdata(pdev);
-
- clk_unprepare(mipi->clk);
-
- return 0;
-}
-
struct platform_driver tegra_mipi_driver = {
.driver = {
.name = "tegra-mipi",
.of_match_table = tegra_mipi_of_match,
},
.probe = tegra_mipi_probe,
- .remove = tegra_mipi_remove,
};
diff --git a/drivers/gpu/host1x/syncpt.c b/drivers/gpu/host1x/syncpt.c
index e08e331e46ae..acc7d82e0585 100644
--- a/drivers/gpu/host1x/syncpt.c
+++ b/drivers/gpu/host1x/syncpt.c
@@ -7,6 +7,7 @@
#include <linux/module.h>
#include <linux/device.h>
+#include <linux/dma-fence.h>
#include <linux/slab.h>
#include <trace/events/host1x.h>
@@ -137,8 +138,15 @@ void host1x_syncpt_restore(struct host1x *host)
struct host1x_syncpt *sp_base = host->syncpt;
unsigned int i;
- for (i = 0; i < host1x_syncpt_nb_pts(host); i++)
+ for (i = 0; i < host1x_syncpt_nb_pts(host); i++) {
+ /*
+ * Unassign syncpt from channels for purposes of Tegra186
+ * syncpoint protection. This prevents any channel from
+ * accessing it until it is reassigned.
+ */
+ host1x_hw_syncpt_assign_to_channel(host, sp_base + i, NULL);
host1x_hw_syncpt_restore(host, sp_base + i);
+ }
for (i = 0; i < host1x_syncpt_nb_bases(host); i++)
host1x_hw_syncpt_restore_wait_base(host, sp_base + i);
@@ -202,17 +210,6 @@ int host1x_syncpt_incr(struct host1x_syncpt *sp)
}
EXPORT_SYMBOL(host1x_syncpt_incr);
-/*
- * Updated sync point form hardware, and returns true if syncpoint is expired,
- * false if we may need to wait
- */
-static bool syncpt_load_min_is_expired(struct host1x_syncpt *sp, u32 thresh)
-{
- host1x_hw_syncpt_load(sp->host, sp);
-
- return host1x_syncpt_is_expired(sp, thresh);
-}
-
/**
* host1x_syncpt_wait() - wait for a syncpoint to reach a given value
* @sp: host1x syncpoint
@@ -223,99 +220,46 @@ static bool syncpt_load_min_is_expired(struct host1x_syncpt *sp, u32 thresh)
int host1x_syncpt_wait(struct host1x_syncpt *sp, u32 thresh, long timeout,
u32 *value)
{
- DECLARE_WAIT_QUEUE_HEAD_ONSTACK(wq);
- void *ref;
- struct host1x_waitlist *waiter;
- int err = 0, check_count = 0;
- u32 val;
+ struct dma_fence *fence;
+ long wait_err;
- if (value)
- *value = 0;
+ host1x_hw_syncpt_load(sp->host, sp);
- /* first check cache */
- if (host1x_syncpt_is_expired(sp, thresh)) {
- if (value)
- *value = host1x_syncpt_load(sp);
+ if (value)
+ *value = host1x_syncpt_load(sp);
+ if (host1x_syncpt_is_expired(sp, thresh))
return 0;
- }
-
- /* try to read from register */
- val = host1x_hw_syncpt_load(sp->host, sp);
- if (host1x_syncpt_is_expired(sp, thresh)) {
- if (value)
- *value = val;
-
- goto done;
- }
-
- if (!timeout) {
- err = -EAGAIN;
- goto done;
- }
- /* allocate a waiter */
- waiter = kzalloc(sizeof(*waiter), GFP_KERNEL);
- if (!waiter) {
- err = -ENOMEM;
- goto done;
- }
-
- /* schedule a wakeup when the syncpoint value is reached */
- err = host1x_intr_add_action(sp->host, sp, thresh,
- HOST1X_INTR_ACTION_WAKEUP_INTERRUPTIBLE,
- &wq, waiter, &ref);
- if (err)
- goto done;
-
- err = -EAGAIN;
- /* Caller-specified timeout may be impractically low */
if (timeout < 0)
timeout = LONG_MAX;
+ else if (timeout == 0)
+ return -EAGAIN;
- /* wait for the syncpoint, or timeout, or signal */
- while (timeout) {
- long check = min_t(long, SYNCPT_CHECK_PERIOD, timeout);
- int remain;
-
- remain = wait_event_interruptible_timeout(wq,
- syncpt_load_min_is_expired(sp, thresh),
- check);
- if (remain > 0 || host1x_syncpt_is_expired(sp, thresh)) {
- if (value)
- *value = host1x_syncpt_load(sp);
+ fence = host1x_fence_create(sp, thresh, false);
+ if (IS_ERR(fence))
+ return PTR_ERR(fence);
- err = 0;
+ wait_err = dma_fence_wait_timeout(fence, true, timeout);
+ if (wait_err == 0)
+ host1x_fence_cancel(fence);
+ dma_fence_put(fence);
- break;
- }
-
- if (remain < 0) {
- err = remain;
- break;
- }
-
- timeout -= check;
-
- if (timeout && check_count <= MAX_STUCK_CHECK_COUNT) {
- dev_warn(sp->host->dev,
- "%s: syncpoint id %u (%s) stuck waiting %d, timeout=%ld\n",
- current->comm, sp->id, sp->name,
- thresh, timeout);
-
- host1x_debug_dump_syncpts(sp->host);
-
- if (check_count == MAX_STUCK_CHECK_COUNT)
- host1x_debug_dump(sp->host);
-
- check_count++;
- }
- }
-
- host1x_intr_put_ref(sp->host, sp->id, ref, true);
+ if (value)
+ *value = host1x_syncpt_load(sp);
-done:
- return err;
+ /*
+ * Don't rely on dma_fence_wait_timeout return value,
+ * since it returns zero both on timeout and if the
+ * wait completed with 0 jiffies left.
+ */
+ host1x_hw_syncpt_load(sp->host, sp);
+ if (wait_err == 0 && !host1x_syncpt_is_expired(sp, thresh))
+ return -EAGAIN;
+ else if (wait_err < 0)
+ return wait_err;
+ else
+ return 0;
}
EXPORT_SYMBOL(host1x_syncpt_wait);
@@ -352,13 +296,6 @@ int host1x_syncpt_init(struct host1x *host)
for (i = 0; i < host->info->nb_pts; i++) {
syncpt[i].id = i;
syncpt[i].host = host;
-
- /*
- * Unassign syncpt from channels for purposes of Tegra186
- * syncpoint protection. This prevents any channel from
- * accessing it until it is reassigned.
- */
- host1x_hw_syncpt_assign_to_channel(host, &syncpt[i], NULL);
}
for (i = 0; i < host->info->nb_bases; i++)
@@ -408,8 +345,6 @@ static void syncpt_release(struct kref *ref)
sp->locked = false;
- mutex_lock(&sp->host->syncpt_mutex);
-
host1x_syncpt_base_free(sp->base);
kfree(sp->name);
sp->base = NULL;
@@ -432,7 +367,7 @@ void host1x_syncpt_put(struct host1x_syncpt *sp)
if (!sp)
return;
- kref_put(&sp->ref, syncpt_release);
+ kref_put_mutex(&sp->ref, syncpt_release, &sp->host->syncpt_mutex);
}
EXPORT_SYMBOL(host1x_syncpt_put);
diff --git a/drivers/gpu/host1x/syncpt.h b/drivers/gpu/host1x/syncpt.h
index 95cd29b79d6d..4c3f3b2f0e9c 100644
--- a/drivers/gpu/host1x/syncpt.h
+++ b/drivers/gpu/host1x/syncpt.h
@@ -14,6 +14,7 @@
#include <linux/kref.h>
#include <linux/sched.h>
+#include "fence.h"
#include "intr.h"
struct host1x;
@@ -39,7 +40,7 @@ struct host1x_syncpt {
struct host1x_syncpt_base *base;
/* interrupt data */
- struct host1x_syncpt_intr intr;
+ struct host1x_fence_list fences;
/*
* If a submission incrementing this syncpoint fails, lock it so that