summaryrefslogtreecommitdiff
path: root/drivers/gpu/host1x
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/gpu/host1x')
-rw-r--r--drivers/gpu/host1x/Kconfig8
-rw-r--r--drivers/gpu/host1x/Makefile8
-rw-r--r--drivers/gpu/host1x/bus.c150
-rw-r--r--drivers/gpu/host1x/bus.h2
-rw-r--r--drivers/gpu/host1x/cdma.c121
-rw-r--r--drivers/gpu/host1x/cdma.h2
-rw-r--r--drivers/gpu/host1x/channel.c40
-rw-r--r--drivers/gpu/host1x/channel.h4
-rw-r--r--drivers/gpu/host1x/context.c177
-rw-r--r--drivers/gpu/host1x/context.h38
-rw-r--r--drivers/gpu/host1x/context_bus.c26
-rw-r--r--drivers/gpu/host1x/debug.c68
-rw-r--r--drivers/gpu/host1x/debug.h1
-rw-r--r--drivers/gpu/host1x/dev.c429
-rw-r--r--drivers/gpu/host1x/dev.h46
-rw-r--r--drivers/gpu/host1x/fence.c154
-rw-r--r--drivers/gpu/host1x/fence.h30
-rw-r--r--drivers/gpu/host1x/hw/cdma_hw.c46
-rw-r--r--drivers/gpu/host1x/hw/channel_hw.c336
-rw-r--r--drivers/gpu/host1x/hw/debug_hw.c45
-rw-r--r--drivers/gpu/host1x/hw/debug_hw_1x01.c8
-rw-r--r--drivers/gpu/host1x/hw/debug_hw_1x06.c16
-rw-r--r--drivers/gpu/host1x/hw/host1x01_hardware.h114
-rw-r--r--drivers/gpu/host1x/hw/host1x02_hardware.h113
-rw-r--r--drivers/gpu/host1x/hw/host1x04_hardware.h113
-rw-r--r--drivers/gpu/host1x/hw/host1x05_hardware.h113
-rw-r--r--drivers/gpu/host1x/hw/host1x06_hardware.h118
-rw-r--r--drivers/gpu/host1x/hw/host1x07_hardware.h118
-rw-r--r--drivers/gpu/host1x/hw/host1x08.c33
-rw-r--r--drivers/gpu/host1x/hw/host1x08.h15
-rw-r--r--drivers/gpu/host1x/hw/host1x08_hardware.h21
-rw-r--r--drivers/gpu/host1x/hw/hw_host1x02_uclass.h12
-rw-r--r--drivers/gpu/host1x/hw/hw_host1x04_uclass.h12
-rw-r--r--drivers/gpu/host1x/hw/hw_host1x05_uclass.h12
-rw-r--r--drivers/gpu/host1x/hw/hw_host1x06_uclass.h14
-rw-r--r--drivers/gpu/host1x/hw/hw_host1x07_uclass.h14
-rw-r--r--drivers/gpu/host1x/hw/hw_host1x08_channel.h11
-rw-r--r--drivers/gpu/host1x/hw/hw_host1x08_common.h11
-rw-r--r--drivers/gpu/host1x/hw/hw_host1x08_hypervisor.h9
-rw-r--r--drivers/gpu/host1x/hw/hw_host1x08_uclass.h181
-rw-r--r--drivers/gpu/host1x/hw/hw_host1x08_vm.h36
-rw-r--r--drivers/gpu/host1x/hw/intr_hw.c136
-rw-r--r--drivers/gpu/host1x/hw/opcodes.h150
-rw-r--r--drivers/gpu/host1x/hw/syncpt_hw.c3
-rw-r--r--drivers/gpu/host1x/intr.c343
-rw-r--r--drivers/gpu/host1x/intr.h82
-rw-r--r--drivers/gpu/host1x/job.c260
-rw-r--r--drivers/gpu/host1x/job.h22
-rw-r--r--drivers/gpu/host1x/mipi.c21
-rw-r--r--drivers/gpu/host1x/syncpt.c148
-rw-r--r--drivers/gpu/host1x/syncpt.h15
51 files changed, 2317 insertions, 1688 deletions
diff --git a/drivers/gpu/host1x/Kconfig b/drivers/gpu/host1x/Kconfig
index 6dab94adf25e..e6c78ae2003a 100644
--- a/drivers/gpu/host1x/Kconfig
+++ b/drivers/gpu/host1x/Kconfig
@@ -1,7 +1,13 @@
# SPDX-License-Identifier: GPL-2.0-only
+
+config TEGRA_HOST1X_CONTEXT_BUS
+ bool
+
config TEGRA_HOST1X
tristate "NVIDIA Tegra host1x driver"
- depends on ARCH_TEGRA || (ARM && COMPILE_TEST)
+ depends on ARCH_TEGRA || COMPILE_TEST
+ select DMA_SHARED_BUFFER
+ select TEGRA_HOST1X_CONTEXT_BUS
select IOMMU_IOVA
help
Driver for the NVIDIA Tegra host1x hardware.
diff --git a/drivers/gpu/host1x/Makefile b/drivers/gpu/host1x/Makefile
index 096017b8789d..ee5286ffe08d 100644
--- a/drivers/gpu/host1x/Makefile
+++ b/drivers/gpu/host1x/Makefile
@@ -9,11 +9,17 @@ host1x-y = \
job.o \
debug.o \
mipi.o \
+ fence.o \
hw/host1x01.o \
hw/host1x02.o \
hw/host1x04.o \
hw/host1x05.o \
hw/host1x06.o \
- hw/host1x07.o
+ hw/host1x07.o \
+ hw/host1x08.o
+
+host1x-$(CONFIG_IOMMU_API) += \
+ context.o
obj-$(CONFIG_TEGRA_HOST1X) += host1x.o
+obj-$(CONFIG_TEGRA_HOST1X_CONTEXT_BUS) += context_bus.o
diff --git a/drivers/gpu/host1x/bus.c b/drivers/gpu/host1x/bus.c
index 218e3718fd68..723a80895cd4 100644
--- a/drivers/gpu/host1x/bus.c
+++ b/drivers/gpu/host1x/bus.c
@@ -5,6 +5,7 @@
*/
#include <linux/debugfs.h>
+#include <linux/dma-mapping.h>
#include <linux/host1x.h>
#include <linux/of.h>
#include <linux/seq_file.h>
@@ -40,7 +41,6 @@ static int host1x_subdev_add(struct host1x_device *device,
struct device_node *np)
{
struct host1x_subdev *subdev;
- struct device_node *child;
int err;
subdev = kzalloc(sizeof(*subdev), GFP_KERNEL);
@@ -55,13 +55,12 @@ static int host1x_subdev_add(struct host1x_device *device,
mutex_unlock(&device->subdevs_lock);
/* recursively add children */
- for_each_child_of_node(np, child) {
+ for_each_child_of_node_scoped(np, child) {
if (of_match_node(driver->subdevs, child) &&
of_device_is_available(child)) {
err = host1x_subdev_add(device, driver, child);
if (err < 0) {
/* XXX cleanup? */
- of_node_put(child);
return err;
}
}
@@ -89,17 +88,14 @@ static void host1x_subdev_del(struct host1x_subdev *subdev)
static int host1x_device_parse_dt(struct host1x_device *device,
struct host1x_driver *driver)
{
- struct device_node *np;
int err;
- for_each_child_of_node(device->dev.parent->of_node, np) {
+ for_each_child_of_node_scoped(device->dev.parent->of_node, np) {
if (of_match_node(driver->subdevs, np) &&
of_device_is_available(np)) {
err = host1x_subdev_add(device, driver, np);
- if (err < 0) {
- of_node_put(np);
+ if (err < 0)
return err;
- }
}
}
@@ -332,46 +328,24 @@ static int host1x_del_client(struct host1x *host1x,
return -ENODEV;
}
-static int host1x_device_match(struct device *dev, struct device_driver *drv)
+static int host1x_device_match(struct device *dev, const struct device_driver *drv)
{
return strcmp(dev_name(dev), drv->name) == 0;
}
-static int host1x_device_uevent(struct device *dev,
+/*
+ * Note that this is really only needed for backwards compatibility
+ * with libdrm, which parses this information from sysfs and will
+ * fail if it can't find the OF_FULLNAME, specifically.
+ */
+static int host1x_device_uevent(const struct device *dev,
struct kobj_uevent_env *env)
{
- struct device_node *np = dev->parent->of_node;
- unsigned int count = 0;
- struct property *p;
- const char *compat;
-
- /*
- * This duplicates most of of_device_uevent(), but the latter cannot
- * be called from modules and operates on dev->of_node, which is not
- * available in this case.
- *
- * Note that this is really only needed for backwards compatibility
- * with libdrm, which parses this information from sysfs and will
- * fail if it can't find the OF_FULLNAME, specifically.
- */
- add_uevent_var(env, "OF_NAME=%pOFn", np);
- add_uevent_var(env, "OF_FULLNAME=%pOF", np);
-
- of_property_for_each_string(np, "compatible", p, compat) {
- add_uevent_var(env, "OF_COMPATIBLE_%u=%s", count, compat);
- count++;
- }
-
- add_uevent_var(env, "OF_COMPATIBLE_N=%u", count);
+ of_device_uevent(dev->parent, env);
return 0;
}
-static int host1x_dma_configure(struct device *dev)
-{
- return of_dma_configure(dev, dev->of_node, true);
-}
-
static const struct dev_pm_ops host1x_device_pm_ops = {
.suspend = pm_generic_suspend,
.resume = pm_generic_resume,
@@ -381,11 +355,10 @@ static const struct dev_pm_ops host1x_device_pm_ops = {
.restore = pm_generic_restore,
};
-struct bus_type host1x_bus_type = {
+const struct bus_type host1x_bus_type = {
.name = "host1x",
.match = host1x_device_match,
.uevent = host1x_device_uevent,
- .dma_configure = host1x_dma_configure,
.pm = &host1x_device_pm_ops,
};
@@ -474,8 +447,6 @@ static int host1x_device_add(struct host1x *host1x,
device->dev.bus = &host1x_bus_type;
device->dev.parent = host1x->dev;
- of_dma_configure(&device->dev, host1x->dev->of_node, true);
-
device->dev.dma_parms = &device->dma_parms;
dma_set_max_seg_size(&device->dev, UINT_MAX);
@@ -500,6 +471,18 @@ static int host1x_device_add(struct host1x *host1x,
mutex_unlock(&clients_lock);
+ /*
+ * Add device even if there are no subdevs to ensure syncpoint functionality
+ * is available regardless of whether any engine subdevices are present
+ */
+ if (list_empty(&device->subdevs)) {
+ err = device_add(&device->dev);
+ if (err < 0)
+ dev_err(&device->dev, "failed to add device: %d\n", err);
+ else
+ device->registered = true;
+ }
+
return 0;
}
@@ -742,6 +725,7 @@ EXPORT_SYMBOL(host1x_driver_unregister);
*/
void __host1x_client_init(struct host1x_client *client, struct lock_class_key *key)
{
+ host1x_bo_cache_init(&client->cache);
INIT_LIST_HEAD(&client->list);
__mutex_init(&client->lock, "host1x client lock", key);
client->usecount = 0;
@@ -761,7 +745,6 @@ EXPORT_SYMBOL(host1x_client_exit);
/**
* __host1x_client_register() - register a host1x client
* @client: host1x client
- * @key: lock class key for the client-specific mutex
*
* Registers a host1x client with each host1x controller instance. Note that
* each client will only match their parent host1x controller and will only be
@@ -802,7 +785,7 @@ EXPORT_SYMBOL(__host1x_client_register);
* Removes a host1x client from its host1x controller instance. If a logical
* device has already been initialized, it will be torn down.
*/
-int host1x_client_unregister(struct host1x_client *client)
+void host1x_client_unregister(struct host1x_client *client)
{
struct host1x_client *c;
struct host1x *host1x;
@@ -814,7 +797,7 @@ int host1x_client_unregister(struct host1x_client *client)
err = host1x_del_client(host1x, client);
if (!err) {
mutex_unlock(&devices_lock);
- return 0;
+ return;
}
}
@@ -830,7 +813,7 @@ int host1x_client_unregister(struct host1x_client *client)
mutex_unlock(&clients_lock);
- return 0;
+ host1x_bo_cache_destroy(&client->cache);
}
EXPORT_SYMBOL(host1x_client_unregister);
@@ -904,3 +887,78 @@ unlock:
return err;
}
EXPORT_SYMBOL(host1x_client_resume);
+
+struct host1x_bo_mapping *host1x_bo_pin(struct device *dev, struct host1x_bo *bo,
+ enum dma_data_direction dir,
+ struct host1x_bo_cache *cache)
+{
+ struct host1x_bo_mapping *mapping;
+
+ if (cache) {
+ mutex_lock(&cache->lock);
+
+ list_for_each_entry(mapping, &cache->mappings, entry) {
+ if (mapping->bo == bo && mapping->direction == dir) {
+ kref_get(&mapping->ref);
+ goto unlock;
+ }
+ }
+ }
+
+ mapping = bo->ops->pin(dev, bo, dir);
+ if (IS_ERR(mapping))
+ goto unlock;
+
+ spin_lock(&mapping->bo->lock);
+ list_add_tail(&mapping->list, &bo->mappings);
+ spin_unlock(&mapping->bo->lock);
+
+ if (cache) {
+ INIT_LIST_HEAD(&mapping->entry);
+ mapping->cache = cache;
+
+ list_add_tail(&mapping->entry, &cache->mappings);
+
+ /* bump reference count to track the copy in the cache */
+ kref_get(&mapping->ref);
+ }
+
+unlock:
+ if (cache)
+ mutex_unlock(&cache->lock);
+
+ return mapping;
+}
+EXPORT_SYMBOL(host1x_bo_pin);
+
+static void __host1x_bo_unpin(struct kref *ref)
+{
+ struct host1x_bo_mapping *mapping = to_host1x_bo_mapping(ref);
+
+ /*
+ * When the last reference of the mapping goes away, make sure to remove the mapping from
+ * the cache.
+ */
+ if (mapping->cache)
+ list_del(&mapping->entry);
+
+ spin_lock(&mapping->bo->lock);
+ list_del(&mapping->list);
+ spin_unlock(&mapping->bo->lock);
+
+ mapping->bo->ops->unpin(mapping);
+}
+
+void host1x_bo_unpin(struct host1x_bo_mapping *mapping)
+{
+ struct host1x_bo_cache *cache = mapping->cache;
+
+ if (cache)
+ mutex_lock(&cache->lock);
+
+ kref_put(&mapping->ref, __host1x_bo_unpin);
+
+ if (cache)
+ mutex_unlock(&cache->lock);
+}
+EXPORT_SYMBOL(host1x_bo_unpin);
diff --git a/drivers/gpu/host1x/bus.h b/drivers/gpu/host1x/bus.h
index a4adf9abc3b4..a80ceadfeb34 100644
--- a/drivers/gpu/host1x/bus.h
+++ b/drivers/gpu/host1x/bus.h
@@ -10,7 +10,7 @@
struct bus_type;
struct host1x;
-extern struct bus_type host1x_bus_type;
+extern const struct bus_type host1x_bus_type;
int host1x_register(struct host1x *host1x);
int host1x_unregister(struct host1x *host1x);
diff --git a/drivers/gpu/host1x/cdma.c b/drivers/gpu/host1x/cdma.c
index 6e6ca774f68d..ba2e572567c0 100644
--- a/drivers/gpu/host1x/cdma.c
+++ b/drivers/gpu/host1x/cdma.c
@@ -105,7 +105,7 @@ static int host1x_pushbuffer_init(struct push_buffer *pb)
pb->dma = iova_dma_addr(&host1x->iova, alloc);
err = iommu_map(host1x->domain, pb->dma, pb->phys, size,
- IOMMU_READ);
+ IOMMU_READ, GFP_KERNEL);
if (err)
goto iommu_free_iova;
} else {
@@ -247,8 +247,6 @@ static int host1x_cdma_wait_pushbuffer_space(struct host1x *host1x,
trace_host1x_wait_cdma(dev_name(cdma_to_channel(cdma)->dev),
CDMA_EVENT_PUSH_BUFFER_SPACE);
- host1x_hw_cdma_flush(host1x, cdma);
-
/* If somebody has managed to already start waiting, yield */
if (cdma->event != CDMA_EVENT_NONE) {
mutex_unlock(&cdma->lock);
@@ -312,10 +310,6 @@ static void update_cdma_locked(struct host1x_cdma *cdma)
bool signal = false;
struct host1x_job *job, *n;
- /* If CDMA is stopped, queue is cleared and we can return */
- if (!cdma->running)
- return;
-
/*
* Walk the sync queue, reading the sync point registers as necessary,
* to consume as many sync queue entries as possible without blocking
@@ -324,7 +318,8 @@ static void update_cdma_locked(struct host1x_cdma *cdma)
struct host1x_syncpt *sp = job->syncpt;
/* Check whether this syncpt has completed, and bail if not */
- if (!host1x_syncpt_is_expired(sp, job->syncpt_end)) {
+ if (!host1x_syncpt_is_expired(sp, job->syncpt_end) &&
+ !job->cancelled) {
/* Start timer on next pending syncpt */
if (job->timeout)
cdma_start_timer_locked(cdma, job);
@@ -413,8 +408,11 @@ syncpt_incr:
else
restart_addr = cdma->last_pos;
+ if (!job)
+ goto resume;
+
/* do CPU increments for the remaining syncpts */
- if (job) {
+ if (job->syncpt_recovery) {
dev_dbg(dev, "%s: perform CPU incr on pending buffers\n",
__func__);
@@ -433,12 +431,72 @@ syncpt_incr:
dev_dbg(dev, "%s: finished sync_queue modification\n",
__func__);
+ } else {
+ struct host1x_job *failed_job = job;
+
+ host1x_job_dump(dev, job);
+
+ host1x_syncpt_set_locked(job->syncpt);
+ failed_job->cancelled = true;
+
+ list_for_each_entry_continue(job, &cdma->sync_queue, list) {
+ unsigned int i;
+
+ if (job->syncpt != failed_job->syncpt)
+ continue;
+
+ for (i = 0; i < job->num_slots; i++) {
+ unsigned int slot = (job->first_get/8 + i) %
+ HOST1X_PUSHBUFFER_SLOTS;
+ u32 *mapped = cdma->push_buffer.mapped;
+
+ /*
+ * Overwrite opcodes with 0 word writes
+ * to offset 0xbad. This does nothing but
+ * has a easily detected signature in debug
+ * traces.
+ *
+ * On systems with MLOCK enforcement enabled,
+ * the above 0 word writes would fall foul of
+ * the enforcement. As such, in the first slot
+ * put a RESTART_W opcode to the beginning
+ * of the next job. We don't use this for older
+ * chips since those only support the RESTART
+ * opcode with inconvenient alignment requirements.
+ */
+ if (i == 0 && host1x->info->has_wide_gather) {
+ unsigned int next_job = (job->first_get/8 + job->num_slots)
+ % HOST1X_PUSHBUFFER_SLOTS;
+ mapped[2*slot+0] = (0xd << 28) | (next_job * 2);
+ mapped[2*slot+1] = 0x0;
+ } else {
+ mapped[2*slot+0] = 0x1bad0000;
+ mapped[2*slot+1] = 0x1bad0000;
+ }
+ }
+
+ job->cancelled = true;
+ }
+
+ wmb();
+
+ update_cdma_locked(cdma);
}
+resume:
/* roll back DMAGET and start up channel again */
host1x_hw_cdma_resume(host1x, cdma, restart_addr);
}
+static void cdma_update_work(struct work_struct *work)
+{
+ struct host1x_cdma *cdma = container_of(work, struct host1x_cdma, update_work);
+
+ mutex_lock(&cdma->lock);
+ update_cdma_locked(cdma);
+ mutex_unlock(&cdma->lock);
+}
+
/*
* Create a cdma
*/
@@ -448,6 +506,7 @@ int host1x_cdma_init(struct host1x_cdma *cdma)
mutex_init(&cdma->lock);
init_completion(&cdma->complete);
+ INIT_WORK(&cdma->update_work, cdma_update_work);
INIT_LIST_HEAD(&cdma->sync_queue);
@@ -490,6 +549,16 @@ int host1x_cdma_begin(struct host1x_cdma *cdma, struct host1x_job *job)
mutex_lock(&cdma->lock);
+ /*
+ * Check if syncpoint was locked due to previous job timeout.
+ * This needs to be done within the cdma lock to avoid a race
+ * with the timeout handler.
+ */
+ if (job->syncpt->locked) {
+ mutex_unlock(&cdma->lock);
+ return -EPERM;
+ }
+
if (job->timeout) {
/* init state on first submit with timeout value */
if (!cdma->timeout.initialized) {
@@ -520,7 +589,6 @@ int host1x_cdma_begin(struct host1x_cdma *cdma, struct host1x_job *job)
*/
void host1x_cdma_push(struct host1x_cdma *cdma, u32 op1, u32 op2)
{
- struct host1x *host1x = cdma_to_host1x(cdma);
struct push_buffer *pb = &cdma->push_buffer;
u32 slots_free = cdma->slots_free;
@@ -528,11 +596,9 @@ void host1x_cdma_push(struct host1x_cdma *cdma, u32 op1, u32 op2)
trace_host1x_cdma_push(dev_name(cdma_to_channel(cdma)->dev),
op1, op2);
- if (slots_free == 0) {
- host1x_hw_cdma_flush(host1x, cdma);
+ if (slots_free == 0)
slots_free = host1x_cdma_wait_locked(cdma,
CDMA_EVENT_PUSH_BUFFER_SPACE);
- }
cdma->slots_free = slots_free - 1;
cdma->slots_used++;
@@ -554,8 +620,7 @@ void host1x_cdma_push_wide(struct host1x_cdma *cdma, u32 op1, u32 op2,
struct host1x_channel *channel = cdma_to_channel(cdma);
struct host1x *host1x = cdma_to_host1x(cdma);
struct push_buffer *pb = &cdma->push_buffer;
- unsigned int needed = 2, extra = 0, i;
- unsigned int space = cdma->slots_free;
+ unsigned int space, needed = 2, extra = 0;
if (host1x_debug_trace_cmdbuf)
trace_host1x_cdma_push_wide(dev_name(channel->dev), op1, op2,
@@ -573,20 +638,14 @@ void host1x_cdma_push_wide(struct host1x_cdma *cdma, u32 op1, u32 op2,
cdma->slots_free = space - needed;
cdma->slots_used += needed;
- /*
- * Note that we rely on the fact that this is only used to submit wide
- * gather opcodes, which consist of 3 words, and they are padded with
- * a NOP to avoid having to deal with fractional slots (a slot always
- * represents 2 words). The fourth opcode passed to this function will
- * therefore always be a NOP.
- *
- * This works around a slight ambiguity when it comes to opcodes. For
- * all current host1x incarnations the NOP opcode uses the exact same
- * encoding (0x20000000), so we could hard-code the value here, but a
- * new incarnation may change it and break that assumption.
- */
- for (i = 0; i < extra; i++)
- host1x_pushbuffer_push(pb, op4, op4);
+ if (extra > 0) {
+ /*
+ * If there isn't enough space at the tail of the pushbuffer,
+ * insert a RESTART(0) here to go back to the beginning.
+ * The code above adjusted the indexes appropriately.
+ */
+ host1x_pushbuffer_push(pb, (0x5 << 28), 0xdead0000);
+ }
host1x_pushbuffer_push(pb, op1, op2);
host1x_pushbuffer_push(pb, op3, op4);
@@ -624,7 +683,5 @@ void host1x_cdma_end(struct host1x_cdma *cdma,
*/
void host1x_cdma_update(struct host1x_cdma *cdma)
{
- mutex_lock(&cdma->lock);
- update_cdma_locked(cdma);
- mutex_unlock(&cdma->lock);
+ schedule_work(&cdma->update_work);
}
diff --git a/drivers/gpu/host1x/cdma.h b/drivers/gpu/host1x/cdma.h
index 12c4327c4df0..7fd8168af4f9 100644
--- a/drivers/gpu/host1x/cdma.h
+++ b/drivers/gpu/host1x/cdma.h
@@ -11,6 +11,7 @@
#include <linux/sched.h>
#include <linux/completion.h>
#include <linux/list.h>
+#include <linux/workqueue.h>
struct host1x_syncpt;
struct host1x_userctx_timeout;
@@ -69,6 +70,7 @@ struct host1x_cdma {
struct buffer_timeout timeout; /* channel's timeout state/wq */
bool running;
bool torndown;
+ struct work_struct update_work;
};
#define cdma_to_channel(cdma) container_of(cdma, struct host1x_channel, cdma)
diff --git a/drivers/gpu/host1x/channel.c b/drivers/gpu/host1x/channel.c
index 4cd212bb570d..08077afe4cde 100644
--- a/drivers/gpu/host1x/channel.c
+++ b/drivers/gpu/host1x/channel.c
@@ -21,22 +21,20 @@ int host1x_channel_list_init(struct host1x_channel_list *chlist,
if (!chlist->channels)
return -ENOMEM;
- chlist->allocated_channels =
- kcalloc(BITS_TO_LONGS(num_channels), sizeof(unsigned long),
- GFP_KERNEL);
+ chlist->allocated_channels = bitmap_zalloc(num_channels, GFP_KERNEL);
if (!chlist->allocated_channels) {
kfree(chlist->channels);
return -ENOMEM;
}
- bitmap_zero(chlist->allocated_channels, num_channels);
+ mutex_init(&chlist->lock);
return 0;
}
void host1x_channel_list_free(struct host1x_channel_list *chlist)
{
- kfree(chlist->allocated_channels);
+ bitmap_free(chlist->allocated_channels);
kfree(chlist->channels);
}
@@ -75,6 +73,33 @@ struct host1x_channel *host1x_channel_get_index(struct host1x *host,
return ch;
}
+void host1x_channel_stop(struct host1x_channel *channel)
+{
+ struct host1x *host = dev_get_drvdata(channel->dev->parent);
+
+ host1x_hw_cdma_stop(host, &channel->cdma);
+}
+EXPORT_SYMBOL(host1x_channel_stop);
+
+/**
+ * host1x_channel_stop_all() - disable CDMA on allocated channels
+ * @host: host1x instance
+ *
+ * Stop CDMA on allocated channels
+ */
+void host1x_channel_stop_all(struct host1x *host)
+{
+ struct host1x_channel_list *chlist = &host->channel_list;
+ int bit;
+
+ mutex_lock(&chlist->lock);
+
+ for_each_set_bit(bit, chlist->allocated_channels, host->info->nb_channels)
+ host1x_channel_stop(&chlist->channels[bit]);
+
+ mutex_unlock(&chlist->lock);
+}
+
static void release_channel(struct kref *kref)
{
struct host1x_channel *channel =
@@ -100,8 +125,11 @@ static struct host1x_channel *acquire_unused_channel(struct host1x *host)
unsigned int max_channels = host->info->nb_channels;
unsigned int index;
+ mutex_lock(&chlist->lock);
+
index = find_first_zero_bit(chlist->allocated_channels, max_channels);
if (index >= max_channels) {
+ mutex_unlock(&chlist->lock);
dev_err(host->dev, "failed to find free channel\n");
return NULL;
}
@@ -110,6 +138,8 @@ static struct host1x_channel *acquire_unused_channel(struct host1x *host)
set_bit(index, chlist->allocated_channels);
+ mutex_unlock(&chlist->lock);
+
return &chlist->channels[index];
}
diff --git a/drivers/gpu/host1x/channel.h b/drivers/gpu/host1x/channel.h
index 39044ff6c3aa..d7aede204d83 100644
--- a/drivers/gpu/host1x/channel.h
+++ b/drivers/gpu/host1x/channel.h
@@ -10,6 +10,7 @@
#include <linux/io.h>
#include <linux/kref.h>
+#include <linux/mutex.h>
#include "cdma.h"
@@ -18,6 +19,8 @@ struct host1x_channel;
struct host1x_channel_list {
struct host1x_channel *channels;
+
+ struct mutex lock;
unsigned long *allocated_channels;
};
@@ -37,5 +40,6 @@ int host1x_channel_list_init(struct host1x_channel_list *chlist,
void host1x_channel_list_free(struct host1x_channel_list *chlist);
struct host1x_channel *host1x_channel_get_index(struct host1x *host,
unsigned int index);
+void host1x_channel_stop_all(struct host1x *host);
#endif
diff --git a/drivers/gpu/host1x/context.c b/drivers/gpu/host1x/context.c
new file mode 100644
index 000000000000..a6f6779662a3
--- /dev/null
+++ b/drivers/gpu/host1x/context.c
@@ -0,0 +1,177 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright (c) 2021, NVIDIA Corporation.
+ */
+
+#include <linux/device.h>
+#include <linux/kref.h>
+#include <linux/of.h>
+#include <linux/of_device.h>
+#include <linux/pid.h>
+#include <linux/slab.h>
+
+#include "context.h"
+#include "dev.h"
+
+static void host1x_memory_context_release(struct device *dev)
+{
+ /* context device is freed in host1x_memory_context_list_free() */
+}
+
+int host1x_memory_context_list_init(struct host1x *host1x)
+{
+ struct host1x_memory_context_list *cdl = &host1x->context_list;
+ struct device_node *node = host1x->dev->of_node;
+ struct host1x_memory_context *ctx;
+ unsigned int i;
+ int err;
+
+ cdl->devs = NULL;
+ cdl->len = 0;
+ mutex_init(&cdl->lock);
+
+ err = of_property_count_u32_elems(node, "iommu-map");
+ if (err < 0)
+ return 0;
+
+ cdl->len = err / 4;
+ cdl->devs = kcalloc(cdl->len, sizeof(*cdl->devs), GFP_KERNEL);
+ if (!cdl->devs)
+ return -ENOMEM;
+
+ for (i = 0; i < cdl->len; i++) {
+ ctx = &cdl->devs[i];
+
+ ctx->host = host1x;
+
+ device_initialize(&ctx->dev);
+
+ /*
+ * Due to an issue with T194 NVENC, only 38 bits can be used.
+ * Anyway, 256GiB of IOVA ought to be enough for anyone.
+ */
+ ctx->dma_mask = DMA_BIT_MASK(38);
+ ctx->dev.dma_mask = &ctx->dma_mask;
+ ctx->dev.coherent_dma_mask = ctx->dma_mask;
+ dev_set_name(&ctx->dev, "host1x-ctx.%d", i);
+ ctx->dev.bus = &host1x_context_device_bus_type;
+ ctx->dev.parent = host1x->dev;
+ ctx->dev.release = host1x_memory_context_release;
+
+ ctx->dev.dma_parms = &ctx->dma_parms;
+ dma_set_max_seg_size(&ctx->dev, UINT_MAX);
+
+ err = device_add(&ctx->dev);
+ if (err) {
+ dev_err(host1x->dev, "could not add context device %d: %d\n", i, err);
+ put_device(&ctx->dev);
+ goto unreg_devices;
+ }
+
+ err = of_dma_configure_id(&ctx->dev, node, true, &i);
+ if (err) {
+ dev_err(host1x->dev, "IOMMU configuration failed for context device %d: %d\n",
+ i, err);
+ device_unregister(&ctx->dev);
+ goto unreg_devices;
+ }
+
+ if (!tegra_dev_iommu_get_stream_id(&ctx->dev, &ctx->stream_id) ||
+ !device_iommu_mapped(&ctx->dev)) {
+ dev_err(host1x->dev, "Context device %d has no IOMMU!\n", i);
+ device_unregister(&ctx->dev);
+
+ /*
+ * This means that if IOMMU is disabled but context devices
+ * are defined in the device tree, Host1x will fail to probe.
+ * That's probably OK in this time and age.
+ */
+ err = -EINVAL;
+
+ goto unreg_devices;
+ }
+ }
+
+ return 0;
+
+unreg_devices:
+ while (i--)
+ device_unregister(&cdl->devs[i].dev);
+
+ kfree(cdl->devs);
+ cdl->devs = NULL;
+ cdl->len = 0;
+
+ return err;
+}
+
+void host1x_memory_context_list_free(struct host1x_memory_context_list *cdl)
+{
+ unsigned int i;
+
+ for (i = 0; i < cdl->len; i++)
+ device_unregister(&cdl->devs[i].dev);
+
+ kfree(cdl->devs);
+ cdl->len = 0;
+}
+
+struct host1x_memory_context *host1x_memory_context_alloc(struct host1x *host1x,
+ struct device *dev,
+ struct pid *pid)
+{
+ struct host1x_memory_context_list *cdl = &host1x->context_list;
+ struct host1x_memory_context *free = NULL;
+ int i;
+
+ if (!cdl->len)
+ return ERR_PTR(-EOPNOTSUPP);
+
+ mutex_lock(&cdl->lock);
+
+ for (i = 0; i < cdl->len; i++) {
+ struct host1x_memory_context *cd = &cdl->devs[i];
+
+ if (cd->dev.iommu->iommu_dev != dev->iommu->iommu_dev)
+ continue;
+
+ if (cd->owner == pid) {
+ refcount_inc(&cd->ref);
+ mutex_unlock(&cdl->lock);
+ return cd;
+ } else if (!cd->owner && !free) {
+ free = cd;
+ }
+ }
+
+ if (!free) {
+ mutex_unlock(&cdl->lock);
+ return ERR_PTR(-EBUSY);
+ }
+
+ refcount_set(&free->ref, 1);
+ free->owner = get_pid(pid);
+
+ mutex_unlock(&cdl->lock);
+
+ return free;
+}
+EXPORT_SYMBOL_GPL(host1x_memory_context_alloc);
+
+void host1x_memory_context_get(struct host1x_memory_context *cd)
+{
+ refcount_inc(&cd->ref);
+}
+EXPORT_SYMBOL_GPL(host1x_memory_context_get);
+
+void host1x_memory_context_put(struct host1x_memory_context *cd)
+{
+ struct host1x_memory_context_list *cdl = &cd->host->context_list;
+
+ if (refcount_dec_and_mutex_lock(&cd->ref, &cdl->lock)) {
+ put_pid(cd->owner);
+ cd->owner = NULL;
+ mutex_unlock(&cdl->lock);
+ }
+}
+EXPORT_SYMBOL_GPL(host1x_memory_context_put);
diff --git a/drivers/gpu/host1x/context.h b/drivers/gpu/host1x/context.h
new file mode 100644
index 000000000000..3e03bc1d3bac
--- /dev/null
+++ b/drivers/gpu/host1x/context.h
@@ -0,0 +1,38 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * Host1x context devices
+ *
+ * Copyright (c) 2020, NVIDIA Corporation.
+ */
+
+#ifndef __HOST1X_CONTEXT_H
+#define __HOST1X_CONTEXT_H
+
+#include <linux/mutex.h>
+#include <linux/refcount.h>
+
+struct host1x;
+
+extern struct bus_type host1x_context_device_bus_type;
+
+struct host1x_memory_context_list {
+ struct mutex lock;
+ struct host1x_memory_context *devs;
+ unsigned int len;
+};
+
+#ifdef CONFIG_IOMMU_API
+int host1x_memory_context_list_init(struct host1x *host1x);
+void host1x_memory_context_list_free(struct host1x_memory_context_list *cdl);
+#else
+static inline int host1x_memory_context_list_init(struct host1x *host1x)
+{
+ return 0;
+}
+
+static inline void host1x_memory_context_list_free(struct host1x_memory_context_list *cdl)
+{
+}
+#endif
+
+#endif
diff --git a/drivers/gpu/host1x/context_bus.c b/drivers/gpu/host1x/context_bus.c
new file mode 100644
index 000000000000..7cd0e1a5edd1
--- /dev/null
+++ b/drivers/gpu/host1x/context_bus.c
@@ -0,0 +1,26 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright (c) 2021, NVIDIA Corporation.
+ */
+
+#include <linux/device.h>
+#include <linux/of.h>
+
+const struct bus_type host1x_context_device_bus_type = {
+ .name = "host1x-context",
+};
+EXPORT_SYMBOL_GPL(host1x_context_device_bus_type);
+
+static int __init host1x_context_device_bus_init(void)
+{
+ int err;
+
+ err = bus_register(&host1x_context_device_bus_type);
+ if (err < 0) {
+ pr_err("bus type registration failed: %d\n", err);
+ return err;
+ }
+
+ return 0;
+}
+postcore_initcall(host1x_context_device_bus_init);
diff --git a/drivers/gpu/host1x/debug.c b/drivers/gpu/host1x/debug.c
index 8a14880c61bb..6433c00d5d7e 100644
--- a/drivers/gpu/host1x/debug.c
+++ b/drivers/gpu/host1x/debug.c
@@ -7,6 +7,7 @@
*/
#include <linux/debugfs.h>
+#include <linux/pm_runtime.h>
#include <linux/seq_file.h>
#include <linux/uaccess.h>
@@ -52,6 +53,11 @@ static int show_channel(struct host1x_channel *ch, void *data, bool show_fifo)
{
struct host1x *m = dev_get_drvdata(ch->dev->parent);
struct output *o = data;
+ int err;
+
+ err = pm_runtime_resume_and_get(m->dev);
+ if (err < 0)
+ return err;
mutex_lock(&ch->cdma.lock);
mutex_lock(&debug_lock);
@@ -64,27 +70,38 @@ static int show_channel(struct host1x_channel *ch, void *data, bool show_fifo)
mutex_unlock(&debug_lock);
mutex_unlock(&ch->cdma.lock);
+ pm_runtime_put(m->dev);
+
return 0;
}
-static void show_syncpts(struct host1x *m, struct output *o)
+static void show_syncpts(struct host1x *m, struct output *o, bool show_all)
{
+ unsigned long irqflags;
struct list_head *pos;
unsigned int i;
+ int err;
host1x_debug_output(o, "---- syncpts ----\n");
+ err = pm_runtime_resume_and_get(m->dev);
+ if (err < 0)
+ return;
+
for (i = 0; i < host1x_syncpt_nb_pts(m); i++) {
u32 max = host1x_syncpt_read_max(m->syncpt + i);
u32 min = host1x_syncpt_load(m->syncpt + i);
unsigned int waiters = 0;
- spin_lock(&m->syncpt[i].intr.lock);
- list_for_each(pos, &m->syncpt[i].intr.wait_head)
+ spin_lock_irqsave(&m->syncpt[i].fences.lock, irqflags);
+ list_for_each(pos, &m->syncpt[i].fences.list)
waiters++;
- spin_unlock(&m->syncpt[i].intr.lock);
+ spin_unlock_irqrestore(&m->syncpt[i].fences.lock, irqflags);
- if (!min && !max && !waiters)
+ if (!kref_read(&m->syncpt[i].ref))
+ continue;
+
+ if (!show_all && !min && !max && !waiters)
continue;
host1x_debug_output(o,
@@ -101,6 +118,8 @@ static void show_syncpts(struct host1x *m, struct output *o)
base_val);
}
+ pm_runtime_put(m->dev);
+
host1x_debug_output(o, "\n");
}
@@ -109,7 +128,7 @@ static void show_all(struct host1x *m, struct output *o, bool show_fifo)
unsigned int i;
host1x_hw_show_mlocks(m, o);
- show_syncpts(m, o);
+ show_syncpts(m, o, true);
host1x_debug_output(o, "---- channels ----\n");
for (i = 0; i < m->info->nb_channels; ++i) {
@@ -122,7 +141,7 @@ static void show_all(struct host1x *m, struct output *o, bool show_fifo)
}
}
-static int host1x_debug_show_all(struct seq_file *s, void *unused)
+static int host1x_debug_all_show(struct seq_file *s, void *unused)
{
struct output o = {
.fn = write_to_seqfile,
@@ -133,6 +152,7 @@ static int host1x_debug_show_all(struct seq_file *s, void *unused)
return 0;
}
+DEFINE_SHOW_ATTRIBUTE(host1x_debug_all);
static int host1x_debug_show(struct seq_file *s, void *unused)
{
@@ -145,30 +165,7 @@ static int host1x_debug_show(struct seq_file *s, void *unused)
return 0;
}
-
-static int host1x_debug_open_all(struct inode *inode, struct file *file)
-{
- return single_open(file, host1x_debug_show_all, inode->i_private);
-}
-
-static const struct file_operations host1x_debug_all_fops = {
- .open = host1x_debug_open_all,
- .read = seq_read,
- .llseek = seq_lseek,
- .release = single_release,
-};
-
-static int host1x_debug_open(struct inode *inode, struct file *file)
-{
- return single_open(file, host1x_debug_show, inode->i_private);
-}
-
-static const struct file_operations host1x_debug_fops = {
- .open = host1x_debug_open,
- .read = seq_read,
- .llseek = seq_lseek,
- .release = single_release,
-};
+DEFINE_SHOW_ATTRIBUTE(host1x_debug);
static void host1x_debugfs_init(struct host1x *host1x)
{
@@ -219,12 +216,3 @@ void host1x_debug_dump(struct host1x *host1x)
show_all(host1x, &o, true);
}
-
-void host1x_debug_dump_syncpts(struct host1x *host1x)
-{
- struct output o = {
- .fn = write_to_printk
- };
-
- show_syncpts(host1x, &o);
-}
diff --git a/drivers/gpu/host1x/debug.h b/drivers/gpu/host1x/debug.h
index 62bd8a091fa7..c43c61d876a9 100644
--- a/drivers/gpu/host1x/debug.h
+++ b/drivers/gpu/host1x/debug.h
@@ -41,6 +41,5 @@ extern unsigned int host1x_debug_trace_cmdbuf;
void host1x_debug_init(struct host1x *host1x);
void host1x_debug_deinit(struct host1x *host1x);
void host1x_debug_dump(struct host1x *host1x);
-void host1x_debug_dump_syncpts(struct host1x *host1x);
#endif
diff --git a/drivers/gpu/host1x/dev.c b/drivers/gpu/host1x/dev.c
index fbb6447b8659..3f475f0e6545 100644
--- a/drivers/gpu/host1x/dev.c
+++ b/drivers/gpu/host1x/dev.c
@@ -6,20 +6,30 @@
*/
#include <linux/clk.h>
+#include <linux/delay.h>
#include <linux/dma-mapping.h>
#include <linux/io.h>
#include <linux/list.h>
#include <linux/module.h>
-#include <linux/of_device.h>
#include <linux/of.h>
+#include <linux/of_platform.h>
+#include <linux/platform_device.h>
+#include <linux/pm_runtime.h>
#include <linux/slab.h>
+#include <soc/tegra/common.h>
+
#define CREATE_TRACE_POINTS
#include <trace/events/host1x.h>
#undef CREATE_TRACE_POINTS
+#if IS_ENABLED(CONFIG_ARM_DMA_USE_IOMMU)
+#include <asm/dma-iommu.h>
+#endif
+
#include "bus.h"
#include "channel.h"
+#include "context.h"
#include "debug.h"
#include "dev.h"
#include "intr.h"
@@ -30,6 +40,12 @@
#include "hw/host1x05.h"
#include "hw/host1x06.h"
#include "hw/host1x07.h"
+#include "hw/host1x08.h"
+
+void host1x_common_writel(struct host1x *host1x, u32 v, u32 r)
+{
+ writel(v, host1x->common_regs + r);
+}
void host1x_hypervisor_writel(struct host1x *host1x, u32 v, u32 r)
{
@@ -55,6 +71,15 @@ u32 host1x_sync_readl(struct host1x *host1x, u32 r)
return readl(sync_regs + r);
}
+#ifdef CONFIG_64BIT
+u64 host1x_sync_readq(struct host1x *host1x, u32 r)
+{
+ void __iomem *sync_regs = host1x->regs + host1x->info->sync_offset;
+
+ return readq(sync_regs + r);
+}
+#endif
+
void host1x_ch_writel(struct host1x_channel *ch, u32 v, u32 r)
{
writel(v, ch->regs + r);
@@ -126,12 +151,29 @@ static const struct host1x_info host1x05_info = {
};
static const struct host1x_sid_entry tegra186_sid_table[] = {
- {
- /* VIC */
- .base = 0x1af0,
- .offset = 0x30,
- .limit = 0x34
- },
+ { /* SE1 */ .base = 0x1ac8, .offset = 0x90, .limit = 0x90 },
+ { /* SE2 */ .base = 0x1ad0, .offset = 0x90, .limit = 0x90 },
+ { /* SE3 */ .base = 0x1ad8, .offset = 0x90, .limit = 0x90 },
+ { /* SE4 */ .base = 0x1ae0, .offset = 0x90, .limit = 0x90 },
+ { /* ISP */ .base = 0x1ae8, .offset = 0x50, .limit = 0x50 },
+ { /* VIC */ .base = 0x1af0, .offset = 0x30, .limit = 0x34 },
+ { /* NVENC */ .base = 0x1af8, .offset = 0x30, .limit = 0x34 },
+ { /* NVDEC */ .base = 0x1b00, .offset = 0x30, .limit = 0x34 },
+ { /* NVJPG */ .base = 0x1b08, .offset = 0x30, .limit = 0x34 },
+ { /* TSEC */ .base = 0x1b10, .offset = 0x30, .limit = 0x34 },
+ { /* TSECB */ .base = 0x1b18, .offset = 0x30, .limit = 0x34 },
+ { /* VI 0 */ .base = 0x1b80, .offset = 0x10000, .limit = 0x10000 },
+ { /* VI 1 */ .base = 0x1b88, .offset = 0x20000, .limit = 0x20000 },
+ { /* VI 2 */ .base = 0x1b90, .offset = 0x30000, .limit = 0x30000 },
+ { /* VI 3 */ .base = 0x1b98, .offset = 0x40000, .limit = 0x40000 },
+ { /* VI 4 */ .base = 0x1ba0, .offset = 0x50000, .limit = 0x50000 },
+ { /* VI 5 */ .base = 0x1ba8, .offset = 0x60000, .limit = 0x60000 },
+ { /* VI 6 */ .base = 0x1bb0, .offset = 0x70000, .limit = 0x70000 },
+ { /* VI 7 */ .base = 0x1bb8, .offset = 0x80000, .limit = 0x80000 },
+ { /* VI 8 */ .base = 0x1bc0, .offset = 0x90000, .limit = 0x90000 },
+ { /* VI 9 */ .base = 0x1bc8, .offset = 0xa0000, .limit = 0xa0000 },
+ { /* VI 10 */ .base = 0x1bd0, .offset = 0xb0000, .limit = 0xb0000 },
+ { /* VI 11 */ .base = 0x1bd8, .offset = 0xc0000, .limit = 0xc0000 },
};
static const struct host1x_info host1x06_info = {
@@ -147,15 +189,30 @@ static const struct host1x_info host1x06_info = {
.num_sid_entries = ARRAY_SIZE(tegra186_sid_table),
.sid_table = tegra186_sid_table,
.reserve_vblank_syncpts = false,
+ .skip_reset_assert = true,
};
static const struct host1x_sid_entry tegra194_sid_table[] = {
- {
- /* VIC */
- .base = 0x1af0,
- .offset = 0x30,
- .limit = 0x34
- },
+ { /* SE1 */ .base = 0x1ac8, .offset = 0x90, .limit = 0x90 },
+ { /* SE2 */ .base = 0x1ad0, .offset = 0x90, .limit = 0x90 },
+ { /* SE3 */ .base = 0x1ad8, .offset = 0x90, .limit = 0x90 },
+ { /* SE4 */ .base = 0x1ae0, .offset = 0x90, .limit = 0x90 },
+ { /* ISP */ .base = 0x1ae8, .offset = 0x800, .limit = 0x800 },
+ { /* VIC */ .base = 0x1af0, .offset = 0x30, .limit = 0x34 },
+ { /* NVENC */ .base = 0x1af8, .offset = 0x30, .limit = 0x34 },
+ { /* NVDEC */ .base = 0x1b00, .offset = 0x30, .limit = 0x34 },
+ { /* NVJPG */ .base = 0x1b08, .offset = 0x30, .limit = 0x34 },
+ { /* TSEC */ .base = 0x1b10, .offset = 0x30, .limit = 0x34 },
+ { /* TSECB */ .base = 0x1b18, .offset = 0x30, .limit = 0x34 },
+ { /* VI */ .base = 0x1b80, .offset = 0x800, .limit = 0x800 },
+ { /* VI_THI */ .base = 0x1b88, .offset = 0x30, .limit = 0x34 },
+ { /* ISP_THI */ .base = 0x1b90, .offset = 0x30, .limit = 0x34 },
+ { /* PVA0_CLUSTER */ .base = 0x1b98, .offset = 0x0, .limit = 0x0 },
+ { /* PVA0_CLUSTER */ .base = 0x1ba0, .offset = 0x0, .limit = 0x0 },
+ { /* NVDLA0 */ .base = 0x1ba8, .offset = 0x30, .limit = 0x34 },
+ { /* NVDLA1 */ .base = 0x1bb0, .offset = 0x30, .limit = 0x34 },
+ { /* NVENC1 */ .base = 0x1bb8, .offset = 0x30, .limit = 0x34 },
+ { /* NVDEC1 */ .base = 0x1bc0, .offset = 0x30, .limit = 0x34 },
};
static const struct host1x_info host1x07_info = {
@@ -173,7 +230,65 @@ static const struct host1x_info host1x07_info = {
.reserve_vblank_syncpts = false,
};
+/*
+ * Tegra234 has two stream ID protection tables, one for setting stream IDs
+ * through the channel path via SETSTREAMID, and one for setting them via
+ * MMIO. We program each engine's data stream ID in the channel path table
+ * and firmware stream ID in the MMIO path table.
+ */
+static const struct host1x_sid_entry tegra234_sid_table[] = {
+ { /* SE1 MMIO */ .base = 0x1650, .offset = 0x90, .limit = 0x90 },
+ { /* SE1 ch */ .base = 0x1730, .offset = 0x90, .limit = 0x90 },
+ { /* SE2 MMIO */ .base = 0x1658, .offset = 0x90, .limit = 0x90 },
+ { /* SE2 ch */ .base = 0x1738, .offset = 0x90, .limit = 0x90 },
+ { /* SE4 MMIO */ .base = 0x1660, .offset = 0x90, .limit = 0x90 },
+ { /* SE4 ch */ .base = 0x1740, .offset = 0x90, .limit = 0x90 },
+ { /* ISP MMIO */ .base = 0x1680, .offset = 0x800, .limit = 0x800 },
+ { /* VIC MMIO */ .base = 0x1688, .offset = 0x34, .limit = 0x34 },
+ { /* VIC ch */ .base = 0x17b8, .offset = 0x30, .limit = 0x30 },
+ { /* NVENC MMIO */ .base = 0x1690, .offset = 0x34, .limit = 0x34 },
+ { /* NVENC ch */ .base = 0x17c0, .offset = 0x30, .limit = 0x30 },
+ { /* NVDEC MMIO */ .base = 0x1698, .offset = 0x34, .limit = 0x34 },
+ { /* NVDEC ch */ .base = 0x17c8, .offset = 0x30, .limit = 0x30 },
+ { /* NVJPG MMIO */ .base = 0x16a0, .offset = 0x34, .limit = 0x34 },
+ { /* NVJPG ch */ .base = 0x17d0, .offset = 0x30, .limit = 0x30 },
+ { /* TSEC MMIO */ .base = 0x16a8, .offset = 0x30, .limit = 0x34 },
+ { /* NVJPG1 MMIO */ .base = 0x16b0, .offset = 0x34, .limit = 0x34 },
+ { /* NVJPG1 ch */ .base = 0x17a8, .offset = 0x30, .limit = 0x30 },
+ { /* VI MMIO */ .base = 0x16b8, .offset = 0x800, .limit = 0x800 },
+ { /* VI_THI MMIO */ .base = 0x16c0, .offset = 0x30, .limit = 0x34 },
+ { /* ISP_THI MMIO */ .base = 0x16c8, .offset = 0x30, .limit = 0x34 },
+ { /* NVDLA MMIO */ .base = 0x16d8, .offset = 0x30, .limit = 0x34 },
+ { /* NVDLA ch */ .base = 0x17e0, .offset = 0x30, .limit = 0x34 },
+ { /* NVDLA1 MMIO */ .base = 0x16e0, .offset = 0x30, .limit = 0x34 },
+ { /* NVDLA1 ch */ .base = 0x17e8, .offset = 0x30, .limit = 0x34 },
+ { /* OFA MMIO */ .base = 0x16e8, .offset = 0x34, .limit = 0x34 },
+ { /* OFA ch */ .base = 0x1768, .offset = 0x30, .limit = 0x30 },
+ { /* VI2 MMIO */ .base = 0x16f0, .offset = 0x800, .limit = 0x800 },
+ { /* VI2_THI MMIO */ .base = 0x16f8, .offset = 0x30, .limit = 0x34 },
+};
+
+static const struct host1x_info host1x08_info = {
+ .nb_channels = 63,
+ .nb_pts = 1024,
+ .nb_mlocks = 24,
+ .nb_bases = 0,
+ .init = host1x08_init,
+ .sync_offset = 0x0,
+ .dma_mask = DMA_BIT_MASK(40),
+ .has_wide_gather = true,
+ .has_hypervisor = true,
+ .has_common = true,
+ .num_sid_entries = ARRAY_SIZE(tegra234_sid_table),
+ .sid_table = tegra234_sid_table,
+ .streamid_vm_table = { 0x1004, 128 },
+ .classid_vm_table = { 0x1404, 25 },
+ .mmio_vm_table = { 0x1504, 25 },
+ .reserve_vblank_syncpts = false,
+};
+
static const struct of_device_id host1x_of_match[] = {
+ { .compatible = "nvidia,tegra234-host1x", .data = &host1x08_info, },
{ .compatible = "nvidia,tegra194-host1x", .data = &host1x07_info, },
{ .compatible = "nvidia,tegra186-host1x", .data = &host1x06_info, },
{ .compatible = "nvidia,tegra210-host1x", .data = &host1x05_info, },
@@ -185,21 +300,43 @@ static const struct of_device_id host1x_of_match[] = {
};
MODULE_DEVICE_TABLE(of, host1x_of_match);
-static void host1x_setup_sid_table(struct host1x *host)
+static void host1x_setup_virtualization_tables(struct host1x *host)
{
const struct host1x_info *info = host->info;
unsigned int i;
+ if (!info->has_hypervisor)
+ return;
+
for (i = 0; i < info->num_sid_entries; i++) {
const struct host1x_sid_entry *entry = &info->sid_table[i];
host1x_hypervisor_writel(host, entry->offset, entry->base);
host1x_hypervisor_writel(host, entry->limit, entry->base + 4);
}
+
+ for (i = 0; i < info->streamid_vm_table.count; i++) {
+ /* Allow access to all stream IDs to all VMs. */
+ host1x_hypervisor_writel(host, 0xff, info->streamid_vm_table.base + 4 * i);
+ }
+
+ for (i = 0; i < info->classid_vm_table.count; i++) {
+ /* Allow access to all classes to all VMs. */
+ host1x_hypervisor_writel(host, 0xff, info->classid_vm_table.base + 4 * i);
+ }
+
+ for (i = 0; i < info->mmio_vm_table.count; i++) {
+ /* Use VM1 (that's us) as originator VMID for engine MMIO accesses. */
+ host1x_hypervisor_writel(host, 0x1, info->mmio_vm_table.base + 4 * i);
+ }
}
static bool host1x_wants_iommu(struct host1x *host1x)
{
+ /* Our IOMMU usage policy doesn't currently play well with GART */
+ if (of_machine_is_compatible("nvidia,tegra20"))
+ return false;
+
/*
* If we support addressing a maximum of 32 bits of physical memory
* and if the host1x firewall is enabled, there's no need to enable
@@ -233,11 +370,26 @@ static bool host1x_wants_iommu(struct host1x *host1x)
return true;
}
+/*
+ * Returns ERR_PTR on failure, NULL if the translation is IDENTITY, otherwise a
+ * valid paging domain.
+ */
static struct iommu_domain *host1x_iommu_attach(struct host1x *host)
{
struct iommu_domain *domain = iommu_get_domain_for_dev(host->dev);
int err;
+#if IS_ENABLED(CONFIG_ARM_DMA_USE_IOMMU)
+ if (host->dev->archdata.mapping) {
+ struct dma_iommu_mapping *mapping =
+ to_dma_iommu_mapping(host->dev);
+ arm_iommu_detach_device(host->dev);
+ arm_iommu_release_mapping(mapping);
+
+ domain = iommu_get_domain_for_dev(host->dev);
+ }
+#endif
+
/*
* We may not always want to enable IOMMU support (for example if the
* host1x firewall is already enabled and we don't support addressing
@@ -246,6 +398,8 @@ static struct iommu_domain *host1x_iommu_attach(struct host1x *host)
* Similarly, if host1x is already attached to an IOMMU (via the DMA
* API), don't try to attach again.
*/
+ if (domain && domain->type == IOMMU_DOMAIN_IDENTITY)
+ domain = NULL;
if (!host1x_wants_iommu(host) || domain)
return domain;
@@ -259,9 +413,10 @@ static struct iommu_domain *host1x_iommu_attach(struct host1x *host)
if (err < 0)
goto put_group;
- host->domain = iommu_domain_alloc(&platform_bus_type);
- if (!host->domain) {
- err = -ENOMEM;
+ host->domain = iommu_paging_domain_alloc(host->dev);
+ if (IS_ERR(host->domain)) {
+ err = PTR_ERR(host->domain);
+ host->domain = NULL;
goto put_cache;
}
@@ -347,12 +502,28 @@ static void host1x_iommu_exit(struct host1x *host)
}
}
+static int host1x_get_resets(struct host1x *host)
+{
+ int err;
+
+ host->resets[0].id = "mc";
+ host->resets[1].id = "host1x";
+ host->nresets = ARRAY_SIZE(host->resets);
+
+ err = devm_reset_control_bulk_get_optional_exclusive_released(
+ host->dev, host->nresets, host->resets);
+ if (err) {
+ dev_err(host->dev, "failed to get reset: %d\n", err);
+ return err;
+ }
+
+ return 0;
+}
+
static int host1x_probe(struct platform_device *pdev)
{
struct host1x *host;
- struct resource *regs, *hv_regs = NULL;
- int syncpt_irq;
- int err;
+ int err, i;
host = devm_kzalloc(&pdev->dev, sizeof(*host), GFP_KERNEL);
if (!host)
@@ -361,30 +532,49 @@ static int host1x_probe(struct platform_device *pdev)
host->info = of_device_get_match_data(&pdev->dev);
if (host->info->has_hypervisor) {
- regs = platform_get_resource_byname(pdev, IORESOURCE_MEM, "vm");
- if (!regs) {
- dev_err(&pdev->dev, "failed to get vm registers\n");
- return -ENXIO;
- }
+ host->regs = devm_platform_ioremap_resource_byname(pdev, "vm");
+ if (IS_ERR(host->regs))
+ return PTR_ERR(host->regs);
+
+ host->hv_regs = devm_platform_ioremap_resource_byname(pdev, "hypervisor");
+ if (IS_ERR(host->hv_regs))
+ return PTR_ERR(host->hv_regs);
- hv_regs = platform_get_resource_byname(pdev, IORESOURCE_MEM,
- "hypervisor");
- if (!hv_regs) {
- dev_err(&pdev->dev,
- "failed to get hypervisor registers\n");
- return -ENXIO;
+ if (host->info->has_common) {
+ host->common_regs = devm_platform_ioremap_resource_byname(pdev, "common");
+ if (IS_ERR(host->common_regs))
+ return PTR_ERR(host->common_regs);
}
} else {
- regs = platform_get_resource(pdev, IORESOURCE_MEM, 0);
- if (!regs) {
- dev_err(&pdev->dev, "failed to get registers\n");
- return -ENXIO;
- }
+ host->regs = devm_platform_ioremap_resource(pdev, 0);
+ if (IS_ERR(host->regs))
+ return PTR_ERR(host->regs);
+ }
+
+ for (i = 0; i < ARRAY_SIZE(host->syncpt_irqs); i++) {
+ char irq_name[] = "syncptX";
+
+ sprintf(irq_name, "syncpt%d", i);
+
+ err = platform_get_irq_byname_optional(pdev, irq_name);
+ if (err == -ENXIO)
+ break;
+ if (err < 0)
+ return err;
+
+ host->syncpt_irqs[i] = err;
}
- syncpt_irq = platform_get_irq(pdev, 0);
- if (syncpt_irq < 0)
- return syncpt_irq;
+ host->num_syncpt_irqs = i;
+
+ /* Device tree without irq names */
+ if (i == 0) {
+ host->syncpt_irqs[0] = platform_get_irq(pdev, 0);
+ if (host->syncpt_irqs[0] < 0)
+ return host->syncpt_irqs[0];
+
+ host->num_syncpt_irqs = 1;
+ }
mutex_init(&host->devices_lock);
INIT_LIST_HEAD(&host->devices);
@@ -394,16 +584,6 @@ static int host1x_probe(struct platform_device *pdev)
/* set common host1x device data */
platform_set_drvdata(pdev, host);
- host->regs = devm_ioremap_resource(&pdev->dev, regs);
- if (IS_ERR(host->regs))
- return PTR_ERR(host->regs);
-
- if (host->info->has_hypervisor) {
- host->hv_regs = devm_ioremap_resource(&pdev->dev, hv_regs);
- if (IS_ERR(host->hv_regs))
- return PTR_ERR(host->hv_regs);
- }
-
host->dev->dma_parms = &host->dma_parms;
dma_set_max_seg_size(host->dev, UINT_MAX);
@@ -414,26 +594,19 @@ static int host1x_probe(struct platform_device *pdev)
}
host->clk = devm_clk_get(&pdev->dev, NULL);
- if (IS_ERR(host->clk)) {
- err = PTR_ERR(host->clk);
-
- if (err != -EPROBE_DEFER)
- dev_err(&pdev->dev, "failed to get clock: %d\n", err);
+ if (IS_ERR(host->clk))
+ return dev_err_probe(&pdev->dev, PTR_ERR(host->clk), "failed to get clock\n");
+ err = host1x_get_resets(host);
+ if (err)
return err;
- }
- host->rst = devm_reset_control_get(&pdev->dev, "host1x");
- if (IS_ERR(host->rst)) {
- err = PTR_ERR(host->rst);
- dev_err(&pdev->dev, "failed to get reset: %d\n", err);
- return err;
- }
+ host1x_bo_cache_init(&host->cache);
err = host1x_iommu_init(host);
if (err < 0) {
dev_err(&pdev->dev, "failed to setup IOMMU: %d\n", err);
- return err;
+ goto destroy_cache;
}
err = host1x_channel_list_init(&host->channel_list,
@@ -443,35 +616,39 @@ static int host1x_probe(struct platform_device *pdev)
goto iommu_exit;
}
- err = clk_prepare_enable(host->clk);
- if (err < 0) {
- dev_err(&pdev->dev, "failed to enable clock\n");
+ err = host1x_memory_context_list_init(host);
+ if (err) {
+ dev_err(&pdev->dev, "failed to initialize context list\n");
goto free_channels;
}
- err = reset_control_deassert(host->rst);
- if (err < 0) {
- dev_err(&pdev->dev, "failed to deassert reset: %d\n", err);
- goto unprepare_disable;
- }
-
err = host1x_syncpt_init(host);
if (err) {
dev_err(&pdev->dev, "failed to initialize syncpts\n");
- goto reset_assert;
+ goto free_contexts;
}
- err = host1x_intr_init(host, syncpt_irq);
+ mutex_init(&host->intr_mutex);
+
+ pm_runtime_enable(&pdev->dev);
+
+ err = devm_tegra_core_dev_init_opp_table_common(&pdev->dev);
+ if (err)
+ goto pm_disable;
+
+ /* the driver's code isn't ready yet for the dynamic RPM */
+ err = pm_runtime_resume_and_get(&pdev->dev);
+ if (err)
+ goto pm_disable;
+
+ err = host1x_intr_init(host);
if (err) {
dev_err(&pdev->dev, "failed to initialize interrupts\n");
- goto deinit_syncpt;
+ goto pm_put;
}
host1x_debug_init(host);
- if (host->info->has_hypervisor)
- host1x_setup_sid_table(host);
-
err = host1x_register(host);
if (err < 0)
goto deinit_debugfs;
@@ -487,39 +664,120 @@ unregister:
deinit_debugfs:
host1x_debug_deinit(host);
host1x_intr_deinit(host);
-deinit_syncpt:
+pm_put:
+ pm_runtime_put_sync_suspend(&pdev->dev);
+pm_disable:
+ pm_runtime_disable(&pdev->dev);
host1x_syncpt_deinit(host);
-reset_assert:
- reset_control_assert(host->rst);
-unprepare_disable:
- clk_disable_unprepare(host->clk);
+free_contexts:
+ host1x_memory_context_list_free(&host->context_list);
free_channels:
host1x_channel_list_free(&host->channel_list);
iommu_exit:
host1x_iommu_exit(host);
+destroy_cache:
+ host1x_bo_cache_destroy(&host->cache);
return err;
}
-static int host1x_remove(struct platform_device *pdev)
+static void host1x_remove(struct platform_device *pdev)
{
struct host1x *host = platform_get_drvdata(pdev);
host1x_unregister(host);
host1x_debug_deinit(host);
+
+ pm_runtime_force_suspend(&pdev->dev);
+
host1x_intr_deinit(host);
host1x_syncpt_deinit(host);
- reset_control_assert(host->rst);
- clk_disable_unprepare(host->clk);
+ host1x_memory_context_list_free(&host->context_list);
+ host1x_channel_list_free(&host->channel_list);
host1x_iommu_exit(host);
+ host1x_bo_cache_destroy(&host->cache);
+}
+
+static int __maybe_unused host1x_runtime_suspend(struct device *dev)
+{
+ struct host1x *host = dev_get_drvdata(dev);
+ int err;
+
+ host1x_channel_stop_all(host);
+ host1x_intr_stop(host);
+ host1x_syncpt_save(host);
+
+ if (!host->info->skip_reset_assert) {
+ err = reset_control_bulk_assert(host->nresets, host->resets);
+ if (err) {
+ dev_err(dev, "failed to assert reset: %d\n", err);
+ goto resume_host1x;
+ }
+
+ usleep_range(1000, 2000);
+ }
+
+ clk_disable_unprepare(host->clk);
+ reset_control_bulk_release(host->nresets, host->resets);
return 0;
+
+resume_host1x:
+ host1x_setup_virtualization_tables(host);
+ host1x_syncpt_restore(host);
+ host1x_intr_start(host);
+
+ return err;
}
+static int __maybe_unused host1x_runtime_resume(struct device *dev)
+{
+ struct host1x *host = dev_get_drvdata(dev);
+ int err;
+
+ err = reset_control_bulk_acquire(host->nresets, host->resets);
+ if (err) {
+ dev_err(dev, "failed to acquire reset: %d\n", err);
+ return err;
+ }
+
+ err = clk_prepare_enable(host->clk);
+ if (err) {
+ dev_err(dev, "failed to enable clock: %d\n", err);
+ goto release_reset;
+ }
+
+ err = reset_control_bulk_deassert(host->nresets, host->resets);
+ if (err < 0) {
+ dev_err(dev, "failed to deassert reset: %d\n", err);
+ goto disable_clk;
+ }
+
+ host1x_setup_virtualization_tables(host);
+ host1x_syncpt_restore(host);
+ host1x_intr_start(host);
+
+ return 0;
+
+disable_clk:
+ clk_disable_unprepare(host->clk);
+release_reset:
+ reset_control_bulk_release(host->nresets, host->resets);
+
+ return err;
+}
+
+static const struct dev_pm_ops host1x_pm_ops = {
+ SET_RUNTIME_PM_OPS(host1x_runtime_suspend, host1x_runtime_resume,
+ NULL)
+ SET_SYSTEM_SLEEP_PM_OPS(pm_runtime_force_suspend, pm_runtime_force_resume)
+};
+
static struct platform_driver tegra_host1x_driver = {
.driver = {
.name = "tegra-host1x",
.of_match_table = host1x_of_match,
+ .pm = &host1x_pm_ops,
},
.probe = host1x_probe,
.remove = host1x_remove,
@@ -566,6 +824,7 @@ u64 host1x_get_dma_mask(struct host1x *host1x)
}
EXPORT_SYMBOL(host1x_get_dma_mask);
+MODULE_SOFTDEP("post: tegra-drm");
MODULE_AUTHOR("Thierry Reding <thierry.reding@avionic-design.de>");
MODULE_AUTHOR("Terje Bergstrom <tbergstrom@nvidia.com>");
MODULE_DESCRIPTION("Host1x driver for Tegra products");
diff --git a/drivers/gpu/host1x/dev.h b/drivers/gpu/host1x/dev.h
index fa6d4bc46e98..ef44618ed88a 100644
--- a/drivers/gpu/host1x/dev.h
+++ b/drivers/gpu/host1x/dev.h
@@ -9,11 +9,13 @@
#include <linux/device.h>
#include <linux/iommu.h>
#include <linux/iova.h>
+#include <linux/irqreturn.h>
#include <linux/platform_device.h>
#include <linux/reset.h>
#include "cdma.h"
#include "channel.h"
+#include "context.h"
#include "intr.h"
#include "job.h"
#include "syncpt.h"
@@ -73,14 +75,14 @@ struct host1x_syncpt_ops {
};
struct host1x_intr_ops {
- int (*init_host_sync)(struct host1x *host, u32 cpm,
- void (*syncpt_thresh_work)(struct work_struct *work));
+ int (*init_host_sync)(struct host1x *host, u32 cpm);
void (*set_syncpt_threshold)(
struct host1x *host, unsigned int id, u32 thresh);
void (*enable_syncpt_intr)(struct host1x *host, unsigned int id);
void (*disable_syncpt_intr)(struct host1x *host, unsigned int id);
void (*disable_all_syncpt_intrs)(struct host1x *host);
int (*free_syncpt_irq)(struct host1x *host);
+ irqreturn_t (*isr)(int irq, void *dev_id);
};
struct host1x_sid_entry {
@@ -89,6 +91,11 @@ struct host1x_sid_entry {
unsigned int limit;
};
+struct host1x_table_desc {
+ unsigned int base;
+ unsigned int count;
+};
+
struct host1x_info {
unsigned int nb_channels; /* host1x: number of channels supported */
unsigned int nb_pts; /* host1x: number of syncpoints supported */
@@ -99,14 +106,24 @@ struct host1x_info {
u64 dma_mask; /* mask of addressable memory */
bool has_wide_gather; /* supports GATHER_W opcode */
bool has_hypervisor; /* has hypervisor registers */
+ bool has_common; /* has common registers separate from hypervisor */
unsigned int num_sid_entries;
const struct host1x_sid_entry *sid_table;
+ struct host1x_table_desc streamid_vm_table;
+ struct host1x_table_desc classid_vm_table;
+ struct host1x_table_desc mmio_vm_table;
/*
* On T20-T148, the boot chain may setup DC to increment syncpoints
* 26/27 on VBLANK. As such we cannot use these syncpoints until
* the display driver disables VBLANK increments.
*/
bool reserve_vblank_syncpts;
+ /*
+ * On Tegra186, secure world applications may require access to
+ * host1x during suspend/resume. To allow this, we need to leave
+ * host1x not in reset.
+ */
+ bool skip_reset_assert;
};
struct host1x {
@@ -114,11 +131,15 @@ struct host1x {
void __iomem *regs;
void __iomem *hv_regs; /* hypervisor region */
+ void __iomem *common_regs;
+ int syncpt_irqs[8];
+ int num_syncpt_irqs;
struct host1x_syncpt *syncpt;
struct host1x_syncpt_base *bases;
struct device *dev;
struct clk *clk;
- struct reset_control *rst;
+ struct reset_control_bulk_data resets[2];
+ unsigned int nresets;
struct iommu_group *group;
struct iommu_domain *domain;
@@ -126,7 +147,6 @@ struct host1x {
dma_addr_t iova_end;
struct mutex intr_mutex;
- int intr_syncpt_irq;
const struct host1x_syncpt_ops *syncpt_op;
const struct host1x_intr_ops *intr_op;
@@ -140,6 +160,7 @@ struct host1x {
struct mutex syncpt_mutex;
struct host1x_channel_list channel_list;
+ struct host1x_memory_context_list context_list;
struct dentry *debugfs;
@@ -149,13 +170,19 @@ struct host1x {
struct list_head list;
struct device_dma_parameters dma_parms;
+
+ struct host1x_bo_cache cache;
};
-void host1x_hypervisor_writel(struct host1x *host1x, u32 r, u32 v);
+void host1x_common_writel(struct host1x *host1x, u32 v, u32 r);
+void host1x_hypervisor_writel(struct host1x *host1x, u32 v, u32 r);
u32 host1x_hypervisor_readl(struct host1x *host1x, u32 r);
-void host1x_sync_writel(struct host1x *host1x, u32 r, u32 v);
+void host1x_sync_writel(struct host1x *host1x, u32 v, u32 r);
u32 host1x_sync_readl(struct host1x *host1x, u32 r);
-void host1x_ch_writel(struct host1x_channel *ch, u32 r, u32 v);
+#ifdef CONFIG_64BIT
+u64 host1x_sync_readq(struct host1x *host1x, u32 r);
+#endif
+void host1x_ch_writel(struct host1x_channel *ch, u32 v, u32 r);
u32 host1x_ch_readl(struct host1x_channel *ch, u32 r);
static inline void host1x_hw_syncpt_restore(struct host1x *host,
@@ -200,10 +227,9 @@ static inline void host1x_hw_syncpt_enable_protection(struct host1x *host)
return host->syncpt_op->enable_protection(host);
}
-static inline int host1x_hw_intr_init_host_sync(struct host1x *host, u32 cpm,
- void (*syncpt_thresh_work)(struct work_struct *))
+static inline int host1x_hw_intr_init_host_sync(struct host1x *host, u32 cpm)
{
- return host->intr_op->init_host_sync(host, cpm, syncpt_thresh_work);
+ return host->intr_op->init_host_sync(host, cpm);
}
static inline void host1x_hw_intr_set_syncpt_threshold(struct host1x *host,
diff --git a/drivers/gpu/host1x/fence.c b/drivers/gpu/host1x/fence.c
new file mode 100644
index 000000000000..139ad1afd935
--- /dev/null
+++ b/drivers/gpu/host1x/fence.c
@@ -0,0 +1,154 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Syncpoint dma_fence implementation
+ *
+ * Copyright (c) 2020, NVIDIA Corporation.
+ */
+
+#include <linux/dma-fence.h>
+#include <linux/file.h>
+#include <linux/fs.h>
+#include <linux/slab.h>
+#include <linux/sync_file.h>
+
+#include "fence.h"
+#include "intr.h"
+#include "syncpt.h"
+
+static const char *host1x_syncpt_fence_get_driver_name(struct dma_fence *f)
+{
+ return "host1x";
+}
+
+static const char *host1x_syncpt_fence_get_timeline_name(struct dma_fence *f)
+{
+ return "syncpoint";
+}
+
+static struct host1x_syncpt_fence *to_host1x_fence(struct dma_fence *f)
+{
+ return container_of(f, struct host1x_syncpt_fence, base);
+}
+
+static bool host1x_syncpt_fence_enable_signaling(struct dma_fence *f)
+{
+ struct host1x_syncpt_fence *sf = to_host1x_fence(f);
+
+ if (host1x_syncpt_is_expired(sf->sp, sf->threshold))
+ return false;
+
+ /* Reference for interrupt path. */
+ dma_fence_get(f);
+
+ /*
+ * The dma_fence framework requires the fence driver to keep a
+ * reference to any fences for which 'enable_signaling' has been
+ * called (and that have not been signalled).
+ *
+ * We cannot currently always guarantee that all fences get signalled
+ * or cancelled. As such, for such situations, set up a timeout, so
+ * that long-lasting fences will get reaped eventually.
+ */
+ if (sf->timeout) {
+ /* Reference for timeout path. */
+ dma_fence_get(f);
+ schedule_delayed_work(&sf->timeout_work, msecs_to_jiffies(30000));
+ }
+
+ host1x_intr_add_fence_locked(sf->sp->host, sf);
+
+ /*
+ * The fence may get signalled at any time after the above call,
+ * so we need to initialize all state used by signalling
+ * before it.
+ */
+
+ return true;
+}
+
+static const struct dma_fence_ops host1x_syncpt_fence_ops = {
+ .get_driver_name = host1x_syncpt_fence_get_driver_name,
+ .get_timeline_name = host1x_syncpt_fence_get_timeline_name,
+ .enable_signaling = host1x_syncpt_fence_enable_signaling,
+};
+
+void host1x_fence_signal(struct host1x_syncpt_fence *f)
+{
+ if (atomic_xchg(&f->signaling, 1)) {
+ /*
+ * Already on timeout path, but we removed the fence before
+ * timeout path could, so drop interrupt path reference.
+ */
+ dma_fence_put(&f->base);
+ return;
+ }
+
+ if (f->timeout && cancel_delayed_work(&f->timeout_work)) {
+ /*
+ * We know that the timeout path will not be entered.
+ * Safe to drop the timeout path's reference now.
+ */
+ dma_fence_put(&f->base);
+ }
+
+ dma_fence_signal_locked(&f->base);
+ dma_fence_put(&f->base);
+}
+
+static void do_fence_timeout(struct work_struct *work)
+{
+ struct delayed_work *dwork = (struct delayed_work *)work;
+ struct host1x_syncpt_fence *f =
+ container_of(dwork, struct host1x_syncpt_fence, timeout_work);
+
+ if (atomic_xchg(&f->signaling, 1)) {
+ /* Already on interrupt path, drop timeout path reference if any. */
+ if (f->timeout)
+ dma_fence_put(&f->base);
+ return;
+ }
+
+ if (host1x_intr_remove_fence(f->sp->host, f)) {
+ /*
+ * Managed to remove fence from queue, so it's safe to drop
+ * the interrupt path's reference.
+ */
+ dma_fence_put(&f->base);
+ }
+
+ dma_fence_set_error(&f->base, -ETIMEDOUT);
+ dma_fence_signal(&f->base);
+ if (f->timeout)
+ dma_fence_put(&f->base);
+}
+
+struct dma_fence *host1x_fence_create(struct host1x_syncpt *sp, u32 threshold,
+ bool timeout)
+{
+ struct host1x_syncpt_fence *fence;
+
+ fence = kzalloc(sizeof(*fence), GFP_KERNEL);
+ if (!fence)
+ return ERR_PTR(-ENOMEM);
+
+ fence->sp = sp;
+ fence->threshold = threshold;
+ fence->timeout = timeout;
+
+ dma_fence_init(&fence->base, &host1x_syncpt_fence_ops, &sp->fences.lock,
+ dma_fence_context_alloc(1), 0);
+
+ INIT_DELAYED_WORK(&fence->timeout_work, do_fence_timeout);
+
+ return &fence->base;
+}
+EXPORT_SYMBOL(host1x_fence_create);
+
+void host1x_fence_cancel(struct dma_fence *f)
+{
+ struct host1x_syncpt_fence *sf = to_host1x_fence(f);
+
+ schedule_delayed_work(&sf->timeout_work, 0);
+ flush_delayed_work(&sf->timeout_work);
+}
+EXPORT_SYMBOL(host1x_fence_cancel);
diff --git a/drivers/gpu/host1x/fence.h b/drivers/gpu/host1x/fence.h
new file mode 100644
index 000000000000..f3c644c73cad
--- /dev/null
+++ b/drivers/gpu/host1x/fence.h
@@ -0,0 +1,30 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * Copyright (c) 2020, NVIDIA Corporation.
+ */
+
+#ifndef HOST1X_FENCE_H
+#define HOST1X_FENCE_H
+
+struct host1x_syncpt_fence {
+ struct dma_fence base;
+
+ atomic_t signaling;
+
+ struct host1x_syncpt *sp;
+ u32 threshold;
+ bool timeout;
+
+ struct delayed_work timeout_work;
+
+ struct list_head list;
+};
+
+struct host1x_fence_list {
+ spinlock_t lock;
+ struct list_head list;
+};
+
+void host1x_fence_signal(struct host1x_syncpt_fence *fence);
+
+#endif
diff --git a/drivers/gpu/host1x/hw/cdma_hw.c b/drivers/gpu/host1x/hw/cdma_hw.c
index e49cd5b8f735..3f3f0018eee0 100644
--- a/drivers/gpu/host1x/hw/cdma_hw.c
+++ b/drivers/gpu/host1x/hw/cdma_hw.c
@@ -238,6 +238,49 @@ static void cdma_resume(struct host1x_cdma *cdma, u32 getptr)
cdma_timeout_restart(cdma, getptr);
}
+static void timeout_release_mlock(struct host1x_cdma *cdma)
+{
+#if HOST1X_HW >= 8
+ /* Tegra186 and Tegra194 require a more complicated MLOCK release
+ * sequence. Furthermore, those chips by default don't enforce MLOCKs,
+ * so it turns out that if we don't /actually/ need MLOCKs, we can just
+ * ignore them.
+ *
+ * As such, for now just implement this on Tegra234 where things are
+ * stricter but also easy to implement.
+ */
+ struct host1x_channel *ch = cdma_to_channel(cdma);
+ struct host1x *host1x = cdma_to_host1x(cdma);
+ u32 offset;
+
+ switch (ch->client->class) {
+ case HOST1X_CLASS_NVJPG1:
+ offset = HOST1X_COMMON_NVJPG1_MLOCK;
+ break;
+ case HOST1X_CLASS_NVENC:
+ offset = HOST1X_COMMON_NVENC_MLOCK;
+ break;
+ case HOST1X_CLASS_VIC:
+ offset = HOST1X_COMMON_VIC_MLOCK;
+ break;
+ case HOST1X_CLASS_NVJPG:
+ offset = HOST1X_COMMON_NVJPG_MLOCK;
+ break;
+ case HOST1X_CLASS_NVDEC:
+ offset = HOST1X_COMMON_NVDEC_MLOCK;
+ break;
+ case HOST1X_CLASS_OFA:
+ offset = HOST1X_COMMON_OFA_MLOCK;
+ break;
+ default:
+ WARN(1, "%s was not updated for class %u", __func__, ch->client->class);
+ return;
+ }
+
+ host1x_common_writel(host1x, 0x0, offset);
+#endif
+}
+
/*
* If this timeout fires, it indicates the current sync_queue entry has
* exceeded its TTL and the userctx should be timed out and remaining
@@ -288,6 +331,9 @@ static void cdma_timeout_handler(struct work_struct *work)
/* stop HW, resetting channel/module */
host1x_hw_cdma_freeze(host1x, cdma);
+ /* release any held MLOCK */
+ timeout_release_mlock(cdma);
+
host1x_cdma_update_sync_queue(cdma, ch->dev);
mutex_unlock(&cdma->lock);
}
diff --git a/drivers/gpu/host1x/hw/channel_hw.c b/drivers/gpu/host1x/hw/channel_hw.c
index d4c28faf27d1..2df6a16d484e 100644
--- a/drivers/gpu/host1x/hw/channel_hw.c
+++ b/drivers/gpu/host1x/hw/channel_hw.c
@@ -47,39 +47,113 @@ static void trace_write_gather(struct host1x_cdma *cdma, struct host1x_bo *bo,
}
}
-static void submit_gathers(struct host1x_job *job)
+static void submit_wait(struct host1x_job *job, u32 id, u32 threshold)
+{
+ struct host1x_cdma *cdma = &job->channel->cdma;
+
+#if HOST1X_HW >= 2
+ host1x_cdma_push_wide(cdma,
+ host1x_opcode_setclass(
+ HOST1X_CLASS_HOST1X,
+ HOST1X_UCLASS_LOAD_SYNCPT_PAYLOAD_32,
+ /* WAIT_SYNCPT_32 is at SYNCPT_PAYLOAD_32+2 */
+ BIT(0) | BIT(2)
+ ),
+ threshold,
+ id,
+ HOST1X_OPCODE_NOP
+ );
+#else
+ /* TODO add waitchk or use waitbases or other mitigation */
+ host1x_cdma_push(cdma,
+ host1x_opcode_setclass(
+ HOST1X_CLASS_HOST1X,
+ host1x_uclass_wait_syncpt_r(),
+ BIT(0)
+ ),
+ host1x_class_host_wait_syncpt(id, threshold)
+ );
+#endif
+}
+
+static void submit_setclass(struct host1x_job *job, u32 next_class)
+{
+ struct host1x_cdma *cdma = &job->channel->cdma;
+
+#if HOST1X_HW >= 6
+ u32 stream_id;
+
+ /*
+ * If a memory context has been set, use it. Otherwise
+ * (if context isolation is disabled) use the engine's
+ * firmware stream ID.
+ */
+ if (job->memory_context)
+ stream_id = job->memory_context->stream_id;
+ else
+ stream_id = job->engine_fallback_streamid;
+
+ host1x_cdma_push_wide(cdma,
+ host1x_opcode_setclass(next_class, 0, 0),
+ host1x_opcode_setpayload(stream_id),
+ host1x_opcode_setstreamid(job->engine_streamid_offset / 4),
+ HOST1X_OPCODE_NOP);
+#else
+ host1x_cdma_push(cdma,
+ host1x_opcode_setclass(next_class, 0, 0),
+ HOST1X_OPCODE_NOP
+ );
+#endif
+}
+
+static void submit_gathers(struct host1x_job *job, struct host1x_job_cmd *cmds, u32 num_cmds,
+ u32 job_syncpt_base)
{
struct host1x_cdma *cdma = &job->channel->cdma;
#if HOST1X_HW < 6
struct device *dev = job->channel->dev;
#endif
unsigned int i;
+ u32 threshold;
- for (i = 0; i < job->num_gathers; i++) {
- struct host1x_job_gather *g = &job->gathers[i];
- dma_addr_t addr = g->base + g->offset;
- u32 op2, op3;
+ for (i = 0; i < num_cmds; i++) {
+ struct host1x_job_cmd *cmd = &cmds[i];
- op2 = lower_32_bits(addr);
- op3 = upper_32_bits(addr);
+ if (cmd->is_wait) {
+ if (cmd->wait.relative)
+ threshold = job_syncpt_base + cmd->wait.threshold;
+ else
+ threshold = cmd->wait.threshold;
+
+ submit_wait(job, cmd->wait.id, threshold);
+ submit_setclass(job, cmd->wait.next_class);
+ } else {
+ struct host1x_job_gather *g = &cmd->gather;
- trace_write_gather(cdma, g->bo, g->offset, g->words);
+ dma_addr_t addr = g->base + g->offset;
+ u32 op2, op3;
- if (op3 != 0) {
+ op2 = lower_32_bits(addr);
+ op3 = upper_32_bits(addr);
+
+ trace_write_gather(cdma, g->bo, g->offset, g->words);
+
+ if (op3 != 0) {
#if HOST1X_HW >= 6
- u32 op1 = host1x_opcode_gather_wide(g->words);
- u32 op4 = HOST1X_OPCODE_NOP;
+ u32 op1 = host1x_opcode_gather_wide(g->words);
+ u32 op4 = HOST1X_OPCODE_NOP;
- host1x_cdma_push_wide(cdma, op1, op2, op3, op4);
+ host1x_cdma_push_wide(cdma, op1, op2, op3, op4);
#else
- dev_err(dev, "invalid gather for push buffer %pad\n",
- &addr);
- continue;
+ dev_err(dev, "invalid gather for push buffer %pad\n",
+ &addr);
+ continue;
#endif
- } else {
- u32 op1 = host1x_opcode_gather(g->words);
+ } else {
+ u32 op1 = host1x_opcode_gather(g->words);
- host1x_cdma_push(cdma, op1, op2);
+ host1x_cdma_push(cdma, op1, op2);
+ }
}
}
}
@@ -103,62 +177,109 @@ static inline void synchronize_syncpt_base(struct host1x_job *job)
static void host1x_channel_set_streamid(struct host1x_channel *channel)
{
#if HOST1X_HW >= 6
- u32 sid = 0x7f;
-#ifdef CONFIG_IOMMU_API
- struct iommu_fwspec *spec = dev_iommu_fwspec_get(channel->dev->parent);
- if (spec)
- sid = spec->ids[0] & 0xffff;
+ u32 stream_id;
+
+ if (!tegra_dev_iommu_get_stream_id(channel->dev->parent, &stream_id))
+ stream_id = TEGRA_STREAM_ID_BYPASS;
+
+ host1x_ch_writel(channel, stream_id, HOST1X_CHANNEL_SMMU_STREAMID);
#endif
+}
+
+static void host1x_enable_gather_filter(struct host1x_channel *ch)
+{
+#if HOST1X_HW >= 6
+ struct host1x *host = dev_get_drvdata(ch->dev->parent);
+ u32 val;
+
+ if (!host->hv_regs)
+ return;
- host1x_ch_writel(channel, sid, HOST1X_CHANNEL_SMMU_STREAMID);
+ val = host1x_hypervisor_readl(
+ host, HOST1X_HV_CH_KERNEL_FILTER_GBUFFER(ch->id / 32));
+ val |= BIT(ch->id % 32);
+ host1x_hypervisor_writel(
+ host, val, HOST1X_HV_CH_KERNEL_FILTER_GBUFFER(ch->id / 32));
+#elif HOST1X_HW >= 4
+ host1x_ch_writel(ch,
+ HOST1X_CHANNEL_CHANNELCTRL_KERNEL_FILTER_GBUFFER(1),
+ HOST1X_CHANNEL_CHANNELCTRL);
#endif
}
-static int channel_submit(struct host1x_job *job)
+static void channel_program_cdma(struct host1x_job *job)
{
- struct host1x_channel *ch = job->channel;
+ struct host1x_cdma *cdma = &job->channel->cdma;
struct host1x_syncpt *sp = job->syncpt;
- u32 user_syncpt_incrs = job->syncpt_incrs;
- u32 prev_max = 0;
- u32 syncval;
- int err;
- struct host1x_waitlist *completed_waiter = NULL;
- struct host1x *host = dev_get_drvdata(ch->dev->parent);
- trace_host1x_channel_submit(dev_name(ch->dev),
- job->num_gathers, job->num_relocs,
- job->syncpt->id, job->syncpt_incrs);
+#if HOST1X_HW >= 6
+ u32 fence;
+ int i = 0;
- /* before error checks, return current max */
- prev_max = job->syncpt_end = host1x_syncpt_read_max(sp);
+ if (job->num_cmds == 0)
+ goto prefences_done;
+ if (!job->cmds[0].is_wait || job->cmds[0].wait.relative)
+ goto prefences_done;
- /* get submit lock */
- err = mutex_lock_interruptible(&ch->submitlock);
- if (err)
- goto error;
+ /* Enter host1x class with invalid stream ID for prefence waits. */
+ host1x_cdma_push_wide(cdma,
+ host1x_opcode_acquire_mlock(1),
+ host1x_opcode_setclass(1, 0, 0),
+ host1x_opcode_setpayload(0),
+ host1x_opcode_setstreamid(0x1fffff));
- completed_waiter = kzalloc(sizeof(*completed_waiter), GFP_KERNEL);
- if (!completed_waiter) {
- mutex_unlock(&ch->submitlock);
- err = -ENOMEM;
- goto error;
- }
+ for (i = 0; i < job->num_cmds; i++) {
+ struct host1x_job_cmd *cmd = &job->cmds[i];
- host1x_channel_set_streamid(ch);
+ if (!cmd->is_wait || cmd->wait.relative)
+ break;
- /* begin a CDMA submit */
- err = host1x_cdma_begin(&ch->cdma, job);
- if (err) {
- mutex_unlock(&ch->submitlock);
- goto error;
+ submit_wait(job, cmd->wait.id, cmd->wait.threshold);
}
+ host1x_cdma_push(cdma,
+ HOST1X_OPCODE_NOP,
+ host1x_opcode_release_mlock(1));
+
+prefences_done:
+ /* Enter engine class with invalid stream ID. */
+ host1x_cdma_push_wide(cdma,
+ host1x_opcode_acquire_mlock(job->class),
+ host1x_opcode_setclass(job->class, 0, 0),
+ host1x_opcode_setpayload(0),
+ host1x_opcode_setstreamid(job->engine_streamid_offset / 4));
+
+ /* Before switching stream ID to real stream ID, ensure engine is idle. */
+ fence = host1x_syncpt_incr_max(sp, 1);
+ host1x_cdma_push(&job->channel->cdma,
+ host1x_opcode_nonincr(HOST1X_UCLASS_INCR_SYNCPT, 1),
+ HOST1X_UCLASS_INCR_SYNCPT_INDX_F(job->syncpt->id) |
+ HOST1X_UCLASS_INCR_SYNCPT_COND_F(4));
+ submit_wait(job, job->syncpt->id, fence);
+ submit_setclass(job, job->class);
+
+ /* Submit work. */
+ job->syncpt_end = host1x_syncpt_incr_max(sp, job->syncpt_incrs);
+ submit_gathers(job, job->cmds + i, job->num_cmds - i, job->syncpt_end - job->syncpt_incrs);
+
+ /* Before releasing MLOCK, ensure engine is idle again. */
+ fence = host1x_syncpt_incr_max(sp, 1);
+ host1x_cdma_push(&job->channel->cdma,
+ host1x_opcode_nonincr(HOST1X_UCLASS_INCR_SYNCPT, 1),
+ HOST1X_UCLASS_INCR_SYNCPT_INDX_F(job->syncpt->id) |
+ HOST1X_UCLASS_INCR_SYNCPT_COND_F(4));
+ submit_wait(job, job->syncpt->id, fence);
+
+ /* Release MLOCK. */
+ host1x_cdma_push(cdma,
+ HOST1X_OPCODE_NOP, host1x_opcode_release_mlock(job->class));
+#else
if (job->serialize) {
/*
* Force serialization by inserting a host wait for the
* previous job to finish before this one can commence.
*/
- host1x_cdma_push(&ch->cdma,
+ host1x_cdma_push(cdma,
host1x_opcode_setclass(HOST1X_CLASS_HOST1X,
host1x_uclass_wait_syncpt_r(), 1),
host1x_class_host_wait_syncpt(job->syncpt->id,
@@ -169,60 +290,86 @@ static int channel_submit(struct host1x_job *job)
if (sp->base)
synchronize_syncpt_base(job);
- syncval = host1x_syncpt_incr_max(sp, user_syncpt_incrs);
-
- host1x_hw_syncpt_assign_to_channel(host, sp, ch);
-
- job->syncpt_end = syncval;
-
/* add a setclass for modules that require it */
if (job->class)
- host1x_cdma_push(&ch->cdma,
+ host1x_cdma_push(cdma,
host1x_opcode_setclass(job->class, 0, 0),
HOST1X_OPCODE_NOP);
- submit_gathers(job);
+ job->syncpt_end = host1x_syncpt_incr_max(sp, job->syncpt_incrs);
- /* end CDMA submit & stash pinned hMems into sync queue */
- host1x_cdma_end(&ch->cdma, job);
+ submit_gathers(job, job->cmds, job->num_cmds, job->syncpt_end - job->syncpt_incrs);
+#endif
+}
- trace_host1x_channel_submitted(dev_name(ch->dev), prev_max, syncval);
+static void job_complete_callback(struct dma_fence *fence, struct dma_fence_cb *cb)
+{
+ struct host1x_job *job = container_of(cb, struct host1x_job, fence_cb);
- /* schedule a submit complete interrupt */
- err = host1x_intr_add_action(host, sp, syncval,
- HOST1X_INTR_ACTION_SUBMIT_COMPLETE, ch,
- completed_waiter, NULL);
- completed_waiter = NULL;
- WARN(err, "Failed to set submit complete interrupt");
+ /* Schedules CDMA update. */
+ host1x_cdma_update(&job->channel->cdma);
+}
- mutex_unlock(&ch->submitlock);
+static int channel_submit(struct host1x_job *job)
+{
+ struct host1x_channel *ch = job->channel;
+ struct host1x_syncpt *sp = job->syncpt;
+ u32 prev_max = 0;
+ u32 syncval;
+ int err;
+ struct host1x *host = dev_get_drvdata(ch->dev->parent);
- return 0;
+ trace_host1x_channel_submit(dev_name(ch->dev),
+ job->num_cmds, job->num_relocs,
+ job->syncpt->id, job->syncpt_incrs);
-error:
- kfree(completed_waiter);
- return err;
-}
+ /* before error checks, return current max */
+ prev_max = job->syncpt_end = host1x_syncpt_read_max(sp);
-static void enable_gather_filter(struct host1x *host,
- struct host1x_channel *ch)
-{
-#if HOST1X_HW >= 6
- u32 val;
+ /* get submit lock */
+ err = mutex_lock_interruptible(&ch->submitlock);
+ if (err)
+ return err;
- if (!host->hv_regs)
- return;
+ host1x_channel_set_streamid(ch);
+ host1x_enable_gather_filter(ch);
+ host1x_hw_syncpt_assign_to_channel(host, sp, ch);
- val = host1x_hypervisor_readl(
- host, HOST1X_HV_CH_KERNEL_FILTER_GBUFFER(ch->id / 32));
- val |= BIT(ch->id % 32);
- host1x_hypervisor_writel(
- host, val, HOST1X_HV_CH_KERNEL_FILTER_GBUFFER(ch->id / 32));
-#elif HOST1X_HW >= 4
- host1x_ch_writel(ch,
- HOST1X_CHANNEL_CHANNELCTRL_KERNEL_FILTER_GBUFFER(1),
- HOST1X_CHANNEL_CHANNELCTRL);
-#endif
+ /* begin a CDMA submit */
+ err = host1x_cdma_begin(&ch->cdma, job);
+ if (err) {
+ mutex_unlock(&ch->submitlock);
+ return err;
+ }
+
+ channel_program_cdma(job);
+ syncval = host1x_syncpt_read_max(sp);
+
+ /*
+ * Create fence before submitting job to HW to avoid job completing
+ * before the fence is set up.
+ */
+ job->fence = host1x_fence_create(sp, syncval, true);
+ if (WARN(IS_ERR(job->fence), "Failed to create submit complete fence")) {
+ job->fence = NULL;
+ } else {
+ err = dma_fence_add_callback(job->fence, &job->fence_cb,
+ job_complete_callback);
+ }
+
+ /* end CDMA submit & stash pinned hMems into sync queue */
+ host1x_cdma_end(&ch->cdma, job);
+
+ trace_host1x_channel_submitted(dev_name(ch->dev), prev_max, syncval);
+
+ mutex_unlock(&ch->submitlock);
+
+ if (err == -ENOENT)
+ host1x_cdma_update(&ch->cdma);
+ else
+ WARN(err, "Failed to set submit complete interrupt");
+
+ return 0;
}
static int host1x_channel_init(struct host1x_channel *ch, struct host1x *dev,
@@ -233,7 +380,6 @@ static int host1x_channel_init(struct host1x_channel *ch, struct host1x *dev,
#else
ch->regs = dev->regs + index * 0x100;
#endif
- enable_gather_filter(dev, ch);
return 0;
}
diff --git a/drivers/gpu/host1x/hw/debug_hw.c b/drivers/gpu/host1x/hw/debug_hw.c
index ceb48229d14b..4c32aa1b95e8 100644
--- a/drivers/gpu/host1x/hw/debug_hw.c
+++ b/drivers/gpu/host1x/hw/debug_hw.c
@@ -156,9 +156,9 @@ static unsigned int show_channel_command(struct output *o, u32 val,
}
}
-static void show_gather(struct output *o, phys_addr_t phys_addr,
+static void show_gather(struct output *o, dma_addr_t phys_addr,
unsigned int words, struct host1x_cdma *cdma,
- phys_addr_t pin_addr, u32 *map_addr)
+ dma_addr_t pin_addr, u32 *map_addr)
{
/* Map dmaget cursor to corresponding mem handle */
u32 offset = phys_addr - pin_addr;
@@ -176,11 +176,20 @@ static void show_gather(struct output *o, phys_addr_t phys_addr,
}
for (i = 0; i < words; i++) {
- u32 addr = phys_addr + i * 4;
- u32 val = *(map_addr + offset / 4 + i);
+ dma_addr_t addr = phys_addr + i * 4;
+ u32 voffset = offset + i * 4;
+ u32 val;
+
+ /* If we reach the RESTART opcode, continue at the beginning of pushbuffer */
+ if (cdma && voffset >= cdma->push_buffer.size) {
+ addr -= cdma->push_buffer.size;
+ voffset -= cdma->push_buffer.size;
+ }
+
+ val = *(map_addr + voffset / 4);
if (!data_count) {
- host1x_debug_output(o, "%08x: %08x: ", addr, val);
+ host1x_debug_output(o, " %pad: %08x: ", &addr, val);
data_count = show_channel_command(o, val, &payload);
} else {
host1x_debug_cont(o, "%08x%s", val,
@@ -195,23 +204,25 @@ static void show_channel_gathers(struct output *o, struct host1x_cdma *cdma)
struct push_buffer *pb = &cdma->push_buffer;
struct host1x_job *job;
- host1x_debug_output(o, "PUSHBUF at %pad, %u words\n",
- &pb->dma, pb->size / 4);
-
- show_gather(o, pb->dma, pb->size / 4, cdma, pb->dma, pb->mapped);
-
list_for_each_entry(job, &cdma->sync_queue, list) {
unsigned int i;
- host1x_debug_output(o, "\n%p: JOB, syncpt_id=%d, syncpt_val=%d, first_get=%08x, timeout=%d num_slots=%d, num_handles=%d\n",
- job, job->syncpt->id, job->syncpt_end,
- job->first_get, job->timeout,
+ host1x_debug_output(o, "JOB, syncpt %u: %u timeout: %u num_slots: %u num_handles: %u\n",
+ job->syncpt->id, job->syncpt_end, job->timeout,
job->num_slots, job->num_unpins);
- for (i = 0; i < job->num_gathers; i++) {
- struct host1x_job_gather *g = &job->gathers[i];
+ show_gather(o, pb->dma + job->first_get, job->num_slots * 2, cdma,
+ pb->dma, pb->mapped);
+
+ for (i = 0; i < job->num_cmds; i++) {
+ struct host1x_job_gather *g;
u32 *mapped;
+ if (job->cmds[i].is_wait)
+ continue;
+
+ g = &job->cmds[i].gather;
+
if (job->gather_copy_mapped)
mapped = (u32 *)job->gather_copy_mapped;
else
@@ -222,10 +233,10 @@ static void show_channel_gathers(struct output *o, struct host1x_cdma *cdma)
continue;
}
- host1x_debug_output(o, " GATHER at %pad+%#x, %d words\n",
+ host1x_debug_output(o, " GATHER at %pad+%#x, %d words\n",
&g->base, g->offset, g->words);
- show_gather(o, g->base + g->offset, g->words, cdma,
+ show_gather(o, g->base + g->offset, g->words, NULL,
g->base, mapped);
if (!job->gather_copy_mapped)
diff --git a/drivers/gpu/host1x/hw/debug_hw_1x01.c b/drivers/gpu/host1x/hw/debug_hw_1x01.c
index 02a93305ac7b..85242a59fa6a 100644
--- a/drivers/gpu/host1x/hw/debug_hw_1x01.c
+++ b/drivers/gpu/host1x/hw/debug_hw_1x01.c
@@ -16,10 +16,13 @@ static void host1x_debug_show_channel_cdma(struct host1x *host,
struct output *o)
{
struct host1x_cdma *cdma = &ch->cdma;
+ dma_addr_t dmastart, dmaend;
u32 dmaput, dmaget, dmactrl;
u32 cbstat, cbread;
u32 val, base, baseval;
+ dmastart = host1x_ch_readl(ch, HOST1X_CHANNEL_DMASTART);
+ dmaend = host1x_ch_readl(ch, HOST1X_CHANNEL_DMAEND);
dmaput = host1x_ch_readl(ch, HOST1X_CHANNEL_DMAPUT);
dmaget = host1x_ch_readl(ch, HOST1X_CHANNEL_DMAGET);
dmactrl = host1x_ch_readl(ch, HOST1X_CHANNEL_DMACTRL);
@@ -56,9 +59,10 @@ static void host1x_debug_show_channel_cdma(struct host1x *host,
HOST1X_SYNC_CBSTAT_CBOFFSET_V(cbstat),
cbread);
- host1x_debug_output(o, "DMAPUT %08x, DMAGET %08x, DMACTL %08x\n",
+ host1x_debug_output(o, "DMASTART %pad, DMAEND %pad\n", &dmastart, &dmaend);
+ host1x_debug_output(o, "DMAPUT %08x DMAGET %08x DMACTL %08x\n",
dmaput, dmaget, dmactrl);
- host1x_debug_output(o, "CBREAD %08x, CBSTAT %08x\n", cbread, cbstat);
+ host1x_debug_output(o, "CBREAD %08x CBSTAT %08x\n", cbread, cbstat);
show_channel_gathers(o, cdma);
host1x_debug_output(o, "\n");
diff --git a/drivers/gpu/host1x/hw/debug_hw_1x06.c b/drivers/gpu/host1x/hw/debug_hw_1x06.c
index 6d1b583aa90f..9d0667879a19 100644
--- a/drivers/gpu/host1x/hw/debug_hw_1x06.c
+++ b/drivers/gpu/host1x/hw/debug_hw_1x06.c
@@ -16,10 +16,23 @@ static void host1x_debug_show_channel_cdma(struct host1x *host,
struct output *o)
{
struct host1x_cdma *cdma = &ch->cdma;
+ dma_addr_t dmastart = 0, dmaend = 0;
u32 dmaput, dmaget, dmactrl;
u32 offset, class;
u32 ch_stat;
+#if defined(CONFIG_ARCH_DMA_ADDR_T_64BIT) && HOST1X_HW >= 6
+ dmastart = host1x_ch_readl(ch, HOST1X_CHANNEL_DMASTART_HI);
+ dmastart <<= 32;
+#endif
+ dmastart |= host1x_ch_readl(ch, HOST1X_CHANNEL_DMASTART);
+
+#if defined(CONFIG_ARCH_DMA_ADDR_T_64BIT) && HOST1X_HW >= 6
+ dmaend = host1x_ch_readl(ch, HOST1X_CHANNEL_DMAEND_HI);
+ dmaend <<= 32;
+#endif
+ dmaend |= host1x_ch_readl(ch, HOST1X_CHANNEL_DMAEND);
+
dmaput = host1x_ch_readl(ch, HOST1X_CHANNEL_DMAPUT);
dmaget = host1x_ch_readl(ch, HOST1X_CHANNEL_DMAGET);
dmactrl = host1x_ch_readl(ch, HOST1X_CHANNEL_DMACTRL);
@@ -41,7 +54,8 @@ static void host1x_debug_show_channel_cdma(struct host1x *host,
host1x_debug_output(o, "active class %02x, offset %04x\n",
class, offset);
- host1x_debug_output(o, "DMAPUT %08x, DMAGET %08x, DMACTL %08x\n",
+ host1x_debug_output(o, "DMASTART %pad, DMAEND %pad\n", &dmastart, &dmaend);
+ host1x_debug_output(o, "DMAPUT %08x DMAGET %08x DMACTL %08x\n",
dmaput, dmaget, dmactrl);
host1x_debug_output(o, "CHANNELSTAT %02x\n", ch_stat);
diff --git a/drivers/gpu/host1x/hw/host1x01_hardware.h b/drivers/gpu/host1x/hw/host1x01_hardware.h
index fe59df1d3dc3..cb93d7c1808c 100644
--- a/drivers/gpu/host1x/hw/host1x01_hardware.h
+++ b/drivers/gpu/host1x/hw/host1x01_hardware.h
@@ -15,118 +15,6 @@
#include "hw_host1x01_sync.h"
#include "hw_host1x01_uclass.h"
-static inline u32 host1x_class_host_wait_syncpt(
- unsigned indx, unsigned threshold)
-{
- return host1x_uclass_wait_syncpt_indx_f(indx)
- | host1x_uclass_wait_syncpt_thresh_f(threshold);
-}
-
-static inline u32 host1x_class_host_load_syncpt_base(
- unsigned indx, unsigned threshold)
-{
- return host1x_uclass_load_syncpt_base_base_indx_f(indx)
- | host1x_uclass_load_syncpt_base_value_f(threshold);
-}
-
-static inline u32 host1x_class_host_wait_syncpt_base(
- unsigned indx, unsigned base_indx, unsigned offset)
-{
- return host1x_uclass_wait_syncpt_base_indx_f(indx)
- | host1x_uclass_wait_syncpt_base_base_indx_f(base_indx)
- | host1x_uclass_wait_syncpt_base_offset_f(offset);
-}
-
-static inline u32 host1x_class_host_incr_syncpt_base(
- unsigned base_indx, unsigned offset)
-{
- return host1x_uclass_incr_syncpt_base_base_indx_f(base_indx)
- | host1x_uclass_incr_syncpt_base_offset_f(offset);
-}
-
-static inline u32 host1x_class_host_incr_syncpt(
- unsigned cond, unsigned indx)
-{
- return host1x_uclass_incr_syncpt_cond_f(cond)
- | host1x_uclass_incr_syncpt_indx_f(indx);
-}
-
-static inline u32 host1x_class_host_indoff_reg_write(
- unsigned mod_id, unsigned offset, bool auto_inc)
-{
- u32 v = host1x_uclass_indoff_indbe_f(0xf)
- | host1x_uclass_indoff_indmodid_f(mod_id)
- | host1x_uclass_indoff_indroffset_f(offset);
- if (auto_inc)
- v |= host1x_uclass_indoff_autoinc_f(1);
- return v;
-}
-
-static inline u32 host1x_class_host_indoff_reg_read(
- unsigned mod_id, unsigned offset, bool auto_inc)
-{
- u32 v = host1x_uclass_indoff_indmodid_f(mod_id)
- | host1x_uclass_indoff_indroffset_f(offset)
- | host1x_uclass_indoff_rwn_read_v();
- if (auto_inc)
- v |= host1x_uclass_indoff_autoinc_f(1);
- return v;
-}
-
-
-/* cdma opcodes */
-static inline u32 host1x_opcode_setclass(
- unsigned class_id, unsigned offset, unsigned mask)
-{
- return (0 << 28) | (offset << 16) | (class_id << 6) | mask;
-}
-
-static inline u32 host1x_opcode_incr(unsigned offset, unsigned count)
-{
- return (1 << 28) | (offset << 16) | count;
-}
-
-static inline u32 host1x_opcode_nonincr(unsigned offset, unsigned count)
-{
- return (2 << 28) | (offset << 16) | count;
-}
-
-static inline u32 host1x_opcode_mask(unsigned offset, unsigned mask)
-{
- return (3 << 28) | (offset << 16) | mask;
-}
-
-static inline u32 host1x_opcode_imm(unsigned offset, unsigned value)
-{
- return (4 << 28) | (offset << 16) | value;
-}
-
-static inline u32 host1x_opcode_imm_incr_syncpt(unsigned cond, unsigned indx)
-{
- return host1x_opcode_imm(host1x_uclass_incr_syncpt_r(),
- host1x_class_host_incr_syncpt(cond, indx));
-}
-
-static inline u32 host1x_opcode_restart(unsigned address)
-{
- return (5 << 28) | (address >> 4);
-}
-
-static inline u32 host1x_opcode_gather(unsigned count)
-{
- return (6 << 28) | count;
-}
-
-static inline u32 host1x_opcode_gather_nonincr(unsigned offset, unsigned count)
-{
- return (6 << 28) | (offset << 16) | BIT(15) | count;
-}
-
-static inline u32 host1x_opcode_gather_incr(unsigned offset, unsigned count)
-{
- return (6 << 28) | (offset << 16) | BIT(15) | BIT(14) | count;
-}
-
-#define HOST1X_OPCODE_NOP host1x_opcode_nonincr(0, 0)
+#include "opcodes.h"
#endif
diff --git a/drivers/gpu/host1x/hw/host1x02_hardware.h b/drivers/gpu/host1x/hw/host1x02_hardware.h
index af60d7fb016d..2d1282b9bc33 100644
--- a/drivers/gpu/host1x/hw/host1x02_hardware.h
+++ b/drivers/gpu/host1x/hw/host1x02_hardware.h
@@ -15,117 +15,6 @@
#include "hw_host1x02_sync.h"
#include "hw_host1x02_uclass.h"
-static inline u32 host1x_class_host_wait_syncpt(
- unsigned indx, unsigned threshold)
-{
- return host1x_uclass_wait_syncpt_indx_f(indx)
- | host1x_uclass_wait_syncpt_thresh_f(threshold);
-}
-
-static inline u32 host1x_class_host_load_syncpt_base(
- unsigned indx, unsigned threshold)
-{
- return host1x_uclass_load_syncpt_base_base_indx_f(indx)
- | host1x_uclass_load_syncpt_base_value_f(threshold);
-}
-
-static inline u32 host1x_class_host_wait_syncpt_base(
- unsigned indx, unsigned base_indx, unsigned offset)
-{
- return host1x_uclass_wait_syncpt_base_indx_f(indx)
- | host1x_uclass_wait_syncpt_base_base_indx_f(base_indx)
- | host1x_uclass_wait_syncpt_base_offset_f(offset);
-}
-
-static inline u32 host1x_class_host_incr_syncpt_base(
- unsigned base_indx, unsigned offset)
-{
- return host1x_uclass_incr_syncpt_base_base_indx_f(base_indx)
- | host1x_uclass_incr_syncpt_base_offset_f(offset);
-}
-
-static inline u32 host1x_class_host_incr_syncpt(
- unsigned cond, unsigned indx)
-{
- return host1x_uclass_incr_syncpt_cond_f(cond)
- | host1x_uclass_incr_syncpt_indx_f(indx);
-}
-
-static inline u32 host1x_class_host_indoff_reg_write(
- unsigned mod_id, unsigned offset, bool auto_inc)
-{
- u32 v = host1x_uclass_indoff_indbe_f(0xf)
- | host1x_uclass_indoff_indmodid_f(mod_id)
- | host1x_uclass_indoff_indroffset_f(offset);
- if (auto_inc)
- v |= host1x_uclass_indoff_autoinc_f(1);
- return v;
-}
-
-static inline u32 host1x_class_host_indoff_reg_read(
- unsigned mod_id, unsigned offset, bool auto_inc)
-{
- u32 v = host1x_uclass_indoff_indmodid_f(mod_id)
- | host1x_uclass_indoff_indroffset_f(offset)
- | host1x_uclass_indoff_rwn_read_v();
- if (auto_inc)
- v |= host1x_uclass_indoff_autoinc_f(1);
- return v;
-}
-
-/* cdma opcodes */
-static inline u32 host1x_opcode_setclass(
- unsigned class_id, unsigned offset, unsigned mask)
-{
- return (0 << 28) | (offset << 16) | (class_id << 6) | mask;
-}
-
-static inline u32 host1x_opcode_incr(unsigned offset, unsigned count)
-{
- return (1 << 28) | (offset << 16) | count;
-}
-
-static inline u32 host1x_opcode_nonincr(unsigned offset, unsigned count)
-{
- return (2 << 28) | (offset << 16) | count;
-}
-
-static inline u32 host1x_opcode_mask(unsigned offset, unsigned mask)
-{
- return (3 << 28) | (offset << 16) | mask;
-}
-
-static inline u32 host1x_opcode_imm(unsigned offset, unsigned value)
-{
- return (4 << 28) | (offset << 16) | value;
-}
-
-static inline u32 host1x_opcode_imm_incr_syncpt(unsigned cond, unsigned indx)
-{
- return host1x_opcode_imm(host1x_uclass_incr_syncpt_r(),
- host1x_class_host_incr_syncpt(cond, indx));
-}
-
-static inline u32 host1x_opcode_restart(unsigned address)
-{
- return (5 << 28) | (address >> 4);
-}
-
-static inline u32 host1x_opcode_gather(unsigned count)
-{
- return (6 << 28) | count;
-}
-
-static inline u32 host1x_opcode_gather_nonincr(unsigned offset, unsigned count)
-{
- return (6 << 28) | (offset << 16) | BIT(15) | count;
-}
-
-static inline u32 host1x_opcode_gather_incr(unsigned offset, unsigned count)
-{
- return (6 << 28) | (offset << 16) | BIT(15) | BIT(14) | count;
-}
-
-#define HOST1X_OPCODE_NOP host1x_opcode_nonincr(0, 0)
+#include "opcodes.h"
#endif
diff --git a/drivers/gpu/host1x/hw/host1x04_hardware.h b/drivers/gpu/host1x/hw/host1x04_hardware.h
index 4f9bcddf27e3..84d244e8af30 100644
--- a/drivers/gpu/host1x/hw/host1x04_hardware.h
+++ b/drivers/gpu/host1x/hw/host1x04_hardware.h
@@ -15,117 +15,6 @@
#include "hw_host1x04_sync.h"
#include "hw_host1x04_uclass.h"
-static inline u32 host1x_class_host_wait_syncpt(
- unsigned indx, unsigned threshold)
-{
- return host1x_uclass_wait_syncpt_indx_f(indx)
- | host1x_uclass_wait_syncpt_thresh_f(threshold);
-}
-
-static inline u32 host1x_class_host_load_syncpt_base(
- unsigned indx, unsigned threshold)
-{
- return host1x_uclass_load_syncpt_base_base_indx_f(indx)
- | host1x_uclass_load_syncpt_base_value_f(threshold);
-}
-
-static inline u32 host1x_class_host_wait_syncpt_base(
- unsigned indx, unsigned base_indx, unsigned offset)
-{
- return host1x_uclass_wait_syncpt_base_indx_f(indx)
- | host1x_uclass_wait_syncpt_base_base_indx_f(base_indx)
- | host1x_uclass_wait_syncpt_base_offset_f(offset);
-}
-
-static inline u32 host1x_class_host_incr_syncpt_base(
- unsigned base_indx, unsigned offset)
-{
- return host1x_uclass_incr_syncpt_base_base_indx_f(base_indx)
- | host1x_uclass_incr_syncpt_base_offset_f(offset);
-}
-
-static inline u32 host1x_class_host_incr_syncpt(
- unsigned cond, unsigned indx)
-{
- return host1x_uclass_incr_syncpt_cond_f(cond)
- | host1x_uclass_incr_syncpt_indx_f(indx);
-}
-
-static inline u32 host1x_class_host_indoff_reg_write(
- unsigned mod_id, unsigned offset, bool auto_inc)
-{
- u32 v = host1x_uclass_indoff_indbe_f(0xf)
- | host1x_uclass_indoff_indmodid_f(mod_id)
- | host1x_uclass_indoff_indroffset_f(offset);
- if (auto_inc)
- v |= host1x_uclass_indoff_autoinc_f(1);
- return v;
-}
-
-static inline u32 host1x_class_host_indoff_reg_read(
- unsigned mod_id, unsigned offset, bool auto_inc)
-{
- u32 v = host1x_uclass_indoff_indmodid_f(mod_id)
- | host1x_uclass_indoff_indroffset_f(offset)
- | host1x_uclass_indoff_rwn_read_v();
- if (auto_inc)
- v |= host1x_uclass_indoff_autoinc_f(1);
- return v;
-}
-
-/* cdma opcodes */
-static inline u32 host1x_opcode_setclass(
- unsigned class_id, unsigned offset, unsigned mask)
-{
- return (0 << 28) | (offset << 16) | (class_id << 6) | mask;
-}
-
-static inline u32 host1x_opcode_incr(unsigned offset, unsigned count)
-{
- return (1 << 28) | (offset << 16) | count;
-}
-
-static inline u32 host1x_opcode_nonincr(unsigned offset, unsigned count)
-{
- return (2 << 28) | (offset << 16) | count;
-}
-
-static inline u32 host1x_opcode_mask(unsigned offset, unsigned mask)
-{
- return (3 << 28) | (offset << 16) | mask;
-}
-
-static inline u32 host1x_opcode_imm(unsigned offset, unsigned value)
-{
- return (4 << 28) | (offset << 16) | value;
-}
-
-static inline u32 host1x_opcode_imm_incr_syncpt(unsigned cond, unsigned indx)
-{
- return host1x_opcode_imm(host1x_uclass_incr_syncpt_r(),
- host1x_class_host_incr_syncpt(cond, indx));
-}
-
-static inline u32 host1x_opcode_restart(unsigned address)
-{
- return (5 << 28) | (address >> 4);
-}
-
-static inline u32 host1x_opcode_gather(unsigned count)
-{
- return (6 << 28) | count;
-}
-
-static inline u32 host1x_opcode_gather_nonincr(unsigned offset, unsigned count)
-{
- return (6 << 28) | (offset << 16) | BIT(15) | count;
-}
-
-static inline u32 host1x_opcode_gather_incr(unsigned offset, unsigned count)
-{
- return (6 << 28) | (offset << 16) | BIT(15) | BIT(14) | count;
-}
-
-#define HOST1X_OPCODE_NOP host1x_opcode_nonincr(0, 0)
+#include "opcodes.h"
#endif
diff --git a/drivers/gpu/host1x/hw/host1x05_hardware.h b/drivers/gpu/host1x/hw/host1x05_hardware.h
index af3ab4b7f010..1dcde6ec7909 100644
--- a/drivers/gpu/host1x/hw/host1x05_hardware.h
+++ b/drivers/gpu/host1x/hw/host1x05_hardware.h
@@ -15,117 +15,6 @@
#include "hw_host1x05_sync.h"
#include "hw_host1x05_uclass.h"
-static inline u32 host1x_class_host_wait_syncpt(
- unsigned indx, unsigned threshold)
-{
- return host1x_uclass_wait_syncpt_indx_f(indx)
- | host1x_uclass_wait_syncpt_thresh_f(threshold);
-}
-
-static inline u32 host1x_class_host_load_syncpt_base(
- unsigned indx, unsigned threshold)
-{
- return host1x_uclass_load_syncpt_base_base_indx_f(indx)
- | host1x_uclass_load_syncpt_base_value_f(threshold);
-}
-
-static inline u32 host1x_class_host_wait_syncpt_base(
- unsigned indx, unsigned base_indx, unsigned offset)
-{
- return host1x_uclass_wait_syncpt_base_indx_f(indx)
- | host1x_uclass_wait_syncpt_base_base_indx_f(base_indx)
- | host1x_uclass_wait_syncpt_base_offset_f(offset);
-}
-
-static inline u32 host1x_class_host_incr_syncpt_base(
- unsigned base_indx, unsigned offset)
-{
- return host1x_uclass_incr_syncpt_base_base_indx_f(base_indx)
- | host1x_uclass_incr_syncpt_base_offset_f(offset);
-}
-
-static inline u32 host1x_class_host_incr_syncpt(
- unsigned cond, unsigned indx)
-{
- return host1x_uclass_incr_syncpt_cond_f(cond)
- | host1x_uclass_incr_syncpt_indx_f(indx);
-}
-
-static inline u32 host1x_class_host_indoff_reg_write(
- unsigned mod_id, unsigned offset, bool auto_inc)
-{
- u32 v = host1x_uclass_indoff_indbe_f(0xf)
- | host1x_uclass_indoff_indmodid_f(mod_id)
- | host1x_uclass_indoff_indroffset_f(offset);
- if (auto_inc)
- v |= host1x_uclass_indoff_autoinc_f(1);
- return v;
-}
-
-static inline u32 host1x_class_host_indoff_reg_read(
- unsigned mod_id, unsigned offset, bool auto_inc)
-{
- u32 v = host1x_uclass_indoff_indmodid_f(mod_id)
- | host1x_uclass_indoff_indroffset_f(offset)
- | host1x_uclass_indoff_rwn_read_v();
- if (auto_inc)
- v |= host1x_uclass_indoff_autoinc_f(1);
- return v;
-}
-
-/* cdma opcodes */
-static inline u32 host1x_opcode_setclass(
- unsigned class_id, unsigned offset, unsigned mask)
-{
- return (0 << 28) | (offset << 16) | (class_id << 6) | mask;
-}
-
-static inline u32 host1x_opcode_incr(unsigned offset, unsigned count)
-{
- return (1 << 28) | (offset << 16) | count;
-}
-
-static inline u32 host1x_opcode_nonincr(unsigned offset, unsigned count)
-{
- return (2 << 28) | (offset << 16) | count;
-}
-
-static inline u32 host1x_opcode_mask(unsigned offset, unsigned mask)
-{
- return (3 << 28) | (offset << 16) | mask;
-}
-
-static inline u32 host1x_opcode_imm(unsigned offset, unsigned value)
-{
- return (4 << 28) | (offset << 16) | value;
-}
-
-static inline u32 host1x_opcode_imm_incr_syncpt(unsigned cond, unsigned indx)
-{
- return host1x_opcode_imm(host1x_uclass_incr_syncpt_r(),
- host1x_class_host_incr_syncpt(cond, indx));
-}
-
-static inline u32 host1x_opcode_restart(unsigned address)
-{
- return (5 << 28) | (address >> 4);
-}
-
-static inline u32 host1x_opcode_gather(unsigned count)
-{
- return (6 << 28) | count;
-}
-
-static inline u32 host1x_opcode_gather_nonincr(unsigned offset, unsigned count)
-{
- return (6 << 28) | (offset << 16) | BIT(15) | count;
-}
-
-static inline u32 host1x_opcode_gather_incr(unsigned offset, unsigned count)
-{
- return (6 << 28) | (offset << 16) | BIT(15) | BIT(14) | count;
-}
-
-#define HOST1X_OPCODE_NOP host1x_opcode_nonincr(0, 0)
+#include "opcodes.h"
#endif
diff --git a/drivers/gpu/host1x/hw/host1x06_hardware.h b/drivers/gpu/host1x/hw/host1x06_hardware.h
index 01a142a09800..c05cfa7e3090 100644
--- a/drivers/gpu/host1x/hw/host1x06_hardware.h
+++ b/drivers/gpu/host1x/hw/host1x06_hardware.h
@@ -16,122 +16,6 @@
#include "hw_host1x06_vm.h"
#include "hw_host1x06_hypervisor.h"
-static inline u32 host1x_class_host_wait_syncpt(
- unsigned indx, unsigned threshold)
-{
- return host1x_uclass_wait_syncpt_indx_f(indx)
- | host1x_uclass_wait_syncpt_thresh_f(threshold);
-}
-
-static inline u32 host1x_class_host_load_syncpt_base(
- unsigned indx, unsigned threshold)
-{
- return host1x_uclass_load_syncpt_base_base_indx_f(indx)
- | host1x_uclass_load_syncpt_base_value_f(threshold);
-}
-
-static inline u32 host1x_class_host_wait_syncpt_base(
- unsigned indx, unsigned base_indx, unsigned offset)
-{
- return host1x_uclass_wait_syncpt_base_indx_f(indx)
- | host1x_uclass_wait_syncpt_base_base_indx_f(base_indx)
- | host1x_uclass_wait_syncpt_base_offset_f(offset);
-}
-
-static inline u32 host1x_class_host_incr_syncpt_base(
- unsigned base_indx, unsigned offset)
-{
- return host1x_uclass_incr_syncpt_base_base_indx_f(base_indx)
- | host1x_uclass_incr_syncpt_base_offset_f(offset);
-}
-
-static inline u32 host1x_class_host_incr_syncpt(
- unsigned cond, unsigned indx)
-{
- return host1x_uclass_incr_syncpt_cond_f(cond)
- | host1x_uclass_incr_syncpt_indx_f(indx);
-}
-
-static inline u32 host1x_class_host_indoff_reg_write(
- unsigned mod_id, unsigned offset, bool auto_inc)
-{
- u32 v = host1x_uclass_indoff_indbe_f(0xf)
- | host1x_uclass_indoff_indmodid_f(mod_id)
- | host1x_uclass_indoff_indroffset_f(offset);
- if (auto_inc)
- v |= host1x_uclass_indoff_autoinc_f(1);
- return v;
-}
-
-static inline u32 host1x_class_host_indoff_reg_read(
- unsigned mod_id, unsigned offset, bool auto_inc)
-{
- u32 v = host1x_uclass_indoff_indmodid_f(mod_id)
- | host1x_uclass_indoff_indroffset_f(offset)
- | host1x_uclass_indoff_rwn_read_v();
- if (auto_inc)
- v |= host1x_uclass_indoff_autoinc_f(1);
- return v;
-}
-
-/* cdma opcodes */
-static inline u32 host1x_opcode_setclass(
- unsigned class_id, unsigned offset, unsigned mask)
-{
- return (0 << 28) | (offset << 16) | (class_id << 6) | mask;
-}
-
-static inline u32 host1x_opcode_incr(unsigned offset, unsigned count)
-{
- return (1 << 28) | (offset << 16) | count;
-}
-
-static inline u32 host1x_opcode_nonincr(unsigned offset, unsigned count)
-{
- return (2 << 28) | (offset << 16) | count;
-}
-
-static inline u32 host1x_opcode_mask(unsigned offset, unsigned mask)
-{
- return (3 << 28) | (offset << 16) | mask;
-}
-
-static inline u32 host1x_opcode_imm(unsigned offset, unsigned value)
-{
- return (4 << 28) | (offset << 16) | value;
-}
-
-static inline u32 host1x_opcode_imm_incr_syncpt(unsigned cond, unsigned indx)
-{
- return host1x_opcode_imm(host1x_uclass_incr_syncpt_r(),
- host1x_class_host_incr_syncpt(cond, indx));
-}
-
-static inline u32 host1x_opcode_restart(unsigned address)
-{
- return (5 << 28) | (address >> 4);
-}
-
-static inline u32 host1x_opcode_gather(unsigned count)
-{
- return (6 << 28) | count;
-}
-
-static inline u32 host1x_opcode_gather_nonincr(unsigned offset, unsigned count)
-{
- return (6 << 28) | (offset << 16) | BIT(15) | count;
-}
-
-static inline u32 host1x_opcode_gather_incr(unsigned offset, unsigned count)
-{
- return (6 << 28) | (offset << 16) | BIT(15) | BIT(14) | count;
-}
-
-static inline u32 host1x_opcode_gather_wide(unsigned count)
-{
- return (12 << 28) | count;
-}
-
-#define HOST1X_OPCODE_NOP host1x_opcode_nonincr(0, 0)
+#include "opcodes.h"
#endif
diff --git a/drivers/gpu/host1x/hw/host1x07_hardware.h b/drivers/gpu/host1x/hw/host1x07_hardware.h
index e6582172ebfd..d67364e03956 100644
--- a/drivers/gpu/host1x/hw/host1x07_hardware.h
+++ b/drivers/gpu/host1x/hw/host1x07_hardware.h
@@ -16,122 +16,6 @@
#include "hw_host1x07_vm.h"
#include "hw_host1x07_hypervisor.h"
-static inline u32 host1x_class_host_wait_syncpt(
- unsigned indx, unsigned threshold)
-{
- return host1x_uclass_wait_syncpt_indx_f(indx)
- | host1x_uclass_wait_syncpt_thresh_f(threshold);
-}
-
-static inline u32 host1x_class_host_load_syncpt_base(
- unsigned indx, unsigned threshold)
-{
- return host1x_uclass_load_syncpt_base_base_indx_f(indx)
- | host1x_uclass_load_syncpt_base_value_f(threshold);
-}
-
-static inline u32 host1x_class_host_wait_syncpt_base(
- unsigned indx, unsigned base_indx, unsigned offset)
-{
- return host1x_uclass_wait_syncpt_base_indx_f(indx)
- | host1x_uclass_wait_syncpt_base_base_indx_f(base_indx)
- | host1x_uclass_wait_syncpt_base_offset_f(offset);
-}
-
-static inline u32 host1x_class_host_incr_syncpt_base(
- unsigned base_indx, unsigned offset)
-{
- return host1x_uclass_incr_syncpt_base_base_indx_f(base_indx)
- | host1x_uclass_incr_syncpt_base_offset_f(offset);
-}
-
-static inline u32 host1x_class_host_incr_syncpt(
- unsigned cond, unsigned indx)
-{
- return host1x_uclass_incr_syncpt_cond_f(cond)
- | host1x_uclass_incr_syncpt_indx_f(indx);
-}
-
-static inline u32 host1x_class_host_indoff_reg_write(
- unsigned mod_id, unsigned offset, bool auto_inc)
-{
- u32 v = host1x_uclass_indoff_indbe_f(0xf)
- | host1x_uclass_indoff_indmodid_f(mod_id)
- | host1x_uclass_indoff_indroffset_f(offset);
- if (auto_inc)
- v |= host1x_uclass_indoff_autoinc_f(1);
- return v;
-}
-
-static inline u32 host1x_class_host_indoff_reg_read(
- unsigned mod_id, unsigned offset, bool auto_inc)
-{
- u32 v = host1x_uclass_indoff_indmodid_f(mod_id)
- | host1x_uclass_indoff_indroffset_f(offset)
- | host1x_uclass_indoff_rwn_read_v();
- if (auto_inc)
- v |= host1x_uclass_indoff_autoinc_f(1);
- return v;
-}
-
-/* cdma opcodes */
-static inline u32 host1x_opcode_setclass(
- unsigned class_id, unsigned offset, unsigned mask)
-{
- return (0 << 28) | (offset << 16) | (class_id << 6) | mask;
-}
-
-static inline u32 host1x_opcode_incr(unsigned offset, unsigned count)
-{
- return (1 << 28) | (offset << 16) | count;
-}
-
-static inline u32 host1x_opcode_nonincr(unsigned offset, unsigned count)
-{
- return (2 << 28) | (offset << 16) | count;
-}
-
-static inline u32 host1x_opcode_mask(unsigned offset, unsigned mask)
-{
- return (3 << 28) | (offset << 16) | mask;
-}
-
-static inline u32 host1x_opcode_imm(unsigned offset, unsigned value)
-{
- return (4 << 28) | (offset << 16) | value;
-}
-
-static inline u32 host1x_opcode_imm_incr_syncpt(unsigned cond, unsigned indx)
-{
- return host1x_opcode_imm(host1x_uclass_incr_syncpt_r(),
- host1x_class_host_incr_syncpt(cond, indx));
-}
-
-static inline u32 host1x_opcode_restart(unsigned address)
-{
- return (5 << 28) | (address >> 4);
-}
-
-static inline u32 host1x_opcode_gather(unsigned count)
-{
- return (6 << 28) | count;
-}
-
-static inline u32 host1x_opcode_gather_nonincr(unsigned offset, unsigned count)
-{
- return (6 << 28) | (offset << 16) | BIT(15) | count;
-}
-
-static inline u32 host1x_opcode_gather_incr(unsigned offset, unsigned count)
-{
- return (6 << 28) | (offset << 16) | BIT(15) | BIT(14) | count;
-}
-
-static inline u32 host1x_opcode_gather_wide(unsigned count)
-{
- return (12 << 28) | count;
-}
-
-#define HOST1X_OPCODE_NOP host1x_opcode_nonincr(0, 0)
+#include "opcodes.h"
#endif
diff --git a/drivers/gpu/host1x/hw/host1x08.c b/drivers/gpu/host1x/hw/host1x08.c
new file mode 100644
index 000000000000..754890c34c74
--- /dev/null
+++ b/drivers/gpu/host1x/hw/host1x08.c
@@ -0,0 +1,33 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Host1x init for Tegra234 SoCs
+ *
+ * Copyright (c) 2022 NVIDIA Corporation.
+ */
+
+/* include hw specification */
+#include "host1x08.h"
+#include "host1x08_hardware.h"
+
+/* include code */
+#define HOST1X_HW 8
+
+#include "cdma_hw.c"
+#include "channel_hw.c"
+#include "debug_hw.c"
+#include "intr_hw.c"
+#include "syncpt_hw.c"
+
+#include "../dev.h"
+
+int host1x08_init(struct host1x *host)
+{
+ host->channel_op = &host1x_channel_ops;
+ host->cdma_op = &host1x_cdma_ops;
+ host->cdma_pb_op = &host1x_pushbuffer_ops;
+ host->syncpt_op = &host1x_syncpt_ops;
+ host->intr_op = &host1x_intr_ops;
+ host->debug_op = &host1x_debug_ops;
+
+ return 0;
+}
diff --git a/drivers/gpu/host1x/hw/host1x08.h b/drivers/gpu/host1x/hw/host1x08.h
new file mode 100644
index 000000000000..a6bad56e44cf
--- /dev/null
+++ b/drivers/gpu/host1x/hw/host1x08.h
@@ -0,0 +1,15 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * Host1x init for Tegra234 SoCs
+ *
+ * Copyright (c) 2018 NVIDIA Corporation.
+ */
+
+#ifndef HOST1X_HOST1X08_H
+#define HOST1X_HOST1X08_H
+
+struct host1x;
+
+int host1x08_init(struct host1x *host);
+
+#endif
diff --git a/drivers/gpu/host1x/hw/host1x08_hardware.h b/drivers/gpu/host1x/hw/host1x08_hardware.h
new file mode 100644
index 000000000000..936243060bff
--- /dev/null
+++ b/drivers/gpu/host1x/hw/host1x08_hardware.h
@@ -0,0 +1,21 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * Tegra host1x Register Offsets for Tegra234
+ *
+ * Copyright (c) 2022 NVIDIA Corporation.
+ */
+
+#ifndef __HOST1X_HOST1X08_HARDWARE_H
+#define __HOST1X_HOST1X08_HARDWARE_H
+
+#include <linux/types.h>
+#include <linux/bitops.h>
+
+#include "hw_host1x08_uclass.h"
+#include "hw_host1x08_vm.h"
+#include "hw_host1x08_hypervisor.h"
+#include "hw_host1x08_common.h"
+
+#include "opcodes.h"
+
+#endif
diff --git a/drivers/gpu/host1x/hw/hw_host1x02_uclass.h b/drivers/gpu/host1x/hw/hw_host1x02_uclass.h
index 4fc51f70496b..0a2ab8f1da6f 100644
--- a/drivers/gpu/host1x/hw/hw_host1x02_uclass.h
+++ b/drivers/gpu/host1x/hw/hw_host1x02_uclass.h
@@ -165,5 +165,17 @@ static inline u32 host1x_uclass_indoff_rwn_read_v(void)
}
#define HOST1X_UCLASS_INDOFF_INDROFFSET_F(v) \
host1x_uclass_indoff_indroffset_f(v)
+static inline u32 host1x_uclass_load_syncpt_payload_32_r(void)
+{
+ return 0x4e;
+}
+#define HOST1X_UCLASS_LOAD_SYNCPT_PAYLOAD_32 \
+ host1x_uclass_load_syncpt_payload_32_r()
+static inline u32 host1x_uclass_wait_syncpt_32_r(void)
+{
+ return 0x50;
+}
+#define HOST1X_UCLASS_WAIT_SYNCPT_32 \
+ host1x_uclass_wait_syncpt_32_r()
#endif
diff --git a/drivers/gpu/host1x/hw/hw_host1x04_uclass.h b/drivers/gpu/host1x/hw/hw_host1x04_uclass.h
index 9e84a4adca9f..60c692b92955 100644
--- a/drivers/gpu/host1x/hw/hw_host1x04_uclass.h
+++ b/drivers/gpu/host1x/hw/hw_host1x04_uclass.h
@@ -165,5 +165,17 @@ static inline u32 host1x_uclass_indoff_rwn_read_v(void)
}
#define HOST1X_UCLASS_INDOFF_INDROFFSET_F(v) \
host1x_uclass_indoff_indroffset_f(v)
+static inline u32 host1x_uclass_load_syncpt_payload_32_r(void)
+{
+ return 0x4e;
+}
+#define HOST1X_UCLASS_LOAD_SYNCPT_PAYLOAD_32 \
+ host1x_uclass_load_syncpt_payload_32_r()
+static inline u32 host1x_uclass_wait_syncpt_32_r(void)
+{
+ return 0x50;
+}
+#define HOST1X_UCLASS_WAIT_SYNCPT_32 \
+ host1x_uclass_wait_syncpt_32_r()
#endif
diff --git a/drivers/gpu/host1x/hw/hw_host1x05_uclass.h b/drivers/gpu/host1x/hw/hw_host1x05_uclass.h
index aee5a4e32877..2fcc9a2ad3ef 100644
--- a/drivers/gpu/host1x/hw/hw_host1x05_uclass.h
+++ b/drivers/gpu/host1x/hw/hw_host1x05_uclass.h
@@ -165,5 +165,17 @@ static inline u32 host1x_uclass_indoff_rwn_read_v(void)
}
#define HOST1X_UCLASS_INDOFF_INDROFFSET_F(v) \
host1x_uclass_indoff_indroffset_f(v)
+static inline u32 host1x_uclass_load_syncpt_payload_32_r(void)
+{
+ return 0x4e;
+}
+#define HOST1X_UCLASS_LOAD_SYNCPT_PAYLOAD_32 \
+ host1x_uclass_load_syncpt_payload_32_r()
+static inline u32 host1x_uclass_wait_syncpt_32_r(void)
+{
+ return 0x50;
+}
+#define HOST1X_UCLASS_WAIT_SYNCPT_32 \
+ host1x_uclass_wait_syncpt_32_r()
#endif
diff --git a/drivers/gpu/host1x/hw/hw_host1x06_uclass.h b/drivers/gpu/host1x/hw/hw_host1x06_uclass.h
index c4bacdb7155f..50c32de452fb 100644
--- a/drivers/gpu/host1x/hw/hw_host1x06_uclass.h
+++ b/drivers/gpu/host1x/hw/hw_host1x06_uclass.h
@@ -53,7 +53,7 @@ static inline u32 host1x_uclass_incr_syncpt_cond_f(u32 v)
host1x_uclass_incr_syncpt_cond_f(v)
static inline u32 host1x_uclass_incr_syncpt_indx_f(u32 v)
{
- return (v & 0xff) << 0;
+ return (v & 0x3ff) << 0;
}
#define HOST1X_UCLASS_INCR_SYNCPT_INDX_F(v) \
host1x_uclass_incr_syncpt_indx_f(v)
@@ -165,5 +165,17 @@ static inline u32 host1x_uclass_indoff_rwn_read_v(void)
}
#define HOST1X_UCLASS_INDOFF_INDROFFSET_F(v) \
host1x_uclass_indoff_indroffset_f(v)
+static inline u32 host1x_uclass_load_syncpt_payload_32_r(void)
+{
+ return 0x4e;
+}
+#define HOST1X_UCLASS_LOAD_SYNCPT_PAYLOAD_32 \
+ host1x_uclass_load_syncpt_payload_32_r()
+static inline u32 host1x_uclass_wait_syncpt_32_r(void)
+{
+ return 0x50;
+}
+#define HOST1X_UCLASS_WAIT_SYNCPT_32 \
+ host1x_uclass_wait_syncpt_32_r()
#endif
diff --git a/drivers/gpu/host1x/hw/hw_host1x07_uclass.h b/drivers/gpu/host1x/hw/hw_host1x07_uclass.h
index c74070f3f203..887b878f92f7 100644
--- a/drivers/gpu/host1x/hw/hw_host1x07_uclass.h
+++ b/drivers/gpu/host1x/hw/hw_host1x07_uclass.h
@@ -53,7 +53,7 @@ static inline u32 host1x_uclass_incr_syncpt_cond_f(u32 v)
host1x_uclass_incr_syncpt_cond_f(v)
static inline u32 host1x_uclass_incr_syncpt_indx_f(u32 v)
{
- return (v & 0xff) << 0;
+ return (v & 0x3ff) << 0;
}
#define HOST1X_UCLASS_INCR_SYNCPT_INDX_F(v) \
host1x_uclass_incr_syncpt_indx_f(v)
@@ -165,5 +165,17 @@ static inline u32 host1x_uclass_indoff_rwn_read_v(void)
}
#define HOST1X_UCLASS_INDOFF_INDROFFSET_F(v) \
host1x_uclass_indoff_indroffset_f(v)
+static inline u32 host1x_uclass_load_syncpt_payload_32_r(void)
+{
+ return 0x4e;
+}
+#define HOST1X_UCLASS_LOAD_SYNCPT_PAYLOAD_32 \
+ host1x_uclass_load_syncpt_payload_32_r()
+static inline u32 host1x_uclass_wait_syncpt_32_r(void)
+{
+ return 0x50;
+}
+#define HOST1X_UCLASS_WAIT_SYNCPT_32 \
+ host1x_uclass_wait_syncpt_32_r()
#endif
diff --git a/drivers/gpu/host1x/hw/hw_host1x08_channel.h b/drivers/gpu/host1x/hw/hw_host1x08_channel.h
new file mode 100644
index 000000000000..c9272d2ab14a
--- /dev/null
+++ b/drivers/gpu/host1x/hw/hw_host1x08_channel.h
@@ -0,0 +1,11 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * Copyright (c) 2022 NVIDIA Corporation.
+ */
+
+#ifndef HOST1X_HW_HOST1X08_CHANNEL_H
+#define HOST1X_HW_HOST1X08_CHANNEL_H
+
+#define HOST1X_CHANNEL_SMMU_STREAMID 0x084
+
+#endif
diff --git a/drivers/gpu/host1x/hw/hw_host1x08_common.h b/drivers/gpu/host1x/hw/hw_host1x08_common.h
new file mode 100644
index 000000000000..8e0c99150ec2
--- /dev/null
+++ b/drivers/gpu/host1x/hw/hw_host1x08_common.h
@@ -0,0 +1,11 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * Copyright (c) 2022 NVIDIA Corporation.
+ */
+
+#define HOST1X_COMMON_OFA_MLOCK 0x4050
+#define HOST1X_COMMON_NVJPG1_MLOCK 0x4070
+#define HOST1X_COMMON_VIC_MLOCK 0x4078
+#define HOST1X_COMMON_NVENC_MLOCK 0x407c
+#define HOST1X_COMMON_NVDEC_MLOCK 0x4080
+#define HOST1X_COMMON_NVJPG_MLOCK 0x4084
diff --git a/drivers/gpu/host1x/hw/hw_host1x08_hypervisor.h b/drivers/gpu/host1x/hw/hw_host1x08_hypervisor.h
new file mode 100644
index 000000000000..22964324c914
--- /dev/null
+++ b/drivers/gpu/host1x/hw/hw_host1x08_hypervisor.h
@@ -0,0 +1,9 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * Copyright (c) 2022 NVIDIA Corporation.
+ */
+
+#define HOST1X_HV_SYNCPT_PROT_EN 0x1724
+#define HOST1X_HV_SYNCPT_PROT_EN_CH_EN BIT(1)
+#define HOST1X_HV_CH_MLOCK_EN(x) (0x1700 + (x * 4))
+#define HOST1X_HV_CH_KERNEL_FILTER_GBUFFER(x) (0x1710 + (x * 4))
diff --git a/drivers/gpu/host1x/hw/hw_host1x08_uclass.h b/drivers/gpu/host1x/hw/hw_host1x08_uclass.h
new file mode 100644
index 000000000000..4fb1d090edae
--- /dev/null
+++ b/drivers/gpu/host1x/hw/hw_host1x08_uclass.h
@@ -0,0 +1,181 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * Copyright (c) 2018 NVIDIA Corporation.
+ */
+
+ /*
+ * Function naming determines intended use:
+ *
+ * <x>_r(void) : Returns the offset for register <x>.
+ *
+ * <x>_w(void) : Returns the word offset for word (4 byte) element <x>.
+ *
+ * <x>_<y>_s(void) : Returns size of field <y> of register <x> in bits.
+ *
+ * <x>_<y>_f(u32 v) : Returns a value based on 'v' which has been shifted
+ * and masked to place it at field <y> of register <x>. This value
+ * can be |'d with others to produce a full register value for
+ * register <x>.
+ *
+ * <x>_<y>_m(void) : Returns a mask for field <y> of register <x>. This
+ * value can be ~'d and then &'d to clear the value of field <y> for
+ * register <x>.
+ *
+ * <x>_<y>_<z>_f(void) : Returns the constant value <z> after being shifted
+ * to place it at field <y> of register <x>. This value can be |'d
+ * with others to produce a full register value for <x>.
+ *
+ * <x>_<y>_v(u32 r) : Returns the value of field <y> from a full register
+ * <x> value 'r' after being shifted to place its LSB at bit 0.
+ * This value is suitable for direct comparison with other unshifted
+ * values appropriate for use in field <y> of register <x>.
+ *
+ * <x>_<y>_<z>_v(void) : Returns the constant value for <z> defined for
+ * field <y> of register <x>. This value is suitable for direct
+ * comparison with unshifted values appropriate for use in field <y>
+ * of register <x>.
+ */
+
+#ifndef HOST1X_HW_HOST1X08_UCLASS_H
+#define HOST1X_HW_HOST1X08_UCLASS_H
+
+static inline u32 host1x_uclass_incr_syncpt_r(void)
+{
+ return 0x0;
+}
+#define HOST1X_UCLASS_INCR_SYNCPT \
+ host1x_uclass_incr_syncpt_r()
+static inline u32 host1x_uclass_incr_syncpt_cond_f(u32 v)
+{
+ return (v & 0xff) << 10;
+}
+#define HOST1X_UCLASS_INCR_SYNCPT_COND_F(v) \
+ host1x_uclass_incr_syncpt_cond_f(v)
+static inline u32 host1x_uclass_incr_syncpt_indx_f(u32 v)
+{
+ return (v & 0x3ff) << 0;
+}
+#define HOST1X_UCLASS_INCR_SYNCPT_INDX_F(v) \
+ host1x_uclass_incr_syncpt_indx_f(v)
+static inline u32 host1x_uclass_wait_syncpt_r(void)
+{
+ return 0x8;
+}
+#define HOST1X_UCLASS_WAIT_SYNCPT \
+ host1x_uclass_wait_syncpt_r()
+static inline u32 host1x_uclass_wait_syncpt_indx_f(u32 v)
+{
+ return (v & 0xff) << 24;
+}
+#define HOST1X_UCLASS_WAIT_SYNCPT_INDX_F(v) \
+ host1x_uclass_wait_syncpt_indx_f(v)
+static inline u32 host1x_uclass_wait_syncpt_thresh_f(u32 v)
+{
+ return (v & 0xffffff) << 0;
+}
+#define HOST1X_UCLASS_WAIT_SYNCPT_THRESH_F(v) \
+ host1x_uclass_wait_syncpt_thresh_f(v)
+static inline u32 host1x_uclass_wait_syncpt_base_r(void)
+{
+ return 0x9;
+}
+#define HOST1X_UCLASS_WAIT_SYNCPT_BASE \
+ host1x_uclass_wait_syncpt_base_r()
+static inline u32 host1x_uclass_wait_syncpt_base_indx_f(u32 v)
+{
+ return (v & 0xff) << 24;
+}
+#define HOST1X_UCLASS_WAIT_SYNCPT_BASE_INDX_F(v) \
+ host1x_uclass_wait_syncpt_base_indx_f(v)
+static inline u32 host1x_uclass_wait_syncpt_base_base_indx_f(u32 v)
+{
+ return (v & 0xff) << 16;
+}
+#define HOST1X_UCLASS_WAIT_SYNCPT_BASE_BASE_INDX_F(v) \
+ host1x_uclass_wait_syncpt_base_base_indx_f(v)
+static inline u32 host1x_uclass_wait_syncpt_base_offset_f(u32 v)
+{
+ return (v & 0xffff) << 0;
+}
+#define HOST1X_UCLASS_WAIT_SYNCPT_BASE_OFFSET_F(v) \
+ host1x_uclass_wait_syncpt_base_offset_f(v)
+static inline u32 host1x_uclass_load_syncpt_base_r(void)
+{
+ return 0xb;
+}
+#define HOST1X_UCLASS_LOAD_SYNCPT_BASE \
+ host1x_uclass_load_syncpt_base_r()
+static inline u32 host1x_uclass_load_syncpt_base_base_indx_f(u32 v)
+{
+ return (v & 0xff) << 24;
+}
+#define HOST1X_UCLASS_LOAD_SYNCPT_BASE_BASE_INDX_F(v) \
+ host1x_uclass_load_syncpt_base_base_indx_f(v)
+static inline u32 host1x_uclass_load_syncpt_base_value_f(u32 v)
+{
+ return (v & 0xffffff) << 0;
+}
+#define HOST1X_UCLASS_LOAD_SYNCPT_BASE_VALUE_F(v) \
+ host1x_uclass_load_syncpt_base_value_f(v)
+static inline u32 host1x_uclass_incr_syncpt_base_base_indx_f(u32 v)
+{
+ return (v & 0xff) << 24;
+}
+#define HOST1X_UCLASS_INCR_SYNCPT_BASE_BASE_INDX_F(v) \
+ host1x_uclass_incr_syncpt_base_base_indx_f(v)
+static inline u32 host1x_uclass_incr_syncpt_base_offset_f(u32 v)
+{
+ return (v & 0xffffff) << 0;
+}
+#define HOST1X_UCLASS_INCR_SYNCPT_BASE_OFFSET_F(v) \
+ host1x_uclass_incr_syncpt_base_offset_f(v)
+static inline u32 host1x_uclass_indoff_r(void)
+{
+ return 0x2d;
+}
+#define HOST1X_UCLASS_INDOFF \
+ host1x_uclass_indoff_r()
+static inline u32 host1x_uclass_indoff_indbe_f(u32 v)
+{
+ return (v & 0xf) << 28;
+}
+#define HOST1X_UCLASS_INDOFF_INDBE_F(v) \
+ host1x_uclass_indoff_indbe_f(v)
+static inline u32 host1x_uclass_indoff_autoinc_f(u32 v)
+{
+ return (v & 0x1) << 27;
+}
+#define HOST1X_UCLASS_INDOFF_AUTOINC_F(v) \
+ host1x_uclass_indoff_autoinc_f(v)
+static inline u32 host1x_uclass_indoff_indmodid_f(u32 v)
+{
+ return (v & 0xff) << 18;
+}
+#define HOST1X_UCLASS_INDOFF_INDMODID_F(v) \
+ host1x_uclass_indoff_indmodid_f(v)
+static inline u32 host1x_uclass_indoff_indroffset_f(u32 v)
+{
+ return (v & 0xffff) << 2;
+}
+#define HOST1X_UCLASS_INDOFF_INDROFFSET_F(v) \
+ host1x_uclass_indoff_indroffset_f(v)
+static inline u32 host1x_uclass_indoff_rwn_read_v(void)
+{
+ return 1;
+}
+#define HOST1X_UCLASS_INDOFF_INDROFFSET_F(v) \
+ host1x_uclass_indoff_indroffset_f(v)
+static inline u32 host1x_uclass_load_syncpt_payload_32_r(void)
+{
+ return 0x4e;
+}
+#define HOST1X_UCLASS_LOAD_SYNCPT_PAYLOAD_32 \
+ host1x_uclass_load_syncpt_payload_32_r()
+static inline u32 host1x_uclass_wait_syncpt_32_r(void)
+{
+ return 0x50;
+}
+#define HOST1X_UCLASS_WAIT_SYNCPT_32 \
+ host1x_uclass_wait_syncpt_32_r()
+
+#endif
diff --git a/drivers/gpu/host1x/hw/hw_host1x08_vm.h b/drivers/gpu/host1x/hw/hw_host1x08_vm.h
new file mode 100644
index 000000000000..1455a4670bf8
--- /dev/null
+++ b/drivers/gpu/host1x/hw/hw_host1x08_vm.h
@@ -0,0 +1,36 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * Copyright (c) 2022 NVIDIA Corporation.
+ */
+
+#define HOST1X_CHANNEL_DMASTART 0x0000
+#define HOST1X_CHANNEL_DMASTART_HI 0x0004
+#define HOST1X_CHANNEL_DMAPUT 0x0008
+#define HOST1X_CHANNEL_DMAPUT_HI 0x000c
+#define HOST1X_CHANNEL_DMAGET 0x0010
+#define HOST1X_CHANNEL_DMAGET_HI 0x0014
+#define HOST1X_CHANNEL_DMAEND 0x0018
+#define HOST1X_CHANNEL_DMAEND_HI 0x001c
+#define HOST1X_CHANNEL_DMACTRL 0x0020
+#define HOST1X_CHANNEL_DMACTRL_DMASTOP BIT(0)
+#define HOST1X_CHANNEL_DMACTRL_DMAGETRST BIT(1)
+#define HOST1X_CHANNEL_DMACTRL_DMAINITGET BIT(2)
+#define HOST1X_CHANNEL_CMDFIFO_STAT 0x0024
+#define HOST1X_CHANNEL_CMDFIFO_STAT_EMPTY BIT(13)
+#define HOST1X_CHANNEL_CMDFIFO_RDATA 0x0028
+#define HOST1X_CHANNEL_CMDP_OFFSET 0x0030
+#define HOST1X_CHANNEL_CMDP_CLASS 0x0034
+#define HOST1X_CHANNEL_CHANNELSTAT 0x0038
+#define HOST1X_CHANNEL_CMDPROC_STOP 0x0048
+#define HOST1X_CHANNEL_TEARDOWN 0x004c
+#define HOST1X_CHANNEL_SMMU_STREAMID 0x0084
+
+#define HOST1X_SYNC_SYNCPT_CPU_INCR(x) (0x6400 + 4 * (x))
+#define HOST1X_SYNC_SYNCPT_THRESH_CPU0_INT_STATUS(x) (0x6600 + 4 * (x))
+#define HOST1X_SYNC_SYNCPT_INTR_DEST(x) (0x6684 + 4 * (x))
+#define HOST1X_SYNC_SYNCPT_THRESH_INT_ENABLE_CPU0(x) (0x770c + 4 * (x))
+#define HOST1X_SYNC_SYNCPT_THRESH_INT_DISABLE(x) (0x7790 + 4 * (x))
+#define HOST1X_SYNC_SYNCPT(x) (0x8080 + 4 * (x))
+#define HOST1X_SYNC_SYNCPT_INT_THRESH(x) (0xa088 + 4 * (x))
+#define HOST1X_SYNC_SYNCPT_CH_APP(x) (0xb090 + 4 * (x))
+#define HOST1X_SYNC_SYNCPT_CH_APP_CH(v) (((v) & 0x3f) << 8)
diff --git a/drivers/gpu/host1x/hw/intr_hw.c b/drivers/gpu/host1x/hw/intr_hw.c
index f56375ee6e71..bd5b5ef62f35 100644
--- a/drivers/gpu/host1x/hw/intr_hw.c
+++ b/drivers/gpu/host1x/hw/intr_hw.c
@@ -6,50 +6,74 @@
* Copyright (c) 2010-2013, NVIDIA Corporation.
*/
-#include <linux/interrupt.h>
-#include <linux/irq.h>
#include <linux/io.h>
#include "../intr.h"
#include "../dev.h"
-/*
- * Sync point threshold interrupt service function
- * Handles sync point threshold triggers, in interrupt context
- */
-static void host1x_intr_syncpt_handle(struct host1x_syncpt *syncpt)
+static void process_32_syncpts(struct host1x *host, unsigned long val, u32 reg_offset)
{
- unsigned int id = syncpt->id;
- struct host1x *host = syncpt->host;
+ unsigned int id;
- host1x_sync_writel(host, BIT(id % 32),
- HOST1X_SYNC_SYNCPT_THRESH_INT_DISABLE(id / 32));
- host1x_sync_writel(host, BIT(id % 32),
- HOST1X_SYNC_SYNCPT_THRESH_CPU0_INT_STATUS(id / 32));
+ if (!val)
+ return;
+
+ host1x_sync_writel(host, val, HOST1X_SYNC_SYNCPT_THRESH_INT_DISABLE(reg_offset));
+ host1x_sync_writel(host, val, HOST1X_SYNC_SYNCPT_THRESH_CPU0_INT_STATUS(reg_offset));
- schedule_work(&syncpt->intr.work);
+ for_each_set_bit(id, &val, 32)
+ host1x_intr_handle_interrupt(host, reg_offset * 32 + id);
}
static irqreturn_t syncpt_thresh_isr(int irq, void *dev_id)
{
- struct host1x *host = dev_id;
+ struct host1x_intr_irq_data *irq_data = dev_id;
+ struct host1x *host = irq_data->host;
unsigned long reg;
- unsigned int i, id;
+ unsigned int i;
- for (i = 0; i < DIV_ROUND_UP(host->info->nb_pts, 32); i++) {
+#if !defined(CONFIG_64BIT)
+ for (i = irq_data->offset; i < DIV_ROUND_UP(host->info->nb_pts, 32);
+ i += host->num_syncpt_irqs) {
reg = host1x_sync_readl(host,
HOST1X_SYNC_SYNCPT_THRESH_CPU0_INT_STATUS(i));
- for_each_set_bit(id, &reg, 32) {
- struct host1x_syncpt *syncpt =
- host->syncpt + (i * 32 + id);
- host1x_intr_syncpt_handle(syncpt);
- }
+
+ process_32_syncpts(host, reg, i);
+ }
+#elif HOST1X_HW == 6 || HOST1X_HW == 7
+ /*
+ * Tegra186 and Tegra194 have the first INT_STATUS register not 64-bit aligned,
+ * and only have one interrupt line.
+ */
+ reg = host1x_sync_readl(host, HOST1X_SYNC_SYNCPT_THRESH_CPU0_INT_STATUS(0));
+ process_32_syncpts(host, reg, 0);
+
+ for (i = 1; i < (host->info->nb_pts / 32) - 1; i += 2) {
+ reg = host1x_sync_readq(host,
+ HOST1X_SYNC_SYNCPT_THRESH_CPU0_INT_STATUS(i));
+
+ process_32_syncpts(host, lower_32_bits(reg), i);
+ process_32_syncpts(host, upper_32_bits(reg), i + 1);
}
+ reg = host1x_sync_readl(host, HOST1X_SYNC_SYNCPT_THRESH_CPU0_INT_STATUS(i));
+ process_32_syncpts(host, reg, i);
+#else
+ /* All 64-bit capable SoCs have number of syncpoints divisible by 64 */
+ for (i = irq_data->offset; i < DIV_ROUND_UP(host->info->nb_pts, 64);
+ i += host->num_syncpt_irqs) {
+ reg = host1x_sync_readq(host,
+ HOST1X_SYNC_SYNCPT_THRESH_CPU0_INT_STATUS(i * 2));
+
+ process_32_syncpts(host, lower_32_bits(reg), i * 2 + 0);
+ process_32_syncpts(host, upper_32_bits(reg), i * 2 + 1);
+ }
+#endif
+
return IRQ_HANDLED;
}
-static void _host1x_intr_disable_all_syncpt_intrs(struct host1x *host)
+static void host1x_intr_disable_all_syncpt_intrs(struct host1x *host)
{
unsigned int i;
@@ -61,7 +85,8 @@ static void _host1x_intr_disable_all_syncpt_intrs(struct host1x *host)
}
}
-static void intr_hw_init(struct host1x *host, u32 cpm)
+static int
+host1x_intr_init_host_sync(struct host1x *host, u32 cpm)
{
#if HOST1X_HW < 6
/* disable the ip_busy_timeout. this prevents write drops */
@@ -76,48 +101,41 @@ static void intr_hw_init(struct host1x *host, u32 cpm)
/* update host clocks per usec */
host1x_sync_writel(host, cpm, HOST1X_SYNC_USEC_CLK);
#endif
-}
-
-static int
-_host1x_intr_init_host_sync(struct host1x *host, u32 cpm,
- void (*syncpt_thresh_work)(struct work_struct *))
-{
- unsigned int i;
- int err;
+#if HOST1X_HW >= 8
+ u32 id;
- host1x_hw_intr_disable_all_syncpt_intrs(host);
-
- for (i = 0; i < host->info->nb_pts; i++)
- INIT_WORK(&host->syncpt[i].intr.work, syncpt_thresh_work);
+ /*
+ * Program threshold interrupt destination among 8 lines per VM,
+ * per syncpoint. For each group of 64 syncpoints (corresponding to two
+ * interrupt status registers), direct to one interrupt line, going
+ * around in a round robin fashion.
+ */
+ for (id = 0; id < host->info->nb_pts; id++) {
+ u32 reg_offset = id / 64;
+ u32 irq_index = reg_offset % host->num_syncpt_irqs;
- err = devm_request_irq(host->dev, host->intr_syncpt_irq,
- syncpt_thresh_isr, IRQF_SHARED,
- "host1x_syncpt", host);
- if (err < 0) {
- WARN_ON(1);
- return err;
+ host1x_sync_writel(host, irq_index, HOST1X_SYNC_SYNCPT_INTR_DEST(id));
}
-
- intr_hw_init(host, cpm);
+#endif
return 0;
}
-static void _host1x_intr_set_syncpt_threshold(struct host1x *host,
+static void host1x_intr_set_syncpt_threshold(struct host1x *host,
unsigned int id,
u32 thresh)
{
host1x_sync_writel(host, thresh, HOST1X_SYNC_SYNCPT_INT_THRESH(id));
}
-static void _host1x_intr_enable_syncpt_intr(struct host1x *host,
+static void host1x_intr_enable_syncpt_intr(struct host1x *host,
unsigned int id)
{
host1x_sync_writel(host, BIT(id % 32),
HOST1X_SYNC_SYNCPT_THRESH_INT_ENABLE_CPU0(id / 32));
}
-static void _host1x_intr_disable_syncpt_intr(struct host1x *host,
+static void host1x_intr_disable_syncpt_intr(struct host1x *host,
unsigned int id)
{
host1x_sync_writel(host, BIT(id % 32),
@@ -126,23 +144,11 @@ static void _host1x_intr_disable_syncpt_intr(struct host1x *host,
HOST1X_SYNC_SYNCPT_THRESH_CPU0_INT_STATUS(id / 32));
}
-static int _host1x_free_syncpt_irq(struct host1x *host)
-{
- unsigned int i;
-
- devm_free_irq(host->dev, host->intr_syncpt_irq, host);
-
- for (i = 0; i < host->info->nb_pts; i++)
- cancel_work_sync(&host->syncpt[i].intr.work);
-
- return 0;
-}
-
static const struct host1x_intr_ops host1x_intr_ops = {
- .init_host_sync = _host1x_intr_init_host_sync,
- .set_syncpt_threshold = _host1x_intr_set_syncpt_threshold,
- .enable_syncpt_intr = _host1x_intr_enable_syncpt_intr,
- .disable_syncpt_intr = _host1x_intr_disable_syncpt_intr,
- .disable_all_syncpt_intrs = _host1x_intr_disable_all_syncpt_intrs,
- .free_syncpt_irq = _host1x_free_syncpt_irq,
+ .init_host_sync = host1x_intr_init_host_sync,
+ .set_syncpt_threshold = host1x_intr_set_syncpt_threshold,
+ .enable_syncpt_intr = host1x_intr_enable_syncpt_intr,
+ .disable_syncpt_intr = host1x_intr_disable_syncpt_intr,
+ .disable_all_syncpt_intrs = host1x_intr_disable_all_syncpt_intrs,
+ .isr = syncpt_thresh_isr,
};
diff --git a/drivers/gpu/host1x/hw/opcodes.h b/drivers/gpu/host1x/hw/opcodes.h
new file mode 100644
index 000000000000..649614499b04
--- /dev/null
+++ b/drivers/gpu/host1x/hw/opcodes.h
@@ -0,0 +1,150 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * Tegra host1x opcodes
+ *
+ * Copyright (c) 2022 NVIDIA Corporation.
+ */
+
+#ifndef __HOST1X_OPCODES_H
+#define __HOST1X_OPCODES_H
+
+#include <linux/types.h>
+
+static inline u32 host1x_class_host_wait_syncpt(
+ unsigned indx, unsigned threshold)
+{
+ return host1x_uclass_wait_syncpt_indx_f(indx)
+ | host1x_uclass_wait_syncpt_thresh_f(threshold);
+}
+
+static inline u32 host1x_class_host_load_syncpt_base(
+ unsigned indx, unsigned threshold)
+{
+ return host1x_uclass_load_syncpt_base_base_indx_f(indx)
+ | host1x_uclass_load_syncpt_base_value_f(threshold);
+}
+
+static inline u32 host1x_class_host_wait_syncpt_base(
+ unsigned indx, unsigned base_indx, unsigned offset)
+{
+ return host1x_uclass_wait_syncpt_base_indx_f(indx)
+ | host1x_uclass_wait_syncpt_base_base_indx_f(base_indx)
+ | host1x_uclass_wait_syncpt_base_offset_f(offset);
+}
+
+static inline u32 host1x_class_host_incr_syncpt_base(
+ unsigned base_indx, unsigned offset)
+{
+ return host1x_uclass_incr_syncpt_base_base_indx_f(base_indx)
+ | host1x_uclass_incr_syncpt_base_offset_f(offset);
+}
+
+static inline u32 host1x_class_host_incr_syncpt(
+ unsigned cond, unsigned indx)
+{
+ return host1x_uclass_incr_syncpt_cond_f(cond)
+ | host1x_uclass_incr_syncpt_indx_f(indx);
+}
+
+static inline u32 host1x_class_host_indoff_reg_write(
+ unsigned mod_id, unsigned offset, bool auto_inc)
+{
+ u32 v = host1x_uclass_indoff_indbe_f(0xf)
+ | host1x_uclass_indoff_indmodid_f(mod_id)
+ | host1x_uclass_indoff_indroffset_f(offset);
+ if (auto_inc)
+ v |= host1x_uclass_indoff_autoinc_f(1);
+ return v;
+}
+
+static inline u32 host1x_class_host_indoff_reg_read(
+ unsigned mod_id, unsigned offset, bool auto_inc)
+{
+ u32 v = host1x_uclass_indoff_indmodid_f(mod_id)
+ | host1x_uclass_indoff_indroffset_f(offset)
+ | host1x_uclass_indoff_rwn_read_v();
+ if (auto_inc)
+ v |= host1x_uclass_indoff_autoinc_f(1);
+ return v;
+}
+
+static inline u32 host1x_opcode_setclass(
+ unsigned class_id, unsigned offset, unsigned mask)
+{
+ return (0 << 28) | (offset << 16) | (class_id << 6) | mask;
+}
+
+static inline u32 host1x_opcode_incr(unsigned offset, unsigned count)
+{
+ return (1 << 28) | (offset << 16) | count;
+}
+
+static inline u32 host1x_opcode_nonincr(unsigned offset, unsigned count)
+{
+ return (2 << 28) | (offset << 16) | count;
+}
+
+static inline u32 host1x_opcode_mask(unsigned offset, unsigned mask)
+{
+ return (3 << 28) | (offset << 16) | mask;
+}
+
+static inline u32 host1x_opcode_imm(unsigned offset, unsigned value)
+{
+ return (4 << 28) | (offset << 16) | value;
+}
+
+static inline u32 host1x_opcode_imm_incr_syncpt(unsigned cond, unsigned indx)
+{
+ return host1x_opcode_imm(host1x_uclass_incr_syncpt_r(),
+ host1x_class_host_incr_syncpt(cond, indx));
+}
+
+static inline u32 host1x_opcode_restart(unsigned address)
+{
+ return (5 << 28) | (address >> 4);
+}
+
+static inline u32 host1x_opcode_gather(unsigned count)
+{
+ return (6 << 28) | count;
+}
+
+static inline u32 host1x_opcode_gather_nonincr(unsigned offset, unsigned count)
+{
+ return (6 << 28) | (offset << 16) | BIT(15) | count;
+}
+
+static inline u32 host1x_opcode_gather_incr(unsigned offset, unsigned count)
+{
+ return (6 << 28) | (offset << 16) | BIT(15) | BIT(14) | count;
+}
+
+static inline u32 host1x_opcode_setstreamid(unsigned streamid)
+{
+ return (7 << 28) | streamid;
+}
+
+static inline u32 host1x_opcode_setpayload(unsigned payload)
+{
+ return (9 << 28) | payload;
+}
+
+static inline u32 host1x_opcode_gather_wide(unsigned count)
+{
+ return (12 << 28) | count;
+}
+
+static inline u32 host1x_opcode_acquire_mlock(unsigned mlock)
+{
+ return (14 << 28) | (0 << 24) | mlock;
+}
+
+static inline u32 host1x_opcode_release_mlock(unsigned mlock)
+{
+ return (14 << 28) | (1 << 24) | mlock;
+}
+
+#define HOST1X_OPCODE_NOP host1x_opcode_nonincr(0, 0)
+
+#endif
diff --git a/drivers/gpu/host1x/hw/syncpt_hw.c b/drivers/gpu/host1x/hw/syncpt_hw.c
index dd39d67ccec3..8cf35b2eff3d 100644
--- a/drivers/gpu/host1x/hw/syncpt_hw.c
+++ b/drivers/gpu/host1x/hw/syncpt_hw.c
@@ -106,9 +106,6 @@ static void syncpt_assign_to_channel(struct host1x_syncpt *sp,
#if HOST1X_HW >= 6
struct host1x *host = sp->host;
- if (!host->hv_regs)
- return;
-
host1x_sync_writel(host,
HOST1X_SYNC_SYNCPT_CH_APP_CH(ch ? ch->id : 0xff),
HOST1X_SYNC_SYNCPT_CH_APP(sp->id));
diff --git a/drivers/gpu/host1x/intr.c b/drivers/gpu/host1x/intr.c
index 6d1f3c0fdbe7..f77a678949e9 100644
--- a/drivers/gpu/host1x/intr.c
+++ b/drivers/gpu/host1x/intr.c
@@ -2,300 +2,137 @@
/*
* Tegra host1x Interrupt Management
*
- * Copyright (c) 2010-2013, NVIDIA Corporation.
+ * Copyright (c) 2010-2021, NVIDIA Corporation.
*/
#include <linux/clk.h>
#include <linux/interrupt.h>
-#include <linux/slab.h>
-#include <linux/irq.h>
-
-#include <trace/events/host1x.h>
-#include "channel.h"
#include "dev.h"
+#include "fence.h"
#include "intr.h"
-/* Wait list management */
-
-enum waitlist_state {
- WLS_PENDING,
- WLS_REMOVED,
- WLS_CANCELLED,
- WLS_HANDLED
-};
-
-static void waiter_release(struct kref *kref)
-{
- kfree(container_of(kref, struct host1x_waitlist, refcount));
-}
-
-/*
- * add a waiter to a waiter queue, sorted by threshold
- * returns true if it was added at the head of the queue
- */
-static bool add_waiter_to_queue(struct host1x_waitlist *waiter,
- struct list_head *queue)
+static void host1x_intr_add_fence_to_list(struct host1x_fence_list *list,
+ struct host1x_syncpt_fence *fence)
{
- struct host1x_waitlist *pos;
- u32 thresh = waiter->thresh;
+ struct host1x_syncpt_fence *fence_in_list;
- list_for_each_entry_reverse(pos, queue, list)
- if ((s32)(pos->thresh - thresh) <= 0) {
- list_add(&waiter->list, &pos->list);
- return false;
+ list_for_each_entry_reverse(fence_in_list, &list->list, list) {
+ if ((s32)(fence_in_list->threshold - fence->threshold) <= 0) {
+ /* Fence in list is before us, we can insert here */
+ list_add(&fence->list, &fence_in_list->list);
+ return;
}
+ }
- list_add(&waiter->list, queue);
- return true;
+ /* Add as first in list */
+ list_add(&fence->list, &list->list);
}
-/*
- * run through a waiter queue for a single sync point ID
- * and gather all completed waiters into lists by actions
- */
-static void remove_completed_waiters(struct list_head *head, u32 sync,
- struct list_head completed[HOST1X_INTR_ACTION_COUNT])
+static void host1x_intr_update_hw_state(struct host1x *host, struct host1x_syncpt *sp)
{
- struct list_head *dest;
- struct host1x_waitlist *waiter, *next, *prev;
+ struct host1x_syncpt_fence *fence;
- list_for_each_entry_safe(waiter, next, head, list) {
- if ((s32)(waiter->thresh - sync) > 0)
- break;
+ if (!list_empty(&sp->fences.list)) {
+ fence = list_first_entry(&sp->fences.list, struct host1x_syncpt_fence, list);
- dest = completed + waiter->action;
-
- /* consolidate submit cleanups */
- if (waiter->action == HOST1X_INTR_ACTION_SUBMIT_COMPLETE &&
- !list_empty(dest)) {
- prev = list_entry(dest->prev,
- struct host1x_waitlist, list);
- if (prev->data == waiter->data) {
- prev->count++;
- dest = NULL;
- }
- }
-
- /* PENDING->REMOVED or CANCELLED->HANDLED */
- if (atomic_inc_return(&waiter->state) == WLS_HANDLED || !dest) {
- list_del(&waiter->list);
- kref_put(&waiter->refcount, waiter_release);
- } else
- list_move_tail(&waiter->list, dest);
+ host1x_hw_intr_set_syncpt_threshold(host, sp->id, fence->threshold);
+ host1x_hw_intr_enable_syncpt_intr(host, sp->id);
+ } else {
+ host1x_hw_intr_disable_syncpt_intr(host, sp->id);
}
}
-static void reset_threshold_interrupt(struct host1x *host,
- struct list_head *head,
- unsigned int id)
-{
- u32 thresh =
- list_first_entry(head, struct host1x_waitlist, list)->thresh;
-
- host1x_hw_intr_set_syncpt_threshold(host, id, thresh);
- host1x_hw_intr_enable_syncpt_intr(host, id);
-}
-
-static void action_submit_complete(struct host1x_waitlist *waiter)
+void host1x_intr_add_fence_locked(struct host1x *host, struct host1x_syncpt_fence *fence)
{
- struct host1x_channel *channel = waiter->data;
-
- host1x_cdma_update(&channel->cdma);
+ struct host1x_fence_list *fence_list = &fence->sp->fences;
- /* Add nr_completed to trace */
- trace_host1x_channel_submit_complete(dev_name(channel->dev),
- waiter->count, waiter->thresh);
-}
-
-static void action_wakeup(struct host1x_waitlist *waiter)
-{
- wait_queue_head_t *wq = waiter->data;
+ INIT_LIST_HEAD(&fence->list);
- wake_up(wq);
+ host1x_intr_add_fence_to_list(fence_list, fence);
+ host1x_intr_update_hw_state(host, fence->sp);
}
-static void action_wakeup_interruptible(struct host1x_waitlist *waiter)
+bool host1x_intr_remove_fence(struct host1x *host, struct host1x_syncpt_fence *fence)
{
- wait_queue_head_t *wq = waiter->data;
+ struct host1x_fence_list *fence_list = &fence->sp->fences;
+ unsigned long irqflags;
- wake_up_interruptible(wq);
-}
-
-typedef void (*action_handler)(struct host1x_waitlist *waiter);
-
-static const action_handler action_handlers[HOST1X_INTR_ACTION_COUNT] = {
- action_submit_complete,
- action_wakeup,
- action_wakeup_interruptible,
-};
+ spin_lock_irqsave(&fence_list->lock, irqflags);
-static void run_handlers(struct list_head completed[HOST1X_INTR_ACTION_COUNT])
-{
- struct list_head *head = completed;
- unsigned int i;
-
- for (i = 0; i < HOST1X_INTR_ACTION_COUNT; ++i, ++head) {
- action_handler handler = action_handlers[i];
- struct host1x_waitlist *waiter, *next;
-
- list_for_each_entry_safe(waiter, next, head, list) {
- list_del(&waiter->list);
- handler(waiter);
- WARN_ON(atomic_xchg(&waiter->state, WLS_HANDLED) !=
- WLS_REMOVED);
- kref_put(&waiter->refcount, waiter_release);
- }
+ if (list_empty(&fence->list)) {
+ spin_unlock_irqrestore(&fence_list->lock, irqflags);
+ return false;
}
-}
-
-/*
- * Remove & handle all waiters that have completed for the given syncpt
- */
-static int process_wait_list(struct host1x *host,
- struct host1x_syncpt *syncpt,
- u32 threshold)
-{
- struct list_head completed[HOST1X_INTR_ACTION_COUNT];
- unsigned int i;
- int empty;
-
- for (i = 0; i < HOST1X_INTR_ACTION_COUNT; ++i)
- INIT_LIST_HEAD(completed + i);
-
- spin_lock(&syncpt->intr.lock);
-
- remove_completed_waiters(&syncpt->intr.wait_head, threshold,
- completed);
-
- empty = list_empty(&syncpt->intr.wait_head);
- if (empty)
- host1x_hw_intr_disable_syncpt_intr(host, syncpt->id);
- else
- reset_threshold_interrupt(host, &syncpt->intr.wait_head,
- syncpt->id);
- spin_unlock(&syncpt->intr.lock);
+ list_del_init(&fence->list);
+ host1x_intr_update_hw_state(host, fence->sp);
- run_handlers(completed);
+ spin_unlock_irqrestore(&fence_list->lock, irqflags);
- return empty;
-}
-
-/*
- * Sync point threshold interrupt service thread function
- * Handles sync point threshold triggers, in thread context
- */
-
-static void syncpt_thresh_work(struct work_struct *work)
-{
- struct host1x_syncpt_intr *syncpt_intr =
- container_of(work, struct host1x_syncpt_intr, work);
- struct host1x_syncpt *syncpt =
- container_of(syncpt_intr, struct host1x_syncpt, intr);
- unsigned int id = syncpt->id;
- struct host1x *host = syncpt->host;
-
- (void)process_wait_list(host, syncpt,
- host1x_syncpt_load(host->syncpt + id));
+ return true;
}
-int host1x_intr_add_action(struct host1x *host, struct host1x_syncpt *syncpt,
- u32 thresh, enum host1x_intr_action action,
- void *data, struct host1x_waitlist *waiter,
- void **ref)
+void host1x_intr_handle_interrupt(struct host1x *host, unsigned int id)
{
- int queue_was_empty;
-
- if (waiter == NULL) {
- pr_warn("%s: NULL waiter\n", __func__);
- return -EINVAL;
- }
-
- /* initialize a new waiter */
- INIT_LIST_HEAD(&waiter->list);
- kref_init(&waiter->refcount);
- if (ref)
- kref_get(&waiter->refcount);
- waiter->thresh = thresh;
- waiter->action = action;
- atomic_set(&waiter->state, WLS_PENDING);
- waiter->data = data;
- waiter->count = 1;
+ struct host1x_syncpt *sp = &host->syncpt[id];
+ struct host1x_syncpt_fence *fence, *tmp;
+ unsigned int value;
- spin_lock(&syncpt->intr.lock);
+ value = host1x_syncpt_load(sp);
- queue_was_empty = list_empty(&syncpt->intr.wait_head);
+ spin_lock(&sp->fences.lock);
- if (add_waiter_to_queue(waiter, &syncpt->intr.wait_head)) {
- /* added at head of list - new threshold value */
- host1x_hw_intr_set_syncpt_threshold(host, syncpt->id, thresh);
+ list_for_each_entry_safe(fence, tmp, &sp->fences.list, list) {
+ if (((value - fence->threshold) & 0x80000000U) != 0U) {
+ /* Fence is not yet expired, we are done */
+ break;
+ }
- /* added as first waiter - enable interrupt */
- if (queue_was_empty)
- host1x_hw_intr_enable_syncpt_intr(host, syncpt->id);
+ list_del_init(&fence->list);
+ host1x_fence_signal(fence);
}
- if (ref)
- *ref = waiter;
-
- spin_unlock(&syncpt->intr.lock);
+ /* Re-enable interrupt if necessary */
+ host1x_intr_update_hw_state(host, sp);
- return 0;
+ spin_unlock(&sp->fences.lock);
}
-void host1x_intr_put_ref(struct host1x *host, unsigned int id, void *ref,
- bool flush)
+int host1x_intr_init(struct host1x *host)
{
- struct host1x_waitlist *waiter = ref;
- struct host1x_syncpt *syncpt;
-
- atomic_cmpxchg(&waiter->state, WLS_PENDING, WLS_CANCELLED);
-
- syncpt = host->syncpt + id;
+ struct host1x_intr_irq_data *irq_data;
+ unsigned int id;
+ int i, err;
- spin_lock(&syncpt->intr.lock);
- if (atomic_cmpxchg(&waiter->state, WLS_CANCELLED, WLS_HANDLED) ==
- WLS_CANCELLED) {
- list_del(&waiter->list);
- kref_put(&waiter->refcount, waiter_release);
- }
- spin_unlock(&syncpt->intr.lock);
+ for (id = 0; id < host1x_syncpt_nb_pts(host); ++id) {
+ struct host1x_syncpt *syncpt = &host->syncpt[id];
- if (flush) {
- /* Wait until any concurrently executing handler has finished. */
- while (atomic_read(&waiter->state) != WLS_HANDLED)
- schedule();
+ spin_lock_init(&syncpt->fences.lock);
+ INIT_LIST_HEAD(&syncpt->fences.list);
}
- kref_put(&waiter->refcount, waiter_release);
-}
-
-int host1x_intr_init(struct host1x *host, unsigned int irq_sync)
-{
- unsigned int id;
- u32 nb_pts = host1x_syncpt_nb_pts(host);
+ irq_data = devm_kcalloc(host->dev, host->num_syncpt_irqs, sizeof(irq_data[0]), GFP_KERNEL);
+ if (!irq_data)
+ return -ENOMEM;
- mutex_init(&host->intr_mutex);
- host->intr_syncpt_irq = irq_sync;
+ host1x_hw_intr_disable_all_syncpt_intrs(host);
- for (id = 0; id < nb_pts; ++id) {
- struct host1x_syncpt *syncpt = host->syncpt + id;
+ for (i = 0; i < host->num_syncpt_irqs; i++) {
+ irq_data[i].host = host;
+ irq_data[i].offset = i;
- spin_lock_init(&syncpt->intr.lock);
- INIT_LIST_HEAD(&syncpt->intr.wait_head);
- snprintf(syncpt->intr.thresh_irq_name,
- sizeof(syncpt->intr.thresh_irq_name),
- "host1x_sp_%02u", id);
+ err = devm_request_irq(host->dev, host->syncpt_irqs[i],
+ host->intr_op->isr, IRQF_SHARED,
+ "host1x_syncpt", &irq_data[i]);
+ if (err < 0)
+ return err;
}
- host1x_intr_start(host);
-
return 0;
}
void host1x_intr_deinit(struct host1x *host)
{
- host1x_intr_stop(host);
}
void host1x_intr_start(struct host1x *host)
@@ -304,8 +141,7 @@ void host1x_intr_start(struct host1x *host)
int err;
mutex_lock(&host->intr_mutex);
- err = host1x_hw_intr_init_host_sync(host, DIV_ROUND_UP(hz, 1000000),
- syncpt_thresh_work);
+ err = host1x_hw_intr_init_host_sync(host, DIV_ROUND_UP(hz, 1000000));
if (err) {
mutex_unlock(&host->intr_mutex);
return;
@@ -315,36 +151,5 @@ void host1x_intr_start(struct host1x *host)
void host1x_intr_stop(struct host1x *host)
{
- unsigned int id;
- struct host1x_syncpt *syncpt = host->syncpt;
- u32 nb_pts = host1x_syncpt_nb_pts(host);
-
- mutex_lock(&host->intr_mutex);
-
host1x_hw_intr_disable_all_syncpt_intrs(host);
-
- for (id = 0; id < nb_pts; ++id) {
- struct host1x_waitlist *waiter, *next;
-
- list_for_each_entry_safe(waiter, next,
- &syncpt[id].intr.wait_head, list) {
- if (atomic_cmpxchg(&waiter->state,
- WLS_CANCELLED, WLS_HANDLED) == WLS_CANCELLED) {
- list_del(&waiter->list);
- kref_put(&waiter->refcount, waiter_release);
- }
- }
-
- if (!list_empty(&syncpt[id].intr.wait_head)) {
- /* output diagnostics */
- mutex_unlock(&host->intr_mutex);
- pr_warn("%s cannot stop syncpt intr id=%u\n",
- __func__, id);
- return;
- }
- }
-
- host1x_hw_intr_free_syncpt_irq(host);
-
- mutex_unlock(&host->intr_mutex);
}
diff --git a/drivers/gpu/host1x/intr.h b/drivers/gpu/host1x/intr.h
index 6ea55e615e3a..11cdf13e32fe 100644
--- a/drivers/gpu/host1x/intr.h
+++ b/drivers/gpu/host1x/intr.h
@@ -2,85 +2,22 @@
/*
* Tegra host1x Interrupt Management
*
- * Copyright (c) 2010-2013, NVIDIA Corporation.
+ * Copyright (c) 2010-2021, NVIDIA Corporation.
*/
#ifndef __HOST1X_INTR_H
#define __HOST1X_INTR_H
-#include <linux/interrupt.h>
-#include <linux/workqueue.h>
-
-struct host1x_syncpt;
struct host1x;
+struct host1x_syncpt_fence;
-enum host1x_intr_action {
- /*
- * Perform cleanup after a submit has completed.
- * 'data' points to a channel
- */
- HOST1X_INTR_ACTION_SUBMIT_COMPLETE = 0,
-
- /*
- * Wake up a task.
- * 'data' points to a wait_queue_head_t
- */
- HOST1X_INTR_ACTION_WAKEUP,
-
- /*
- * Wake up a interruptible task.
- * 'data' points to a wait_queue_head_t
- */
- HOST1X_INTR_ACTION_WAKEUP_INTERRUPTIBLE,
-
- HOST1X_INTR_ACTION_COUNT
-};
-
-struct host1x_syncpt_intr {
- spinlock_t lock;
- struct list_head wait_head;
- char thresh_irq_name[12];
- struct work_struct work;
-};
-
-struct host1x_waitlist {
- struct list_head list;
- struct kref refcount;
- u32 thresh;
- enum host1x_intr_action action;
- atomic_t state;
- void *data;
- int count;
+struct host1x_intr_irq_data {
+ struct host1x *host;
+ u32 offset;
};
-/*
- * Schedule an action to be taken when a sync point reaches the given threshold.
- *
- * @id the sync point
- * @thresh the threshold
- * @action the action to take
- * @data a pointer to extra data depending on action, see above
- * @waiter waiter structure - assumes ownership
- * @ref must be passed if cancellation is possible, else NULL
- *
- * This is a non-blocking api.
- */
-int host1x_intr_add_action(struct host1x *host, struct host1x_syncpt *syncpt,
- u32 thresh, enum host1x_intr_action action,
- void *data, struct host1x_waitlist *waiter,
- void **ref);
-
-/*
- * Unreference an action submitted to host1x_intr_add_action().
- * You must call this if you passed non-NULL as ref.
- * @ref the ref returned from host1x_intr_add_action()
- * @flush wait until any pending handlers have completed before returning.
- */
-void host1x_intr_put_ref(struct host1x *host, unsigned int id, void *ref,
- bool flush);
-
/* Initialize host1x sync point interrupt */
-int host1x_intr_init(struct host1x *host, unsigned int irq_sync);
+int host1x_intr_init(struct host1x *host);
/* Deinitialize host1x sync point interrupt */
void host1x_intr_deinit(struct host1x *host);
@@ -91,5 +28,10 @@ void host1x_intr_start(struct host1x *host);
/* Disable host1x sync point interrupt */
void host1x_intr_stop(struct host1x *host);
-irqreturn_t host1x_syncpt_thresh_fn(void *dev_id);
+void host1x_intr_handle_interrupt(struct host1x *host, unsigned int id);
+
+void host1x_intr_add_fence_locked(struct host1x *host, struct host1x_syncpt_fence *fence);
+
+bool host1x_intr_remove_fence(struct host1x *host, struct host1x_syncpt_fence *fence);
+
#endif
diff --git a/drivers/gpu/host1x/job.c b/drivers/gpu/host1x/job.c
index adbdc225de8d..3ed49e1fd933 100644
--- a/drivers/gpu/host1x/job.c
+++ b/drivers/gpu/host1x/job.c
@@ -24,21 +24,25 @@
#define HOST1X_WAIT_SYNCPT_OFFSET 0x8
struct host1x_job *host1x_job_alloc(struct host1x_channel *ch,
- u32 num_cmdbufs, u32 num_relocs)
+ u32 num_cmdbufs, u32 num_relocs,
+ bool skip_firewall)
{
struct host1x_job *job = NULL;
unsigned int num_unpins = num_relocs;
+ bool enable_firewall;
u64 total;
void *mem;
- if (!IS_ENABLED(CONFIG_TEGRA_HOST1X_FIREWALL))
+ enable_firewall = IS_ENABLED(CONFIG_TEGRA_HOST1X_FIREWALL) && !skip_firewall;
+
+ if (!enable_firewall)
num_unpins += num_cmdbufs;
/* Check that we're not going to overflow */
total = sizeof(struct host1x_job) +
(u64)num_relocs * sizeof(struct host1x_reloc) +
(u64)num_unpins * sizeof(struct host1x_job_unpin_data) +
- (u64)num_cmdbufs * sizeof(struct host1x_job_gather) +
+ (u64)num_cmdbufs * sizeof(struct host1x_job_cmd) +
(u64)num_unpins * sizeof(dma_addr_t) +
(u64)num_unpins * sizeof(u32 *);
if (total > ULONG_MAX)
@@ -48,6 +52,8 @@ struct host1x_job *host1x_job_alloc(struct host1x_channel *ch,
if (!job)
return NULL;
+ job->enable_firewall = enable_firewall;
+
kref_init(&job->ref);
job->channel = ch;
@@ -57,8 +63,8 @@ struct host1x_job *host1x_job_alloc(struct host1x_channel *ch,
mem += num_relocs * sizeof(struct host1x_reloc);
job->unpins = num_unpins ? mem : NULL;
mem += num_unpins * sizeof(struct host1x_job_unpin_data);
- job->gathers = num_cmdbufs ? mem : NULL;
- mem += num_cmdbufs * sizeof(struct host1x_job_gather);
+ job->cmds = num_cmdbufs ? mem : NULL;
+ mem += num_cmdbufs * sizeof(struct host1x_job_cmd);
job->addr_phys = num_unpins ? mem : NULL;
job->reloc_addr_phys = job->addr_phys;
@@ -79,6 +85,19 @@ static void job_free(struct kref *ref)
{
struct host1x_job *job = container_of(ref, struct host1x_job, ref);
+ if (job->release)
+ job->release(job);
+
+ if (job->fence) {
+ /*
+ * remove_callback is atomic w.r.t. fence signaling, so
+ * after the call returns, we know that the callback is not
+ * in execution, and the fence can be safely freed.
+ */
+ dma_fence_remove_callback(job->fence, &job->fence_cb);
+ dma_fence_put(job->fence);
+ }
+
if (job->syncpt)
host1x_syncpt_put(job->syncpt);
@@ -94,32 +113,47 @@ EXPORT_SYMBOL(host1x_job_put);
void host1x_job_add_gather(struct host1x_job *job, struct host1x_bo *bo,
unsigned int words, unsigned int offset)
{
- struct host1x_job_gather *gather = &job->gathers[job->num_gathers];
+ struct host1x_job_gather *gather = &job->cmds[job->num_cmds].gather;
gather->words = words;
gather->bo = bo;
gather->offset = offset;
- job->num_gathers++;
+ job->num_cmds++;
}
EXPORT_SYMBOL(host1x_job_add_gather);
+void host1x_job_add_wait(struct host1x_job *job, u32 id, u32 thresh,
+ bool relative, u32 next_class)
+{
+ struct host1x_job_cmd *cmd = &job->cmds[job->num_cmds];
+
+ cmd->is_wait = true;
+ cmd->wait.id = id;
+ cmd->wait.threshold = thresh;
+ cmd->wait.next_class = next_class;
+ cmd->wait.relative = relative;
+
+ job->num_cmds++;
+}
+EXPORT_SYMBOL(host1x_job_add_wait);
+
static unsigned int pin_job(struct host1x *host, struct host1x_job *job)
{
+ unsigned long mask = HOST1X_RELOC_READ | HOST1X_RELOC_WRITE;
struct host1x_client *client = job->client;
struct device *dev = client->dev;
struct host1x_job_gather *g;
- struct iommu_domain *domain;
unsigned int i;
int err;
- domain = iommu_get_domain_for_dev(dev);
job->num_unpins = 0;
for (i = 0; i < job->num_relocs; i++) {
struct host1x_reloc *reloc = &job->relocs[i];
- dma_addr_t phys_addr, *phys;
- struct sg_table *sgt;
+ enum dma_data_direction direction;
+ struct host1x_bo_mapping *map;
+ struct host1x_bo *bo;
reloc->target.bo = host1x_bo_get(reloc->target.bo);
if (!reloc->target.bo) {
@@ -127,64 +161,44 @@ static unsigned int pin_job(struct host1x *host, struct host1x_job *job)
goto unpin;
}
- /*
- * If the client device is not attached to an IOMMU, the
- * physical address of the buffer object can be used.
- *
- * Similarly, when an IOMMU domain is shared between all
- * host1x clients, the IOVA is already available, so no
- * need to map the buffer object again.
- *
- * XXX Note that this isn't always safe to do because it
- * relies on an assumption that no cache maintenance is
- * needed on the buffer objects.
- */
- if (!domain || client->group)
- phys = &phys_addr;
- else
- phys = NULL;
-
- sgt = host1x_bo_pin(dev, reloc->target.bo, phys);
- if (IS_ERR(sgt)) {
- err = PTR_ERR(sgt);
- goto unpin;
- }
+ bo = reloc->target.bo;
- if (sgt) {
- unsigned long mask = HOST1X_RELOC_READ |
- HOST1X_RELOC_WRITE;
- enum dma_data_direction dir;
-
- switch (reloc->flags & mask) {
- case HOST1X_RELOC_READ:
- dir = DMA_TO_DEVICE;
- break;
+ switch (reloc->flags & mask) {
+ case HOST1X_RELOC_READ:
+ direction = DMA_TO_DEVICE;
+ break;
- case HOST1X_RELOC_WRITE:
- dir = DMA_FROM_DEVICE;
- break;
+ case HOST1X_RELOC_WRITE:
+ direction = DMA_FROM_DEVICE;
+ break;
- case HOST1X_RELOC_READ | HOST1X_RELOC_WRITE:
- dir = DMA_BIDIRECTIONAL;
- break;
+ case HOST1X_RELOC_READ | HOST1X_RELOC_WRITE:
+ direction = DMA_BIDIRECTIONAL;
+ break;
- default:
- err = -EINVAL;
- goto unpin;
- }
+ default:
+ err = -EINVAL;
+ goto unpin;
+ }
- err = dma_map_sgtable(dev, sgt, dir, 0);
- if (err)
- goto unpin;
+ map = host1x_bo_pin(dev, bo, direction, NULL);
+ if (IS_ERR(map)) {
+ err = PTR_ERR(map);
+ goto unpin;
+ }
- job->unpins[job->num_unpins].dev = dev;
- job->unpins[job->num_unpins].dir = dir;
- phys_addr = sg_dma_address(sgt->sgl);
+ /*
+ * host1x clients are generally not able to do scatter-gather themselves, so fail
+ * if the buffer is discontiguous and we fail to map its SG table to a single
+ * contiguous chunk of I/O virtual memory.
+ */
+ if (map->chunks > 1) {
+ err = -EINVAL;
+ goto unpin;
}
- job->addr_phys[job->num_unpins] = phys_addr;
- job->unpins[job->num_unpins].bo = reloc->target.bo;
- job->unpins[job->num_unpins].sgt = sgt;
+ job->addr_phys[job->num_unpins] = map->phys;
+ job->unpins[job->num_unpins].map = map;
job->num_unpins++;
}
@@ -192,45 +206,38 @@ static unsigned int pin_job(struct host1x *host, struct host1x_job *job)
* We will copy gathers BO content later, so there is no need to
* hold and pin them.
*/
- if (IS_ENABLED(CONFIG_TEGRA_HOST1X_FIREWALL))
+ if (job->enable_firewall)
return 0;
- for (i = 0; i < job->num_gathers; i++) {
+ for (i = 0; i < job->num_cmds; i++) {
+ struct host1x_bo_mapping *map;
size_t gather_size = 0;
struct scatterlist *sg;
- struct sg_table *sgt;
- dma_addr_t phys_addr;
unsigned long shift;
struct iova *alloc;
- dma_addr_t *phys;
unsigned int j;
- g = &job->gathers[i];
+ if (job->cmds[i].is_wait)
+ continue;
+
+ g = &job->cmds[i].gather;
+
g->bo = host1x_bo_get(g->bo);
if (!g->bo) {
err = -EINVAL;
goto unpin;
}
- /**
- * If the host1x is not attached to an IOMMU, there is no need
- * to map the buffer object for the host1x, since the physical
- * address can simply be used.
- */
- if (!iommu_get_domain_for_dev(host->dev))
- phys = &phys_addr;
- else
- phys = NULL;
-
- sgt = host1x_bo_pin(host->dev, g->bo, phys);
- if (IS_ERR(sgt)) {
- err = PTR_ERR(sgt);
- goto put;
+ map = host1x_bo_pin(host->dev, g->bo, DMA_TO_DEVICE, NULL);
+ if (IS_ERR(map)) {
+ err = PTR_ERR(map);
+ goto unpin;
}
if (host->domain) {
- for_each_sgtable_sg(sgt, sg, j)
+ for_each_sgtable_sg(map->sgt, sg, j)
gather_size += sg->length;
+
gather_size = iova_align(&host->iova, gather_size);
shift = iova_shift(&host->iova);
@@ -241,33 +248,23 @@ static unsigned int pin_job(struct host1x *host, struct host1x_job *job)
goto put;
}
- err = iommu_map_sgtable(host->domain,
- iova_dma_addr(&host->iova, alloc),
- sgt, IOMMU_READ);
+ err = iommu_map_sgtable(host->domain, iova_dma_addr(&host->iova, alloc),
+ map->sgt, IOMMU_READ);
if (err == 0) {
__free_iova(&host->iova, alloc);
err = -EINVAL;
goto put;
}
- job->unpins[job->num_unpins].size = gather_size;
- phys_addr = iova_dma_addr(&host->iova, alloc);
- } else if (sgt) {
- err = dma_map_sgtable(host->dev, sgt, DMA_TO_DEVICE, 0);
- if (err)
- goto put;
-
- job->unpins[job->num_unpins].dir = DMA_TO_DEVICE;
- job->unpins[job->num_unpins].dev = host->dev;
- phys_addr = sg_dma_address(sgt->sgl);
+ map->phys = iova_dma_addr(&host->iova, alloc);
+ map->size = gather_size;
}
- job->addr_phys[job->num_unpins] = phys_addr;
- job->gather_addr_phys[i] = phys_addr;
-
- job->unpins[job->num_unpins].bo = g->bo;
- job->unpins[job->num_unpins].sgt = sgt;
+ job->addr_phys[job->num_unpins] = map->phys;
+ job->unpins[job->num_unpins].map = map;
job->num_unpins++;
+
+ job->gather_addr_phys[i] = map->phys;
}
return 0;
@@ -296,7 +293,7 @@ static int do_relocs(struct host1x_job *job, struct host1x_job_gather *g)
if (cmdbuf != reloc->cmdbuf.bo)
continue;
- if (IS_ENABLED(CONFIG_TEGRA_HOST1X_FIREWALL)) {
+ if (job->enable_firewall) {
target = (u32 *)job->gather_copy_mapped +
reloc->cmdbuf.offset / sizeof(u32) +
g->offset / sizeof(u32);
@@ -538,8 +535,13 @@ static inline int copy_gathers(struct device *host, struct host1x_job *job,
fw.num_relocs = job->num_relocs;
fw.class = job->class;
- for (i = 0; i < job->num_gathers; i++) {
- struct host1x_job_gather *g = &job->gathers[i];
+ for (i = 0; i < job->num_cmds; i++) {
+ struct host1x_job_gather *g;
+
+ if (job->cmds[i].is_wait)
+ continue;
+
+ g = &job->cmds[i].gather;
size += g->words * sizeof(u32);
}
@@ -561,10 +563,14 @@ static inline int copy_gathers(struct device *host, struct host1x_job *job,
job->gather_copy_size = size;
- for (i = 0; i < job->num_gathers; i++) {
- struct host1x_job_gather *g = &job->gathers[i];
+ for (i = 0; i < job->num_cmds; i++) {
+ struct host1x_job_gather *g;
void *gather;
+ if (job->cmds[i].is_wait)
+ continue;
+ g = &job->cmds[i].gather;
+
/* Copy the gather */
gather = host1x_bo_mmap(g->bo);
memcpy(job->gather_copy_mapped + offset, gather + g->offset,
@@ -600,28 +606,33 @@ int host1x_job_pin(struct host1x_job *job, struct device *dev)
if (err)
goto out;
- if (IS_ENABLED(CONFIG_TEGRA_HOST1X_FIREWALL)) {
+ if (job->enable_firewall) {
err = copy_gathers(host->dev, job, dev);
if (err)
goto out;
}
/* patch gathers */
- for (i = 0; i < job->num_gathers; i++) {
- struct host1x_job_gather *g = &job->gathers[i];
+ for (i = 0; i < job->num_cmds; i++) {
+ struct host1x_job_gather *g;
+
+ if (job->cmds[i].is_wait)
+ continue;
+ g = &job->cmds[i].gather;
/* process each gather mem only once */
if (g->handled)
continue;
/* copy_gathers() sets gathers base if firewall is enabled */
- if (!IS_ENABLED(CONFIG_TEGRA_HOST1X_FIREWALL))
+ if (!job->enable_firewall)
g->base = job->gather_addr_phys[i];
- for (j = i + 1; j < job->num_gathers; j++) {
- if (job->gathers[j].bo == g->bo) {
- job->gathers[j].handled = true;
- job->gathers[j].base = g->base;
+ for (j = i + 1; j < job->num_cmds; j++) {
+ if (!job->cmds[j].is_wait &&
+ job->cmds[j].gather.bo == g->bo) {
+ job->cmds[j].gather.handled = true;
+ job->cmds[j].gather.base = g->base;
}
}
@@ -645,23 +656,16 @@ void host1x_job_unpin(struct host1x_job *job)
unsigned int i;
for (i = 0; i < job->num_unpins; i++) {
- struct host1x_job_unpin_data *unpin = &job->unpins[i];
- struct device *dev = unpin->dev ?: host->dev;
- struct sg_table *sgt = unpin->sgt;
-
- if (!IS_ENABLED(CONFIG_TEGRA_HOST1X_FIREWALL) &&
- unpin->size && host->domain) {
- iommu_unmap(host->domain, job->addr_phys[i],
- unpin->size);
- free_iova(&host->iova,
- iova_pfn(&host->iova, job->addr_phys[i]));
- }
+ struct host1x_bo_mapping *map = job->unpins[i].map;
+ struct host1x_bo *bo = map->bo;
- if (unpin->dev && sgt)
- dma_unmap_sgtable(unpin->dev, sgt, unpin->dir, 0);
+ if (!job->enable_firewall && map->size && host->domain) {
+ iommu_unmap(host->domain, job->addr_phys[i], map->size);
+ free_iova(&host->iova, iova_pfn(&host->iova, job->addr_phys[i]));
+ }
- host1x_bo_unpin(dev, unpin->bo, sgt);
- host1x_bo_put(unpin->bo);
+ host1x_bo_unpin(map);
+ host1x_bo_put(bo);
}
job->num_unpins = 0;
diff --git a/drivers/gpu/host1x/job.h b/drivers/gpu/host1x/job.h
index 94bc2e4ae241..dad5a1946693 100644
--- a/drivers/gpu/host1x/job.h
+++ b/drivers/gpu/host1x/job.h
@@ -18,12 +18,24 @@ struct host1x_job_gather {
bool handled;
};
+struct host1x_job_wait {
+ u32 id;
+ u32 threshold;
+ u32 next_class;
+ bool relative;
+};
+
+struct host1x_job_cmd {
+ bool is_wait;
+
+ union {
+ struct host1x_job_gather gather;
+ struct host1x_job_wait wait;
+ };
+};
+
struct host1x_job_unpin_data {
- struct host1x_bo *bo;
- struct sg_table *sgt;
- struct device *dev;
- size_t size;
- enum dma_data_direction dir;
+ struct host1x_bo_mapping *map;
};
/*
diff --git a/drivers/gpu/host1x/mipi.c b/drivers/gpu/host1x/mipi.c
index 2efe12dde8bc..e51b43dd15a3 100644
--- a/drivers/gpu/host1x/mipi.c
+++ b/drivers/gpu/host1x/mipi.c
@@ -501,8 +501,6 @@ static int tegra_mipi_probe(struct platform_device *pdev)
{
const struct of_device_id *match;
struct tegra_mipi *mipi;
- struct resource *res;
- int err;
match = of_match_node(tegra_mipi_of_match, pdev->dev.of_node);
if (!match)
@@ -515,42 +513,27 @@ static int tegra_mipi_probe(struct platform_device *pdev)
mipi->soc = match->data;
mipi->dev = &pdev->dev;
- res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
- mipi->regs = devm_ioremap_resource(&pdev->dev, res);
+ mipi->regs = devm_platform_get_and_ioremap_resource(pdev, 0, NULL);
if (IS_ERR(mipi->regs))
return PTR_ERR(mipi->regs);
mutex_init(&mipi->lock);
- mipi->clk = devm_clk_get(&pdev->dev, NULL);
+ mipi->clk = devm_clk_get_prepared(&pdev->dev, NULL);
if (IS_ERR(mipi->clk)) {
dev_err(&pdev->dev, "failed to get clock\n");
return PTR_ERR(mipi->clk);
}
- err = clk_prepare(mipi->clk);
- if (err < 0)
- return err;
-
platform_set_drvdata(pdev, mipi);
return 0;
}
-static int tegra_mipi_remove(struct platform_device *pdev)
-{
- struct tegra_mipi *mipi = platform_get_drvdata(pdev);
-
- clk_unprepare(mipi->clk);
-
- return 0;
-}
-
struct platform_driver tegra_mipi_driver = {
.driver = {
.name = "tegra-mipi",
.of_match_table = tegra_mipi_of_match,
},
.probe = tegra_mipi_probe,
- .remove = tegra_mipi_remove,
};
diff --git a/drivers/gpu/host1x/syncpt.c b/drivers/gpu/host1x/syncpt.c
index e648ebbb2027..acc7d82e0585 100644
--- a/drivers/gpu/host1x/syncpt.c
+++ b/drivers/gpu/host1x/syncpt.c
@@ -7,6 +7,7 @@
#include <linux/module.h>
#include <linux/device.h>
+#include <linux/dma-fence.h>
#include <linux/slab.h>
#include <trace/events/host1x.h>
@@ -137,12 +138,21 @@ void host1x_syncpt_restore(struct host1x *host)
struct host1x_syncpt *sp_base = host->syncpt;
unsigned int i;
- for (i = 0; i < host1x_syncpt_nb_pts(host); i++)
+ for (i = 0; i < host1x_syncpt_nb_pts(host); i++) {
+ /*
+ * Unassign syncpt from channels for purposes of Tegra186
+ * syncpoint protection. This prevents any channel from
+ * accessing it until it is reassigned.
+ */
+ host1x_hw_syncpt_assign_to_channel(host, sp_base + i, NULL);
host1x_hw_syncpt_restore(host, sp_base + i);
+ }
for (i = 0; i < host1x_syncpt_nb_bases(host); i++)
host1x_hw_syncpt_restore_wait_base(host, sp_base + i);
+ host1x_hw_syncpt_enable_protection(host);
+
wmb();
}
@@ -200,17 +210,6 @@ int host1x_syncpt_incr(struct host1x_syncpt *sp)
}
EXPORT_SYMBOL(host1x_syncpt_incr);
-/*
- * Updated sync point form hardware, and returns true if syncpoint is expired,
- * false if we may need to wait
- */
-static bool syncpt_load_min_is_expired(struct host1x_syncpt *sp, u32 thresh)
-{
- host1x_hw_syncpt_load(sp->host, sp);
-
- return host1x_syncpt_is_expired(sp, thresh);
-}
-
/**
* host1x_syncpt_wait() - wait for a syncpoint to reach a given value
* @sp: host1x syncpoint
@@ -221,99 +220,46 @@ static bool syncpt_load_min_is_expired(struct host1x_syncpt *sp, u32 thresh)
int host1x_syncpt_wait(struct host1x_syncpt *sp, u32 thresh, long timeout,
u32 *value)
{
- DECLARE_WAIT_QUEUE_HEAD_ONSTACK(wq);
- void *ref;
- struct host1x_waitlist *waiter;
- int err = 0, check_count = 0;
- u32 val;
+ struct dma_fence *fence;
+ long wait_err;
- if (value)
- *value = 0;
+ host1x_hw_syncpt_load(sp->host, sp);
- /* first check cache */
- if (host1x_syncpt_is_expired(sp, thresh)) {
- if (value)
- *value = host1x_syncpt_load(sp);
+ if (value)
+ *value = host1x_syncpt_load(sp);
+ if (host1x_syncpt_is_expired(sp, thresh))
return 0;
- }
-
- /* try to read from register */
- val = host1x_hw_syncpt_load(sp->host, sp);
- if (host1x_syncpt_is_expired(sp, thresh)) {
- if (value)
- *value = val;
-
- goto done;
- }
-
- if (!timeout) {
- err = -EAGAIN;
- goto done;
- }
- /* allocate a waiter */
- waiter = kzalloc(sizeof(*waiter), GFP_KERNEL);
- if (!waiter) {
- err = -ENOMEM;
- goto done;
- }
-
- /* schedule a wakeup when the syncpoint value is reached */
- err = host1x_intr_add_action(sp->host, sp, thresh,
- HOST1X_INTR_ACTION_WAKEUP_INTERRUPTIBLE,
- &wq, waiter, &ref);
- if (err)
- goto done;
-
- err = -EAGAIN;
- /* Caller-specified timeout may be impractically low */
if (timeout < 0)
timeout = LONG_MAX;
+ else if (timeout == 0)
+ return -EAGAIN;
- /* wait for the syncpoint, or timeout, or signal */
- while (timeout) {
- long check = min_t(long, SYNCPT_CHECK_PERIOD, timeout);
- int remain;
-
- remain = wait_event_interruptible_timeout(wq,
- syncpt_load_min_is_expired(sp, thresh),
- check);
- if (remain > 0 || host1x_syncpt_is_expired(sp, thresh)) {
- if (value)
- *value = host1x_syncpt_load(sp);
-
- err = 0;
-
- break;
- }
-
- if (remain < 0) {
- err = remain;
- break;
- }
-
- timeout -= check;
-
- if (timeout && check_count <= MAX_STUCK_CHECK_COUNT) {
- dev_warn(sp->host->dev,
- "%s: syncpoint id %u (%s) stuck waiting %d, timeout=%ld\n",
- current->comm, sp->id, sp->name,
- thresh, timeout);
-
- host1x_debug_dump_syncpts(sp->host);
-
- if (check_count == MAX_STUCK_CHECK_COUNT)
- host1x_debug_dump(sp->host);
+ fence = host1x_fence_create(sp, thresh, false);
+ if (IS_ERR(fence))
+ return PTR_ERR(fence);
- check_count++;
- }
- }
+ wait_err = dma_fence_wait_timeout(fence, true, timeout);
+ if (wait_err == 0)
+ host1x_fence_cancel(fence);
+ dma_fence_put(fence);
- host1x_intr_put_ref(sp->host, sp->id, ref, true);
+ if (value)
+ *value = host1x_syncpt_load(sp);
-done:
- return err;
+ /*
+ * Don't rely on dma_fence_wait_timeout return value,
+ * since it returns zero both on timeout and if the
+ * wait completed with 0 jiffies left.
+ */
+ host1x_hw_syncpt_load(sp->host, sp);
+ if (wait_err == 0 && !host1x_syncpt_is_expired(sp, thresh))
+ return -EAGAIN;
+ else if (wait_err < 0)
+ return wait_err;
+ else
+ return 0;
}
EXPORT_SYMBOL(host1x_syncpt_wait);
@@ -350,13 +296,6 @@ int host1x_syncpt_init(struct host1x *host)
for (i = 0; i < host->info->nb_pts; i++) {
syncpt[i].id = i;
syncpt[i].host = host;
-
- /*
- * Unassign syncpt from channels for purposes of Tegra186
- * syncpoint protection. This prevents any channel from
- * accessing it until it is reassigned.
- */
- host1x_hw_syncpt_assign_to_channel(host, &syncpt[i], NULL);
}
for (i = 0; i < host->info->nb_bases; i++)
@@ -366,9 +305,6 @@ int host1x_syncpt_init(struct host1x *host)
host->syncpt = syncpt;
host->bases = bases;
- host1x_syncpt_restore(host);
- host1x_hw_syncpt_enable_protection(host);
-
/* Allocate sync point to use for clearing waits for expired fences */
host->nop_sp = host1x_syncpt_alloc(host, 0, "reserved-nop");
if (!host->nop_sp)
@@ -407,7 +343,7 @@ static void syncpt_release(struct kref *ref)
atomic_set(&sp->max_val, host1x_syncpt_read(sp));
- mutex_lock(&sp->host->syncpt_mutex);
+ sp->locked = false;
host1x_syncpt_base_free(sp->base);
kfree(sp->name);
@@ -431,7 +367,7 @@ void host1x_syncpt_put(struct host1x_syncpt *sp)
if (!sp)
return;
- kref_put(&sp->ref, syncpt_release);
+ kref_put_mutex(&sp->ref, syncpt_release, &sp->host->syncpt_mutex);
}
EXPORT_SYMBOL(host1x_syncpt_put);
diff --git a/drivers/gpu/host1x/syncpt.h b/drivers/gpu/host1x/syncpt.h
index a6766f8d55ee..4c3f3b2f0e9c 100644
--- a/drivers/gpu/host1x/syncpt.h
+++ b/drivers/gpu/host1x/syncpt.h
@@ -14,6 +14,7 @@
#include <linux/kref.h>
#include <linux/sched.h>
+#include "fence.h"
#include "intr.h"
struct host1x;
@@ -39,7 +40,14 @@ struct host1x_syncpt {
struct host1x_syncpt_base *base;
/* interrupt data */
- struct host1x_syncpt_intr intr;
+ struct host1x_fence_list fences;
+
+ /*
+ * If a submission incrementing this syncpoint fails, lock it so that
+ * further submission cannot be made until application has handled the
+ * failure.
+ */
+ bool locked;
};
/* Initialize sync point array */
@@ -115,4 +123,9 @@ static inline int host1x_syncpt_is_valid(struct host1x_syncpt *sp)
return sp->id < host1x_syncpt_nb_pts(sp->host);
}
+static inline void host1x_syncpt_set_locked(struct host1x_syncpt *sp)
+{
+ sp->locked = true;
+}
+
#endif