summaryrefslogtreecommitdiff
path: root/drivers/gpu/host1x/job.c
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/gpu/host1x/job.c')
-rw-r--r--drivers/gpu/host1x/job.c265
1 files changed, 176 insertions, 89 deletions
diff --git a/drivers/gpu/host1x/job.c b/drivers/gpu/host1x/job.c
index 527a1cddb14f..3ed49e1fd933 100644
--- a/drivers/gpu/host1x/job.c
+++ b/drivers/gpu/host1x/job.c
@@ -1,24 +1,14 @@
+// SPDX-License-Identifier: GPL-2.0-only
/*
* Tegra host1x Job
*
* Copyright (c) 2010-2015, NVIDIA Corporation.
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms and conditions of the GNU General Public License,
- * version 2, as published by the Free Software Foundation.
- *
- * This program is distributed in the hope it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
- * more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
#include <linux/dma-mapping.h>
#include <linux/err.h>
#include <linux/host1x.h>
+#include <linux/iommu.h>
#include <linux/kref.h>
#include <linux/module.h>
#include <linux/scatterlist.h>
@@ -34,18 +24,25 @@
#define HOST1X_WAIT_SYNCPT_OFFSET 0x8
struct host1x_job *host1x_job_alloc(struct host1x_channel *ch,
- u32 num_cmdbufs, u32 num_relocs)
+ u32 num_cmdbufs, u32 num_relocs,
+ bool skip_firewall)
{
struct host1x_job *job = NULL;
- unsigned int num_unpins = num_cmdbufs + num_relocs;
+ unsigned int num_unpins = num_relocs;
+ bool enable_firewall;
u64 total;
void *mem;
+ enable_firewall = IS_ENABLED(CONFIG_TEGRA_HOST1X_FIREWALL) && !skip_firewall;
+
+ if (!enable_firewall)
+ num_unpins += num_cmdbufs;
+
/* Check that we're not going to overflow */
total = sizeof(struct host1x_job) +
(u64)num_relocs * sizeof(struct host1x_reloc) +
(u64)num_unpins * sizeof(struct host1x_job_unpin_data) +
- (u64)num_cmdbufs * sizeof(struct host1x_job_gather) +
+ (u64)num_cmdbufs * sizeof(struct host1x_job_cmd) +
(u64)num_unpins * sizeof(dma_addr_t) +
(u64)num_unpins * sizeof(u32 *);
if (total > ULONG_MAX)
@@ -55,6 +52,8 @@ struct host1x_job *host1x_job_alloc(struct host1x_channel *ch,
if (!job)
return NULL;
+ job->enable_firewall = enable_firewall;
+
kref_init(&job->ref);
job->channel = ch;
@@ -64,8 +63,8 @@ struct host1x_job *host1x_job_alloc(struct host1x_channel *ch,
mem += num_relocs * sizeof(struct host1x_reloc);
job->unpins = num_unpins ? mem : NULL;
mem += num_unpins * sizeof(struct host1x_job_unpin_data);
- job->gathers = num_cmdbufs ? mem : NULL;
- mem += num_cmdbufs * sizeof(struct host1x_job_gather);
+ job->cmds = num_cmdbufs ? mem : NULL;
+ mem += num_cmdbufs * sizeof(struct host1x_job_cmd);
job->addr_phys = num_unpins ? mem : NULL;
job->reloc_addr_phys = job->addr_phys;
@@ -86,6 +85,22 @@ static void job_free(struct kref *ref)
{
struct host1x_job *job = container_of(ref, struct host1x_job, ref);
+ if (job->release)
+ job->release(job);
+
+ if (job->fence) {
+ /*
+ * remove_callback is atomic w.r.t. fence signaling, so
+ * after the call returns, we know that the callback is not
+ * in execution, and the fence can be safely freed.
+ */
+ dma_fence_remove_callback(job->fence, &job->fence_cb);
+ dma_fence_put(job->fence);
+ }
+
+ if (job->syncpt)
+ host1x_syncpt_put(job->syncpt);
+
kfree(job);
}
@@ -98,18 +113,37 @@ EXPORT_SYMBOL(host1x_job_put);
void host1x_job_add_gather(struct host1x_job *job, struct host1x_bo *bo,
unsigned int words, unsigned int offset)
{
- struct host1x_job_gather *gather = &job->gathers[job->num_gathers];
+ struct host1x_job_gather *gather = &job->cmds[job->num_cmds].gather;
gather->words = words;
gather->bo = bo;
gather->offset = offset;
- job->num_gathers++;
+ job->num_cmds++;
}
EXPORT_SYMBOL(host1x_job_add_gather);
+void host1x_job_add_wait(struct host1x_job *job, u32 id, u32 thresh,
+ bool relative, u32 next_class)
+{
+ struct host1x_job_cmd *cmd = &job->cmds[job->num_cmds];
+
+ cmd->is_wait = true;
+ cmd->wait.id = id;
+ cmd->wait.threshold = thresh;
+ cmd->wait.next_class = next_class;
+ cmd->wait.relative = relative;
+
+ job->num_cmds++;
+}
+EXPORT_SYMBOL(host1x_job_add_wait);
+
static unsigned int pin_job(struct host1x *host, struct host1x_job *job)
{
+ unsigned long mask = HOST1X_RELOC_READ | HOST1X_RELOC_WRITE;
+ struct host1x_client *client = job->client;
+ struct device *dev = client->dev;
+ struct host1x_job_gather *g;
unsigned int i;
int err;
@@ -117,8 +151,9 @@ static unsigned int pin_job(struct host1x *host, struct host1x_job *job)
for (i = 0; i < job->num_relocs; i++) {
struct host1x_reloc *reloc = &job->relocs[i];
- struct sg_table *sgt;
- dma_addr_t phys_addr;
+ enum dma_data_direction direction;
+ struct host1x_bo_mapping *map;
+ struct host1x_bo *bo;
reloc->target.bo = host1x_bo_get(reloc->target.bo);
if (!reloc->target.bo) {
@@ -126,35 +161,83 @@ static unsigned int pin_job(struct host1x *host, struct host1x_job *job)
goto unpin;
}
- phys_addr = host1x_bo_pin(reloc->target.bo, &sgt);
+ bo = reloc->target.bo;
- job->addr_phys[job->num_unpins] = phys_addr;
- job->unpins[job->num_unpins].bo = reloc->target.bo;
- job->unpins[job->num_unpins].sgt = sgt;
+ switch (reloc->flags & mask) {
+ case HOST1X_RELOC_READ:
+ direction = DMA_TO_DEVICE;
+ break;
+
+ case HOST1X_RELOC_WRITE:
+ direction = DMA_FROM_DEVICE;
+ break;
+
+ case HOST1X_RELOC_READ | HOST1X_RELOC_WRITE:
+ direction = DMA_BIDIRECTIONAL;
+ break;
+
+ default:
+ err = -EINVAL;
+ goto unpin;
+ }
+
+ map = host1x_bo_pin(dev, bo, direction, NULL);
+ if (IS_ERR(map)) {
+ err = PTR_ERR(map);
+ goto unpin;
+ }
+
+ /*
+ * host1x clients are generally not able to do scatter-gather themselves, so fail
+ * if the buffer is discontiguous and we fail to map its SG table to a single
+ * contiguous chunk of I/O virtual memory.
+ */
+ if (map->chunks > 1) {
+ err = -EINVAL;
+ goto unpin;
+ }
+
+ job->addr_phys[job->num_unpins] = map->phys;
+ job->unpins[job->num_unpins].map = map;
job->num_unpins++;
}
- for (i = 0; i < job->num_gathers; i++) {
- struct host1x_job_gather *g = &job->gathers[i];
+ /*
+ * We will copy gathers BO content later, so there is no need to
+ * hold and pin them.
+ */
+ if (job->enable_firewall)
+ return 0;
+
+ for (i = 0; i < job->num_cmds; i++) {
+ struct host1x_bo_mapping *map;
size_t gather_size = 0;
struct scatterlist *sg;
- struct sg_table *sgt;
- dma_addr_t phys_addr;
unsigned long shift;
struct iova *alloc;
unsigned int j;
+ if (job->cmds[i].is_wait)
+ continue;
+
+ g = &job->cmds[i].gather;
+
g->bo = host1x_bo_get(g->bo);
if (!g->bo) {
err = -EINVAL;
goto unpin;
}
- phys_addr = host1x_bo_pin(g->bo, &sgt);
+ map = host1x_bo_pin(host->dev, g->bo, DMA_TO_DEVICE, NULL);
+ if (IS_ERR(map)) {
+ err = PTR_ERR(map);
+ goto unpin;
+ }
- if (!IS_ENABLED(CONFIG_TEGRA_HOST1X_FIREWALL) && host->domain) {
- for_each_sg(sgt->sgl, sg, sgt->nents, j)
+ if (host->domain) {
+ for_each_sgtable_sg(map->sgt, sg, j)
gather_size += sg->length;
+
gather_size = iova_align(&host->iova, gather_size);
shift = iova_shift(&host->iova);
@@ -162,34 +245,32 @@ static unsigned int pin_job(struct host1x *host, struct host1x_job *job)
host->iova_end >> shift, true);
if (!alloc) {
err = -ENOMEM;
- goto unpin;
+ goto put;
}
- err = iommu_map_sg(host->domain,
- iova_dma_addr(&host->iova, alloc),
- sgt->sgl, sgt->nents, IOMMU_READ);
+ err = iommu_map_sgtable(host->domain, iova_dma_addr(&host->iova, alloc),
+ map->sgt, IOMMU_READ);
if (err == 0) {
__free_iova(&host->iova, alloc);
err = -EINVAL;
- goto unpin;
+ goto put;
}
- job->addr_phys[job->num_unpins] =
- iova_dma_addr(&host->iova, alloc);
- job->unpins[job->num_unpins].size = gather_size;
- } else {
- job->addr_phys[job->num_unpins] = phys_addr;
+ map->phys = iova_dma_addr(&host->iova, alloc);
+ map->size = gather_size;
}
- job->gather_addr_phys[i] = job->addr_phys[job->num_unpins];
-
- job->unpins[job->num_unpins].bo = g->bo;
- job->unpins[job->num_unpins].sgt = sgt;
+ job->addr_phys[job->num_unpins] = map->phys;
+ job->unpins[job->num_unpins].map = map;
job->num_unpins++;
+
+ job->gather_addr_phys[i] = map->phys;
}
return 0;
+put:
+ host1x_bo_put(g->bo);
unpin:
host1x_job_unpin(job);
return err;
@@ -197,8 +278,7 @@ unpin:
static int do_relocs(struct host1x_job *job, struct host1x_job_gather *g)
{
- u32 last_page = ~0;
- void *cmdbuf_page_addr = NULL;
+ void *cmdbuf_addr = NULL;
struct host1x_bo *cmdbuf = g->bo;
unsigned int i;
@@ -213,35 +293,29 @@ static int do_relocs(struct host1x_job *job, struct host1x_job_gather *g)
if (cmdbuf != reloc->cmdbuf.bo)
continue;
- if (IS_ENABLED(CONFIG_TEGRA_HOST1X_FIREWALL)) {
+ if (job->enable_firewall) {
target = (u32 *)job->gather_copy_mapped +
reloc->cmdbuf.offset / sizeof(u32) +
g->offset / sizeof(u32);
goto patch_reloc;
}
- if (last_page != reloc->cmdbuf.offset >> PAGE_SHIFT) {
- if (cmdbuf_page_addr)
- host1x_bo_kunmap(cmdbuf, last_page,
- cmdbuf_page_addr);
+ if (!cmdbuf_addr) {
+ cmdbuf_addr = host1x_bo_mmap(cmdbuf);
- cmdbuf_page_addr = host1x_bo_kmap(cmdbuf,
- reloc->cmdbuf.offset >> PAGE_SHIFT);
- last_page = reloc->cmdbuf.offset >> PAGE_SHIFT;
-
- if (unlikely(!cmdbuf_page_addr)) {
+ if (unlikely(!cmdbuf_addr)) {
pr_err("Could not map cmdbuf for relocation\n");
return -ENOMEM;
}
}
- target = cmdbuf_page_addr + (reloc->cmdbuf.offset & ~PAGE_MASK);
+ target = cmdbuf_addr + reloc->cmdbuf.offset;
patch_reloc:
*target = reloc_addr;
}
- if (cmdbuf_page_addr)
- host1x_bo_kunmap(cmdbuf, last_page, cmdbuf_page_addr);
+ if (cmdbuf_addr)
+ host1x_bo_munmap(cmdbuf, cmdbuf_addr);
return 0;
}
@@ -447,7 +521,8 @@ out:
return err;
}
-static inline int copy_gathers(struct host1x_job *job, struct device *dev)
+static inline int copy_gathers(struct device *host, struct host1x_job *job,
+ struct device *dev)
{
struct host1x_firewall fw;
size_t size = 0;
@@ -460,8 +535,13 @@ static inline int copy_gathers(struct host1x_job *job, struct device *dev)
fw.num_relocs = job->num_relocs;
fw.class = job->class;
- for (i = 0; i < job->num_gathers; i++) {
- struct host1x_job_gather *g = &job->gathers[i];
+ for (i = 0; i < job->num_cmds; i++) {
+ struct host1x_job_gather *g;
+
+ if (job->cmds[i].is_wait)
+ continue;
+
+ g = &job->cmds[i].gather;
size += g->words * sizeof(u32);
}
@@ -470,12 +550,12 @@ static inline int copy_gathers(struct host1x_job *job, struct device *dev)
* Try a non-blocking allocation from a higher priority pools first,
* as awaiting for the allocation here is a major performance hit.
*/
- job->gather_copy_mapped = dma_alloc_wc(dev, size, &job->gather_copy,
+ job->gather_copy_mapped = dma_alloc_wc(host, size, &job->gather_copy,
GFP_NOWAIT);
/* the higher priority allocation failed, try the generic-blocking */
if (!job->gather_copy_mapped)
- job->gather_copy_mapped = dma_alloc_wc(dev, size,
+ job->gather_copy_mapped = dma_alloc_wc(host, size,
&job->gather_copy,
GFP_KERNEL);
if (!job->gather_copy_mapped)
@@ -483,10 +563,14 @@ static inline int copy_gathers(struct host1x_job *job, struct device *dev)
job->gather_copy_size = size;
- for (i = 0; i < job->num_gathers; i++) {
- struct host1x_job_gather *g = &job->gathers[i];
+ for (i = 0; i < job->num_cmds; i++) {
+ struct host1x_job_gather *g;
void *gather;
+ if (job->cmds[i].is_wait)
+ continue;
+ g = &job->cmds[i].gather;
+
/* Copy the gather */
gather = host1x_bo_mmap(g->bo);
memcpy(job->gather_copy_mapped + offset, gather + g->offset,
@@ -522,28 +606,33 @@ int host1x_job_pin(struct host1x_job *job, struct device *dev)
if (err)
goto out;
- if (IS_ENABLED(CONFIG_TEGRA_HOST1X_FIREWALL)) {
- err = copy_gathers(job, dev);
+ if (job->enable_firewall) {
+ err = copy_gathers(host->dev, job, dev);
if (err)
goto out;
}
/* patch gathers */
- for (i = 0; i < job->num_gathers; i++) {
- struct host1x_job_gather *g = &job->gathers[i];
+ for (i = 0; i < job->num_cmds; i++) {
+ struct host1x_job_gather *g;
+
+ if (job->cmds[i].is_wait)
+ continue;
+ g = &job->cmds[i].gather;
/* process each gather mem only once */
if (g->handled)
continue;
/* copy_gathers() sets gathers base if firewall is enabled */
- if (!IS_ENABLED(CONFIG_TEGRA_HOST1X_FIREWALL))
+ if (!job->enable_firewall)
g->base = job->gather_addr_phys[i];
- for (j = i + 1; j < job->num_gathers; j++) {
- if (job->gathers[j].bo == g->bo) {
- job->gathers[j].handled = true;
- job->gathers[j].base = g->base;
+ for (j = i + 1; j < job->num_cmds; j++) {
+ if (!job->cmds[j].is_wait &&
+ job->cmds[j].gather.bo == g->bo) {
+ job->cmds[j].gather.handled = true;
+ job->cmds[j].gather.base = g->base;
}
}
@@ -567,24 +656,22 @@ void host1x_job_unpin(struct host1x_job *job)
unsigned int i;
for (i = 0; i < job->num_unpins; i++) {
- struct host1x_job_unpin_data *unpin = &job->unpins[i];
-
- if (!IS_ENABLED(CONFIG_TEGRA_HOST1X_FIREWALL) &&
- unpin->size && host->domain) {
- iommu_unmap(host->domain, job->addr_phys[i],
- unpin->size);
- free_iova(&host->iova,
- iova_pfn(&host->iova, job->addr_phys[i]));
+ struct host1x_bo_mapping *map = job->unpins[i].map;
+ struct host1x_bo *bo = map->bo;
+
+ if (!job->enable_firewall && map->size && host->domain) {
+ iommu_unmap(host->domain, job->addr_phys[i], map->size);
+ free_iova(&host->iova, iova_pfn(&host->iova, job->addr_phys[i]));
}
- host1x_bo_unpin(unpin->bo, unpin->sgt);
- host1x_bo_put(unpin->bo);
+ host1x_bo_unpin(map);
+ host1x_bo_put(bo);
}
job->num_unpins = 0;
if (job->gather_copy_size)
- dma_free_wc(job->channel->dev, job->gather_copy_size,
+ dma_free_wc(host->dev, job->gather_copy_size,
job->gather_copy_mapped, job->gather_copy);
}
EXPORT_SYMBOL(host1x_job_unpin);
@@ -594,7 +681,7 @@ EXPORT_SYMBOL(host1x_job_unpin);
*/
void host1x_job_dump(struct device *dev, struct host1x_job *job)
{
- dev_dbg(dev, " SYNCPT_ID %d\n", job->syncpt_id);
+ dev_dbg(dev, " SYNCPT_ID %d\n", job->syncpt->id);
dev_dbg(dev, " SYNCPT_VAL %d\n", job->syncpt_end);
dev_dbg(dev, " FIRST_GET 0x%x\n", job->first_get);
dev_dbg(dev, " TIMEOUT %d\n", job->timeout);