diff options
Diffstat (limited to 'drivers/gpu/host1x/job.c')
| -rw-r--r-- | drivers/gpu/host1x/job.c | 265 |
1 files changed, 176 insertions, 89 deletions
diff --git a/drivers/gpu/host1x/job.c b/drivers/gpu/host1x/job.c index 527a1cddb14f..3ed49e1fd933 100644 --- a/drivers/gpu/host1x/job.c +++ b/drivers/gpu/host1x/job.c @@ -1,24 +1,14 @@ +// SPDX-License-Identifier: GPL-2.0-only /* * Tegra host1x Job * * Copyright (c) 2010-2015, NVIDIA Corporation. - * - * This program is free software; you can redistribute it and/or modify it - * under the terms and conditions of the GNU General Public License, - * version 2, as published by the Free Software Foundation. - * - * This program is distributed in the hope it will be useful, but WITHOUT - * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or - * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for - * more details. - * - * You should have received a copy of the GNU General Public License - * along with this program. If not, see <http://www.gnu.org/licenses/>. */ #include <linux/dma-mapping.h> #include <linux/err.h> #include <linux/host1x.h> +#include <linux/iommu.h> #include <linux/kref.h> #include <linux/module.h> #include <linux/scatterlist.h> @@ -34,18 +24,25 @@ #define HOST1X_WAIT_SYNCPT_OFFSET 0x8 struct host1x_job *host1x_job_alloc(struct host1x_channel *ch, - u32 num_cmdbufs, u32 num_relocs) + u32 num_cmdbufs, u32 num_relocs, + bool skip_firewall) { struct host1x_job *job = NULL; - unsigned int num_unpins = num_cmdbufs + num_relocs; + unsigned int num_unpins = num_relocs; + bool enable_firewall; u64 total; void *mem; + enable_firewall = IS_ENABLED(CONFIG_TEGRA_HOST1X_FIREWALL) && !skip_firewall; + + if (!enable_firewall) + num_unpins += num_cmdbufs; + /* Check that we're not going to overflow */ total = sizeof(struct host1x_job) + (u64)num_relocs * sizeof(struct host1x_reloc) + (u64)num_unpins * sizeof(struct host1x_job_unpin_data) + - (u64)num_cmdbufs * sizeof(struct host1x_job_gather) + + (u64)num_cmdbufs * sizeof(struct host1x_job_cmd) + (u64)num_unpins * sizeof(dma_addr_t) + (u64)num_unpins * sizeof(u32 *); if (total > ULONG_MAX) @@ -55,6 +52,8 @@ struct host1x_job *host1x_job_alloc(struct host1x_channel *ch, if (!job) return NULL; + job->enable_firewall = enable_firewall; + kref_init(&job->ref); job->channel = ch; @@ -64,8 +63,8 @@ struct host1x_job *host1x_job_alloc(struct host1x_channel *ch, mem += num_relocs * sizeof(struct host1x_reloc); job->unpins = num_unpins ? mem : NULL; mem += num_unpins * sizeof(struct host1x_job_unpin_data); - job->gathers = num_cmdbufs ? mem : NULL; - mem += num_cmdbufs * sizeof(struct host1x_job_gather); + job->cmds = num_cmdbufs ? mem : NULL; + mem += num_cmdbufs * sizeof(struct host1x_job_cmd); job->addr_phys = num_unpins ? mem : NULL; job->reloc_addr_phys = job->addr_phys; @@ -86,6 +85,22 @@ static void job_free(struct kref *ref) { struct host1x_job *job = container_of(ref, struct host1x_job, ref); + if (job->release) + job->release(job); + + if (job->fence) { + /* + * remove_callback is atomic w.r.t. fence signaling, so + * after the call returns, we know that the callback is not + * in execution, and the fence can be safely freed. + */ + dma_fence_remove_callback(job->fence, &job->fence_cb); + dma_fence_put(job->fence); + } + + if (job->syncpt) + host1x_syncpt_put(job->syncpt); + kfree(job); } @@ -98,18 +113,37 @@ EXPORT_SYMBOL(host1x_job_put); void host1x_job_add_gather(struct host1x_job *job, struct host1x_bo *bo, unsigned int words, unsigned int offset) { - struct host1x_job_gather *gather = &job->gathers[job->num_gathers]; + struct host1x_job_gather *gather = &job->cmds[job->num_cmds].gather; gather->words = words; gather->bo = bo; gather->offset = offset; - job->num_gathers++; + job->num_cmds++; } EXPORT_SYMBOL(host1x_job_add_gather); +void host1x_job_add_wait(struct host1x_job *job, u32 id, u32 thresh, + bool relative, u32 next_class) +{ + struct host1x_job_cmd *cmd = &job->cmds[job->num_cmds]; + + cmd->is_wait = true; + cmd->wait.id = id; + cmd->wait.threshold = thresh; + cmd->wait.next_class = next_class; + cmd->wait.relative = relative; + + job->num_cmds++; +} +EXPORT_SYMBOL(host1x_job_add_wait); + static unsigned int pin_job(struct host1x *host, struct host1x_job *job) { + unsigned long mask = HOST1X_RELOC_READ | HOST1X_RELOC_WRITE; + struct host1x_client *client = job->client; + struct device *dev = client->dev; + struct host1x_job_gather *g; unsigned int i; int err; @@ -117,8 +151,9 @@ static unsigned int pin_job(struct host1x *host, struct host1x_job *job) for (i = 0; i < job->num_relocs; i++) { struct host1x_reloc *reloc = &job->relocs[i]; - struct sg_table *sgt; - dma_addr_t phys_addr; + enum dma_data_direction direction; + struct host1x_bo_mapping *map; + struct host1x_bo *bo; reloc->target.bo = host1x_bo_get(reloc->target.bo); if (!reloc->target.bo) { @@ -126,35 +161,83 @@ static unsigned int pin_job(struct host1x *host, struct host1x_job *job) goto unpin; } - phys_addr = host1x_bo_pin(reloc->target.bo, &sgt); + bo = reloc->target.bo; - job->addr_phys[job->num_unpins] = phys_addr; - job->unpins[job->num_unpins].bo = reloc->target.bo; - job->unpins[job->num_unpins].sgt = sgt; + switch (reloc->flags & mask) { + case HOST1X_RELOC_READ: + direction = DMA_TO_DEVICE; + break; + + case HOST1X_RELOC_WRITE: + direction = DMA_FROM_DEVICE; + break; + + case HOST1X_RELOC_READ | HOST1X_RELOC_WRITE: + direction = DMA_BIDIRECTIONAL; + break; + + default: + err = -EINVAL; + goto unpin; + } + + map = host1x_bo_pin(dev, bo, direction, NULL); + if (IS_ERR(map)) { + err = PTR_ERR(map); + goto unpin; + } + + /* + * host1x clients are generally not able to do scatter-gather themselves, so fail + * if the buffer is discontiguous and we fail to map its SG table to a single + * contiguous chunk of I/O virtual memory. + */ + if (map->chunks > 1) { + err = -EINVAL; + goto unpin; + } + + job->addr_phys[job->num_unpins] = map->phys; + job->unpins[job->num_unpins].map = map; job->num_unpins++; } - for (i = 0; i < job->num_gathers; i++) { - struct host1x_job_gather *g = &job->gathers[i]; + /* + * We will copy gathers BO content later, so there is no need to + * hold and pin them. + */ + if (job->enable_firewall) + return 0; + + for (i = 0; i < job->num_cmds; i++) { + struct host1x_bo_mapping *map; size_t gather_size = 0; struct scatterlist *sg; - struct sg_table *sgt; - dma_addr_t phys_addr; unsigned long shift; struct iova *alloc; unsigned int j; + if (job->cmds[i].is_wait) + continue; + + g = &job->cmds[i].gather; + g->bo = host1x_bo_get(g->bo); if (!g->bo) { err = -EINVAL; goto unpin; } - phys_addr = host1x_bo_pin(g->bo, &sgt); + map = host1x_bo_pin(host->dev, g->bo, DMA_TO_DEVICE, NULL); + if (IS_ERR(map)) { + err = PTR_ERR(map); + goto unpin; + } - if (!IS_ENABLED(CONFIG_TEGRA_HOST1X_FIREWALL) && host->domain) { - for_each_sg(sgt->sgl, sg, sgt->nents, j) + if (host->domain) { + for_each_sgtable_sg(map->sgt, sg, j) gather_size += sg->length; + gather_size = iova_align(&host->iova, gather_size); shift = iova_shift(&host->iova); @@ -162,34 +245,32 @@ static unsigned int pin_job(struct host1x *host, struct host1x_job *job) host->iova_end >> shift, true); if (!alloc) { err = -ENOMEM; - goto unpin; + goto put; } - err = iommu_map_sg(host->domain, - iova_dma_addr(&host->iova, alloc), - sgt->sgl, sgt->nents, IOMMU_READ); + err = iommu_map_sgtable(host->domain, iova_dma_addr(&host->iova, alloc), + map->sgt, IOMMU_READ); if (err == 0) { __free_iova(&host->iova, alloc); err = -EINVAL; - goto unpin; + goto put; } - job->addr_phys[job->num_unpins] = - iova_dma_addr(&host->iova, alloc); - job->unpins[job->num_unpins].size = gather_size; - } else { - job->addr_phys[job->num_unpins] = phys_addr; + map->phys = iova_dma_addr(&host->iova, alloc); + map->size = gather_size; } - job->gather_addr_phys[i] = job->addr_phys[job->num_unpins]; - - job->unpins[job->num_unpins].bo = g->bo; - job->unpins[job->num_unpins].sgt = sgt; + job->addr_phys[job->num_unpins] = map->phys; + job->unpins[job->num_unpins].map = map; job->num_unpins++; + + job->gather_addr_phys[i] = map->phys; } return 0; +put: + host1x_bo_put(g->bo); unpin: host1x_job_unpin(job); return err; @@ -197,8 +278,7 @@ unpin: static int do_relocs(struct host1x_job *job, struct host1x_job_gather *g) { - u32 last_page = ~0; - void *cmdbuf_page_addr = NULL; + void *cmdbuf_addr = NULL; struct host1x_bo *cmdbuf = g->bo; unsigned int i; @@ -213,35 +293,29 @@ static int do_relocs(struct host1x_job *job, struct host1x_job_gather *g) if (cmdbuf != reloc->cmdbuf.bo) continue; - if (IS_ENABLED(CONFIG_TEGRA_HOST1X_FIREWALL)) { + if (job->enable_firewall) { target = (u32 *)job->gather_copy_mapped + reloc->cmdbuf.offset / sizeof(u32) + g->offset / sizeof(u32); goto patch_reloc; } - if (last_page != reloc->cmdbuf.offset >> PAGE_SHIFT) { - if (cmdbuf_page_addr) - host1x_bo_kunmap(cmdbuf, last_page, - cmdbuf_page_addr); + if (!cmdbuf_addr) { + cmdbuf_addr = host1x_bo_mmap(cmdbuf); - cmdbuf_page_addr = host1x_bo_kmap(cmdbuf, - reloc->cmdbuf.offset >> PAGE_SHIFT); - last_page = reloc->cmdbuf.offset >> PAGE_SHIFT; - - if (unlikely(!cmdbuf_page_addr)) { + if (unlikely(!cmdbuf_addr)) { pr_err("Could not map cmdbuf for relocation\n"); return -ENOMEM; } } - target = cmdbuf_page_addr + (reloc->cmdbuf.offset & ~PAGE_MASK); + target = cmdbuf_addr + reloc->cmdbuf.offset; patch_reloc: *target = reloc_addr; } - if (cmdbuf_page_addr) - host1x_bo_kunmap(cmdbuf, last_page, cmdbuf_page_addr); + if (cmdbuf_addr) + host1x_bo_munmap(cmdbuf, cmdbuf_addr); return 0; } @@ -447,7 +521,8 @@ out: return err; } -static inline int copy_gathers(struct host1x_job *job, struct device *dev) +static inline int copy_gathers(struct device *host, struct host1x_job *job, + struct device *dev) { struct host1x_firewall fw; size_t size = 0; @@ -460,8 +535,13 @@ static inline int copy_gathers(struct host1x_job *job, struct device *dev) fw.num_relocs = job->num_relocs; fw.class = job->class; - for (i = 0; i < job->num_gathers; i++) { - struct host1x_job_gather *g = &job->gathers[i]; + for (i = 0; i < job->num_cmds; i++) { + struct host1x_job_gather *g; + + if (job->cmds[i].is_wait) + continue; + + g = &job->cmds[i].gather; size += g->words * sizeof(u32); } @@ -470,12 +550,12 @@ static inline int copy_gathers(struct host1x_job *job, struct device *dev) * Try a non-blocking allocation from a higher priority pools first, * as awaiting for the allocation here is a major performance hit. */ - job->gather_copy_mapped = dma_alloc_wc(dev, size, &job->gather_copy, + job->gather_copy_mapped = dma_alloc_wc(host, size, &job->gather_copy, GFP_NOWAIT); /* the higher priority allocation failed, try the generic-blocking */ if (!job->gather_copy_mapped) - job->gather_copy_mapped = dma_alloc_wc(dev, size, + job->gather_copy_mapped = dma_alloc_wc(host, size, &job->gather_copy, GFP_KERNEL); if (!job->gather_copy_mapped) @@ -483,10 +563,14 @@ static inline int copy_gathers(struct host1x_job *job, struct device *dev) job->gather_copy_size = size; - for (i = 0; i < job->num_gathers; i++) { - struct host1x_job_gather *g = &job->gathers[i]; + for (i = 0; i < job->num_cmds; i++) { + struct host1x_job_gather *g; void *gather; + if (job->cmds[i].is_wait) + continue; + g = &job->cmds[i].gather; + /* Copy the gather */ gather = host1x_bo_mmap(g->bo); memcpy(job->gather_copy_mapped + offset, gather + g->offset, @@ -522,28 +606,33 @@ int host1x_job_pin(struct host1x_job *job, struct device *dev) if (err) goto out; - if (IS_ENABLED(CONFIG_TEGRA_HOST1X_FIREWALL)) { - err = copy_gathers(job, dev); + if (job->enable_firewall) { + err = copy_gathers(host->dev, job, dev); if (err) goto out; } /* patch gathers */ - for (i = 0; i < job->num_gathers; i++) { - struct host1x_job_gather *g = &job->gathers[i]; + for (i = 0; i < job->num_cmds; i++) { + struct host1x_job_gather *g; + + if (job->cmds[i].is_wait) + continue; + g = &job->cmds[i].gather; /* process each gather mem only once */ if (g->handled) continue; /* copy_gathers() sets gathers base if firewall is enabled */ - if (!IS_ENABLED(CONFIG_TEGRA_HOST1X_FIREWALL)) + if (!job->enable_firewall) g->base = job->gather_addr_phys[i]; - for (j = i + 1; j < job->num_gathers; j++) { - if (job->gathers[j].bo == g->bo) { - job->gathers[j].handled = true; - job->gathers[j].base = g->base; + for (j = i + 1; j < job->num_cmds; j++) { + if (!job->cmds[j].is_wait && + job->cmds[j].gather.bo == g->bo) { + job->cmds[j].gather.handled = true; + job->cmds[j].gather.base = g->base; } } @@ -567,24 +656,22 @@ void host1x_job_unpin(struct host1x_job *job) unsigned int i; for (i = 0; i < job->num_unpins; i++) { - struct host1x_job_unpin_data *unpin = &job->unpins[i]; - - if (!IS_ENABLED(CONFIG_TEGRA_HOST1X_FIREWALL) && - unpin->size && host->domain) { - iommu_unmap(host->domain, job->addr_phys[i], - unpin->size); - free_iova(&host->iova, - iova_pfn(&host->iova, job->addr_phys[i])); + struct host1x_bo_mapping *map = job->unpins[i].map; + struct host1x_bo *bo = map->bo; + + if (!job->enable_firewall && map->size && host->domain) { + iommu_unmap(host->domain, job->addr_phys[i], map->size); + free_iova(&host->iova, iova_pfn(&host->iova, job->addr_phys[i])); } - host1x_bo_unpin(unpin->bo, unpin->sgt); - host1x_bo_put(unpin->bo); + host1x_bo_unpin(map); + host1x_bo_put(bo); } job->num_unpins = 0; if (job->gather_copy_size) - dma_free_wc(job->channel->dev, job->gather_copy_size, + dma_free_wc(host->dev, job->gather_copy_size, job->gather_copy_mapped, job->gather_copy); } EXPORT_SYMBOL(host1x_job_unpin); @@ -594,7 +681,7 @@ EXPORT_SYMBOL(host1x_job_unpin); */ void host1x_job_dump(struct device *dev, struct host1x_job *job) { - dev_dbg(dev, " SYNCPT_ID %d\n", job->syncpt_id); + dev_dbg(dev, " SYNCPT_ID %d\n", job->syncpt->id); dev_dbg(dev, " SYNCPT_VAL %d\n", job->syncpt_end); dev_dbg(dev, " FIRST_GET 0x%x\n", job->first_get); dev_dbg(dev, " TIMEOUT %d\n", job->timeout); |
