diff options
Diffstat (limited to 'drivers/gpu/host1x/cdma.c')
| -rw-r--r-- | drivers/gpu/host1x/cdma.c | 392 |
1 files changed, 294 insertions, 98 deletions
diff --git a/drivers/gpu/host1x/cdma.c b/drivers/gpu/host1x/cdma.c index de72172d3b5f..ba2e572567c0 100644 --- a/drivers/gpu/host1x/cdma.c +++ b/drivers/gpu/host1x/cdma.c @@ -1,25 +1,15 @@ +// SPDX-License-Identifier: GPL-2.0-only /* * Tegra host1x Command DMA * * Copyright (c) 2010-2013, NVIDIA Corporation. - * - * This program is free software; you can redistribute it and/or modify it - * under the terms and conditions of the GNU General Public License, - * version 2, as published by the Free Software Foundation. - * - * This program is distributed in the hope it will be useful, but WITHOUT - * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or - * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for - * more details. - * - * You should have received a copy of the GNU General Public License - * along with this program. If not, see <http://www.gnu.org/licenses/>. */ #include <asm/cacheflush.h> #include <linux/device.h> #include <linux/dma-mapping.h> +#include <linux/host1x.h> #include <linux/interrupt.h> #include <linux/kernel.h> #include <linux/kfifo.h> @@ -30,7 +20,6 @@ #include "channel.h" #include "dev.h" #include "debug.h" -#include "host1x_bo.h" #include "job.h" /* @@ -41,7 +30,17 @@ * means that the push buffer is full, not empty. */ -#define HOST1X_PUSHBUFFER_SLOTS 512 +/* + * Typically the commands written into the push buffer are a pair of words. We + * use slots to represent each of these pairs and to simplify things. Note the + * strange number of slots allocated here. 512 slots will fit exactly within a + * single memory page. We also need one additional word at the end of the push + * buffer for the RESTART opcode that will instruct the CDMA to jump back to + * the beginning of the push buffer. With 512 slots, this means that we'll use + * 2 memory pages and waste 4092 bytes of the second page that will never be + * used. + */ +#define HOST1X_PUSHBUFFER_SLOTS 511 /* * Clean up push buffer resources @@ -51,9 +50,15 @@ static void host1x_pushbuffer_destroy(struct push_buffer *pb) struct host1x_cdma *cdma = pb_to_cdma(pb); struct host1x *host1x = cdma_to_host1x(cdma); - if (pb->phys != 0) - dma_free_writecombine(host1x->dev, pb->size_bytes + 4, - pb->mapped, pb->phys); + if (!pb->mapped) + return; + + if (host1x->domain) { + iommu_unmap(host1x->domain, pb->dma, pb->alloc_size); + free_iova(&host1x->iova, iova_pfn(&host1x->iova, pb->dma)); + } + + dma_free_wc(host1x->dev, pb->alloc_size, pb->mapped, pb->phys); pb->mapped = NULL; pb->phys = 0; @@ -66,28 +71,64 @@ static int host1x_pushbuffer_init(struct push_buffer *pb) { struct host1x_cdma *cdma = pb_to_cdma(pb); struct host1x *host1x = cdma_to_host1x(cdma); + struct iova *alloc; + u32 size; + int err; pb->mapped = NULL; pb->phys = 0; - pb->size_bytes = HOST1X_PUSHBUFFER_SLOTS * 8; + pb->size = HOST1X_PUSHBUFFER_SLOTS * 8; + + size = pb->size + 4; /* initialize buffer pointers */ - pb->fence = pb->size_bytes - 8; + pb->fence = pb->size - 8; pb->pos = 0; - /* allocate and map pushbuffer memory */ - pb->mapped = dma_alloc_writecombine(host1x->dev, pb->size_bytes + 4, - &pb->phys, GFP_KERNEL); - if (!pb->mapped) - goto fail; + if (host1x->domain) { + unsigned long shift; + + size = iova_align(&host1x->iova, size); + + pb->mapped = dma_alloc_wc(host1x->dev, size, &pb->phys, + GFP_KERNEL); + if (!pb->mapped) + return -ENOMEM; + + shift = iova_shift(&host1x->iova); + alloc = alloc_iova(&host1x->iova, size >> shift, + host1x->iova_end >> shift, true); + if (!alloc) { + err = -ENOMEM; + goto iommu_free_mem; + } + + pb->dma = iova_dma_addr(&host1x->iova, alloc); + err = iommu_map(host1x->domain, pb->dma, pb->phys, size, + IOMMU_READ, GFP_KERNEL); + if (err) + goto iommu_free_iova; + } else { + pb->mapped = dma_alloc_wc(host1x->dev, size, &pb->phys, + GFP_KERNEL); + if (!pb->mapped) + return -ENOMEM; + + pb->dma = pb->phys; + } + + pb->alloc_size = size; host1x_hw_pushbuffer_init(host1x, pb); return 0; -fail: - host1x_pushbuffer_destroy(pb); - return -ENOMEM; +iommu_free_iova: + __free_iova(&host1x->iova, alloc); +iommu_free_mem: + dma_free_wc(host1x->dev, size, pb->mapped, pb->phys); + + return err; } /* @@ -96,12 +137,15 @@ fail: */ static void host1x_pushbuffer_push(struct push_buffer *pb, u32 op1, u32 op2) { - u32 pos = pb->pos; - u32 *p = (u32 *)((u32)pb->mapped + pos); - WARN_ON(pos == pb->fence); + u32 *p = (u32 *)((void *)pb->mapped + pb->pos); + + WARN_ON(pb->pos == pb->fence); *(p++) = op1; *(p++) = op2; - pb->pos = (pos + 8) & (pb->size_bytes - 1); + pb->pos += 8; + + if (pb->pos >= pb->size) + pb->pos -= pb->size; } /* @@ -111,7 +155,10 @@ static void host1x_pushbuffer_push(struct push_buffer *pb, u32 op1, u32 op2) static void host1x_pushbuffer_pop(struct push_buffer *pb, unsigned int slots) { /* Advance the next write position */ - pb->fence = (pb->fence + slots * 8) & (pb->size_bytes - 1); + pb->fence += slots * 8; + + if (pb->fence >= pb->size) + pb->fence -= pb->size; } /* @@ -119,7 +166,12 @@ static void host1x_pushbuffer_pop(struct push_buffer *pb, unsigned int slots) */ static u32 host1x_pushbuffer_space(struct push_buffer *pb) { - return ((pb->fence - pb->pos) & (pb->size_bytes - 1)) / 8; + unsigned int fence = pb->fence; + + if (pb->fence < pb->pos) + fence += pb->size; + + return (fence - pb->pos) / 8; } /* @@ -134,14 +186,19 @@ unsigned int host1x_cdma_wait_locked(struct host1x_cdma *cdma, enum cdma_event event) { for (;;) { + struct push_buffer *pb = &cdma->push_buffer; unsigned int space; - if (event == CDMA_EVENT_SYNC_QUEUE_EMPTY) + switch (event) { + case CDMA_EVENT_SYNC_QUEUE_EMPTY: space = list_empty(&cdma->sync_queue) ? 1 : 0; - else if (event == CDMA_EVENT_PUSH_BUFFER_SPACE) { - struct push_buffer *pb = &cdma->push_buffer; + break; + + case CDMA_EVENT_PUSH_BUFFER_SPACE: space = host1x_pushbuffer_space(pb); - } else { + break; + + default: WARN_ON(1); return -EINVAL; } @@ -159,31 +216,68 @@ unsigned int host1x_cdma_wait_locked(struct host1x_cdma *cdma, mutex_lock(&cdma->lock); continue; } + cdma->event = event; mutex_unlock(&cdma->lock); - down(&cdma->sem); + wait_for_completion(&cdma->complete); mutex_lock(&cdma->lock); } + return 0; } /* + * Sleep (if necessary) until the push buffer has enough free space. + * + * Must be called with the cdma lock held. + */ +static int host1x_cdma_wait_pushbuffer_space(struct host1x *host1x, + struct host1x_cdma *cdma, + unsigned int needed) +{ + while (true) { + struct push_buffer *pb = &cdma->push_buffer; + unsigned int space; + + space = host1x_pushbuffer_space(pb); + if (space >= needed) + break; + + trace_host1x_wait_cdma(dev_name(cdma_to_channel(cdma)->dev), + CDMA_EVENT_PUSH_BUFFER_SPACE); + + /* If somebody has managed to already start waiting, yield */ + if (cdma->event != CDMA_EVENT_NONE) { + mutex_unlock(&cdma->lock); + schedule(); + mutex_lock(&cdma->lock); + continue; + } + + cdma->event = CDMA_EVENT_PUSH_BUFFER_SPACE; + + mutex_unlock(&cdma->lock); + wait_for_completion(&cdma->complete); + mutex_lock(&cdma->lock); + } + + return 0; +} +/* * Start timer that tracks the time spent by the job. * Must be called with the cdma lock held. */ static void cdma_start_timer_locked(struct host1x_cdma *cdma, struct host1x_job *job) { - struct host1x *host = cdma_to_host1x(cdma); - if (cdma->timeout.client) { /* timer already started */ return; } cdma->timeout.client = job->client; - cdma->timeout.syncpt = host1x_syncpt_get(host, job->syncpt_id); + cdma->timeout.syncpt = job->syncpt; cdma->timeout.syncpt_val = job->syncpt_end; cdma->timeout.start_ktime = ktime_get(); @@ -198,7 +292,7 @@ static void cdma_start_timer_locked(struct host1x_cdma *cdma, static void stop_cdma_timer_locked(struct host1x_cdma *cdma) { cancel_delayed_work(&cdma->timeout.wq); - cdma->timeout.client = 0; + cdma->timeout.client = NULL; } /* @@ -214,26 +308,22 @@ static void stop_cdma_timer_locked(struct host1x_cdma *cdma) static void update_cdma_locked(struct host1x_cdma *cdma) { bool signal = false; - struct host1x *host1x = cdma_to_host1x(cdma); struct host1x_job *job, *n; - /* If CDMA is stopped, queue is cleared and we can return */ - if (!cdma->running) - return; - /* * Walk the sync queue, reading the sync point registers as necessary, * to consume as many sync queue entries as possible without blocking */ list_for_each_entry_safe(job, n, &cdma->sync_queue, list) { - struct host1x_syncpt *sp = - host1x_syncpt_get(host1x, job->syncpt_id); + struct host1x_syncpt *sp = job->syncpt; /* Check whether this syncpt has completed, and bail if not */ - if (!host1x_syncpt_is_expired(sp, job->syncpt_end)) { + if (!host1x_syncpt_is_expired(sp, job->syncpt_end) && + !job->cancelled) { /* Start timer on next pending syncpt */ if (job->timeout) cdma_start_timer_locked(cdma, job); + break; } @@ -247,7 +337,9 @@ static void update_cdma_locked(struct host1x_cdma *cdma) /* Pop push buffer slots */ if (job->num_slots) { struct push_buffer *pb = &cdma->push_buffer; + host1x_pushbuffer_pop(pb, job->num_slots); + if (cdma->event == CDMA_EVENT_PUSH_BUFFER_SPACE) signal = true; } @@ -262,18 +354,16 @@ static void update_cdma_locked(struct host1x_cdma *cdma) if (signal) { cdma->event = CDMA_EVENT_NONE; - up(&cdma->sem); + complete(&cdma->complete); } } void host1x_cdma_update_sync_queue(struct host1x_cdma *cdma, struct device *dev) { - u32 restart_addr; - u32 syncpt_incrs; - struct host1x_job *job = NULL; - u32 syncpt_val; struct host1x *host1x = cdma_to_host1x(cdma); + u32 restart_addr, syncpt_incrs, syncpt_val; + struct host1x_job *job, *next_job = NULL; syncpt_val = host1x_syncpt_load(cdma->timeout.syncpt); @@ -291,40 +381,40 @@ void host1x_cdma_update_sync_queue(struct host1x_cdma *cdma, __func__); list_for_each_entry(job, &cdma->sync_queue, list) { - if (syncpt_val < job->syncpt_end) - break; + if (syncpt_val < job->syncpt_end) { + + if (!list_is_last(&job->list, &cdma->sync_queue)) + next_job = list_next_entry(job, list); + + goto syncpt_incr; + } host1x_job_dump(dev, job); } + /* all jobs have been completed */ + job = NULL; + +syncpt_incr: + /* - * Walk the sync_queue, first incrementing with the CPU syncpts that - * are partially executed (the first buffer) or fully skipped while - * still in the current context (slots are also NOP-ed). + * Increment with CPU the remaining syncpts of a partially executed job. * - * At the point contexts are interleaved, syncpt increments must be - * done inline with the pushbuffer from a GATHER buffer to maintain - * the order (slots are modified to be a GATHER of syncpt incrs). - * - * Note: save in restart_addr the location where the timed out buffer - * started in the PB, so we can start the refetch from there (with the - * modified NOP-ed PB slots). This lets things appear to have completed - * properly for this buffer and resources are freed. + * CDMA will continue execution starting with the next job or will get + * into idle state. */ - - dev_dbg(dev, "%s: perform CPU incr on pending same ctx buffers\n", - __func__); - - if (!list_empty(&cdma->sync_queue)) - restart_addr = job->first_get; + if (next_job) + restart_addr = next_job->first_get; else restart_addr = cdma->last_pos; - /* do CPU increments as long as this context continues */ - list_for_each_entry_from(job, &cdma->sync_queue, list) { - /* different context, gets us out of this loop */ - if (job->client != cdma->timeout.client) - break; + if (!job) + goto resume; + + /* do CPU increments for the remaining syncpts */ + if (job->syncpt_recovery) { + dev_dbg(dev, "%s: perform CPU incr on pending buffers\n", + __func__); /* won't need a timeout when replayed */ job->timeout = 0; @@ -339,23 +429,74 @@ void host1x_cdma_update_sync_queue(struct host1x_cdma *cdma, syncpt_incrs, job->syncpt_end, job->num_slots); - syncpt_val += syncpt_incrs; - } + dev_dbg(dev, "%s: finished sync_queue modification\n", + __func__); + } else { + struct host1x_job *failed_job = job; + + host1x_job_dump(dev, job); + + host1x_syncpt_set_locked(job->syncpt); + failed_job->cancelled = true; + + list_for_each_entry_continue(job, &cdma->sync_queue, list) { + unsigned int i; + + if (job->syncpt != failed_job->syncpt) + continue; + + for (i = 0; i < job->num_slots; i++) { + unsigned int slot = (job->first_get/8 + i) % + HOST1X_PUSHBUFFER_SLOTS; + u32 *mapped = cdma->push_buffer.mapped; + + /* + * Overwrite opcodes with 0 word writes + * to offset 0xbad. This does nothing but + * has a easily detected signature in debug + * traces. + * + * On systems with MLOCK enforcement enabled, + * the above 0 word writes would fall foul of + * the enforcement. As such, in the first slot + * put a RESTART_W opcode to the beginning + * of the next job. We don't use this for older + * chips since those only support the RESTART + * opcode with inconvenient alignment requirements. + */ + if (i == 0 && host1x->info->has_wide_gather) { + unsigned int next_job = (job->first_get/8 + job->num_slots) + % HOST1X_PUSHBUFFER_SLOTS; + mapped[2*slot+0] = (0xd << 28) | (next_job * 2); + mapped[2*slot+1] = 0x0; + } else { + mapped[2*slot+0] = 0x1bad0000; + mapped[2*slot+1] = 0x1bad0000; + } + } - /* The following sumbits from the same client may be dependent on the - * failed submit and therefore they may fail. Force a small timeout - * to make the queue cleanup faster */ + job->cancelled = true; + } - list_for_each_entry_from(job, &cdma->sync_queue, list) - if (job->client == cdma->timeout.client) - job->timeout = min_t(unsigned int, job->timeout, 500); + wmb(); - dev_dbg(dev, "%s: finished sync_queue modification\n", __func__); + update_cdma_locked(cdma); + } +resume: /* roll back DMAGET and start up channel again */ host1x_hw_cdma_resume(host1x, cdma, restart_addr); } +static void cdma_update_work(struct work_struct *work) +{ + struct host1x_cdma *cdma = container_of(work, struct host1x_cdma, update_work); + + mutex_lock(&cdma->lock); + update_cdma_locked(cdma); + mutex_unlock(&cdma->lock); +} + /* * Create a cdma */ @@ -364,7 +505,8 @@ int host1x_cdma_init(struct host1x_cdma *cdma) int err; mutex_init(&cdma->lock); - sema_init(&cdma->sem, 0); + init_completion(&cdma->complete); + INIT_WORK(&cdma->update_work, cdma_update_work); INIT_LIST_HEAD(&cdma->sync_queue); @@ -375,6 +517,7 @@ int host1x_cdma_init(struct host1x_cdma *cdma) err = host1x_pushbuffer_init(&cdma->push_buffer); if (err) return err; + return 0; } @@ -406,18 +549,29 @@ int host1x_cdma_begin(struct host1x_cdma *cdma, struct host1x_job *job) mutex_lock(&cdma->lock); + /* + * Check if syncpoint was locked due to previous job timeout. + * This needs to be done within the cdma lock to avoid a race + * with the timeout handler. + */ + if (job->syncpt->locked) { + mutex_unlock(&cdma->lock); + return -EPERM; + } + if (job->timeout) { /* init state on first submit with timeout value */ if (!cdma->timeout.initialized) { int err; - err = host1x_hw_cdma_timeout_init(host1x, cdma, - job->syncpt_id); + + err = host1x_hw_cdma_timeout_init(host1x, cdma); if (err) { mutex_unlock(&cdma->lock); return err; } } } + if (!cdma->running) host1x_hw_cdma_start(host1x, cdma); @@ -435,7 +589,6 @@ int host1x_cdma_begin(struct host1x_cdma *cdma, struct host1x_job *job) */ void host1x_cdma_push(struct host1x_cdma *cdma, u32 op1, u32 op2) { - struct host1x *host1x = cdma_to_host1x(cdma); struct push_buffer *pb = &cdma->push_buffer; u32 slots_free = cdma->slots_free; @@ -443,17 +596,62 @@ void host1x_cdma_push(struct host1x_cdma *cdma, u32 op1, u32 op2) trace_host1x_cdma_push(dev_name(cdma_to_channel(cdma)->dev), op1, op2); - if (slots_free == 0) { - host1x_hw_cdma_flush(host1x, cdma); + if (slots_free == 0) slots_free = host1x_cdma_wait_locked(cdma, CDMA_EVENT_PUSH_BUFFER_SPACE); - } + cdma->slots_free = slots_free - 1; cdma->slots_used++; host1x_pushbuffer_push(pb, op1, op2); } /* + * Push four words into two consecutive push buffer slots. Note that extra + * care needs to be taken not to split the two slots across the end of the + * push buffer. Otherwise the RESTART opcode at the end of the push buffer + * that ensures processing will restart at the beginning will break up the + * four words. + * + * Blocks as necessary if the push buffer is full. + */ +void host1x_cdma_push_wide(struct host1x_cdma *cdma, u32 op1, u32 op2, + u32 op3, u32 op4) +{ + struct host1x_channel *channel = cdma_to_channel(cdma); + struct host1x *host1x = cdma_to_host1x(cdma); + struct push_buffer *pb = &cdma->push_buffer; + unsigned int space, needed = 2, extra = 0; + + if (host1x_debug_trace_cmdbuf) + trace_host1x_cdma_push_wide(dev_name(channel->dev), op1, op2, + op3, op4); + + /* compute number of extra slots needed for padding */ + if (pb->pos + 16 > pb->size) { + extra = (pb->size - pb->pos) / 8; + needed += extra; + } + + host1x_cdma_wait_pushbuffer_space(host1x, cdma, needed); + space = host1x_pushbuffer_space(pb); + + cdma->slots_free = space - needed; + cdma->slots_used += needed; + + if (extra > 0) { + /* + * If there isn't enough space at the tail of the pushbuffer, + * insert a RESTART(0) here to go back to the beginning. + * The code above adjusted the indexes appropriately. + */ + host1x_pushbuffer_push(pb, (0x5 << 28), 0xdead0000); + } + + host1x_pushbuffer_push(pb, op1, op2); + host1x_pushbuffer_push(pb, op3, op4); +} + +/* * End a cdma submit * Kick off DMA, add job to the sync queue, and a number of slots to be freed * from the pushbuffer. The handles for a submit must all be pinned at the same @@ -485,7 +683,5 @@ void host1x_cdma_end(struct host1x_cdma *cdma, */ void host1x_cdma_update(struct host1x_cdma *cdma) { - mutex_lock(&cdma->lock); - update_cdma_locked(cdma); - mutex_unlock(&cdma->lock); + schedule_work(&cdma->update_work); } |
