diff options
Diffstat (limited to 'drivers/gpu/host1x')
51 files changed, 2317 insertions, 1688 deletions
diff --git a/drivers/gpu/host1x/Kconfig b/drivers/gpu/host1x/Kconfig index 6dab94adf25e..e6c78ae2003a 100644 --- a/drivers/gpu/host1x/Kconfig +++ b/drivers/gpu/host1x/Kconfig @@ -1,7 +1,13 @@ # SPDX-License-Identifier: GPL-2.0-only + +config TEGRA_HOST1X_CONTEXT_BUS + bool + config TEGRA_HOST1X tristate "NVIDIA Tegra host1x driver" - depends on ARCH_TEGRA || (ARM && COMPILE_TEST) + depends on ARCH_TEGRA || COMPILE_TEST + select DMA_SHARED_BUFFER + select TEGRA_HOST1X_CONTEXT_BUS select IOMMU_IOVA help Driver for the NVIDIA Tegra host1x hardware. diff --git a/drivers/gpu/host1x/Makefile b/drivers/gpu/host1x/Makefile index 096017b8789d..ee5286ffe08d 100644 --- a/drivers/gpu/host1x/Makefile +++ b/drivers/gpu/host1x/Makefile @@ -9,11 +9,17 @@ host1x-y = \ job.o \ debug.o \ mipi.o \ + fence.o \ hw/host1x01.o \ hw/host1x02.o \ hw/host1x04.o \ hw/host1x05.o \ hw/host1x06.o \ - hw/host1x07.o + hw/host1x07.o \ + hw/host1x08.o + +host1x-$(CONFIG_IOMMU_API) += \ + context.o obj-$(CONFIG_TEGRA_HOST1X) += host1x.o +obj-$(CONFIG_TEGRA_HOST1X_CONTEXT_BUS) += context_bus.o diff --git a/drivers/gpu/host1x/bus.c b/drivers/gpu/host1x/bus.c index 218e3718fd68..723a80895cd4 100644 --- a/drivers/gpu/host1x/bus.c +++ b/drivers/gpu/host1x/bus.c @@ -5,6 +5,7 @@ */ #include <linux/debugfs.h> +#include <linux/dma-mapping.h> #include <linux/host1x.h> #include <linux/of.h> #include <linux/seq_file.h> @@ -40,7 +41,6 @@ static int host1x_subdev_add(struct host1x_device *device, struct device_node *np) { struct host1x_subdev *subdev; - struct device_node *child; int err; subdev = kzalloc(sizeof(*subdev), GFP_KERNEL); @@ -55,13 +55,12 @@ static int host1x_subdev_add(struct host1x_device *device, mutex_unlock(&device->subdevs_lock); /* recursively add children */ - for_each_child_of_node(np, child) { + for_each_child_of_node_scoped(np, child) { if (of_match_node(driver->subdevs, child) && of_device_is_available(child)) { err = host1x_subdev_add(device, driver, child); if (err < 0) { /* XXX cleanup? */ - of_node_put(child); return err; } } @@ -89,17 +88,14 @@ static void host1x_subdev_del(struct host1x_subdev *subdev) static int host1x_device_parse_dt(struct host1x_device *device, struct host1x_driver *driver) { - struct device_node *np; int err; - for_each_child_of_node(device->dev.parent->of_node, np) { + for_each_child_of_node_scoped(device->dev.parent->of_node, np) { if (of_match_node(driver->subdevs, np) && of_device_is_available(np)) { err = host1x_subdev_add(device, driver, np); - if (err < 0) { - of_node_put(np); + if (err < 0) return err; - } } } @@ -332,46 +328,24 @@ static int host1x_del_client(struct host1x *host1x, return -ENODEV; } -static int host1x_device_match(struct device *dev, struct device_driver *drv) +static int host1x_device_match(struct device *dev, const struct device_driver *drv) { return strcmp(dev_name(dev), drv->name) == 0; } -static int host1x_device_uevent(struct device *dev, +/* + * Note that this is really only needed for backwards compatibility + * with libdrm, which parses this information from sysfs and will + * fail if it can't find the OF_FULLNAME, specifically. + */ +static int host1x_device_uevent(const struct device *dev, struct kobj_uevent_env *env) { - struct device_node *np = dev->parent->of_node; - unsigned int count = 0; - struct property *p; - const char *compat; - - /* - * This duplicates most of of_device_uevent(), but the latter cannot - * be called from modules and operates on dev->of_node, which is not - * available in this case. - * - * Note that this is really only needed for backwards compatibility - * with libdrm, which parses this information from sysfs and will - * fail if it can't find the OF_FULLNAME, specifically. - */ - add_uevent_var(env, "OF_NAME=%pOFn", np); - add_uevent_var(env, "OF_FULLNAME=%pOF", np); - - of_property_for_each_string(np, "compatible", p, compat) { - add_uevent_var(env, "OF_COMPATIBLE_%u=%s", count, compat); - count++; - } - - add_uevent_var(env, "OF_COMPATIBLE_N=%u", count); + of_device_uevent(dev->parent, env); return 0; } -static int host1x_dma_configure(struct device *dev) -{ - return of_dma_configure(dev, dev->of_node, true); -} - static const struct dev_pm_ops host1x_device_pm_ops = { .suspend = pm_generic_suspend, .resume = pm_generic_resume, @@ -381,11 +355,10 @@ static const struct dev_pm_ops host1x_device_pm_ops = { .restore = pm_generic_restore, }; -struct bus_type host1x_bus_type = { +const struct bus_type host1x_bus_type = { .name = "host1x", .match = host1x_device_match, .uevent = host1x_device_uevent, - .dma_configure = host1x_dma_configure, .pm = &host1x_device_pm_ops, }; @@ -474,8 +447,6 @@ static int host1x_device_add(struct host1x *host1x, device->dev.bus = &host1x_bus_type; device->dev.parent = host1x->dev; - of_dma_configure(&device->dev, host1x->dev->of_node, true); - device->dev.dma_parms = &device->dma_parms; dma_set_max_seg_size(&device->dev, UINT_MAX); @@ -500,6 +471,18 @@ static int host1x_device_add(struct host1x *host1x, mutex_unlock(&clients_lock); + /* + * Add device even if there are no subdevs to ensure syncpoint functionality + * is available regardless of whether any engine subdevices are present + */ + if (list_empty(&device->subdevs)) { + err = device_add(&device->dev); + if (err < 0) + dev_err(&device->dev, "failed to add device: %d\n", err); + else + device->registered = true; + } + return 0; } @@ -742,6 +725,7 @@ EXPORT_SYMBOL(host1x_driver_unregister); */ void __host1x_client_init(struct host1x_client *client, struct lock_class_key *key) { + host1x_bo_cache_init(&client->cache); INIT_LIST_HEAD(&client->list); __mutex_init(&client->lock, "host1x client lock", key); client->usecount = 0; @@ -761,7 +745,6 @@ EXPORT_SYMBOL(host1x_client_exit); /** * __host1x_client_register() - register a host1x client * @client: host1x client - * @key: lock class key for the client-specific mutex * * Registers a host1x client with each host1x controller instance. Note that * each client will only match their parent host1x controller and will only be @@ -802,7 +785,7 @@ EXPORT_SYMBOL(__host1x_client_register); * Removes a host1x client from its host1x controller instance. If a logical * device has already been initialized, it will be torn down. */ -int host1x_client_unregister(struct host1x_client *client) +void host1x_client_unregister(struct host1x_client *client) { struct host1x_client *c; struct host1x *host1x; @@ -814,7 +797,7 @@ int host1x_client_unregister(struct host1x_client *client) err = host1x_del_client(host1x, client); if (!err) { mutex_unlock(&devices_lock); - return 0; + return; } } @@ -830,7 +813,7 @@ int host1x_client_unregister(struct host1x_client *client) mutex_unlock(&clients_lock); - return 0; + host1x_bo_cache_destroy(&client->cache); } EXPORT_SYMBOL(host1x_client_unregister); @@ -904,3 +887,78 @@ unlock: return err; } EXPORT_SYMBOL(host1x_client_resume); + +struct host1x_bo_mapping *host1x_bo_pin(struct device *dev, struct host1x_bo *bo, + enum dma_data_direction dir, + struct host1x_bo_cache *cache) +{ + struct host1x_bo_mapping *mapping; + + if (cache) { + mutex_lock(&cache->lock); + + list_for_each_entry(mapping, &cache->mappings, entry) { + if (mapping->bo == bo && mapping->direction == dir) { + kref_get(&mapping->ref); + goto unlock; + } + } + } + + mapping = bo->ops->pin(dev, bo, dir); + if (IS_ERR(mapping)) + goto unlock; + + spin_lock(&mapping->bo->lock); + list_add_tail(&mapping->list, &bo->mappings); + spin_unlock(&mapping->bo->lock); + + if (cache) { + INIT_LIST_HEAD(&mapping->entry); + mapping->cache = cache; + + list_add_tail(&mapping->entry, &cache->mappings); + + /* bump reference count to track the copy in the cache */ + kref_get(&mapping->ref); + } + +unlock: + if (cache) + mutex_unlock(&cache->lock); + + return mapping; +} +EXPORT_SYMBOL(host1x_bo_pin); + +static void __host1x_bo_unpin(struct kref *ref) +{ + struct host1x_bo_mapping *mapping = to_host1x_bo_mapping(ref); + + /* + * When the last reference of the mapping goes away, make sure to remove the mapping from + * the cache. + */ + if (mapping->cache) + list_del(&mapping->entry); + + spin_lock(&mapping->bo->lock); + list_del(&mapping->list); + spin_unlock(&mapping->bo->lock); + + mapping->bo->ops->unpin(mapping); +} + +void host1x_bo_unpin(struct host1x_bo_mapping *mapping) +{ + struct host1x_bo_cache *cache = mapping->cache; + + if (cache) + mutex_lock(&cache->lock); + + kref_put(&mapping->ref, __host1x_bo_unpin); + + if (cache) + mutex_unlock(&cache->lock); +} +EXPORT_SYMBOL(host1x_bo_unpin); diff --git a/drivers/gpu/host1x/bus.h b/drivers/gpu/host1x/bus.h index a4adf9abc3b4..a80ceadfeb34 100644 --- a/drivers/gpu/host1x/bus.h +++ b/drivers/gpu/host1x/bus.h @@ -10,7 +10,7 @@ struct bus_type; struct host1x; -extern struct bus_type host1x_bus_type; +extern const struct bus_type host1x_bus_type; int host1x_register(struct host1x *host1x); int host1x_unregister(struct host1x *host1x); diff --git a/drivers/gpu/host1x/cdma.c b/drivers/gpu/host1x/cdma.c index 6e6ca774f68d..ba2e572567c0 100644 --- a/drivers/gpu/host1x/cdma.c +++ b/drivers/gpu/host1x/cdma.c @@ -105,7 +105,7 @@ static int host1x_pushbuffer_init(struct push_buffer *pb) pb->dma = iova_dma_addr(&host1x->iova, alloc); err = iommu_map(host1x->domain, pb->dma, pb->phys, size, - IOMMU_READ); + IOMMU_READ, GFP_KERNEL); if (err) goto iommu_free_iova; } else { @@ -247,8 +247,6 @@ static int host1x_cdma_wait_pushbuffer_space(struct host1x *host1x, trace_host1x_wait_cdma(dev_name(cdma_to_channel(cdma)->dev), CDMA_EVENT_PUSH_BUFFER_SPACE); - host1x_hw_cdma_flush(host1x, cdma); - /* If somebody has managed to already start waiting, yield */ if (cdma->event != CDMA_EVENT_NONE) { mutex_unlock(&cdma->lock); @@ -312,10 +310,6 @@ static void update_cdma_locked(struct host1x_cdma *cdma) bool signal = false; struct host1x_job *job, *n; - /* If CDMA is stopped, queue is cleared and we can return */ - if (!cdma->running) - return; - /* * Walk the sync queue, reading the sync point registers as necessary, * to consume as many sync queue entries as possible without blocking @@ -324,7 +318,8 @@ static void update_cdma_locked(struct host1x_cdma *cdma) struct host1x_syncpt *sp = job->syncpt; /* Check whether this syncpt has completed, and bail if not */ - if (!host1x_syncpt_is_expired(sp, job->syncpt_end)) { + if (!host1x_syncpt_is_expired(sp, job->syncpt_end) && + !job->cancelled) { /* Start timer on next pending syncpt */ if (job->timeout) cdma_start_timer_locked(cdma, job); @@ -413,8 +408,11 @@ syncpt_incr: else restart_addr = cdma->last_pos; + if (!job) + goto resume; + /* do CPU increments for the remaining syncpts */ - if (job) { + if (job->syncpt_recovery) { dev_dbg(dev, "%s: perform CPU incr on pending buffers\n", __func__); @@ -433,12 +431,72 @@ syncpt_incr: dev_dbg(dev, "%s: finished sync_queue modification\n", __func__); + } else { + struct host1x_job *failed_job = job; + + host1x_job_dump(dev, job); + + host1x_syncpt_set_locked(job->syncpt); + failed_job->cancelled = true; + + list_for_each_entry_continue(job, &cdma->sync_queue, list) { + unsigned int i; + + if (job->syncpt != failed_job->syncpt) + continue; + + for (i = 0; i < job->num_slots; i++) { + unsigned int slot = (job->first_get/8 + i) % + HOST1X_PUSHBUFFER_SLOTS; + u32 *mapped = cdma->push_buffer.mapped; + + /* + * Overwrite opcodes with 0 word writes + * to offset 0xbad. This does nothing but + * has a easily detected signature in debug + * traces. + * + * On systems with MLOCK enforcement enabled, + * the above 0 word writes would fall foul of + * the enforcement. As such, in the first slot + * put a RESTART_W opcode to the beginning + * of the next job. We don't use this for older + * chips since those only support the RESTART + * opcode with inconvenient alignment requirements. + */ + if (i == 0 && host1x->info->has_wide_gather) { + unsigned int next_job = (job->first_get/8 + job->num_slots) + % HOST1X_PUSHBUFFER_SLOTS; + mapped[2*slot+0] = (0xd << 28) | (next_job * 2); + mapped[2*slot+1] = 0x0; + } else { + mapped[2*slot+0] = 0x1bad0000; + mapped[2*slot+1] = 0x1bad0000; + } + } + + job->cancelled = true; + } + + wmb(); + + update_cdma_locked(cdma); } +resume: /* roll back DMAGET and start up channel again */ host1x_hw_cdma_resume(host1x, cdma, restart_addr); } +static void cdma_update_work(struct work_struct *work) +{ + struct host1x_cdma *cdma = container_of(work, struct host1x_cdma, update_work); + + mutex_lock(&cdma->lock); + update_cdma_locked(cdma); + mutex_unlock(&cdma->lock); +} + /* * Create a cdma */ @@ -448,6 +506,7 @@ int host1x_cdma_init(struct host1x_cdma *cdma) mutex_init(&cdma->lock); init_completion(&cdma->complete); + INIT_WORK(&cdma->update_work, cdma_update_work); INIT_LIST_HEAD(&cdma->sync_queue); @@ -490,6 +549,16 @@ int host1x_cdma_begin(struct host1x_cdma *cdma, struct host1x_job *job) mutex_lock(&cdma->lock); + /* + * Check if syncpoint was locked due to previous job timeout. + * This needs to be done within the cdma lock to avoid a race + * with the timeout handler. + */ + if (job->syncpt->locked) { + mutex_unlock(&cdma->lock); + return -EPERM; + } + if (job->timeout) { /* init state on first submit with timeout value */ if (!cdma->timeout.initialized) { @@ -520,7 +589,6 @@ int host1x_cdma_begin(struct host1x_cdma *cdma, struct host1x_job *job) */ void host1x_cdma_push(struct host1x_cdma *cdma, u32 op1, u32 op2) { - struct host1x *host1x = cdma_to_host1x(cdma); struct push_buffer *pb = &cdma->push_buffer; u32 slots_free = cdma->slots_free; @@ -528,11 +596,9 @@ void host1x_cdma_push(struct host1x_cdma *cdma, u32 op1, u32 op2) trace_host1x_cdma_push(dev_name(cdma_to_channel(cdma)->dev), op1, op2); - if (slots_free == 0) { - host1x_hw_cdma_flush(host1x, cdma); + if (slots_free == 0) slots_free = host1x_cdma_wait_locked(cdma, CDMA_EVENT_PUSH_BUFFER_SPACE); - } cdma->slots_free = slots_free - 1; cdma->slots_used++; @@ -554,8 +620,7 @@ void host1x_cdma_push_wide(struct host1x_cdma *cdma, u32 op1, u32 op2, struct host1x_channel *channel = cdma_to_channel(cdma); struct host1x *host1x = cdma_to_host1x(cdma); struct push_buffer *pb = &cdma->push_buffer; - unsigned int needed = 2, extra = 0, i; - unsigned int space = cdma->slots_free; + unsigned int space, needed = 2, extra = 0; if (host1x_debug_trace_cmdbuf) trace_host1x_cdma_push_wide(dev_name(channel->dev), op1, op2, @@ -573,20 +638,14 @@ void host1x_cdma_push_wide(struct host1x_cdma *cdma, u32 op1, u32 op2, cdma->slots_free = space - needed; cdma->slots_used += needed; - /* - * Note that we rely on the fact that this is only used to submit wide - * gather opcodes, which consist of 3 words, and they are padded with - * a NOP to avoid having to deal with fractional slots (a slot always - * represents 2 words). The fourth opcode passed to this function will - * therefore always be a NOP. - * - * This works around a slight ambiguity when it comes to opcodes. For - * all current host1x incarnations the NOP opcode uses the exact same - * encoding (0x20000000), so we could hard-code the value here, but a - * new incarnation may change it and break that assumption. - */ - for (i = 0; i < extra; i++) - host1x_pushbuffer_push(pb, op4, op4); + if (extra > 0) { + /* + * If there isn't enough space at the tail of the pushbuffer, + * insert a RESTART(0) here to go back to the beginning. + * The code above adjusted the indexes appropriately. + */ + host1x_pushbuffer_push(pb, (0x5 << 28), 0xdead0000); + } host1x_pushbuffer_push(pb, op1, op2); host1x_pushbuffer_push(pb, op3, op4); @@ -624,7 +683,5 @@ void host1x_cdma_end(struct host1x_cdma *cdma, */ void host1x_cdma_update(struct host1x_cdma *cdma) { - mutex_lock(&cdma->lock); - update_cdma_locked(cdma); - mutex_unlock(&cdma->lock); + schedule_work(&cdma->update_work); } diff --git a/drivers/gpu/host1x/cdma.h b/drivers/gpu/host1x/cdma.h index 12c4327c4df0..7fd8168af4f9 100644 --- a/drivers/gpu/host1x/cdma.h +++ b/drivers/gpu/host1x/cdma.h @@ -11,6 +11,7 @@ #include <linux/sched.h> #include <linux/completion.h> #include <linux/list.h> +#include <linux/workqueue.h> struct host1x_syncpt; struct host1x_userctx_timeout; @@ -69,6 +70,7 @@ struct host1x_cdma { struct buffer_timeout timeout; /* channel's timeout state/wq */ bool running; bool torndown; + struct work_struct update_work; }; #define cdma_to_channel(cdma) container_of(cdma, struct host1x_channel, cdma) diff --git a/drivers/gpu/host1x/channel.c b/drivers/gpu/host1x/channel.c index 4cd212bb570d..08077afe4cde 100644 --- a/drivers/gpu/host1x/channel.c +++ b/drivers/gpu/host1x/channel.c @@ -21,22 +21,20 @@ int host1x_channel_list_init(struct host1x_channel_list *chlist, if (!chlist->channels) return -ENOMEM; - chlist->allocated_channels = - kcalloc(BITS_TO_LONGS(num_channels), sizeof(unsigned long), - GFP_KERNEL); + chlist->allocated_channels = bitmap_zalloc(num_channels, GFP_KERNEL); if (!chlist->allocated_channels) { kfree(chlist->channels); return -ENOMEM; } - bitmap_zero(chlist->allocated_channels, num_channels); + mutex_init(&chlist->lock); return 0; } void host1x_channel_list_free(struct host1x_channel_list *chlist) { - kfree(chlist->allocated_channels); + bitmap_free(chlist->allocated_channels); kfree(chlist->channels); } @@ -75,6 +73,33 @@ struct host1x_channel *host1x_channel_get_index(struct host1x *host, return ch; } +void host1x_channel_stop(struct host1x_channel *channel) +{ + struct host1x *host = dev_get_drvdata(channel->dev->parent); + + host1x_hw_cdma_stop(host, &channel->cdma); +} +EXPORT_SYMBOL(host1x_channel_stop); + +/** + * host1x_channel_stop_all() - disable CDMA on allocated channels + * @host: host1x instance + * + * Stop CDMA on allocated channels + */ +void host1x_channel_stop_all(struct host1x *host) +{ + struct host1x_channel_list *chlist = &host->channel_list; + int bit; + + mutex_lock(&chlist->lock); + + for_each_set_bit(bit, chlist->allocated_channels, host->info->nb_channels) + host1x_channel_stop(&chlist->channels[bit]); + + mutex_unlock(&chlist->lock); +} + static void release_channel(struct kref *kref) { struct host1x_channel *channel = @@ -100,8 +125,11 @@ static struct host1x_channel *acquire_unused_channel(struct host1x *host) unsigned int max_channels = host->info->nb_channels; unsigned int index; + mutex_lock(&chlist->lock); + index = find_first_zero_bit(chlist->allocated_channels, max_channels); if (index >= max_channels) { + mutex_unlock(&chlist->lock); dev_err(host->dev, "failed to find free channel\n"); return NULL; } @@ -110,6 +138,8 @@ static struct host1x_channel *acquire_unused_channel(struct host1x *host) set_bit(index, chlist->allocated_channels); + mutex_unlock(&chlist->lock); + return &chlist->channels[index]; } diff --git a/drivers/gpu/host1x/channel.h b/drivers/gpu/host1x/channel.h index 39044ff6c3aa..d7aede204d83 100644 --- a/drivers/gpu/host1x/channel.h +++ b/drivers/gpu/host1x/channel.h @@ -10,6 +10,7 @@ #include <linux/io.h> #include <linux/kref.h> +#include <linux/mutex.h> #include "cdma.h" @@ -18,6 +19,8 @@ struct host1x_channel; struct host1x_channel_list { struct host1x_channel *channels; + + struct mutex lock; unsigned long *allocated_channels; }; @@ -37,5 +40,6 @@ int host1x_channel_list_init(struct host1x_channel_list *chlist, void host1x_channel_list_free(struct host1x_channel_list *chlist); struct host1x_channel *host1x_channel_get_index(struct host1x *host, unsigned int index); +void host1x_channel_stop_all(struct host1x *host); #endif diff --git a/drivers/gpu/host1x/context.c b/drivers/gpu/host1x/context.c new file mode 100644 index 000000000000..a6f6779662a3 --- /dev/null +++ b/drivers/gpu/host1x/context.c @@ -0,0 +1,177 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Copyright (c) 2021, NVIDIA Corporation. + */ + +#include <linux/device.h> +#include <linux/kref.h> +#include <linux/of.h> +#include <linux/of_device.h> +#include <linux/pid.h> +#include <linux/slab.h> + +#include "context.h" +#include "dev.h" + +static void host1x_memory_context_release(struct device *dev) +{ + /* context device is freed in host1x_memory_context_list_free() */ +} + +int host1x_memory_context_list_init(struct host1x *host1x) +{ + struct host1x_memory_context_list *cdl = &host1x->context_list; + struct device_node *node = host1x->dev->of_node; + struct host1x_memory_context *ctx; + unsigned int i; + int err; + + cdl->devs = NULL; + cdl->len = 0; + mutex_init(&cdl->lock); + + err = of_property_count_u32_elems(node, "iommu-map"); + if (err < 0) + return 0; + + cdl->len = err / 4; + cdl->devs = kcalloc(cdl->len, sizeof(*cdl->devs), GFP_KERNEL); + if (!cdl->devs) + return -ENOMEM; + + for (i = 0; i < cdl->len; i++) { + ctx = &cdl->devs[i]; + + ctx->host = host1x; + + device_initialize(&ctx->dev); + + /* + * Due to an issue with T194 NVENC, only 38 bits can be used. + * Anyway, 256GiB of IOVA ought to be enough for anyone. + */ + ctx->dma_mask = DMA_BIT_MASK(38); + ctx->dev.dma_mask = &ctx->dma_mask; + ctx->dev.coherent_dma_mask = ctx->dma_mask; + dev_set_name(&ctx->dev, "host1x-ctx.%d", i); + ctx->dev.bus = &host1x_context_device_bus_type; + ctx->dev.parent = host1x->dev; + ctx->dev.release = host1x_memory_context_release; + + ctx->dev.dma_parms = &ctx->dma_parms; + dma_set_max_seg_size(&ctx->dev, UINT_MAX); + + err = device_add(&ctx->dev); + if (err) { + dev_err(host1x->dev, "could not add context device %d: %d\n", i, err); + put_device(&ctx->dev); + goto unreg_devices; + } + + err = of_dma_configure_id(&ctx->dev, node, true, &i); + if (err) { + dev_err(host1x->dev, "IOMMU configuration failed for context device %d: %d\n", + i, err); + device_unregister(&ctx->dev); + goto unreg_devices; + } + + if (!tegra_dev_iommu_get_stream_id(&ctx->dev, &ctx->stream_id) || + !device_iommu_mapped(&ctx->dev)) { + dev_err(host1x->dev, "Context device %d has no IOMMU!\n", i); + device_unregister(&ctx->dev); + + /* + * This means that if IOMMU is disabled but context devices + * are defined in the device tree, Host1x will fail to probe. + * That's probably OK in this time and age. + */ + err = -EINVAL; + + goto unreg_devices; + } + } + + return 0; + +unreg_devices: + while (i--) + device_unregister(&cdl->devs[i].dev); + + kfree(cdl->devs); + cdl->devs = NULL; + cdl->len = 0; + + return err; +} + +void host1x_memory_context_list_free(struct host1x_memory_context_list *cdl) +{ + unsigned int i; + + for (i = 0; i < cdl->len; i++) + device_unregister(&cdl->devs[i].dev); + + kfree(cdl->devs); + cdl->len = 0; +} + +struct host1x_memory_context *host1x_memory_context_alloc(struct host1x *host1x, + struct device *dev, + struct pid *pid) +{ + struct host1x_memory_context_list *cdl = &host1x->context_list; + struct host1x_memory_context *free = NULL; + int i; + + if (!cdl->len) + return ERR_PTR(-EOPNOTSUPP); + + mutex_lock(&cdl->lock); + + for (i = 0; i < cdl->len; i++) { + struct host1x_memory_context *cd = &cdl->devs[i]; + + if (cd->dev.iommu->iommu_dev != dev->iommu->iommu_dev) + continue; + + if (cd->owner == pid) { + refcount_inc(&cd->ref); + mutex_unlock(&cdl->lock); + return cd; + } else if (!cd->owner && !free) { + free = cd; + } + } + + if (!free) { + mutex_unlock(&cdl->lock); + return ERR_PTR(-EBUSY); + } + + refcount_set(&free->ref, 1); + free->owner = get_pid(pid); + + mutex_unlock(&cdl->lock); + + return free; +} +EXPORT_SYMBOL_GPL(host1x_memory_context_alloc); + +void host1x_memory_context_get(struct host1x_memory_context *cd) +{ + refcount_inc(&cd->ref); +} +EXPORT_SYMBOL_GPL(host1x_memory_context_get); + +void host1x_memory_context_put(struct host1x_memory_context *cd) +{ + struct host1x_memory_context_list *cdl = &cd->host->context_list; + + if (refcount_dec_and_mutex_lock(&cd->ref, &cdl->lock)) { + put_pid(cd->owner); + cd->owner = NULL; + mutex_unlock(&cdl->lock); + } +} +EXPORT_SYMBOL_GPL(host1x_memory_context_put); diff --git a/drivers/gpu/host1x/context.h b/drivers/gpu/host1x/context.h new file mode 100644 index 000000000000..3e03bc1d3bac --- /dev/null +++ b/drivers/gpu/host1x/context.h @@ -0,0 +1,38 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * Host1x context devices + * + * Copyright (c) 2020, NVIDIA Corporation. + */ + +#ifndef __HOST1X_CONTEXT_H +#define __HOST1X_CONTEXT_H + +#include <linux/mutex.h> +#include <linux/refcount.h> + +struct host1x; + +extern struct bus_type host1x_context_device_bus_type; + +struct host1x_memory_context_list { + struct mutex lock; + struct host1x_memory_context *devs; + unsigned int len; +}; + +#ifdef CONFIG_IOMMU_API +int host1x_memory_context_list_init(struct host1x *host1x); +void host1x_memory_context_list_free(struct host1x_memory_context_list *cdl); +#else +static inline int host1x_memory_context_list_init(struct host1x *host1x) +{ + return 0; +} + +static inline void host1x_memory_context_list_free(struct host1x_memory_context_list *cdl) +{ +} +#endif + +#endif diff --git a/drivers/gpu/host1x/context_bus.c b/drivers/gpu/host1x/context_bus.c new file mode 100644 index 000000000000..7cd0e1a5edd1 --- /dev/null +++ b/drivers/gpu/host1x/context_bus.c @@ -0,0 +1,26 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Copyright (c) 2021, NVIDIA Corporation. + */ + +#include <linux/device.h> +#include <linux/of.h> + +const struct bus_type host1x_context_device_bus_type = { + .name = "host1x-context", +}; +EXPORT_SYMBOL_GPL(host1x_context_device_bus_type); + +static int __init host1x_context_device_bus_init(void) +{ + int err; + + err = bus_register(&host1x_context_device_bus_type); + if (err < 0) { + pr_err("bus type registration failed: %d\n", err); + return err; + } + + return 0; +} +postcore_initcall(host1x_context_device_bus_init); diff --git a/drivers/gpu/host1x/debug.c b/drivers/gpu/host1x/debug.c index 8a14880c61bb..6433c00d5d7e 100644 --- a/drivers/gpu/host1x/debug.c +++ b/drivers/gpu/host1x/debug.c @@ -7,6 +7,7 @@ */ #include <linux/debugfs.h> +#include <linux/pm_runtime.h> #include <linux/seq_file.h> #include <linux/uaccess.h> @@ -52,6 +53,11 @@ static int show_channel(struct host1x_channel *ch, void *data, bool show_fifo) { struct host1x *m = dev_get_drvdata(ch->dev->parent); struct output *o = data; + int err; + + err = pm_runtime_resume_and_get(m->dev); + if (err < 0) + return err; mutex_lock(&ch->cdma.lock); mutex_lock(&debug_lock); @@ -64,27 +70,38 @@ static int show_channel(struct host1x_channel *ch, void *data, bool show_fifo) mutex_unlock(&debug_lock); mutex_unlock(&ch->cdma.lock); + pm_runtime_put(m->dev); + return 0; } -static void show_syncpts(struct host1x *m, struct output *o) +static void show_syncpts(struct host1x *m, struct output *o, bool show_all) { + unsigned long irqflags; struct list_head *pos; unsigned int i; + int err; host1x_debug_output(o, "---- syncpts ----\n"); + err = pm_runtime_resume_and_get(m->dev); + if (err < 0) + return; + for (i = 0; i < host1x_syncpt_nb_pts(m); i++) { u32 max = host1x_syncpt_read_max(m->syncpt + i); u32 min = host1x_syncpt_load(m->syncpt + i); unsigned int waiters = 0; - spin_lock(&m->syncpt[i].intr.lock); - list_for_each(pos, &m->syncpt[i].intr.wait_head) + spin_lock_irqsave(&m->syncpt[i].fences.lock, irqflags); + list_for_each(pos, &m->syncpt[i].fences.list) waiters++; - spin_unlock(&m->syncpt[i].intr.lock); + spin_unlock_irqrestore(&m->syncpt[i].fences.lock, irqflags); - if (!min && !max && !waiters) + if (!kref_read(&m->syncpt[i].ref)) + continue; + + if (!show_all && !min && !max && !waiters) continue; host1x_debug_output(o, @@ -101,6 +118,8 @@ static void show_syncpts(struct host1x *m, struct output *o) base_val); } + pm_runtime_put(m->dev); + host1x_debug_output(o, "\n"); } @@ -109,7 +128,7 @@ static void show_all(struct host1x *m, struct output *o, bool show_fifo) unsigned int i; host1x_hw_show_mlocks(m, o); - show_syncpts(m, o); + show_syncpts(m, o, true); host1x_debug_output(o, "---- channels ----\n"); for (i = 0; i < m->info->nb_channels; ++i) { @@ -122,7 +141,7 @@ static void show_all(struct host1x *m, struct output *o, bool show_fifo) } } -static int host1x_debug_show_all(struct seq_file *s, void *unused) +static int host1x_debug_all_show(struct seq_file *s, void *unused) { struct output o = { .fn = write_to_seqfile, @@ -133,6 +152,7 @@ static int host1x_debug_show_all(struct seq_file *s, void *unused) return 0; } +DEFINE_SHOW_ATTRIBUTE(host1x_debug_all); static int host1x_debug_show(struct seq_file *s, void *unused) { @@ -145,30 +165,7 @@ static int host1x_debug_show(struct seq_file *s, void *unused) return 0; } - -static int host1x_debug_open_all(struct inode *inode, struct file *file) -{ - return single_open(file, host1x_debug_show_all, inode->i_private); -} - -static const struct file_operations host1x_debug_all_fops = { - .open = host1x_debug_open_all, - .read = seq_read, - .llseek = seq_lseek, - .release = single_release, -}; - -static int host1x_debug_open(struct inode *inode, struct file *file) -{ - return single_open(file, host1x_debug_show, inode->i_private); -} - -static const struct file_operations host1x_debug_fops = { - .open = host1x_debug_open, - .read = seq_read, - .llseek = seq_lseek, - .release = single_release, -}; +DEFINE_SHOW_ATTRIBUTE(host1x_debug); static void host1x_debugfs_init(struct host1x *host1x) { @@ -219,12 +216,3 @@ void host1x_debug_dump(struct host1x *host1x) show_all(host1x, &o, true); } - -void host1x_debug_dump_syncpts(struct host1x *host1x) -{ - struct output o = { - .fn = write_to_printk - }; - - show_syncpts(host1x, &o); -} diff --git a/drivers/gpu/host1x/debug.h b/drivers/gpu/host1x/debug.h index 62bd8a091fa7..c43c61d876a9 100644 --- a/drivers/gpu/host1x/debug.h +++ b/drivers/gpu/host1x/debug.h @@ -41,6 +41,5 @@ extern unsigned int host1x_debug_trace_cmdbuf; void host1x_debug_init(struct host1x *host1x); void host1x_debug_deinit(struct host1x *host1x); void host1x_debug_dump(struct host1x *host1x); -void host1x_debug_dump_syncpts(struct host1x *host1x); #endif diff --git a/drivers/gpu/host1x/dev.c b/drivers/gpu/host1x/dev.c index fbb6447b8659..3f475f0e6545 100644 --- a/drivers/gpu/host1x/dev.c +++ b/drivers/gpu/host1x/dev.c @@ -6,20 +6,30 @@ */ #include <linux/clk.h> +#include <linux/delay.h> #include <linux/dma-mapping.h> #include <linux/io.h> #include <linux/list.h> #include <linux/module.h> -#include <linux/of_device.h> #include <linux/of.h> +#include <linux/of_platform.h> +#include <linux/platform_device.h> +#include <linux/pm_runtime.h> #include <linux/slab.h> +#include <soc/tegra/common.h> + #define CREATE_TRACE_POINTS #include <trace/events/host1x.h> #undef CREATE_TRACE_POINTS +#if IS_ENABLED(CONFIG_ARM_DMA_USE_IOMMU) +#include <asm/dma-iommu.h> +#endif + #include "bus.h" #include "channel.h" +#include "context.h" #include "debug.h" #include "dev.h" #include "intr.h" @@ -30,6 +40,12 @@ #include "hw/host1x05.h" #include "hw/host1x06.h" #include "hw/host1x07.h" +#include "hw/host1x08.h" + +void host1x_common_writel(struct host1x *host1x, u32 v, u32 r) +{ + writel(v, host1x->common_regs + r); +} void host1x_hypervisor_writel(struct host1x *host1x, u32 v, u32 r) { @@ -55,6 +71,15 @@ u32 host1x_sync_readl(struct host1x *host1x, u32 r) return readl(sync_regs + r); } +#ifdef CONFIG_64BIT +u64 host1x_sync_readq(struct host1x *host1x, u32 r) +{ + void __iomem *sync_regs = host1x->regs + host1x->info->sync_offset; + + return readq(sync_regs + r); +} +#endif + void host1x_ch_writel(struct host1x_channel *ch, u32 v, u32 r) { writel(v, ch->regs + r); @@ -126,12 +151,29 @@ static const struct host1x_info host1x05_info = { }; static const struct host1x_sid_entry tegra186_sid_table[] = { - { - /* VIC */ - .base = 0x1af0, - .offset = 0x30, - .limit = 0x34 - }, + { /* SE1 */ .base = 0x1ac8, .offset = 0x90, .limit = 0x90 }, + { /* SE2 */ .base = 0x1ad0, .offset = 0x90, .limit = 0x90 }, + { /* SE3 */ .base = 0x1ad8, .offset = 0x90, .limit = 0x90 }, + { /* SE4 */ .base = 0x1ae0, .offset = 0x90, .limit = 0x90 }, + { /* ISP */ .base = 0x1ae8, .offset = 0x50, .limit = 0x50 }, + { /* VIC */ .base = 0x1af0, .offset = 0x30, .limit = 0x34 }, + { /* NVENC */ .base = 0x1af8, .offset = 0x30, .limit = 0x34 }, + { /* NVDEC */ .base = 0x1b00, .offset = 0x30, .limit = 0x34 }, + { /* NVJPG */ .base = 0x1b08, .offset = 0x30, .limit = 0x34 }, + { /* TSEC */ .base = 0x1b10, .offset = 0x30, .limit = 0x34 }, + { /* TSECB */ .base = 0x1b18, .offset = 0x30, .limit = 0x34 }, + { /* VI 0 */ .base = 0x1b80, .offset = 0x10000, .limit = 0x10000 }, + { /* VI 1 */ .base = 0x1b88, .offset = 0x20000, .limit = 0x20000 }, + { /* VI 2 */ .base = 0x1b90, .offset = 0x30000, .limit = 0x30000 }, + { /* VI 3 */ .base = 0x1b98, .offset = 0x40000, .limit = 0x40000 }, + { /* VI 4 */ .base = 0x1ba0, .offset = 0x50000, .limit = 0x50000 }, + { /* VI 5 */ .base = 0x1ba8, .offset = 0x60000, .limit = 0x60000 }, + { /* VI 6 */ .base = 0x1bb0, .offset = 0x70000, .limit = 0x70000 }, + { /* VI 7 */ .base = 0x1bb8, .offset = 0x80000, .limit = 0x80000 }, + { /* VI 8 */ .base = 0x1bc0, .offset = 0x90000, .limit = 0x90000 }, + { /* VI 9 */ .base = 0x1bc8, .offset = 0xa0000, .limit = 0xa0000 }, + { /* VI 10 */ .base = 0x1bd0, .offset = 0xb0000, .limit = 0xb0000 }, + { /* VI 11 */ .base = 0x1bd8, .offset = 0xc0000, .limit = 0xc0000 }, }; static const struct host1x_info host1x06_info = { @@ -147,15 +189,30 @@ static const struct host1x_info host1x06_info = { .num_sid_entries = ARRAY_SIZE(tegra186_sid_table), .sid_table = tegra186_sid_table, .reserve_vblank_syncpts = false, + .skip_reset_assert = true, }; static const struct host1x_sid_entry tegra194_sid_table[] = { - { - /* VIC */ - .base = 0x1af0, - .offset = 0x30, - .limit = 0x34 - }, + { /* SE1 */ .base = 0x1ac8, .offset = 0x90, .limit = 0x90 }, + { /* SE2 */ .base = 0x1ad0, .offset = 0x90, .limit = 0x90 }, + { /* SE3 */ .base = 0x1ad8, .offset = 0x90, .limit = 0x90 }, + { /* SE4 */ .base = 0x1ae0, .offset = 0x90, .limit = 0x90 }, + { /* ISP */ .base = 0x1ae8, .offset = 0x800, .limit = 0x800 }, + { /* VIC */ .base = 0x1af0, .offset = 0x30, .limit = 0x34 }, + { /* NVENC */ .base = 0x1af8, .offset = 0x30, .limit = 0x34 }, + { /* NVDEC */ .base = 0x1b00, .offset = 0x30, .limit = 0x34 }, + { /* NVJPG */ .base = 0x1b08, .offset = 0x30, .limit = 0x34 }, + { /* TSEC */ .base = 0x1b10, .offset = 0x30, .limit = 0x34 }, + { /* TSECB */ .base = 0x1b18, .offset = 0x30, .limit = 0x34 }, + { /* VI */ .base = 0x1b80, .offset = 0x800, .limit = 0x800 }, + { /* VI_THI */ .base = 0x1b88, .offset = 0x30, .limit = 0x34 }, + { /* ISP_THI */ .base = 0x1b90, .offset = 0x30, .limit = 0x34 }, + { /* PVA0_CLUSTER */ .base = 0x1b98, .offset = 0x0, .limit = 0x0 }, + { /* PVA0_CLUSTER */ .base = 0x1ba0, .offset = 0x0, .limit = 0x0 }, + { /* NVDLA0 */ .base = 0x1ba8, .offset = 0x30, .limit = 0x34 }, + { /* NVDLA1 */ .base = 0x1bb0, .offset = 0x30, .limit = 0x34 }, + { /* NVENC1 */ .base = 0x1bb8, .offset = 0x30, .limit = 0x34 }, + { /* NVDEC1 */ .base = 0x1bc0, .offset = 0x30, .limit = 0x34 }, }; static const struct host1x_info host1x07_info = { @@ -173,7 +230,65 @@ static const struct host1x_info host1x07_info = { .reserve_vblank_syncpts = false, }; +/* + * Tegra234 has two stream ID protection tables, one for setting stream IDs + * through the channel path via SETSTREAMID, and one for setting them via + * MMIO. We program each engine's data stream ID in the channel path table + * and firmware stream ID in the MMIO path table. + */ +static const struct host1x_sid_entry tegra234_sid_table[] = { + { /* SE1 MMIO */ .base = 0x1650, .offset = 0x90, .limit = 0x90 }, + { /* SE1 ch */ .base = 0x1730, .offset = 0x90, .limit = 0x90 }, + { /* SE2 MMIO */ .base = 0x1658, .offset = 0x90, .limit = 0x90 }, + { /* SE2 ch */ .base = 0x1738, .offset = 0x90, .limit = 0x90 }, + { /* SE4 MMIO */ .base = 0x1660, .offset = 0x90, .limit = 0x90 }, + { /* SE4 ch */ .base = 0x1740, .offset = 0x90, .limit = 0x90 }, + { /* ISP MMIO */ .base = 0x1680, .offset = 0x800, .limit = 0x800 }, + { /* VIC MMIO */ .base = 0x1688, .offset = 0x34, .limit = 0x34 }, + { /* VIC ch */ .base = 0x17b8, .offset = 0x30, .limit = 0x30 }, + { /* NVENC MMIO */ .base = 0x1690, .offset = 0x34, .limit = 0x34 }, + { /* NVENC ch */ .base = 0x17c0, .offset = 0x30, .limit = 0x30 }, + { /* NVDEC MMIO */ .base = 0x1698, .offset = 0x34, .limit = 0x34 }, + { /* NVDEC ch */ .base = 0x17c8, .offset = 0x30, .limit = 0x30 }, + { /* NVJPG MMIO */ .base = 0x16a0, .offset = 0x34, .limit = 0x34 }, + { /* NVJPG ch */ .base = 0x17d0, .offset = 0x30, .limit = 0x30 }, + { /* TSEC MMIO */ .base = 0x16a8, .offset = 0x30, .limit = 0x34 }, + { /* NVJPG1 MMIO */ .base = 0x16b0, .offset = 0x34, .limit = 0x34 }, + { /* NVJPG1 ch */ .base = 0x17a8, .offset = 0x30, .limit = 0x30 }, + { /* VI MMIO */ .base = 0x16b8, .offset = 0x800, .limit = 0x800 }, + { /* VI_THI MMIO */ .base = 0x16c0, .offset = 0x30, .limit = 0x34 }, + { /* ISP_THI MMIO */ .base = 0x16c8, .offset = 0x30, .limit = 0x34 }, + { /* NVDLA MMIO */ .base = 0x16d8, .offset = 0x30, .limit = 0x34 }, + { /* NVDLA ch */ .base = 0x17e0, .offset = 0x30, .limit = 0x34 }, + { /* NVDLA1 MMIO */ .base = 0x16e0, .offset = 0x30, .limit = 0x34 }, + { /* NVDLA1 ch */ .base = 0x17e8, .offset = 0x30, .limit = 0x34 }, + { /* OFA MMIO */ .base = 0x16e8, .offset = 0x34, .limit = 0x34 }, + { /* OFA ch */ .base = 0x1768, .offset = 0x30, .limit = 0x30 }, + { /* VI2 MMIO */ .base = 0x16f0, .offset = 0x800, .limit = 0x800 }, + { /* VI2_THI MMIO */ .base = 0x16f8, .offset = 0x30, .limit = 0x34 }, +}; + +static const struct host1x_info host1x08_info = { + .nb_channels = 63, + .nb_pts = 1024, + .nb_mlocks = 24, + .nb_bases = 0, + .init = host1x08_init, + .sync_offset = 0x0, + .dma_mask = DMA_BIT_MASK(40), + .has_wide_gather = true, + .has_hypervisor = true, + .has_common = true, + .num_sid_entries = ARRAY_SIZE(tegra234_sid_table), + .sid_table = tegra234_sid_table, + .streamid_vm_table = { 0x1004, 128 }, + .classid_vm_table = { 0x1404, 25 }, + .mmio_vm_table = { 0x1504, 25 }, + .reserve_vblank_syncpts = false, +}; + static const struct of_device_id host1x_of_match[] = { + { .compatible = "nvidia,tegra234-host1x", .data = &host1x08_info, }, { .compatible = "nvidia,tegra194-host1x", .data = &host1x07_info, }, { .compatible = "nvidia,tegra186-host1x", .data = &host1x06_info, }, { .compatible = "nvidia,tegra210-host1x", .data = &host1x05_info, }, @@ -185,21 +300,43 @@ static const struct of_device_id host1x_of_match[] = { }; MODULE_DEVICE_TABLE(of, host1x_of_match); -static void host1x_setup_sid_table(struct host1x *host) +static void host1x_setup_virtualization_tables(struct host1x *host) { const struct host1x_info *info = host->info; unsigned int i; + if (!info->has_hypervisor) + return; + for (i = 0; i < info->num_sid_entries; i++) { const struct host1x_sid_entry *entry = &info->sid_table[i]; host1x_hypervisor_writel(host, entry->offset, entry->base); host1x_hypervisor_writel(host, entry->limit, entry->base + 4); } + + for (i = 0; i < info->streamid_vm_table.count; i++) { + /* Allow access to all stream IDs to all VMs. */ + host1x_hypervisor_writel(host, 0xff, info->streamid_vm_table.base + 4 * i); + } + + for (i = 0; i < info->classid_vm_table.count; i++) { + /* Allow access to all classes to all VMs. */ + host1x_hypervisor_writel(host, 0xff, info->classid_vm_table.base + 4 * i); + } + + for (i = 0; i < info->mmio_vm_table.count; i++) { + /* Use VM1 (that's us) as originator VMID for engine MMIO accesses. */ + host1x_hypervisor_writel(host, 0x1, info->mmio_vm_table.base + 4 * i); + } } static bool host1x_wants_iommu(struct host1x *host1x) { + /* Our IOMMU usage policy doesn't currently play well with GART */ + if (of_machine_is_compatible("nvidia,tegra20")) + return false; + /* * If we support addressing a maximum of 32 bits of physical memory * and if the host1x firewall is enabled, there's no need to enable @@ -233,11 +370,26 @@ static bool host1x_wants_iommu(struct host1x *host1x) return true; } +/* + * Returns ERR_PTR on failure, NULL if the translation is IDENTITY, otherwise a + * valid paging domain. + */ static struct iommu_domain *host1x_iommu_attach(struct host1x *host) { struct iommu_domain *domain = iommu_get_domain_for_dev(host->dev); int err; +#if IS_ENABLED(CONFIG_ARM_DMA_USE_IOMMU) + if (host->dev->archdata.mapping) { + struct dma_iommu_mapping *mapping = + to_dma_iommu_mapping(host->dev); + arm_iommu_detach_device(host->dev); + arm_iommu_release_mapping(mapping); + + domain = iommu_get_domain_for_dev(host->dev); + } +#endif + /* * We may not always want to enable IOMMU support (for example if the * host1x firewall is already enabled and we don't support addressing @@ -246,6 +398,8 @@ static struct iommu_domain *host1x_iommu_attach(struct host1x *host) * Similarly, if host1x is already attached to an IOMMU (via the DMA * API), don't try to attach again. */ + if (domain && domain->type == IOMMU_DOMAIN_IDENTITY) + domain = NULL; if (!host1x_wants_iommu(host) || domain) return domain; @@ -259,9 +413,10 @@ static struct iommu_domain *host1x_iommu_attach(struct host1x *host) if (err < 0) goto put_group; - host->domain = iommu_domain_alloc(&platform_bus_type); - if (!host->domain) { - err = -ENOMEM; + host->domain = iommu_paging_domain_alloc(host->dev); + if (IS_ERR(host->domain)) { + err = PTR_ERR(host->domain); + host->domain = NULL; goto put_cache; } @@ -347,12 +502,28 @@ static void host1x_iommu_exit(struct host1x *host) } } +static int host1x_get_resets(struct host1x *host) +{ + int err; + + host->resets[0].id = "mc"; + host->resets[1].id = "host1x"; + host->nresets = ARRAY_SIZE(host->resets); + + err = devm_reset_control_bulk_get_optional_exclusive_released( + host->dev, host->nresets, host->resets); + if (err) { + dev_err(host->dev, "failed to get reset: %d\n", err); + return err; + } + + return 0; +} + static int host1x_probe(struct platform_device *pdev) { struct host1x *host; - struct resource *regs, *hv_regs = NULL; - int syncpt_irq; - int err; + int err, i; host = devm_kzalloc(&pdev->dev, sizeof(*host), GFP_KERNEL); if (!host) @@ -361,30 +532,49 @@ static int host1x_probe(struct platform_device *pdev) host->info = of_device_get_match_data(&pdev->dev); if (host->info->has_hypervisor) { - regs = platform_get_resource_byname(pdev, IORESOURCE_MEM, "vm"); - if (!regs) { - dev_err(&pdev->dev, "failed to get vm registers\n"); - return -ENXIO; - } + host->regs = devm_platform_ioremap_resource_byname(pdev, "vm"); + if (IS_ERR(host->regs)) + return PTR_ERR(host->regs); + + host->hv_regs = devm_platform_ioremap_resource_byname(pdev, "hypervisor"); + if (IS_ERR(host->hv_regs)) + return PTR_ERR(host->hv_regs); - hv_regs = platform_get_resource_byname(pdev, IORESOURCE_MEM, - "hypervisor"); - if (!hv_regs) { - dev_err(&pdev->dev, - "failed to get hypervisor registers\n"); - return -ENXIO; + if (host->info->has_common) { + host->common_regs = devm_platform_ioremap_resource_byname(pdev, "common"); + if (IS_ERR(host->common_regs)) + return PTR_ERR(host->common_regs); } } else { - regs = platform_get_resource(pdev, IORESOURCE_MEM, 0); - if (!regs) { - dev_err(&pdev->dev, "failed to get registers\n"); - return -ENXIO; - } + host->regs = devm_platform_ioremap_resource(pdev, 0); + if (IS_ERR(host->regs)) + return PTR_ERR(host->regs); + } + + for (i = 0; i < ARRAY_SIZE(host->syncpt_irqs); i++) { + char irq_name[] = "syncptX"; + + sprintf(irq_name, "syncpt%d", i); + + err = platform_get_irq_byname_optional(pdev, irq_name); + if (err == -ENXIO) + break; + if (err < 0) + return err; + + host->syncpt_irqs[i] = err; } - syncpt_irq = platform_get_irq(pdev, 0); - if (syncpt_irq < 0) - return syncpt_irq; + host->num_syncpt_irqs = i; + + /* Device tree without irq names */ + if (i == 0) { + host->syncpt_irqs[0] = platform_get_irq(pdev, 0); + if (host->syncpt_irqs[0] < 0) + return host->syncpt_irqs[0]; + + host->num_syncpt_irqs = 1; + } mutex_init(&host->devices_lock); INIT_LIST_HEAD(&host->devices); @@ -394,16 +584,6 @@ static int host1x_probe(struct platform_device *pdev) /* set common host1x device data */ platform_set_drvdata(pdev, host); - host->regs = devm_ioremap_resource(&pdev->dev, regs); - if (IS_ERR(host->regs)) - return PTR_ERR(host->regs); - - if (host->info->has_hypervisor) { - host->hv_regs = devm_ioremap_resource(&pdev->dev, hv_regs); - if (IS_ERR(host->hv_regs)) - return PTR_ERR(host->hv_regs); - } - host->dev->dma_parms = &host->dma_parms; dma_set_max_seg_size(host->dev, UINT_MAX); @@ -414,26 +594,19 @@ static int host1x_probe(struct platform_device *pdev) } host->clk = devm_clk_get(&pdev->dev, NULL); - if (IS_ERR(host->clk)) { - err = PTR_ERR(host->clk); - - if (err != -EPROBE_DEFER) - dev_err(&pdev->dev, "failed to get clock: %d\n", err); + if (IS_ERR(host->clk)) + return dev_err_probe(&pdev->dev, PTR_ERR(host->clk), "failed to get clock\n"); + err = host1x_get_resets(host); + if (err) return err; - } - host->rst = devm_reset_control_get(&pdev->dev, "host1x"); - if (IS_ERR(host->rst)) { - err = PTR_ERR(host->rst); - dev_err(&pdev->dev, "failed to get reset: %d\n", err); - return err; - } + host1x_bo_cache_init(&host->cache); err = host1x_iommu_init(host); if (err < 0) { dev_err(&pdev->dev, "failed to setup IOMMU: %d\n", err); - return err; + goto destroy_cache; } err = host1x_channel_list_init(&host->channel_list, @@ -443,35 +616,39 @@ static int host1x_probe(struct platform_device *pdev) goto iommu_exit; } - err = clk_prepare_enable(host->clk); - if (err < 0) { - dev_err(&pdev->dev, "failed to enable clock\n"); + err = host1x_memory_context_list_init(host); + if (err) { + dev_err(&pdev->dev, "failed to initialize context list\n"); goto free_channels; } - err = reset_control_deassert(host->rst); - if (err < 0) { - dev_err(&pdev->dev, "failed to deassert reset: %d\n", err); - goto unprepare_disable; - } - err = host1x_syncpt_init(host); if (err) { dev_err(&pdev->dev, "failed to initialize syncpts\n"); - goto reset_assert; + goto free_contexts; } - err = host1x_intr_init(host, syncpt_irq); + mutex_init(&host->intr_mutex); + + pm_runtime_enable(&pdev->dev); + + err = devm_tegra_core_dev_init_opp_table_common(&pdev->dev); + if (err) + goto pm_disable; + + /* the driver's code isn't ready yet for the dynamic RPM */ + err = pm_runtime_resume_and_get(&pdev->dev); + if (err) + goto pm_disable; + + err = host1x_intr_init(host); if (err) { dev_err(&pdev->dev, "failed to initialize interrupts\n"); - goto deinit_syncpt; + goto pm_put; } host1x_debug_init(host); - if (host->info->has_hypervisor) - host1x_setup_sid_table(host); - err = host1x_register(host); if (err < 0) goto deinit_debugfs; @@ -487,39 +664,120 @@ unregister: deinit_debugfs: host1x_debug_deinit(host); host1x_intr_deinit(host); -deinit_syncpt: +pm_put: + pm_runtime_put_sync_suspend(&pdev->dev); +pm_disable: + pm_runtime_disable(&pdev->dev); host1x_syncpt_deinit(host); -reset_assert: - reset_control_assert(host->rst); -unprepare_disable: - clk_disable_unprepare(host->clk); +free_contexts: + host1x_memory_context_list_free(&host->context_list); free_channels: host1x_channel_list_free(&host->channel_list); iommu_exit: host1x_iommu_exit(host); +destroy_cache: + host1x_bo_cache_destroy(&host->cache); return err; } -static int host1x_remove(struct platform_device *pdev) +static void host1x_remove(struct platform_device *pdev) { struct host1x *host = platform_get_drvdata(pdev); host1x_unregister(host); host1x_debug_deinit(host); + + pm_runtime_force_suspend(&pdev->dev); + host1x_intr_deinit(host); host1x_syncpt_deinit(host); - reset_control_assert(host->rst); - clk_disable_unprepare(host->clk); + host1x_memory_context_list_free(&host->context_list); + host1x_channel_list_free(&host->channel_list); host1x_iommu_exit(host); + host1x_bo_cache_destroy(&host->cache); +} + +static int __maybe_unused host1x_runtime_suspend(struct device *dev) +{ + struct host1x *host = dev_get_drvdata(dev); + int err; + + host1x_channel_stop_all(host); + host1x_intr_stop(host); + host1x_syncpt_save(host); + + if (!host->info->skip_reset_assert) { + err = reset_control_bulk_assert(host->nresets, host->resets); + if (err) { + dev_err(dev, "failed to assert reset: %d\n", err); + goto resume_host1x; + } + + usleep_range(1000, 2000); + } + + clk_disable_unprepare(host->clk); + reset_control_bulk_release(host->nresets, host->resets); return 0; + +resume_host1x: + host1x_setup_virtualization_tables(host); + host1x_syncpt_restore(host); + host1x_intr_start(host); + + return err; } +static int __maybe_unused host1x_runtime_resume(struct device *dev) +{ + struct host1x *host = dev_get_drvdata(dev); + int err; + + err = reset_control_bulk_acquire(host->nresets, host->resets); + if (err) { + dev_err(dev, "failed to acquire reset: %d\n", err); + return err; + } + + err = clk_prepare_enable(host->clk); + if (err) { + dev_err(dev, "failed to enable clock: %d\n", err); + goto release_reset; + } + + err = reset_control_bulk_deassert(host->nresets, host->resets); + if (err < 0) { + dev_err(dev, "failed to deassert reset: %d\n", err); + goto disable_clk; + } + + host1x_setup_virtualization_tables(host); + host1x_syncpt_restore(host); + host1x_intr_start(host); + + return 0; + +disable_clk: + clk_disable_unprepare(host->clk); +release_reset: + reset_control_bulk_release(host->nresets, host->resets); + + return err; +} + +static const struct dev_pm_ops host1x_pm_ops = { + SET_RUNTIME_PM_OPS(host1x_runtime_suspend, host1x_runtime_resume, + NULL) + SET_SYSTEM_SLEEP_PM_OPS(pm_runtime_force_suspend, pm_runtime_force_resume) +}; + static struct platform_driver tegra_host1x_driver = { .driver = { .name = "tegra-host1x", .of_match_table = host1x_of_match, + .pm = &host1x_pm_ops, }, .probe = host1x_probe, .remove = host1x_remove, @@ -566,6 +824,7 @@ u64 host1x_get_dma_mask(struct host1x *host1x) } EXPORT_SYMBOL(host1x_get_dma_mask); +MODULE_SOFTDEP("post: tegra-drm"); MODULE_AUTHOR("Thierry Reding <thierry.reding@avionic-design.de>"); MODULE_AUTHOR("Terje Bergstrom <tbergstrom@nvidia.com>"); MODULE_DESCRIPTION("Host1x driver for Tegra products"); diff --git a/drivers/gpu/host1x/dev.h b/drivers/gpu/host1x/dev.h index fa6d4bc46e98..ef44618ed88a 100644 --- a/drivers/gpu/host1x/dev.h +++ b/drivers/gpu/host1x/dev.h @@ -9,11 +9,13 @@ #include <linux/device.h> #include <linux/iommu.h> #include <linux/iova.h> +#include <linux/irqreturn.h> #include <linux/platform_device.h> #include <linux/reset.h> #include "cdma.h" #include "channel.h" +#include "context.h" #include "intr.h" #include "job.h" #include "syncpt.h" @@ -73,14 +75,14 @@ struct host1x_syncpt_ops { }; struct host1x_intr_ops { - int (*init_host_sync)(struct host1x *host, u32 cpm, - void (*syncpt_thresh_work)(struct work_struct *work)); + int (*init_host_sync)(struct host1x *host, u32 cpm); void (*set_syncpt_threshold)( struct host1x *host, unsigned int id, u32 thresh); void (*enable_syncpt_intr)(struct host1x *host, unsigned int id); void (*disable_syncpt_intr)(struct host1x *host, unsigned int id); void (*disable_all_syncpt_intrs)(struct host1x *host); int (*free_syncpt_irq)(struct host1x *host); + irqreturn_t (*isr)(int irq, void *dev_id); }; struct host1x_sid_entry { @@ -89,6 +91,11 @@ struct host1x_sid_entry { unsigned int limit; }; +struct host1x_table_desc { + unsigned int base; + unsigned int count; +}; + struct host1x_info { unsigned int nb_channels; /* host1x: number of channels supported */ unsigned int nb_pts; /* host1x: number of syncpoints supported */ @@ -99,14 +106,24 @@ struct host1x_info { u64 dma_mask; /* mask of addressable memory */ bool has_wide_gather; /* supports GATHER_W opcode */ bool has_hypervisor; /* has hypervisor registers */ + bool has_common; /* has common registers separate from hypervisor */ unsigned int num_sid_entries; const struct host1x_sid_entry *sid_table; + struct host1x_table_desc streamid_vm_table; + struct host1x_table_desc classid_vm_table; + struct host1x_table_desc mmio_vm_table; /* * On T20-T148, the boot chain may setup DC to increment syncpoints * 26/27 on VBLANK. As such we cannot use these syncpoints until * the display driver disables VBLANK increments. */ bool reserve_vblank_syncpts; + /* + * On Tegra186, secure world applications may require access to + * host1x during suspend/resume. To allow this, we need to leave + * host1x not in reset. + */ + bool skip_reset_assert; }; struct host1x { @@ -114,11 +131,15 @@ struct host1x { void __iomem *regs; void __iomem *hv_regs; /* hypervisor region */ + void __iomem *common_regs; + int syncpt_irqs[8]; + int num_syncpt_irqs; struct host1x_syncpt *syncpt; struct host1x_syncpt_base *bases; struct device *dev; struct clk *clk; - struct reset_control *rst; + struct reset_control_bulk_data resets[2]; + unsigned int nresets; struct iommu_group *group; struct iommu_domain *domain; @@ -126,7 +147,6 @@ struct host1x { dma_addr_t iova_end; struct mutex intr_mutex; - int intr_syncpt_irq; const struct host1x_syncpt_ops *syncpt_op; const struct host1x_intr_ops *intr_op; @@ -140,6 +160,7 @@ struct host1x { struct mutex syncpt_mutex; struct host1x_channel_list channel_list; + struct host1x_memory_context_list context_list; struct dentry *debugfs; @@ -149,13 +170,19 @@ struct host1x { struct list_head list; struct device_dma_parameters dma_parms; + + struct host1x_bo_cache cache; }; -void host1x_hypervisor_writel(struct host1x *host1x, u32 r, u32 v); +void host1x_common_writel(struct host1x *host1x, u32 v, u32 r); +void host1x_hypervisor_writel(struct host1x *host1x, u32 v, u32 r); u32 host1x_hypervisor_readl(struct host1x *host1x, u32 r); -void host1x_sync_writel(struct host1x *host1x, u32 r, u32 v); +void host1x_sync_writel(struct host1x *host1x, u32 v, u32 r); u32 host1x_sync_readl(struct host1x *host1x, u32 r); -void host1x_ch_writel(struct host1x_channel *ch, u32 r, u32 v); +#ifdef CONFIG_64BIT +u64 host1x_sync_readq(struct host1x *host1x, u32 r); +#endif +void host1x_ch_writel(struct host1x_channel *ch, u32 v, u32 r); u32 host1x_ch_readl(struct host1x_channel *ch, u32 r); static inline void host1x_hw_syncpt_restore(struct host1x *host, @@ -200,10 +227,9 @@ static inline void host1x_hw_syncpt_enable_protection(struct host1x *host) return host->syncpt_op->enable_protection(host); } -static inline int host1x_hw_intr_init_host_sync(struct host1x *host, u32 cpm, - void (*syncpt_thresh_work)(struct work_struct *)) +static inline int host1x_hw_intr_init_host_sync(struct host1x *host, u32 cpm) { - return host->intr_op->init_host_sync(host, cpm, syncpt_thresh_work); + return host->intr_op->init_host_sync(host, cpm); } static inline void host1x_hw_intr_set_syncpt_threshold(struct host1x *host, diff --git a/drivers/gpu/host1x/fence.c b/drivers/gpu/host1x/fence.c new file mode 100644 index 000000000000..139ad1afd935 --- /dev/null +++ b/drivers/gpu/host1x/fence.c @@ -0,0 +1,154 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Syncpoint dma_fence implementation + * + * Copyright (c) 2020, NVIDIA Corporation. + */ + +#include <linux/dma-fence.h> +#include <linux/file.h> +#include <linux/fs.h> +#include <linux/slab.h> +#include <linux/sync_file.h> + +#include "fence.h" +#include "intr.h" +#include "syncpt.h" + +static const char *host1x_syncpt_fence_get_driver_name(struct dma_fence *f) +{ + return "host1x"; +} + +static const char *host1x_syncpt_fence_get_timeline_name(struct dma_fence *f) +{ + return "syncpoint"; +} + +static struct host1x_syncpt_fence *to_host1x_fence(struct dma_fence *f) +{ + return container_of(f, struct host1x_syncpt_fence, base); +} + +static bool host1x_syncpt_fence_enable_signaling(struct dma_fence *f) +{ + struct host1x_syncpt_fence *sf = to_host1x_fence(f); + + if (host1x_syncpt_is_expired(sf->sp, sf->threshold)) + return false; + + /* Reference for interrupt path. */ + dma_fence_get(f); + + /* + * The dma_fence framework requires the fence driver to keep a + * reference to any fences for which 'enable_signaling' has been + * called (and that have not been signalled). + * + * We cannot currently always guarantee that all fences get signalled + * or cancelled. As such, for such situations, set up a timeout, so + * that long-lasting fences will get reaped eventually. + */ + if (sf->timeout) { + /* Reference for timeout path. */ + dma_fence_get(f); + schedule_delayed_work(&sf->timeout_work, msecs_to_jiffies(30000)); + } + + host1x_intr_add_fence_locked(sf->sp->host, sf); + + /* + * The fence may get signalled at any time after the above call, + * so we need to initialize all state used by signalling + * before it. + */ + + return true; +} + +static const struct dma_fence_ops host1x_syncpt_fence_ops = { + .get_driver_name = host1x_syncpt_fence_get_driver_name, + .get_timeline_name = host1x_syncpt_fence_get_timeline_name, + .enable_signaling = host1x_syncpt_fence_enable_signaling, +}; + +void host1x_fence_signal(struct host1x_syncpt_fence *f) +{ + if (atomic_xchg(&f->signaling, 1)) { + /* + * Already on timeout path, but we removed the fence before + * timeout path could, so drop interrupt path reference. + */ + dma_fence_put(&f->base); + return; + } + + if (f->timeout && cancel_delayed_work(&f->timeout_work)) { + /* + * We know that the timeout path will not be entered. + * Safe to drop the timeout path's reference now. + */ + dma_fence_put(&f->base); + } + + dma_fence_signal_locked(&f->base); + dma_fence_put(&f->base); +} + +static void do_fence_timeout(struct work_struct *work) +{ + struct delayed_work *dwork = (struct delayed_work *)work; + struct host1x_syncpt_fence *f = + container_of(dwork, struct host1x_syncpt_fence, timeout_work); + + if (atomic_xchg(&f->signaling, 1)) { + /* Already on interrupt path, drop timeout path reference if any. */ + if (f->timeout) + dma_fence_put(&f->base); + return; + } + + if (host1x_intr_remove_fence(f->sp->host, f)) { + /* + * Managed to remove fence from queue, so it's safe to drop + * the interrupt path's reference. + */ + dma_fence_put(&f->base); + } + + dma_fence_set_error(&f->base, -ETIMEDOUT); + dma_fence_signal(&f->base); + if (f->timeout) + dma_fence_put(&f->base); +} + +struct dma_fence *host1x_fence_create(struct host1x_syncpt *sp, u32 threshold, + bool timeout) +{ + struct host1x_syncpt_fence *fence; + + fence = kzalloc(sizeof(*fence), GFP_KERNEL); + if (!fence) + return ERR_PTR(-ENOMEM); + + fence->sp = sp; + fence->threshold = threshold; + fence->timeout = timeout; + + dma_fence_init(&fence->base, &host1x_syncpt_fence_ops, &sp->fences.lock, + dma_fence_context_alloc(1), 0); + + INIT_DELAYED_WORK(&fence->timeout_work, do_fence_timeout); + + return &fence->base; +} +EXPORT_SYMBOL(host1x_fence_create); + +void host1x_fence_cancel(struct dma_fence *f) +{ + struct host1x_syncpt_fence *sf = to_host1x_fence(f); + + schedule_delayed_work(&sf->timeout_work, 0); + flush_delayed_work(&sf->timeout_work); +} +EXPORT_SYMBOL(host1x_fence_cancel); diff --git a/drivers/gpu/host1x/fence.h b/drivers/gpu/host1x/fence.h new file mode 100644 index 000000000000..f3c644c73cad --- /dev/null +++ b/drivers/gpu/host1x/fence.h @@ -0,0 +1,30 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * Copyright (c) 2020, NVIDIA Corporation. + */ + +#ifndef HOST1X_FENCE_H +#define HOST1X_FENCE_H + +struct host1x_syncpt_fence { + struct dma_fence base; + + atomic_t signaling; + + struct host1x_syncpt *sp; + u32 threshold; + bool timeout; + + struct delayed_work timeout_work; + + struct list_head list; +}; + +struct host1x_fence_list { + spinlock_t lock; + struct list_head list; +}; + +void host1x_fence_signal(struct host1x_syncpt_fence *fence); + +#endif diff --git a/drivers/gpu/host1x/hw/cdma_hw.c b/drivers/gpu/host1x/hw/cdma_hw.c index e49cd5b8f735..3f3f0018eee0 100644 --- a/drivers/gpu/host1x/hw/cdma_hw.c +++ b/drivers/gpu/host1x/hw/cdma_hw.c @@ -238,6 +238,49 @@ static void cdma_resume(struct host1x_cdma *cdma, u32 getptr) cdma_timeout_restart(cdma, getptr); } +static void timeout_release_mlock(struct host1x_cdma *cdma) +{ +#if HOST1X_HW >= 8 + /* Tegra186 and Tegra194 require a more complicated MLOCK release + * sequence. Furthermore, those chips by default don't enforce MLOCKs, + * so it turns out that if we don't /actually/ need MLOCKs, we can just + * ignore them. + * + * As such, for now just implement this on Tegra234 where things are + * stricter but also easy to implement. + */ + struct host1x_channel *ch = cdma_to_channel(cdma); + struct host1x *host1x = cdma_to_host1x(cdma); + u32 offset; + + switch (ch->client->class) { + case HOST1X_CLASS_NVJPG1: + offset = HOST1X_COMMON_NVJPG1_MLOCK; + break; + case HOST1X_CLASS_NVENC: + offset = HOST1X_COMMON_NVENC_MLOCK; + break; + case HOST1X_CLASS_VIC: + offset = HOST1X_COMMON_VIC_MLOCK; + break; + case HOST1X_CLASS_NVJPG: + offset = HOST1X_COMMON_NVJPG_MLOCK; + break; + case HOST1X_CLASS_NVDEC: + offset = HOST1X_COMMON_NVDEC_MLOCK; + break; + case HOST1X_CLASS_OFA: + offset = HOST1X_COMMON_OFA_MLOCK; + break; + default: + WARN(1, "%s was not updated for class %u", __func__, ch->client->class); + return; + } + + host1x_common_writel(host1x, 0x0, offset); +#endif +} + /* * If this timeout fires, it indicates the current sync_queue entry has * exceeded its TTL and the userctx should be timed out and remaining @@ -288,6 +331,9 @@ static void cdma_timeout_handler(struct work_struct *work) /* stop HW, resetting channel/module */ host1x_hw_cdma_freeze(host1x, cdma); + /* release any held MLOCK */ + timeout_release_mlock(cdma); + host1x_cdma_update_sync_queue(cdma, ch->dev); mutex_unlock(&cdma->lock); } diff --git a/drivers/gpu/host1x/hw/channel_hw.c b/drivers/gpu/host1x/hw/channel_hw.c index d4c28faf27d1..2df6a16d484e 100644 --- a/drivers/gpu/host1x/hw/channel_hw.c +++ b/drivers/gpu/host1x/hw/channel_hw.c @@ -47,39 +47,113 @@ static void trace_write_gather(struct host1x_cdma *cdma, struct host1x_bo *bo, } } -static void submit_gathers(struct host1x_job *job) +static void submit_wait(struct host1x_job *job, u32 id, u32 threshold) +{ + struct host1x_cdma *cdma = &job->channel->cdma; + +#if HOST1X_HW >= 2 + host1x_cdma_push_wide(cdma, + host1x_opcode_setclass( + HOST1X_CLASS_HOST1X, + HOST1X_UCLASS_LOAD_SYNCPT_PAYLOAD_32, + /* WAIT_SYNCPT_32 is at SYNCPT_PAYLOAD_32+2 */ + BIT(0) | BIT(2) + ), + threshold, + id, + HOST1X_OPCODE_NOP + ); +#else + /* TODO add waitchk or use waitbases or other mitigation */ + host1x_cdma_push(cdma, + host1x_opcode_setclass( + HOST1X_CLASS_HOST1X, + host1x_uclass_wait_syncpt_r(), + BIT(0) + ), + host1x_class_host_wait_syncpt(id, threshold) + ); +#endif +} + +static void submit_setclass(struct host1x_job *job, u32 next_class) +{ + struct host1x_cdma *cdma = &job->channel->cdma; + +#if HOST1X_HW >= 6 + u32 stream_id; + + /* + * If a memory context has been set, use it. Otherwise + * (if context isolation is disabled) use the engine's + * firmware stream ID. + */ + if (job->memory_context) + stream_id = job->memory_context->stream_id; + else + stream_id = job->engine_fallback_streamid; + + host1x_cdma_push_wide(cdma, + host1x_opcode_setclass(next_class, 0, 0), + host1x_opcode_setpayload(stream_id), + host1x_opcode_setstreamid(job->engine_streamid_offset / 4), + HOST1X_OPCODE_NOP); +#else + host1x_cdma_push(cdma, + host1x_opcode_setclass(next_class, 0, 0), + HOST1X_OPCODE_NOP + ); +#endif +} + +static void submit_gathers(struct host1x_job *job, struct host1x_job_cmd *cmds, u32 num_cmds, + u32 job_syncpt_base) { struct host1x_cdma *cdma = &job->channel->cdma; #if HOST1X_HW < 6 struct device *dev = job->channel->dev; #endif unsigned int i; + u32 threshold; - for (i = 0; i < job->num_gathers; i++) { - struct host1x_job_gather *g = &job->gathers[i]; - dma_addr_t addr = g->base + g->offset; - u32 op2, op3; + for (i = 0; i < num_cmds; i++) { + struct host1x_job_cmd *cmd = &cmds[i]; - op2 = lower_32_bits(addr); - op3 = upper_32_bits(addr); + if (cmd->is_wait) { + if (cmd->wait.relative) + threshold = job_syncpt_base + cmd->wait.threshold; + else + threshold = cmd->wait.threshold; + + submit_wait(job, cmd->wait.id, threshold); + submit_setclass(job, cmd->wait.next_class); + } else { + struct host1x_job_gather *g = &cmd->gather; - trace_write_gather(cdma, g->bo, g->offset, g->words); + dma_addr_t addr = g->base + g->offset; + u32 op2, op3; - if (op3 != 0) { + op2 = lower_32_bits(addr); + op3 = upper_32_bits(addr); + + trace_write_gather(cdma, g->bo, g->offset, g->words); + + if (op3 != 0) { #if HOST1X_HW >= 6 - u32 op1 = host1x_opcode_gather_wide(g->words); - u32 op4 = HOST1X_OPCODE_NOP; + u32 op1 = host1x_opcode_gather_wide(g->words); + u32 op4 = HOST1X_OPCODE_NOP; - host1x_cdma_push_wide(cdma, op1, op2, op3, op4); + host1x_cdma_push_wide(cdma, op1, op2, op3, op4); #else - dev_err(dev, "invalid gather for push buffer %pad\n", - &addr); - continue; + dev_err(dev, "invalid gather for push buffer %pad\n", + &addr); + continue; #endif - } else { - u32 op1 = host1x_opcode_gather(g->words); + } else { + u32 op1 = host1x_opcode_gather(g->words); - host1x_cdma_push(cdma, op1, op2); + host1x_cdma_push(cdma, op1, op2); + } } } } @@ -103,62 +177,109 @@ static inline void synchronize_syncpt_base(struct host1x_job *job) static void host1x_channel_set_streamid(struct host1x_channel *channel) { #if HOST1X_HW >= 6 - u32 sid = 0x7f; -#ifdef CONFIG_IOMMU_API - struct iommu_fwspec *spec = dev_iommu_fwspec_get(channel->dev->parent); - if (spec) - sid = spec->ids[0] & 0xffff; + u32 stream_id; + + if (!tegra_dev_iommu_get_stream_id(channel->dev->parent, &stream_id)) + stream_id = TEGRA_STREAM_ID_BYPASS; + + host1x_ch_writel(channel, stream_id, HOST1X_CHANNEL_SMMU_STREAMID); #endif +} + +static void host1x_enable_gather_filter(struct host1x_channel *ch) +{ +#if HOST1X_HW >= 6 + struct host1x *host = dev_get_drvdata(ch->dev->parent); + u32 val; + + if (!host->hv_regs) + return; - host1x_ch_writel(channel, sid, HOST1X_CHANNEL_SMMU_STREAMID); + val = host1x_hypervisor_readl( + host, HOST1X_HV_CH_KERNEL_FILTER_GBUFFER(ch->id / 32)); + val |= BIT(ch->id % 32); + host1x_hypervisor_writel( + host, val, HOST1X_HV_CH_KERNEL_FILTER_GBUFFER(ch->id / 32)); +#elif HOST1X_HW >= 4 + host1x_ch_writel(ch, + HOST1X_CHANNEL_CHANNELCTRL_KERNEL_FILTER_GBUFFER(1), + HOST1X_CHANNEL_CHANNELCTRL); #endif } -static int channel_submit(struct host1x_job *job) +static void channel_program_cdma(struct host1x_job *job) { - struct host1x_channel *ch = job->channel; + struct host1x_cdma *cdma = &job->channel->cdma; struct host1x_syncpt *sp = job->syncpt; - u32 user_syncpt_incrs = job->syncpt_incrs; - u32 prev_max = 0; - u32 syncval; - int err; - struct host1x_waitlist *completed_waiter = NULL; - struct host1x *host = dev_get_drvdata(ch->dev->parent); - trace_host1x_channel_submit(dev_name(ch->dev), - job->num_gathers, job->num_relocs, - job->syncpt->id, job->syncpt_incrs); +#if HOST1X_HW >= 6 + u32 fence; + int i = 0; - /* before error checks, return current max */ - prev_max = job->syncpt_end = host1x_syncpt_read_max(sp); + if (job->num_cmds == 0) + goto prefences_done; + if (!job->cmds[0].is_wait || job->cmds[0].wait.relative) + goto prefences_done; - /* get submit lock */ - err = mutex_lock_interruptible(&ch->submitlock); - if (err) - goto error; + /* Enter host1x class with invalid stream ID for prefence waits. */ + host1x_cdma_push_wide(cdma, + host1x_opcode_acquire_mlock(1), + host1x_opcode_setclass(1, 0, 0), + host1x_opcode_setpayload(0), + host1x_opcode_setstreamid(0x1fffff)); - completed_waiter = kzalloc(sizeof(*completed_waiter), GFP_KERNEL); - if (!completed_waiter) { - mutex_unlock(&ch->submitlock); - err = -ENOMEM; - goto error; - } + for (i = 0; i < job->num_cmds; i++) { + struct host1x_job_cmd *cmd = &job->cmds[i]; - host1x_channel_set_streamid(ch); + if (!cmd->is_wait || cmd->wait.relative) + break; - /* begin a CDMA submit */ - err = host1x_cdma_begin(&ch->cdma, job); - if (err) { - mutex_unlock(&ch->submitlock); - goto error; + submit_wait(job, cmd->wait.id, cmd->wait.threshold); } + host1x_cdma_push(cdma, + HOST1X_OPCODE_NOP, + host1x_opcode_release_mlock(1)); + +prefences_done: + /* Enter engine class with invalid stream ID. */ + host1x_cdma_push_wide(cdma, + host1x_opcode_acquire_mlock(job->class), + host1x_opcode_setclass(job->class, 0, 0), + host1x_opcode_setpayload(0), + host1x_opcode_setstreamid(job->engine_streamid_offset / 4)); + + /* Before switching stream ID to real stream ID, ensure engine is idle. */ + fence = host1x_syncpt_incr_max(sp, 1); + host1x_cdma_push(&job->channel->cdma, + host1x_opcode_nonincr(HOST1X_UCLASS_INCR_SYNCPT, 1), + HOST1X_UCLASS_INCR_SYNCPT_INDX_F(job->syncpt->id) | + HOST1X_UCLASS_INCR_SYNCPT_COND_F(4)); + submit_wait(job, job->syncpt->id, fence); + submit_setclass(job, job->class); + + /* Submit work. */ + job->syncpt_end = host1x_syncpt_incr_max(sp, job->syncpt_incrs); + submit_gathers(job, job->cmds + i, job->num_cmds - i, job->syncpt_end - job->syncpt_incrs); + + /* Before releasing MLOCK, ensure engine is idle again. */ + fence = host1x_syncpt_incr_max(sp, 1); + host1x_cdma_push(&job->channel->cdma, + host1x_opcode_nonincr(HOST1X_UCLASS_INCR_SYNCPT, 1), + HOST1X_UCLASS_INCR_SYNCPT_INDX_F(job->syncpt->id) | + HOST1X_UCLASS_INCR_SYNCPT_COND_F(4)); + submit_wait(job, job->syncpt->id, fence); + + /* Release MLOCK. */ + host1x_cdma_push(cdma, + HOST1X_OPCODE_NOP, host1x_opcode_release_mlock(job->class)); +#else if (job->serialize) { /* * Force serialization by inserting a host wait for the * previous job to finish before this one can commence. */ - host1x_cdma_push(&ch->cdma, + host1x_cdma_push(cdma, host1x_opcode_setclass(HOST1X_CLASS_HOST1X, host1x_uclass_wait_syncpt_r(), 1), host1x_class_host_wait_syncpt(job->syncpt->id, @@ -169,60 +290,86 @@ static int channel_submit(struct host1x_job *job) if (sp->base) synchronize_syncpt_base(job); - syncval = host1x_syncpt_incr_max(sp, user_syncpt_incrs); - - host1x_hw_syncpt_assign_to_channel(host, sp, ch); - - job->syncpt_end = syncval; - /* add a setclass for modules that require it */ if (job->class) - host1x_cdma_push(&ch->cdma, + host1x_cdma_push(cdma, host1x_opcode_setclass(job->class, 0, 0), HOST1X_OPCODE_NOP); - submit_gathers(job); + job->syncpt_end = host1x_syncpt_incr_max(sp, job->syncpt_incrs); - /* end CDMA submit & stash pinned hMems into sync queue */ - host1x_cdma_end(&ch->cdma, job); + submit_gathers(job, job->cmds, job->num_cmds, job->syncpt_end - job->syncpt_incrs); +#endif +} - trace_host1x_channel_submitted(dev_name(ch->dev), prev_max, syncval); +static void job_complete_callback(struct dma_fence *fence, struct dma_fence_cb *cb) +{ + struct host1x_job *job = container_of(cb, struct host1x_job, fence_cb); - /* schedule a submit complete interrupt */ - err = host1x_intr_add_action(host, sp, syncval, - HOST1X_INTR_ACTION_SUBMIT_COMPLETE, ch, - completed_waiter, NULL); - completed_waiter = NULL; - WARN(err, "Failed to set submit complete interrupt"); + /* Schedules CDMA update. */ + host1x_cdma_update(&job->channel->cdma); +} - mutex_unlock(&ch->submitlock); +static int channel_submit(struct host1x_job *job) +{ + struct host1x_channel *ch = job->channel; + struct host1x_syncpt *sp = job->syncpt; + u32 prev_max = 0; + u32 syncval; + int err; + struct host1x *host = dev_get_drvdata(ch->dev->parent); - return 0; + trace_host1x_channel_submit(dev_name(ch->dev), + job->num_cmds, job->num_relocs, + job->syncpt->id, job->syncpt_incrs); -error: - kfree(completed_waiter); - return err; -} + /* before error checks, return current max */ + prev_max = job->syncpt_end = host1x_syncpt_read_max(sp); -static void enable_gather_filter(struct host1x *host, - struct host1x_channel *ch) -{ -#if HOST1X_HW >= 6 - u32 val; + /* get submit lock */ + err = mutex_lock_interruptible(&ch->submitlock); + if (err) + return err; - if (!host->hv_regs) - return; + host1x_channel_set_streamid(ch); + host1x_enable_gather_filter(ch); + host1x_hw_syncpt_assign_to_channel(host, sp, ch); - val = host1x_hypervisor_readl( - host, HOST1X_HV_CH_KERNEL_FILTER_GBUFFER(ch->id / 32)); - val |= BIT(ch->id % 32); - host1x_hypervisor_writel( - host, val, HOST1X_HV_CH_KERNEL_FILTER_GBUFFER(ch->id / 32)); -#elif HOST1X_HW >= 4 - host1x_ch_writel(ch, - HOST1X_CHANNEL_CHANNELCTRL_KERNEL_FILTER_GBUFFER(1), - HOST1X_CHANNEL_CHANNELCTRL); -#endif + /* begin a CDMA submit */ + err = host1x_cdma_begin(&ch->cdma, job); + if (err) { + mutex_unlock(&ch->submitlock); + return err; + } + + channel_program_cdma(job); + syncval = host1x_syncpt_read_max(sp); + + /* + * Create fence before submitting job to HW to avoid job completing + * before the fence is set up. + */ + job->fence = host1x_fence_create(sp, syncval, true); + if (WARN(IS_ERR(job->fence), "Failed to create submit complete fence")) { + job->fence = NULL; + } else { + err = dma_fence_add_callback(job->fence, &job->fence_cb, + job_complete_callback); + } + + /* end CDMA submit & stash pinned hMems into sync queue */ + host1x_cdma_end(&ch->cdma, job); + + trace_host1x_channel_submitted(dev_name(ch->dev), prev_max, syncval); + + mutex_unlock(&ch->submitlock); + + if (err == -ENOENT) + host1x_cdma_update(&ch->cdma); + else + WARN(err, "Failed to set submit complete interrupt"); + + return 0; } static int host1x_channel_init(struct host1x_channel *ch, struct host1x *dev, @@ -233,7 +380,6 @@ static int host1x_channel_init(struct host1x_channel *ch, struct host1x *dev, #else ch->regs = dev->regs + index * 0x100; #endif - enable_gather_filter(dev, ch); return 0; } diff --git a/drivers/gpu/host1x/hw/debug_hw.c b/drivers/gpu/host1x/hw/debug_hw.c index ceb48229d14b..4c32aa1b95e8 100644 --- a/drivers/gpu/host1x/hw/debug_hw.c +++ b/drivers/gpu/host1x/hw/debug_hw.c @@ -156,9 +156,9 @@ static unsigned int show_channel_command(struct output *o, u32 val, } } -static void show_gather(struct output *o, phys_addr_t phys_addr, +static void show_gather(struct output *o, dma_addr_t phys_addr, unsigned int words, struct host1x_cdma *cdma, - phys_addr_t pin_addr, u32 *map_addr) + dma_addr_t pin_addr, u32 *map_addr) { /* Map dmaget cursor to corresponding mem handle */ u32 offset = phys_addr - pin_addr; @@ -176,11 +176,20 @@ static void show_gather(struct output *o, phys_addr_t phys_addr, } for (i = 0; i < words; i++) { - u32 addr = phys_addr + i * 4; - u32 val = *(map_addr + offset / 4 + i); + dma_addr_t addr = phys_addr + i * 4; + u32 voffset = offset + i * 4; + u32 val; + + /* If we reach the RESTART opcode, continue at the beginning of pushbuffer */ + if (cdma && voffset >= cdma->push_buffer.size) { + addr -= cdma->push_buffer.size; + voffset -= cdma->push_buffer.size; + } + + val = *(map_addr + voffset / 4); if (!data_count) { - host1x_debug_output(o, "%08x: %08x: ", addr, val); + host1x_debug_output(o, " %pad: %08x: ", &addr, val); data_count = show_channel_command(o, val, &payload); } else { host1x_debug_cont(o, "%08x%s", val, @@ -195,23 +204,25 @@ static void show_channel_gathers(struct output *o, struct host1x_cdma *cdma) struct push_buffer *pb = &cdma->push_buffer; struct host1x_job *job; - host1x_debug_output(o, "PUSHBUF at %pad, %u words\n", - &pb->dma, pb->size / 4); - - show_gather(o, pb->dma, pb->size / 4, cdma, pb->dma, pb->mapped); - list_for_each_entry(job, &cdma->sync_queue, list) { unsigned int i; - host1x_debug_output(o, "\n%p: JOB, syncpt_id=%d, syncpt_val=%d, first_get=%08x, timeout=%d num_slots=%d, num_handles=%d\n", - job, job->syncpt->id, job->syncpt_end, - job->first_get, job->timeout, + host1x_debug_output(o, "JOB, syncpt %u: %u timeout: %u num_slots: %u num_handles: %u\n", + job->syncpt->id, job->syncpt_end, job->timeout, job->num_slots, job->num_unpins); - for (i = 0; i < job->num_gathers; i++) { - struct host1x_job_gather *g = &job->gathers[i]; + show_gather(o, pb->dma + job->first_get, job->num_slots * 2, cdma, + pb->dma, pb->mapped); + + for (i = 0; i < job->num_cmds; i++) { + struct host1x_job_gather *g; u32 *mapped; + if (job->cmds[i].is_wait) + continue; + + g = &job->cmds[i].gather; + if (job->gather_copy_mapped) mapped = (u32 *)job->gather_copy_mapped; else @@ -222,10 +233,10 @@ static void show_channel_gathers(struct output *o, struct host1x_cdma *cdma) continue; } - host1x_debug_output(o, " GATHER at %pad+%#x, %d words\n", + host1x_debug_output(o, " GATHER at %pad+%#x, %d words\n", &g->base, g->offset, g->words); - show_gather(o, g->base + g->offset, g->words, cdma, + show_gather(o, g->base + g->offset, g->words, NULL, g->base, mapped); if (!job->gather_copy_mapped) diff --git a/drivers/gpu/host1x/hw/debug_hw_1x01.c b/drivers/gpu/host1x/hw/debug_hw_1x01.c index 02a93305ac7b..85242a59fa6a 100644 --- a/drivers/gpu/host1x/hw/debug_hw_1x01.c +++ b/drivers/gpu/host1x/hw/debug_hw_1x01.c @@ -16,10 +16,13 @@ static void host1x_debug_show_channel_cdma(struct host1x *host, struct output *o) { struct host1x_cdma *cdma = &ch->cdma; + dma_addr_t dmastart, dmaend; u32 dmaput, dmaget, dmactrl; u32 cbstat, cbread; u32 val, base, baseval; + dmastart = host1x_ch_readl(ch, HOST1X_CHANNEL_DMASTART); + dmaend = host1x_ch_readl(ch, HOST1X_CHANNEL_DMAEND); dmaput = host1x_ch_readl(ch, HOST1X_CHANNEL_DMAPUT); dmaget = host1x_ch_readl(ch, HOST1X_CHANNEL_DMAGET); dmactrl = host1x_ch_readl(ch, HOST1X_CHANNEL_DMACTRL); @@ -56,9 +59,10 @@ static void host1x_debug_show_channel_cdma(struct host1x *host, HOST1X_SYNC_CBSTAT_CBOFFSET_V(cbstat), cbread); - host1x_debug_output(o, "DMAPUT %08x, DMAGET %08x, DMACTL %08x\n", + host1x_debug_output(o, "DMASTART %pad, DMAEND %pad\n", &dmastart, &dmaend); + host1x_debug_output(o, "DMAPUT %08x DMAGET %08x DMACTL %08x\n", dmaput, dmaget, dmactrl); - host1x_debug_output(o, "CBREAD %08x, CBSTAT %08x\n", cbread, cbstat); + host1x_debug_output(o, "CBREAD %08x CBSTAT %08x\n", cbread, cbstat); show_channel_gathers(o, cdma); host1x_debug_output(o, "\n"); diff --git a/drivers/gpu/host1x/hw/debug_hw_1x06.c b/drivers/gpu/host1x/hw/debug_hw_1x06.c index 6d1b583aa90f..9d0667879a19 100644 --- a/drivers/gpu/host1x/hw/debug_hw_1x06.c +++ b/drivers/gpu/host1x/hw/debug_hw_1x06.c @@ -16,10 +16,23 @@ static void host1x_debug_show_channel_cdma(struct host1x *host, struct output *o) { struct host1x_cdma *cdma = &ch->cdma; + dma_addr_t dmastart = 0, dmaend = 0; u32 dmaput, dmaget, dmactrl; u32 offset, class; u32 ch_stat; +#if defined(CONFIG_ARCH_DMA_ADDR_T_64BIT) && HOST1X_HW >= 6 + dmastart = host1x_ch_readl(ch, HOST1X_CHANNEL_DMASTART_HI); + dmastart <<= 32; +#endif + dmastart |= host1x_ch_readl(ch, HOST1X_CHANNEL_DMASTART); + +#if defined(CONFIG_ARCH_DMA_ADDR_T_64BIT) && HOST1X_HW >= 6 + dmaend = host1x_ch_readl(ch, HOST1X_CHANNEL_DMAEND_HI); + dmaend <<= 32; +#endif + dmaend |= host1x_ch_readl(ch, HOST1X_CHANNEL_DMAEND); + dmaput = host1x_ch_readl(ch, HOST1X_CHANNEL_DMAPUT); dmaget = host1x_ch_readl(ch, HOST1X_CHANNEL_DMAGET); dmactrl = host1x_ch_readl(ch, HOST1X_CHANNEL_DMACTRL); @@ -41,7 +54,8 @@ static void host1x_debug_show_channel_cdma(struct host1x *host, host1x_debug_output(o, "active class %02x, offset %04x\n", class, offset); - host1x_debug_output(o, "DMAPUT %08x, DMAGET %08x, DMACTL %08x\n", + host1x_debug_output(o, "DMASTART %pad, DMAEND %pad\n", &dmastart, &dmaend); + host1x_debug_output(o, "DMAPUT %08x DMAGET %08x DMACTL %08x\n", dmaput, dmaget, dmactrl); host1x_debug_output(o, "CHANNELSTAT %02x\n", ch_stat); diff --git a/drivers/gpu/host1x/hw/host1x01_hardware.h b/drivers/gpu/host1x/hw/host1x01_hardware.h index fe59df1d3dc3..cb93d7c1808c 100644 --- a/drivers/gpu/host1x/hw/host1x01_hardware.h +++ b/drivers/gpu/host1x/hw/host1x01_hardware.h @@ -15,118 +15,6 @@ #include "hw_host1x01_sync.h" #include "hw_host1x01_uclass.h" -static inline u32 host1x_class_host_wait_syncpt( - unsigned indx, unsigned threshold) -{ - return host1x_uclass_wait_syncpt_indx_f(indx) - | host1x_uclass_wait_syncpt_thresh_f(threshold); -} - -static inline u32 host1x_class_host_load_syncpt_base( - unsigned indx, unsigned threshold) -{ - return host1x_uclass_load_syncpt_base_base_indx_f(indx) - | host1x_uclass_load_syncpt_base_value_f(threshold); -} - -static inline u32 host1x_class_host_wait_syncpt_base( - unsigned indx, unsigned base_indx, unsigned offset) -{ - return host1x_uclass_wait_syncpt_base_indx_f(indx) - | host1x_uclass_wait_syncpt_base_base_indx_f(base_indx) - | host1x_uclass_wait_syncpt_base_offset_f(offset); -} - -static inline u32 host1x_class_host_incr_syncpt_base( - unsigned base_indx, unsigned offset) -{ - return host1x_uclass_incr_syncpt_base_base_indx_f(base_indx) - | host1x_uclass_incr_syncpt_base_offset_f(offset); -} - -static inline u32 host1x_class_host_incr_syncpt( - unsigned cond, unsigned indx) -{ - return host1x_uclass_incr_syncpt_cond_f(cond) - | host1x_uclass_incr_syncpt_indx_f(indx); -} - -static inline u32 host1x_class_host_indoff_reg_write( - unsigned mod_id, unsigned offset, bool auto_inc) -{ - u32 v = host1x_uclass_indoff_indbe_f(0xf) - | host1x_uclass_indoff_indmodid_f(mod_id) - | host1x_uclass_indoff_indroffset_f(offset); - if (auto_inc) - v |= host1x_uclass_indoff_autoinc_f(1); - return v; -} - -static inline u32 host1x_class_host_indoff_reg_read( - unsigned mod_id, unsigned offset, bool auto_inc) -{ - u32 v = host1x_uclass_indoff_indmodid_f(mod_id) - | host1x_uclass_indoff_indroffset_f(offset) - | host1x_uclass_indoff_rwn_read_v(); - if (auto_inc) - v |= host1x_uclass_indoff_autoinc_f(1); - return v; -} - - -/* cdma opcodes */ -static inline u32 host1x_opcode_setclass( - unsigned class_id, unsigned offset, unsigned mask) -{ - return (0 << 28) | (offset << 16) | (class_id << 6) | mask; -} - -static inline u32 host1x_opcode_incr(unsigned offset, unsigned count) -{ - return (1 << 28) | (offset << 16) | count; -} - -static inline u32 host1x_opcode_nonincr(unsigned offset, unsigned count) -{ - return (2 << 28) | (offset << 16) | count; -} - -static inline u32 host1x_opcode_mask(unsigned offset, unsigned mask) -{ - return (3 << 28) | (offset << 16) | mask; -} - -static inline u32 host1x_opcode_imm(unsigned offset, unsigned value) -{ - return (4 << 28) | (offset << 16) | value; -} - -static inline u32 host1x_opcode_imm_incr_syncpt(unsigned cond, unsigned indx) -{ - return host1x_opcode_imm(host1x_uclass_incr_syncpt_r(), - host1x_class_host_incr_syncpt(cond, indx)); -} - -static inline u32 host1x_opcode_restart(unsigned address) -{ - return (5 << 28) | (address >> 4); -} - -static inline u32 host1x_opcode_gather(unsigned count) -{ - return (6 << 28) | count; -} - -static inline u32 host1x_opcode_gather_nonincr(unsigned offset, unsigned count) -{ - return (6 << 28) | (offset << 16) | BIT(15) | count; -} - -static inline u32 host1x_opcode_gather_incr(unsigned offset, unsigned count) -{ - return (6 << 28) | (offset << 16) | BIT(15) | BIT(14) | count; -} - -#define HOST1X_OPCODE_NOP host1x_opcode_nonincr(0, 0) +#include "opcodes.h" #endif diff --git a/drivers/gpu/host1x/hw/host1x02_hardware.h b/drivers/gpu/host1x/hw/host1x02_hardware.h index af60d7fb016d..2d1282b9bc33 100644 --- a/drivers/gpu/host1x/hw/host1x02_hardware.h +++ b/drivers/gpu/host1x/hw/host1x02_hardware.h @@ -15,117 +15,6 @@ #include "hw_host1x02_sync.h" #include "hw_host1x02_uclass.h" -static inline u32 host1x_class_host_wait_syncpt( - unsigned indx, unsigned threshold) -{ - return host1x_uclass_wait_syncpt_indx_f(indx) - | host1x_uclass_wait_syncpt_thresh_f(threshold); -} - -static inline u32 host1x_class_host_load_syncpt_base( - unsigned indx, unsigned threshold) -{ - return host1x_uclass_load_syncpt_base_base_indx_f(indx) - | host1x_uclass_load_syncpt_base_value_f(threshold); -} - -static inline u32 host1x_class_host_wait_syncpt_base( - unsigned indx, unsigned base_indx, unsigned offset) -{ - return host1x_uclass_wait_syncpt_base_indx_f(indx) - | host1x_uclass_wait_syncpt_base_base_indx_f(base_indx) - | host1x_uclass_wait_syncpt_base_offset_f(offset); -} - -static inline u32 host1x_class_host_incr_syncpt_base( - unsigned base_indx, unsigned offset) -{ - return host1x_uclass_incr_syncpt_base_base_indx_f(base_indx) - | host1x_uclass_incr_syncpt_base_offset_f(offset); -} - -static inline u32 host1x_class_host_incr_syncpt( - unsigned cond, unsigned indx) -{ - return host1x_uclass_incr_syncpt_cond_f(cond) - | host1x_uclass_incr_syncpt_indx_f(indx); -} - -static inline u32 host1x_class_host_indoff_reg_write( - unsigned mod_id, unsigned offset, bool auto_inc) -{ - u32 v = host1x_uclass_indoff_indbe_f(0xf) - | host1x_uclass_indoff_indmodid_f(mod_id) - | host1x_uclass_indoff_indroffset_f(offset); - if (auto_inc) - v |= host1x_uclass_indoff_autoinc_f(1); - return v; -} - -static inline u32 host1x_class_host_indoff_reg_read( - unsigned mod_id, unsigned offset, bool auto_inc) -{ - u32 v = host1x_uclass_indoff_indmodid_f(mod_id) - | host1x_uclass_indoff_indroffset_f(offset) - | host1x_uclass_indoff_rwn_read_v(); - if (auto_inc) - v |= host1x_uclass_indoff_autoinc_f(1); - return v; -} - -/* cdma opcodes */ -static inline u32 host1x_opcode_setclass( - unsigned class_id, unsigned offset, unsigned mask) -{ - return (0 << 28) | (offset << 16) | (class_id << 6) | mask; -} - -static inline u32 host1x_opcode_incr(unsigned offset, unsigned count) -{ - return (1 << 28) | (offset << 16) | count; -} - -static inline u32 host1x_opcode_nonincr(unsigned offset, unsigned count) -{ - return (2 << 28) | (offset << 16) | count; -} - -static inline u32 host1x_opcode_mask(unsigned offset, unsigned mask) -{ - return (3 << 28) | (offset << 16) | mask; -} - -static inline u32 host1x_opcode_imm(unsigned offset, unsigned value) -{ - return (4 << 28) | (offset << 16) | value; -} - -static inline u32 host1x_opcode_imm_incr_syncpt(unsigned cond, unsigned indx) -{ - return host1x_opcode_imm(host1x_uclass_incr_syncpt_r(), - host1x_class_host_incr_syncpt(cond, indx)); -} - -static inline u32 host1x_opcode_restart(unsigned address) -{ - return (5 << 28) | (address >> 4); -} - -static inline u32 host1x_opcode_gather(unsigned count) -{ - return (6 << 28) | count; -} - -static inline u32 host1x_opcode_gather_nonincr(unsigned offset, unsigned count) -{ - return (6 << 28) | (offset << 16) | BIT(15) | count; -} - -static inline u32 host1x_opcode_gather_incr(unsigned offset, unsigned count) -{ - return (6 << 28) | (offset << 16) | BIT(15) | BIT(14) | count; -} - -#define HOST1X_OPCODE_NOP host1x_opcode_nonincr(0, 0) +#include "opcodes.h" #endif diff --git a/drivers/gpu/host1x/hw/host1x04_hardware.h b/drivers/gpu/host1x/hw/host1x04_hardware.h index 4f9bcddf27e3..84d244e8af30 100644 --- a/drivers/gpu/host1x/hw/host1x04_hardware.h +++ b/drivers/gpu/host1x/hw/host1x04_hardware.h @@ -15,117 +15,6 @@ #include "hw_host1x04_sync.h" #include "hw_host1x04_uclass.h" -static inline u32 host1x_class_host_wait_syncpt( - unsigned indx, unsigned threshold) -{ - return host1x_uclass_wait_syncpt_indx_f(indx) - | host1x_uclass_wait_syncpt_thresh_f(threshold); -} - -static inline u32 host1x_class_host_load_syncpt_base( - unsigned indx, unsigned threshold) -{ - return host1x_uclass_load_syncpt_base_base_indx_f(indx) - | host1x_uclass_load_syncpt_base_value_f(threshold); -} - -static inline u32 host1x_class_host_wait_syncpt_base( - unsigned indx, unsigned base_indx, unsigned offset) -{ - return host1x_uclass_wait_syncpt_base_indx_f(indx) - | host1x_uclass_wait_syncpt_base_base_indx_f(base_indx) - | host1x_uclass_wait_syncpt_base_offset_f(offset); -} - -static inline u32 host1x_class_host_incr_syncpt_base( - unsigned base_indx, unsigned offset) -{ - return host1x_uclass_incr_syncpt_base_base_indx_f(base_indx) - | host1x_uclass_incr_syncpt_base_offset_f(offset); -} - -static inline u32 host1x_class_host_incr_syncpt( - unsigned cond, unsigned indx) -{ - return host1x_uclass_incr_syncpt_cond_f(cond) - | host1x_uclass_incr_syncpt_indx_f(indx); -} - -static inline u32 host1x_class_host_indoff_reg_write( - unsigned mod_id, unsigned offset, bool auto_inc) -{ - u32 v = host1x_uclass_indoff_indbe_f(0xf) - | host1x_uclass_indoff_indmodid_f(mod_id) - | host1x_uclass_indoff_indroffset_f(offset); - if (auto_inc) - v |= host1x_uclass_indoff_autoinc_f(1); - return v; -} - -static inline u32 host1x_class_host_indoff_reg_read( - unsigned mod_id, unsigned offset, bool auto_inc) -{ - u32 v = host1x_uclass_indoff_indmodid_f(mod_id) - | host1x_uclass_indoff_indroffset_f(offset) - | host1x_uclass_indoff_rwn_read_v(); - if (auto_inc) - v |= host1x_uclass_indoff_autoinc_f(1); - return v; -} - -/* cdma opcodes */ -static inline u32 host1x_opcode_setclass( - unsigned class_id, unsigned offset, unsigned mask) -{ - return (0 << 28) | (offset << 16) | (class_id << 6) | mask; -} - -static inline u32 host1x_opcode_incr(unsigned offset, unsigned count) -{ - return (1 << 28) | (offset << 16) | count; -} - -static inline u32 host1x_opcode_nonincr(unsigned offset, unsigned count) -{ - return (2 << 28) | (offset << 16) | count; -} - -static inline u32 host1x_opcode_mask(unsigned offset, unsigned mask) -{ - return (3 << 28) | (offset << 16) | mask; -} - -static inline u32 host1x_opcode_imm(unsigned offset, unsigned value) -{ - return (4 << 28) | (offset << 16) | value; -} - -static inline u32 host1x_opcode_imm_incr_syncpt(unsigned cond, unsigned indx) -{ - return host1x_opcode_imm(host1x_uclass_incr_syncpt_r(), - host1x_class_host_incr_syncpt(cond, indx)); -} - -static inline u32 host1x_opcode_restart(unsigned address) -{ - return (5 << 28) | (address >> 4); -} - -static inline u32 host1x_opcode_gather(unsigned count) -{ - return (6 << 28) | count; -} - -static inline u32 host1x_opcode_gather_nonincr(unsigned offset, unsigned count) -{ - return (6 << 28) | (offset << 16) | BIT(15) | count; -} - -static inline u32 host1x_opcode_gather_incr(unsigned offset, unsigned count) -{ - return (6 << 28) | (offset << 16) | BIT(15) | BIT(14) | count; -} - -#define HOST1X_OPCODE_NOP host1x_opcode_nonincr(0, 0) +#include "opcodes.h" #endif diff --git a/drivers/gpu/host1x/hw/host1x05_hardware.h b/drivers/gpu/host1x/hw/host1x05_hardware.h index af3ab4b7f010..1dcde6ec7909 100644 --- a/drivers/gpu/host1x/hw/host1x05_hardware.h +++ b/drivers/gpu/host1x/hw/host1x05_hardware.h @@ -15,117 +15,6 @@ #include "hw_host1x05_sync.h" #include "hw_host1x05_uclass.h" -static inline u32 host1x_class_host_wait_syncpt( - unsigned indx, unsigned threshold) -{ - return host1x_uclass_wait_syncpt_indx_f(indx) - | host1x_uclass_wait_syncpt_thresh_f(threshold); -} - -static inline u32 host1x_class_host_load_syncpt_base( - unsigned indx, unsigned threshold) -{ - return host1x_uclass_load_syncpt_base_base_indx_f(indx) - | host1x_uclass_load_syncpt_base_value_f(threshold); -} - -static inline u32 host1x_class_host_wait_syncpt_base( - unsigned indx, unsigned base_indx, unsigned offset) -{ - return host1x_uclass_wait_syncpt_base_indx_f(indx) - | host1x_uclass_wait_syncpt_base_base_indx_f(base_indx) - | host1x_uclass_wait_syncpt_base_offset_f(offset); -} - -static inline u32 host1x_class_host_incr_syncpt_base( - unsigned base_indx, unsigned offset) -{ - return host1x_uclass_incr_syncpt_base_base_indx_f(base_indx) - | host1x_uclass_incr_syncpt_base_offset_f(offset); -} - -static inline u32 host1x_class_host_incr_syncpt( - unsigned cond, unsigned indx) -{ - return host1x_uclass_incr_syncpt_cond_f(cond) - | host1x_uclass_incr_syncpt_indx_f(indx); -} - -static inline u32 host1x_class_host_indoff_reg_write( - unsigned mod_id, unsigned offset, bool auto_inc) -{ - u32 v = host1x_uclass_indoff_indbe_f(0xf) - | host1x_uclass_indoff_indmodid_f(mod_id) - | host1x_uclass_indoff_indroffset_f(offset); - if (auto_inc) - v |= host1x_uclass_indoff_autoinc_f(1); - return v; -} - -static inline u32 host1x_class_host_indoff_reg_read( - unsigned mod_id, unsigned offset, bool auto_inc) -{ - u32 v = host1x_uclass_indoff_indmodid_f(mod_id) - | host1x_uclass_indoff_indroffset_f(offset) - | host1x_uclass_indoff_rwn_read_v(); - if (auto_inc) - v |= host1x_uclass_indoff_autoinc_f(1); - return v; -} - -/* cdma opcodes */ -static inline u32 host1x_opcode_setclass( - unsigned class_id, unsigned offset, unsigned mask) -{ - return (0 << 28) | (offset << 16) | (class_id << 6) | mask; -} - -static inline u32 host1x_opcode_incr(unsigned offset, unsigned count) -{ - return (1 << 28) | (offset << 16) | count; -} - -static inline u32 host1x_opcode_nonincr(unsigned offset, unsigned count) -{ - return (2 << 28) | (offset << 16) | count; -} - -static inline u32 host1x_opcode_mask(unsigned offset, unsigned mask) -{ - return (3 << 28) | (offset << 16) | mask; -} - -static inline u32 host1x_opcode_imm(unsigned offset, unsigned value) -{ - return (4 << 28) | (offset << 16) | value; -} - -static inline u32 host1x_opcode_imm_incr_syncpt(unsigned cond, unsigned indx) -{ - return host1x_opcode_imm(host1x_uclass_incr_syncpt_r(), - host1x_class_host_incr_syncpt(cond, indx)); -} - -static inline u32 host1x_opcode_restart(unsigned address) -{ - return (5 << 28) | (address >> 4); -} - -static inline u32 host1x_opcode_gather(unsigned count) -{ - return (6 << 28) | count; -} - -static inline u32 host1x_opcode_gather_nonincr(unsigned offset, unsigned count) -{ - return (6 << 28) | (offset << 16) | BIT(15) | count; -} - -static inline u32 host1x_opcode_gather_incr(unsigned offset, unsigned count) -{ - return (6 << 28) | (offset << 16) | BIT(15) | BIT(14) | count; -} - -#define HOST1X_OPCODE_NOP host1x_opcode_nonincr(0, 0) +#include "opcodes.h" #endif diff --git a/drivers/gpu/host1x/hw/host1x06_hardware.h b/drivers/gpu/host1x/hw/host1x06_hardware.h index 01a142a09800..c05cfa7e3090 100644 --- a/drivers/gpu/host1x/hw/host1x06_hardware.h +++ b/drivers/gpu/host1x/hw/host1x06_hardware.h @@ -16,122 +16,6 @@ #include "hw_host1x06_vm.h" #include "hw_host1x06_hypervisor.h" -static inline u32 host1x_class_host_wait_syncpt( - unsigned indx, unsigned threshold) -{ - return host1x_uclass_wait_syncpt_indx_f(indx) - | host1x_uclass_wait_syncpt_thresh_f(threshold); -} - -static inline u32 host1x_class_host_load_syncpt_base( - unsigned indx, unsigned threshold) -{ - return host1x_uclass_load_syncpt_base_base_indx_f(indx) - | host1x_uclass_load_syncpt_base_value_f(threshold); -} - -static inline u32 host1x_class_host_wait_syncpt_base( - unsigned indx, unsigned base_indx, unsigned offset) -{ - return host1x_uclass_wait_syncpt_base_indx_f(indx) - | host1x_uclass_wait_syncpt_base_base_indx_f(base_indx) - | host1x_uclass_wait_syncpt_base_offset_f(offset); -} - -static inline u32 host1x_class_host_incr_syncpt_base( - unsigned base_indx, unsigned offset) -{ - return host1x_uclass_incr_syncpt_base_base_indx_f(base_indx) - | host1x_uclass_incr_syncpt_base_offset_f(offset); -} - -static inline u32 host1x_class_host_incr_syncpt( - unsigned cond, unsigned indx) -{ - return host1x_uclass_incr_syncpt_cond_f(cond) - | host1x_uclass_incr_syncpt_indx_f(indx); -} - -static inline u32 host1x_class_host_indoff_reg_write( - unsigned mod_id, unsigned offset, bool auto_inc) -{ - u32 v = host1x_uclass_indoff_indbe_f(0xf) - | host1x_uclass_indoff_indmodid_f(mod_id) - | host1x_uclass_indoff_indroffset_f(offset); - if (auto_inc) - v |= host1x_uclass_indoff_autoinc_f(1); - return v; -} - -static inline u32 host1x_class_host_indoff_reg_read( - unsigned mod_id, unsigned offset, bool auto_inc) -{ - u32 v = host1x_uclass_indoff_indmodid_f(mod_id) - | host1x_uclass_indoff_indroffset_f(offset) - | host1x_uclass_indoff_rwn_read_v(); - if (auto_inc) - v |= host1x_uclass_indoff_autoinc_f(1); - return v; -} - -/* cdma opcodes */ -static inline u32 host1x_opcode_setclass( - unsigned class_id, unsigned offset, unsigned mask) -{ - return (0 << 28) | (offset << 16) | (class_id << 6) | mask; -} - -static inline u32 host1x_opcode_incr(unsigned offset, unsigned count) -{ - return (1 << 28) | (offset << 16) | count; -} - -static inline u32 host1x_opcode_nonincr(unsigned offset, unsigned count) -{ - return (2 << 28) | (offset << 16) | count; -} - -static inline u32 host1x_opcode_mask(unsigned offset, unsigned mask) -{ - return (3 << 28) | (offset << 16) | mask; -} - -static inline u32 host1x_opcode_imm(unsigned offset, unsigned value) -{ - return (4 << 28) | (offset << 16) | value; -} - -static inline u32 host1x_opcode_imm_incr_syncpt(unsigned cond, unsigned indx) -{ - return host1x_opcode_imm(host1x_uclass_incr_syncpt_r(), - host1x_class_host_incr_syncpt(cond, indx)); -} - -static inline u32 host1x_opcode_restart(unsigned address) -{ - return (5 << 28) | (address >> 4); -} - -static inline u32 host1x_opcode_gather(unsigned count) -{ - return (6 << 28) | count; -} - -static inline u32 host1x_opcode_gather_nonincr(unsigned offset, unsigned count) -{ - return (6 << 28) | (offset << 16) | BIT(15) | count; -} - -static inline u32 host1x_opcode_gather_incr(unsigned offset, unsigned count) -{ - return (6 << 28) | (offset << 16) | BIT(15) | BIT(14) | count; -} - -static inline u32 host1x_opcode_gather_wide(unsigned count) -{ - return (12 << 28) | count; -} - -#define HOST1X_OPCODE_NOP host1x_opcode_nonincr(0, 0) +#include "opcodes.h" #endif diff --git a/drivers/gpu/host1x/hw/host1x07_hardware.h b/drivers/gpu/host1x/hw/host1x07_hardware.h index e6582172ebfd..d67364e03956 100644 --- a/drivers/gpu/host1x/hw/host1x07_hardware.h +++ b/drivers/gpu/host1x/hw/host1x07_hardware.h @@ -16,122 +16,6 @@ #include "hw_host1x07_vm.h" #include "hw_host1x07_hypervisor.h" -static inline u32 host1x_class_host_wait_syncpt( - unsigned indx, unsigned threshold) -{ - return host1x_uclass_wait_syncpt_indx_f(indx) - | host1x_uclass_wait_syncpt_thresh_f(threshold); -} - -static inline u32 host1x_class_host_load_syncpt_base( - unsigned indx, unsigned threshold) -{ - return host1x_uclass_load_syncpt_base_base_indx_f(indx) - | host1x_uclass_load_syncpt_base_value_f(threshold); -} - -static inline u32 host1x_class_host_wait_syncpt_base( - unsigned indx, unsigned base_indx, unsigned offset) -{ - return host1x_uclass_wait_syncpt_base_indx_f(indx) - | host1x_uclass_wait_syncpt_base_base_indx_f(base_indx) - | host1x_uclass_wait_syncpt_base_offset_f(offset); -} - -static inline u32 host1x_class_host_incr_syncpt_base( - unsigned base_indx, unsigned offset) -{ - return host1x_uclass_incr_syncpt_base_base_indx_f(base_indx) - | host1x_uclass_incr_syncpt_base_offset_f(offset); -} - -static inline u32 host1x_class_host_incr_syncpt( - unsigned cond, unsigned indx) -{ - return host1x_uclass_incr_syncpt_cond_f(cond) - | host1x_uclass_incr_syncpt_indx_f(indx); -} - -static inline u32 host1x_class_host_indoff_reg_write( - unsigned mod_id, unsigned offset, bool auto_inc) -{ - u32 v = host1x_uclass_indoff_indbe_f(0xf) - | host1x_uclass_indoff_indmodid_f(mod_id) - | host1x_uclass_indoff_indroffset_f(offset); - if (auto_inc) - v |= host1x_uclass_indoff_autoinc_f(1); - return v; -} - -static inline u32 host1x_class_host_indoff_reg_read( - unsigned mod_id, unsigned offset, bool auto_inc) -{ - u32 v = host1x_uclass_indoff_indmodid_f(mod_id) - | host1x_uclass_indoff_indroffset_f(offset) - | host1x_uclass_indoff_rwn_read_v(); - if (auto_inc) - v |= host1x_uclass_indoff_autoinc_f(1); - return v; -} - -/* cdma opcodes */ -static inline u32 host1x_opcode_setclass( - unsigned class_id, unsigned offset, unsigned mask) -{ - return (0 << 28) | (offset << 16) | (class_id << 6) | mask; -} - -static inline u32 host1x_opcode_incr(unsigned offset, unsigned count) -{ - return (1 << 28) | (offset << 16) | count; -} - -static inline u32 host1x_opcode_nonincr(unsigned offset, unsigned count) -{ - return (2 << 28) | (offset << 16) | count; -} - -static inline u32 host1x_opcode_mask(unsigned offset, unsigned mask) -{ - return (3 << 28) | (offset << 16) | mask; -} - -static inline u32 host1x_opcode_imm(unsigned offset, unsigned value) -{ - return (4 << 28) | (offset << 16) | value; -} - -static inline u32 host1x_opcode_imm_incr_syncpt(unsigned cond, unsigned indx) -{ - return host1x_opcode_imm(host1x_uclass_incr_syncpt_r(), - host1x_class_host_incr_syncpt(cond, indx)); -} - -static inline u32 host1x_opcode_restart(unsigned address) -{ - return (5 << 28) | (address >> 4); -} - -static inline u32 host1x_opcode_gather(unsigned count) -{ - return (6 << 28) | count; -} - -static inline u32 host1x_opcode_gather_nonincr(unsigned offset, unsigned count) -{ - return (6 << 28) | (offset << 16) | BIT(15) | count; -} - -static inline u32 host1x_opcode_gather_incr(unsigned offset, unsigned count) -{ - return (6 << 28) | (offset << 16) | BIT(15) | BIT(14) | count; -} - -static inline u32 host1x_opcode_gather_wide(unsigned count) -{ - return (12 << 28) | count; -} - -#define HOST1X_OPCODE_NOP host1x_opcode_nonincr(0, 0) +#include "opcodes.h" #endif diff --git a/drivers/gpu/host1x/hw/host1x08.c b/drivers/gpu/host1x/hw/host1x08.c new file mode 100644 index 000000000000..754890c34c74 --- /dev/null +++ b/drivers/gpu/host1x/hw/host1x08.c @@ -0,0 +1,33 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Host1x init for Tegra234 SoCs + * + * Copyright (c) 2022 NVIDIA Corporation. + */ + +/* include hw specification */ +#include "host1x08.h" +#include "host1x08_hardware.h" + +/* include code */ +#define HOST1X_HW 8 + +#include "cdma_hw.c" +#include "channel_hw.c" +#include "debug_hw.c" +#include "intr_hw.c" +#include "syncpt_hw.c" + +#include "../dev.h" + +int host1x08_init(struct host1x *host) +{ + host->channel_op = &host1x_channel_ops; + host->cdma_op = &host1x_cdma_ops; + host->cdma_pb_op = &host1x_pushbuffer_ops; + host->syncpt_op = &host1x_syncpt_ops; + host->intr_op = &host1x_intr_ops; + host->debug_op = &host1x_debug_ops; + + return 0; +} diff --git a/drivers/gpu/host1x/hw/host1x08.h b/drivers/gpu/host1x/hw/host1x08.h new file mode 100644 index 000000000000..a6bad56e44cf --- /dev/null +++ b/drivers/gpu/host1x/hw/host1x08.h @@ -0,0 +1,15 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * Host1x init for Tegra234 SoCs + * + * Copyright (c) 2018 NVIDIA Corporation. + */ + +#ifndef HOST1X_HOST1X08_H +#define HOST1X_HOST1X08_H + +struct host1x; + +int host1x08_init(struct host1x *host); + +#endif diff --git a/drivers/gpu/host1x/hw/host1x08_hardware.h b/drivers/gpu/host1x/hw/host1x08_hardware.h new file mode 100644 index 000000000000..936243060bff --- /dev/null +++ b/drivers/gpu/host1x/hw/host1x08_hardware.h @@ -0,0 +1,21 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * Tegra host1x Register Offsets for Tegra234 + * + * Copyright (c) 2022 NVIDIA Corporation. + */ + +#ifndef __HOST1X_HOST1X08_HARDWARE_H +#define __HOST1X_HOST1X08_HARDWARE_H + +#include <linux/types.h> +#include <linux/bitops.h> + +#include "hw_host1x08_uclass.h" +#include "hw_host1x08_vm.h" +#include "hw_host1x08_hypervisor.h" +#include "hw_host1x08_common.h" + +#include "opcodes.h" + +#endif diff --git a/drivers/gpu/host1x/hw/hw_host1x02_uclass.h b/drivers/gpu/host1x/hw/hw_host1x02_uclass.h index 4fc51f70496b..0a2ab8f1da6f 100644 --- a/drivers/gpu/host1x/hw/hw_host1x02_uclass.h +++ b/drivers/gpu/host1x/hw/hw_host1x02_uclass.h @@ -165,5 +165,17 @@ static inline u32 host1x_uclass_indoff_rwn_read_v(void) } #define HOST1X_UCLASS_INDOFF_INDROFFSET_F(v) \ host1x_uclass_indoff_indroffset_f(v) +static inline u32 host1x_uclass_load_syncpt_payload_32_r(void) +{ + return 0x4e; +} +#define HOST1X_UCLASS_LOAD_SYNCPT_PAYLOAD_32 \ + host1x_uclass_load_syncpt_payload_32_r() +static inline u32 host1x_uclass_wait_syncpt_32_r(void) +{ + return 0x50; +} +#define HOST1X_UCLASS_WAIT_SYNCPT_32 \ + host1x_uclass_wait_syncpt_32_r() #endif diff --git a/drivers/gpu/host1x/hw/hw_host1x04_uclass.h b/drivers/gpu/host1x/hw/hw_host1x04_uclass.h index 9e84a4adca9f..60c692b92955 100644 --- a/drivers/gpu/host1x/hw/hw_host1x04_uclass.h +++ b/drivers/gpu/host1x/hw/hw_host1x04_uclass.h @@ -165,5 +165,17 @@ static inline u32 host1x_uclass_indoff_rwn_read_v(void) } #define HOST1X_UCLASS_INDOFF_INDROFFSET_F(v) \ host1x_uclass_indoff_indroffset_f(v) +static inline u32 host1x_uclass_load_syncpt_payload_32_r(void) +{ + return 0x4e; +} +#define HOST1X_UCLASS_LOAD_SYNCPT_PAYLOAD_32 \ + host1x_uclass_load_syncpt_payload_32_r() +static inline u32 host1x_uclass_wait_syncpt_32_r(void) +{ + return 0x50; +} +#define HOST1X_UCLASS_WAIT_SYNCPT_32 \ + host1x_uclass_wait_syncpt_32_r() #endif diff --git a/drivers/gpu/host1x/hw/hw_host1x05_uclass.h b/drivers/gpu/host1x/hw/hw_host1x05_uclass.h index aee5a4e32877..2fcc9a2ad3ef 100644 --- a/drivers/gpu/host1x/hw/hw_host1x05_uclass.h +++ b/drivers/gpu/host1x/hw/hw_host1x05_uclass.h @@ -165,5 +165,17 @@ static inline u32 host1x_uclass_indoff_rwn_read_v(void) } #define HOST1X_UCLASS_INDOFF_INDROFFSET_F(v) \ host1x_uclass_indoff_indroffset_f(v) +static inline u32 host1x_uclass_load_syncpt_payload_32_r(void) +{ + return 0x4e; +} +#define HOST1X_UCLASS_LOAD_SYNCPT_PAYLOAD_32 \ + host1x_uclass_load_syncpt_payload_32_r() +static inline u32 host1x_uclass_wait_syncpt_32_r(void) +{ + return 0x50; +} +#define HOST1X_UCLASS_WAIT_SYNCPT_32 \ + host1x_uclass_wait_syncpt_32_r() #endif diff --git a/drivers/gpu/host1x/hw/hw_host1x06_uclass.h b/drivers/gpu/host1x/hw/hw_host1x06_uclass.h index c4bacdb7155f..50c32de452fb 100644 --- a/drivers/gpu/host1x/hw/hw_host1x06_uclass.h +++ b/drivers/gpu/host1x/hw/hw_host1x06_uclass.h @@ -53,7 +53,7 @@ static inline u32 host1x_uclass_incr_syncpt_cond_f(u32 v) host1x_uclass_incr_syncpt_cond_f(v) static inline u32 host1x_uclass_incr_syncpt_indx_f(u32 v) { - return (v & 0xff) << 0; + return (v & 0x3ff) << 0; } #define HOST1X_UCLASS_INCR_SYNCPT_INDX_F(v) \ host1x_uclass_incr_syncpt_indx_f(v) @@ -165,5 +165,17 @@ static inline u32 host1x_uclass_indoff_rwn_read_v(void) } #define HOST1X_UCLASS_INDOFF_INDROFFSET_F(v) \ host1x_uclass_indoff_indroffset_f(v) +static inline u32 host1x_uclass_load_syncpt_payload_32_r(void) +{ + return 0x4e; +} +#define HOST1X_UCLASS_LOAD_SYNCPT_PAYLOAD_32 \ + host1x_uclass_load_syncpt_payload_32_r() +static inline u32 host1x_uclass_wait_syncpt_32_r(void) +{ + return 0x50; +} +#define HOST1X_UCLASS_WAIT_SYNCPT_32 \ + host1x_uclass_wait_syncpt_32_r() #endif diff --git a/drivers/gpu/host1x/hw/hw_host1x07_uclass.h b/drivers/gpu/host1x/hw/hw_host1x07_uclass.h index c74070f3f203..887b878f92f7 100644 --- a/drivers/gpu/host1x/hw/hw_host1x07_uclass.h +++ b/drivers/gpu/host1x/hw/hw_host1x07_uclass.h @@ -53,7 +53,7 @@ static inline u32 host1x_uclass_incr_syncpt_cond_f(u32 v) host1x_uclass_incr_syncpt_cond_f(v) static inline u32 host1x_uclass_incr_syncpt_indx_f(u32 v) { - return (v & 0xff) << 0; + return (v & 0x3ff) << 0; } #define HOST1X_UCLASS_INCR_SYNCPT_INDX_F(v) \ host1x_uclass_incr_syncpt_indx_f(v) @@ -165,5 +165,17 @@ static inline u32 host1x_uclass_indoff_rwn_read_v(void) } #define HOST1X_UCLASS_INDOFF_INDROFFSET_F(v) \ host1x_uclass_indoff_indroffset_f(v) +static inline u32 host1x_uclass_load_syncpt_payload_32_r(void) +{ + return 0x4e; +} +#define HOST1X_UCLASS_LOAD_SYNCPT_PAYLOAD_32 \ + host1x_uclass_load_syncpt_payload_32_r() +static inline u32 host1x_uclass_wait_syncpt_32_r(void) +{ + return 0x50; +} +#define HOST1X_UCLASS_WAIT_SYNCPT_32 \ + host1x_uclass_wait_syncpt_32_r() #endif diff --git a/drivers/gpu/host1x/hw/hw_host1x08_channel.h b/drivers/gpu/host1x/hw/hw_host1x08_channel.h new file mode 100644 index 000000000000..c9272d2ab14a --- /dev/null +++ b/drivers/gpu/host1x/hw/hw_host1x08_channel.h @@ -0,0 +1,11 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * Copyright (c) 2022 NVIDIA Corporation. + */ + +#ifndef HOST1X_HW_HOST1X08_CHANNEL_H +#define HOST1X_HW_HOST1X08_CHANNEL_H + +#define HOST1X_CHANNEL_SMMU_STREAMID 0x084 + +#endif diff --git a/drivers/gpu/host1x/hw/hw_host1x08_common.h b/drivers/gpu/host1x/hw/hw_host1x08_common.h new file mode 100644 index 000000000000..8e0c99150ec2 --- /dev/null +++ b/drivers/gpu/host1x/hw/hw_host1x08_common.h @@ -0,0 +1,11 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * Copyright (c) 2022 NVIDIA Corporation. + */ + +#define HOST1X_COMMON_OFA_MLOCK 0x4050 +#define HOST1X_COMMON_NVJPG1_MLOCK 0x4070 +#define HOST1X_COMMON_VIC_MLOCK 0x4078 +#define HOST1X_COMMON_NVENC_MLOCK 0x407c +#define HOST1X_COMMON_NVDEC_MLOCK 0x4080 +#define HOST1X_COMMON_NVJPG_MLOCK 0x4084 diff --git a/drivers/gpu/host1x/hw/hw_host1x08_hypervisor.h b/drivers/gpu/host1x/hw/hw_host1x08_hypervisor.h new file mode 100644 index 000000000000..22964324c914 --- /dev/null +++ b/drivers/gpu/host1x/hw/hw_host1x08_hypervisor.h @@ -0,0 +1,9 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * Copyright (c) 2022 NVIDIA Corporation. + */ + +#define HOST1X_HV_SYNCPT_PROT_EN 0x1724 +#define HOST1X_HV_SYNCPT_PROT_EN_CH_EN BIT(1) +#define HOST1X_HV_CH_MLOCK_EN(x) (0x1700 + (x * 4)) +#define HOST1X_HV_CH_KERNEL_FILTER_GBUFFER(x) (0x1710 + (x * 4)) diff --git a/drivers/gpu/host1x/hw/hw_host1x08_uclass.h b/drivers/gpu/host1x/hw/hw_host1x08_uclass.h new file mode 100644 index 000000000000..4fb1d090edae --- /dev/null +++ b/drivers/gpu/host1x/hw/hw_host1x08_uclass.h @@ -0,0 +1,181 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * Copyright (c) 2018 NVIDIA Corporation. + */ + + /* + * Function naming determines intended use: + * + * <x>_r(void) : Returns the offset for register <x>. + * + * <x>_w(void) : Returns the word offset for word (4 byte) element <x>. + * + * <x>_<y>_s(void) : Returns size of field <y> of register <x> in bits. + * + * <x>_<y>_f(u32 v) : Returns a value based on 'v' which has been shifted + * and masked to place it at field <y> of register <x>. This value + * can be |'d with others to produce a full register value for + * register <x>. + * + * <x>_<y>_m(void) : Returns a mask for field <y> of register <x>. This + * value can be ~'d and then &'d to clear the value of field <y> for + * register <x>. + * + * <x>_<y>_<z>_f(void) : Returns the constant value <z> after being shifted + * to place it at field <y> of register <x>. This value can be |'d + * with others to produce a full register value for <x>. + * + * <x>_<y>_v(u32 r) : Returns the value of field <y> from a full register + * <x> value 'r' after being shifted to place its LSB at bit 0. + * This value is suitable for direct comparison with other unshifted + * values appropriate for use in field <y> of register <x>. + * + * <x>_<y>_<z>_v(void) : Returns the constant value for <z> defined for + * field <y> of register <x>. This value is suitable for direct + * comparison with unshifted values appropriate for use in field <y> + * of register <x>. + */ + +#ifndef HOST1X_HW_HOST1X08_UCLASS_H +#define HOST1X_HW_HOST1X08_UCLASS_H + +static inline u32 host1x_uclass_incr_syncpt_r(void) +{ + return 0x0; +} +#define HOST1X_UCLASS_INCR_SYNCPT \ + host1x_uclass_incr_syncpt_r() +static inline u32 host1x_uclass_incr_syncpt_cond_f(u32 v) +{ + return (v & 0xff) << 10; +} +#define HOST1X_UCLASS_INCR_SYNCPT_COND_F(v) \ + host1x_uclass_incr_syncpt_cond_f(v) +static inline u32 host1x_uclass_incr_syncpt_indx_f(u32 v) +{ + return (v & 0x3ff) << 0; +} +#define HOST1X_UCLASS_INCR_SYNCPT_INDX_F(v) \ + host1x_uclass_incr_syncpt_indx_f(v) +static inline u32 host1x_uclass_wait_syncpt_r(void) +{ + return 0x8; +} +#define HOST1X_UCLASS_WAIT_SYNCPT \ + host1x_uclass_wait_syncpt_r() +static inline u32 host1x_uclass_wait_syncpt_indx_f(u32 v) +{ + return (v & 0xff) << 24; +} +#define HOST1X_UCLASS_WAIT_SYNCPT_INDX_F(v) \ + host1x_uclass_wait_syncpt_indx_f(v) +static inline u32 host1x_uclass_wait_syncpt_thresh_f(u32 v) +{ + return (v & 0xffffff) << 0; +} +#define HOST1X_UCLASS_WAIT_SYNCPT_THRESH_F(v) \ + host1x_uclass_wait_syncpt_thresh_f(v) +static inline u32 host1x_uclass_wait_syncpt_base_r(void) +{ + return 0x9; +} +#define HOST1X_UCLASS_WAIT_SYNCPT_BASE \ + host1x_uclass_wait_syncpt_base_r() +static inline u32 host1x_uclass_wait_syncpt_base_indx_f(u32 v) +{ + return (v & 0xff) << 24; +} +#define HOST1X_UCLASS_WAIT_SYNCPT_BASE_INDX_F(v) \ + host1x_uclass_wait_syncpt_base_indx_f(v) +static inline u32 host1x_uclass_wait_syncpt_base_base_indx_f(u32 v) +{ + return (v & 0xff) << 16; +} +#define HOST1X_UCLASS_WAIT_SYNCPT_BASE_BASE_INDX_F(v) \ + host1x_uclass_wait_syncpt_base_base_indx_f(v) +static inline u32 host1x_uclass_wait_syncpt_base_offset_f(u32 v) +{ + return (v & 0xffff) << 0; +} +#define HOST1X_UCLASS_WAIT_SYNCPT_BASE_OFFSET_F(v) \ + host1x_uclass_wait_syncpt_base_offset_f(v) +static inline u32 host1x_uclass_load_syncpt_base_r(void) +{ + return 0xb; +} +#define HOST1X_UCLASS_LOAD_SYNCPT_BASE \ + host1x_uclass_load_syncpt_base_r() +static inline u32 host1x_uclass_load_syncpt_base_base_indx_f(u32 v) +{ + return (v & 0xff) << 24; +} +#define HOST1X_UCLASS_LOAD_SYNCPT_BASE_BASE_INDX_F(v) \ + host1x_uclass_load_syncpt_base_base_indx_f(v) +static inline u32 host1x_uclass_load_syncpt_base_value_f(u32 v) +{ + return (v & 0xffffff) << 0; +} +#define HOST1X_UCLASS_LOAD_SYNCPT_BASE_VALUE_F(v) \ + host1x_uclass_load_syncpt_base_value_f(v) +static inline u32 host1x_uclass_incr_syncpt_base_base_indx_f(u32 v) +{ + return (v & 0xff) << 24; +} +#define HOST1X_UCLASS_INCR_SYNCPT_BASE_BASE_INDX_F(v) \ + host1x_uclass_incr_syncpt_base_base_indx_f(v) +static inline u32 host1x_uclass_incr_syncpt_base_offset_f(u32 v) +{ + return (v & 0xffffff) << 0; +} +#define HOST1X_UCLASS_INCR_SYNCPT_BASE_OFFSET_F(v) \ + host1x_uclass_incr_syncpt_base_offset_f(v) +static inline u32 host1x_uclass_indoff_r(void) +{ + return 0x2d; +} +#define HOST1X_UCLASS_INDOFF \ + host1x_uclass_indoff_r() +static inline u32 host1x_uclass_indoff_indbe_f(u32 v) +{ + return (v & 0xf) << 28; +} +#define HOST1X_UCLASS_INDOFF_INDBE_F(v) \ + host1x_uclass_indoff_indbe_f(v) +static inline u32 host1x_uclass_indoff_autoinc_f(u32 v) +{ + return (v & 0x1) << 27; +} +#define HOST1X_UCLASS_INDOFF_AUTOINC_F(v) \ + host1x_uclass_indoff_autoinc_f(v) +static inline u32 host1x_uclass_indoff_indmodid_f(u32 v) +{ + return (v & 0xff) << 18; +} +#define HOST1X_UCLASS_INDOFF_INDMODID_F(v) \ + host1x_uclass_indoff_indmodid_f(v) +static inline u32 host1x_uclass_indoff_indroffset_f(u32 v) +{ + return (v & 0xffff) << 2; +} +#define HOST1X_UCLASS_INDOFF_INDROFFSET_F(v) \ + host1x_uclass_indoff_indroffset_f(v) +static inline u32 host1x_uclass_indoff_rwn_read_v(void) +{ + return 1; +} +#define HOST1X_UCLASS_INDOFF_INDROFFSET_F(v) \ + host1x_uclass_indoff_indroffset_f(v) +static inline u32 host1x_uclass_load_syncpt_payload_32_r(void) +{ + return 0x4e; +} +#define HOST1X_UCLASS_LOAD_SYNCPT_PAYLOAD_32 \ + host1x_uclass_load_syncpt_payload_32_r() +static inline u32 host1x_uclass_wait_syncpt_32_r(void) +{ + return 0x50; +} +#define HOST1X_UCLASS_WAIT_SYNCPT_32 \ + host1x_uclass_wait_syncpt_32_r() + +#endif diff --git a/drivers/gpu/host1x/hw/hw_host1x08_vm.h b/drivers/gpu/host1x/hw/hw_host1x08_vm.h new file mode 100644 index 000000000000..1455a4670bf8 --- /dev/null +++ b/drivers/gpu/host1x/hw/hw_host1x08_vm.h @@ -0,0 +1,36 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * Copyright (c) 2022 NVIDIA Corporation. + */ + +#define HOST1X_CHANNEL_DMASTART 0x0000 +#define HOST1X_CHANNEL_DMASTART_HI 0x0004 +#define HOST1X_CHANNEL_DMAPUT 0x0008 +#define HOST1X_CHANNEL_DMAPUT_HI 0x000c +#define HOST1X_CHANNEL_DMAGET 0x0010 +#define HOST1X_CHANNEL_DMAGET_HI 0x0014 +#define HOST1X_CHANNEL_DMAEND 0x0018 +#define HOST1X_CHANNEL_DMAEND_HI 0x001c +#define HOST1X_CHANNEL_DMACTRL 0x0020 +#define HOST1X_CHANNEL_DMACTRL_DMASTOP BIT(0) +#define HOST1X_CHANNEL_DMACTRL_DMAGETRST BIT(1) +#define HOST1X_CHANNEL_DMACTRL_DMAINITGET BIT(2) +#define HOST1X_CHANNEL_CMDFIFO_STAT 0x0024 +#define HOST1X_CHANNEL_CMDFIFO_STAT_EMPTY BIT(13) +#define HOST1X_CHANNEL_CMDFIFO_RDATA 0x0028 +#define HOST1X_CHANNEL_CMDP_OFFSET 0x0030 +#define HOST1X_CHANNEL_CMDP_CLASS 0x0034 +#define HOST1X_CHANNEL_CHANNELSTAT 0x0038 +#define HOST1X_CHANNEL_CMDPROC_STOP 0x0048 +#define HOST1X_CHANNEL_TEARDOWN 0x004c +#define HOST1X_CHANNEL_SMMU_STREAMID 0x0084 + +#define HOST1X_SYNC_SYNCPT_CPU_INCR(x) (0x6400 + 4 * (x)) +#define HOST1X_SYNC_SYNCPT_THRESH_CPU0_INT_STATUS(x) (0x6600 + 4 * (x)) +#define HOST1X_SYNC_SYNCPT_INTR_DEST(x) (0x6684 + 4 * (x)) +#define HOST1X_SYNC_SYNCPT_THRESH_INT_ENABLE_CPU0(x) (0x770c + 4 * (x)) +#define HOST1X_SYNC_SYNCPT_THRESH_INT_DISABLE(x) (0x7790 + 4 * (x)) +#define HOST1X_SYNC_SYNCPT(x) (0x8080 + 4 * (x)) +#define HOST1X_SYNC_SYNCPT_INT_THRESH(x) (0xa088 + 4 * (x)) +#define HOST1X_SYNC_SYNCPT_CH_APP(x) (0xb090 + 4 * (x)) +#define HOST1X_SYNC_SYNCPT_CH_APP_CH(v) (((v) & 0x3f) << 8) diff --git a/drivers/gpu/host1x/hw/intr_hw.c b/drivers/gpu/host1x/hw/intr_hw.c index f56375ee6e71..bd5b5ef62f35 100644 --- a/drivers/gpu/host1x/hw/intr_hw.c +++ b/drivers/gpu/host1x/hw/intr_hw.c @@ -6,50 +6,74 @@ * Copyright (c) 2010-2013, NVIDIA Corporation. */ -#include <linux/interrupt.h> -#include <linux/irq.h> #include <linux/io.h> #include "../intr.h" #include "../dev.h" -/* - * Sync point threshold interrupt service function - * Handles sync point threshold triggers, in interrupt context - */ -static void host1x_intr_syncpt_handle(struct host1x_syncpt *syncpt) +static void process_32_syncpts(struct host1x *host, unsigned long val, u32 reg_offset) { - unsigned int id = syncpt->id; - struct host1x *host = syncpt->host; + unsigned int id; - host1x_sync_writel(host, BIT(id % 32), - HOST1X_SYNC_SYNCPT_THRESH_INT_DISABLE(id / 32)); - host1x_sync_writel(host, BIT(id % 32), - HOST1X_SYNC_SYNCPT_THRESH_CPU0_INT_STATUS(id / 32)); + if (!val) + return; + + host1x_sync_writel(host, val, HOST1X_SYNC_SYNCPT_THRESH_INT_DISABLE(reg_offset)); + host1x_sync_writel(host, val, HOST1X_SYNC_SYNCPT_THRESH_CPU0_INT_STATUS(reg_offset)); - schedule_work(&syncpt->intr.work); + for_each_set_bit(id, &val, 32) + host1x_intr_handle_interrupt(host, reg_offset * 32 + id); } static irqreturn_t syncpt_thresh_isr(int irq, void *dev_id) { - struct host1x *host = dev_id; + struct host1x_intr_irq_data *irq_data = dev_id; + struct host1x *host = irq_data->host; unsigned long reg; - unsigned int i, id; + unsigned int i; - for (i = 0; i < DIV_ROUND_UP(host->info->nb_pts, 32); i++) { +#if !defined(CONFIG_64BIT) + for (i = irq_data->offset; i < DIV_ROUND_UP(host->info->nb_pts, 32); + i += host->num_syncpt_irqs) { reg = host1x_sync_readl(host, HOST1X_SYNC_SYNCPT_THRESH_CPU0_INT_STATUS(i)); - for_each_set_bit(id, ®, 32) { - struct host1x_syncpt *syncpt = - host->syncpt + (i * 32 + id); - host1x_intr_syncpt_handle(syncpt); - } + + process_32_syncpts(host, reg, i); + } +#elif HOST1X_HW == 6 || HOST1X_HW == 7 + /* + * Tegra186 and Tegra194 have the first INT_STATUS register not 64-bit aligned, + * and only have one interrupt line. + */ + reg = host1x_sync_readl(host, HOST1X_SYNC_SYNCPT_THRESH_CPU0_INT_STATUS(0)); + process_32_syncpts(host, reg, 0); + + for (i = 1; i < (host->info->nb_pts / 32) - 1; i += 2) { + reg = host1x_sync_readq(host, + HOST1X_SYNC_SYNCPT_THRESH_CPU0_INT_STATUS(i)); + + process_32_syncpts(host, lower_32_bits(reg), i); + process_32_syncpts(host, upper_32_bits(reg), i + 1); } + reg = host1x_sync_readl(host, HOST1X_SYNC_SYNCPT_THRESH_CPU0_INT_STATUS(i)); + process_32_syncpts(host, reg, i); +#else + /* All 64-bit capable SoCs have number of syncpoints divisible by 64 */ + for (i = irq_data->offset; i < DIV_ROUND_UP(host->info->nb_pts, 64); + i += host->num_syncpt_irqs) { + reg = host1x_sync_readq(host, + HOST1X_SYNC_SYNCPT_THRESH_CPU0_INT_STATUS(i * 2)); + + process_32_syncpts(host, lower_32_bits(reg), i * 2 + 0); + process_32_syncpts(host, upper_32_bits(reg), i * 2 + 1); + } +#endif + return IRQ_HANDLED; } -static void _host1x_intr_disable_all_syncpt_intrs(struct host1x *host) +static void host1x_intr_disable_all_syncpt_intrs(struct host1x *host) { unsigned int i; @@ -61,7 +85,8 @@ static void _host1x_intr_disable_all_syncpt_intrs(struct host1x *host) } } -static void intr_hw_init(struct host1x *host, u32 cpm) +static int +host1x_intr_init_host_sync(struct host1x *host, u32 cpm) { #if HOST1X_HW < 6 /* disable the ip_busy_timeout. this prevents write drops */ @@ -76,48 +101,41 @@ static void intr_hw_init(struct host1x *host, u32 cpm) /* update host clocks per usec */ host1x_sync_writel(host, cpm, HOST1X_SYNC_USEC_CLK); #endif -} - -static int -_host1x_intr_init_host_sync(struct host1x *host, u32 cpm, - void (*syncpt_thresh_work)(struct work_struct *)) -{ - unsigned int i; - int err; +#if HOST1X_HW >= 8 + u32 id; - host1x_hw_intr_disable_all_syncpt_intrs(host); - - for (i = 0; i < host->info->nb_pts; i++) - INIT_WORK(&host->syncpt[i].intr.work, syncpt_thresh_work); + /* + * Program threshold interrupt destination among 8 lines per VM, + * per syncpoint. For each group of 64 syncpoints (corresponding to two + * interrupt status registers), direct to one interrupt line, going + * around in a round robin fashion. + */ + for (id = 0; id < host->info->nb_pts; id++) { + u32 reg_offset = id / 64; + u32 irq_index = reg_offset % host->num_syncpt_irqs; - err = devm_request_irq(host->dev, host->intr_syncpt_irq, - syncpt_thresh_isr, IRQF_SHARED, - "host1x_syncpt", host); - if (err < 0) { - WARN_ON(1); - return err; + host1x_sync_writel(host, irq_index, HOST1X_SYNC_SYNCPT_INTR_DEST(id)); } - - intr_hw_init(host, cpm); +#endif return 0; } -static void _host1x_intr_set_syncpt_threshold(struct host1x *host, +static void host1x_intr_set_syncpt_threshold(struct host1x *host, unsigned int id, u32 thresh) { host1x_sync_writel(host, thresh, HOST1X_SYNC_SYNCPT_INT_THRESH(id)); } -static void _host1x_intr_enable_syncpt_intr(struct host1x *host, +static void host1x_intr_enable_syncpt_intr(struct host1x *host, unsigned int id) { host1x_sync_writel(host, BIT(id % 32), HOST1X_SYNC_SYNCPT_THRESH_INT_ENABLE_CPU0(id / 32)); } -static void _host1x_intr_disable_syncpt_intr(struct host1x *host, +static void host1x_intr_disable_syncpt_intr(struct host1x *host, unsigned int id) { host1x_sync_writel(host, BIT(id % 32), @@ -126,23 +144,11 @@ static void _host1x_intr_disable_syncpt_intr(struct host1x *host, HOST1X_SYNC_SYNCPT_THRESH_CPU0_INT_STATUS(id / 32)); } -static int _host1x_free_syncpt_irq(struct host1x *host) -{ - unsigned int i; - - devm_free_irq(host->dev, host->intr_syncpt_irq, host); - - for (i = 0; i < host->info->nb_pts; i++) - cancel_work_sync(&host->syncpt[i].intr.work); - - return 0; -} - static const struct host1x_intr_ops host1x_intr_ops = { - .init_host_sync = _host1x_intr_init_host_sync, - .set_syncpt_threshold = _host1x_intr_set_syncpt_threshold, - .enable_syncpt_intr = _host1x_intr_enable_syncpt_intr, - .disable_syncpt_intr = _host1x_intr_disable_syncpt_intr, - .disable_all_syncpt_intrs = _host1x_intr_disable_all_syncpt_intrs, - .free_syncpt_irq = _host1x_free_syncpt_irq, + .init_host_sync = host1x_intr_init_host_sync, + .set_syncpt_threshold = host1x_intr_set_syncpt_threshold, + .enable_syncpt_intr = host1x_intr_enable_syncpt_intr, + .disable_syncpt_intr = host1x_intr_disable_syncpt_intr, + .disable_all_syncpt_intrs = host1x_intr_disable_all_syncpt_intrs, + .isr = syncpt_thresh_isr, }; diff --git a/drivers/gpu/host1x/hw/opcodes.h b/drivers/gpu/host1x/hw/opcodes.h new file mode 100644 index 000000000000..649614499b04 --- /dev/null +++ b/drivers/gpu/host1x/hw/opcodes.h @@ -0,0 +1,150 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * Tegra host1x opcodes + * + * Copyright (c) 2022 NVIDIA Corporation. + */ + +#ifndef __HOST1X_OPCODES_H +#define __HOST1X_OPCODES_H + +#include <linux/types.h> + +static inline u32 host1x_class_host_wait_syncpt( + unsigned indx, unsigned threshold) +{ + return host1x_uclass_wait_syncpt_indx_f(indx) + | host1x_uclass_wait_syncpt_thresh_f(threshold); +} + +static inline u32 host1x_class_host_load_syncpt_base( + unsigned indx, unsigned threshold) +{ + return host1x_uclass_load_syncpt_base_base_indx_f(indx) + | host1x_uclass_load_syncpt_base_value_f(threshold); +} + +static inline u32 host1x_class_host_wait_syncpt_base( + unsigned indx, unsigned base_indx, unsigned offset) +{ + return host1x_uclass_wait_syncpt_base_indx_f(indx) + | host1x_uclass_wait_syncpt_base_base_indx_f(base_indx) + | host1x_uclass_wait_syncpt_base_offset_f(offset); +} + +static inline u32 host1x_class_host_incr_syncpt_base( + unsigned base_indx, unsigned offset) +{ + return host1x_uclass_incr_syncpt_base_base_indx_f(base_indx) + | host1x_uclass_incr_syncpt_base_offset_f(offset); +} + +static inline u32 host1x_class_host_incr_syncpt( + unsigned cond, unsigned indx) +{ + return host1x_uclass_incr_syncpt_cond_f(cond) + | host1x_uclass_incr_syncpt_indx_f(indx); +} + +static inline u32 host1x_class_host_indoff_reg_write( + unsigned mod_id, unsigned offset, bool auto_inc) +{ + u32 v = host1x_uclass_indoff_indbe_f(0xf) + | host1x_uclass_indoff_indmodid_f(mod_id) + | host1x_uclass_indoff_indroffset_f(offset); + if (auto_inc) + v |= host1x_uclass_indoff_autoinc_f(1); + return v; +} + +static inline u32 host1x_class_host_indoff_reg_read( + unsigned mod_id, unsigned offset, bool auto_inc) +{ + u32 v = host1x_uclass_indoff_indmodid_f(mod_id) + | host1x_uclass_indoff_indroffset_f(offset) + | host1x_uclass_indoff_rwn_read_v(); + if (auto_inc) + v |= host1x_uclass_indoff_autoinc_f(1); + return v; +} + +static inline u32 host1x_opcode_setclass( + unsigned class_id, unsigned offset, unsigned mask) +{ + return (0 << 28) | (offset << 16) | (class_id << 6) | mask; +} + +static inline u32 host1x_opcode_incr(unsigned offset, unsigned count) +{ + return (1 << 28) | (offset << 16) | count; +} + +static inline u32 host1x_opcode_nonincr(unsigned offset, unsigned count) +{ + return (2 << 28) | (offset << 16) | count; +} + +static inline u32 host1x_opcode_mask(unsigned offset, unsigned mask) +{ + return (3 << 28) | (offset << 16) | mask; +} + +static inline u32 host1x_opcode_imm(unsigned offset, unsigned value) +{ + return (4 << 28) | (offset << 16) | value; +} + +static inline u32 host1x_opcode_imm_incr_syncpt(unsigned cond, unsigned indx) +{ + return host1x_opcode_imm(host1x_uclass_incr_syncpt_r(), + host1x_class_host_incr_syncpt(cond, indx)); +} + +static inline u32 host1x_opcode_restart(unsigned address) +{ + return (5 << 28) | (address >> 4); +} + +static inline u32 host1x_opcode_gather(unsigned count) +{ + return (6 << 28) | count; +} + +static inline u32 host1x_opcode_gather_nonincr(unsigned offset, unsigned count) +{ + return (6 << 28) | (offset << 16) | BIT(15) | count; +} + +static inline u32 host1x_opcode_gather_incr(unsigned offset, unsigned count) +{ + return (6 << 28) | (offset << 16) | BIT(15) | BIT(14) | count; +} + +static inline u32 host1x_opcode_setstreamid(unsigned streamid) +{ + return (7 << 28) | streamid; +} + +static inline u32 host1x_opcode_setpayload(unsigned payload) +{ + return (9 << 28) | payload; +} + +static inline u32 host1x_opcode_gather_wide(unsigned count) +{ + return (12 << 28) | count; +} + +static inline u32 host1x_opcode_acquire_mlock(unsigned mlock) +{ + return (14 << 28) | (0 << 24) | mlock; +} + +static inline u32 host1x_opcode_release_mlock(unsigned mlock) +{ + return (14 << 28) | (1 << 24) | mlock; +} + +#define HOST1X_OPCODE_NOP host1x_opcode_nonincr(0, 0) + +#endif diff --git a/drivers/gpu/host1x/hw/syncpt_hw.c b/drivers/gpu/host1x/hw/syncpt_hw.c index dd39d67ccec3..8cf35b2eff3d 100644 --- a/drivers/gpu/host1x/hw/syncpt_hw.c +++ b/drivers/gpu/host1x/hw/syncpt_hw.c @@ -106,9 +106,6 @@ static void syncpt_assign_to_channel(struct host1x_syncpt *sp, #if HOST1X_HW >= 6 struct host1x *host = sp->host; - if (!host->hv_regs) - return; - host1x_sync_writel(host, HOST1X_SYNC_SYNCPT_CH_APP_CH(ch ? ch->id : 0xff), HOST1X_SYNC_SYNCPT_CH_APP(sp->id)); diff --git a/drivers/gpu/host1x/intr.c b/drivers/gpu/host1x/intr.c index 6d1f3c0fdbe7..f77a678949e9 100644 --- a/drivers/gpu/host1x/intr.c +++ b/drivers/gpu/host1x/intr.c @@ -2,300 +2,137 @@ /* * Tegra host1x Interrupt Management * - * Copyright (c) 2010-2013, NVIDIA Corporation. + * Copyright (c) 2010-2021, NVIDIA Corporation. */ #include <linux/clk.h> #include <linux/interrupt.h> -#include <linux/slab.h> -#include <linux/irq.h> - -#include <trace/events/host1x.h> -#include "channel.h" #include "dev.h" +#include "fence.h" #include "intr.h" -/* Wait list management */ - -enum waitlist_state { - WLS_PENDING, - WLS_REMOVED, - WLS_CANCELLED, - WLS_HANDLED -}; - -static void waiter_release(struct kref *kref) -{ - kfree(container_of(kref, struct host1x_waitlist, refcount)); -} - -/* - * add a waiter to a waiter queue, sorted by threshold - * returns true if it was added at the head of the queue - */ -static bool add_waiter_to_queue(struct host1x_waitlist *waiter, - struct list_head *queue) +static void host1x_intr_add_fence_to_list(struct host1x_fence_list *list, + struct host1x_syncpt_fence *fence) { - struct host1x_waitlist *pos; - u32 thresh = waiter->thresh; + struct host1x_syncpt_fence *fence_in_list; - list_for_each_entry_reverse(pos, queue, list) - if ((s32)(pos->thresh - thresh) <= 0) { - list_add(&waiter->list, &pos->list); - return false; + list_for_each_entry_reverse(fence_in_list, &list->list, list) { + if ((s32)(fence_in_list->threshold - fence->threshold) <= 0) { + /* Fence in list is before us, we can insert here */ + list_add(&fence->list, &fence_in_list->list); + return; } + } - list_add(&waiter->list, queue); - return true; + /* Add as first in list */ + list_add(&fence->list, &list->list); } -/* - * run through a waiter queue for a single sync point ID - * and gather all completed waiters into lists by actions - */ -static void remove_completed_waiters(struct list_head *head, u32 sync, - struct list_head completed[HOST1X_INTR_ACTION_COUNT]) +static void host1x_intr_update_hw_state(struct host1x *host, struct host1x_syncpt *sp) { - struct list_head *dest; - struct host1x_waitlist *waiter, *next, *prev; + struct host1x_syncpt_fence *fence; - list_for_each_entry_safe(waiter, next, head, list) { - if ((s32)(waiter->thresh - sync) > 0) - break; + if (!list_empty(&sp->fences.list)) { + fence = list_first_entry(&sp->fences.list, struct host1x_syncpt_fence, list); - dest = completed + waiter->action; - - /* consolidate submit cleanups */ - if (waiter->action == HOST1X_INTR_ACTION_SUBMIT_COMPLETE && - !list_empty(dest)) { - prev = list_entry(dest->prev, - struct host1x_waitlist, list); - if (prev->data == waiter->data) { - prev->count++; - dest = NULL; - } - } - - /* PENDING->REMOVED or CANCELLED->HANDLED */ - if (atomic_inc_return(&waiter->state) == WLS_HANDLED || !dest) { - list_del(&waiter->list); - kref_put(&waiter->refcount, waiter_release); - } else - list_move_tail(&waiter->list, dest); + host1x_hw_intr_set_syncpt_threshold(host, sp->id, fence->threshold); + host1x_hw_intr_enable_syncpt_intr(host, sp->id); + } else { + host1x_hw_intr_disable_syncpt_intr(host, sp->id); } } -static void reset_threshold_interrupt(struct host1x *host, - struct list_head *head, - unsigned int id) -{ - u32 thresh = - list_first_entry(head, struct host1x_waitlist, list)->thresh; - - host1x_hw_intr_set_syncpt_threshold(host, id, thresh); - host1x_hw_intr_enable_syncpt_intr(host, id); -} - -static void action_submit_complete(struct host1x_waitlist *waiter) +void host1x_intr_add_fence_locked(struct host1x *host, struct host1x_syncpt_fence *fence) { - struct host1x_channel *channel = waiter->data; - - host1x_cdma_update(&channel->cdma); + struct host1x_fence_list *fence_list = &fence->sp->fences; - /* Add nr_completed to trace */ - trace_host1x_channel_submit_complete(dev_name(channel->dev), - waiter->count, waiter->thresh); -} - -static void action_wakeup(struct host1x_waitlist *waiter) -{ - wait_queue_head_t *wq = waiter->data; + INIT_LIST_HEAD(&fence->list); - wake_up(wq); + host1x_intr_add_fence_to_list(fence_list, fence); + host1x_intr_update_hw_state(host, fence->sp); } -static void action_wakeup_interruptible(struct host1x_waitlist *waiter) +bool host1x_intr_remove_fence(struct host1x *host, struct host1x_syncpt_fence *fence) { - wait_queue_head_t *wq = waiter->data; + struct host1x_fence_list *fence_list = &fence->sp->fences; + unsigned long irqflags; - wake_up_interruptible(wq); -} - -typedef void (*action_handler)(struct host1x_waitlist *waiter); - -static const action_handler action_handlers[HOST1X_INTR_ACTION_COUNT] = { - action_submit_complete, - action_wakeup, - action_wakeup_interruptible, -}; + spin_lock_irqsave(&fence_list->lock, irqflags); -static void run_handlers(struct list_head completed[HOST1X_INTR_ACTION_COUNT]) -{ - struct list_head *head = completed; - unsigned int i; - - for (i = 0; i < HOST1X_INTR_ACTION_COUNT; ++i, ++head) { - action_handler handler = action_handlers[i]; - struct host1x_waitlist *waiter, *next; - - list_for_each_entry_safe(waiter, next, head, list) { - list_del(&waiter->list); - handler(waiter); - WARN_ON(atomic_xchg(&waiter->state, WLS_HANDLED) != - WLS_REMOVED); - kref_put(&waiter->refcount, waiter_release); - } + if (list_empty(&fence->list)) { + spin_unlock_irqrestore(&fence_list->lock, irqflags); + return false; } -} - -/* - * Remove & handle all waiters that have completed for the given syncpt - */ -static int process_wait_list(struct host1x *host, - struct host1x_syncpt *syncpt, - u32 threshold) -{ - struct list_head completed[HOST1X_INTR_ACTION_COUNT]; - unsigned int i; - int empty; - - for (i = 0; i < HOST1X_INTR_ACTION_COUNT; ++i) - INIT_LIST_HEAD(completed + i); - - spin_lock(&syncpt->intr.lock); - - remove_completed_waiters(&syncpt->intr.wait_head, threshold, - completed); - - empty = list_empty(&syncpt->intr.wait_head); - if (empty) - host1x_hw_intr_disable_syncpt_intr(host, syncpt->id); - else - reset_threshold_interrupt(host, &syncpt->intr.wait_head, - syncpt->id); - spin_unlock(&syncpt->intr.lock); + list_del_init(&fence->list); + host1x_intr_update_hw_state(host, fence->sp); - run_handlers(completed); + spin_unlock_irqrestore(&fence_list->lock, irqflags); - return empty; -} - -/* - * Sync point threshold interrupt service thread function - * Handles sync point threshold triggers, in thread context - */ - -static void syncpt_thresh_work(struct work_struct *work) -{ - struct host1x_syncpt_intr *syncpt_intr = - container_of(work, struct host1x_syncpt_intr, work); - struct host1x_syncpt *syncpt = - container_of(syncpt_intr, struct host1x_syncpt, intr); - unsigned int id = syncpt->id; - struct host1x *host = syncpt->host; - - (void)process_wait_list(host, syncpt, - host1x_syncpt_load(host->syncpt + id)); + return true; } -int host1x_intr_add_action(struct host1x *host, struct host1x_syncpt *syncpt, - u32 thresh, enum host1x_intr_action action, - void *data, struct host1x_waitlist *waiter, - void **ref) +void host1x_intr_handle_interrupt(struct host1x *host, unsigned int id) { - int queue_was_empty; - - if (waiter == NULL) { - pr_warn("%s: NULL waiter\n", __func__); - return -EINVAL; - } - - /* initialize a new waiter */ - INIT_LIST_HEAD(&waiter->list); - kref_init(&waiter->refcount); - if (ref) - kref_get(&waiter->refcount); - waiter->thresh = thresh; - waiter->action = action; - atomic_set(&waiter->state, WLS_PENDING); - waiter->data = data; - waiter->count = 1; + struct host1x_syncpt *sp = &host->syncpt[id]; + struct host1x_syncpt_fence *fence, *tmp; + unsigned int value; - spin_lock(&syncpt->intr.lock); + value = host1x_syncpt_load(sp); - queue_was_empty = list_empty(&syncpt->intr.wait_head); + spin_lock(&sp->fences.lock); - if (add_waiter_to_queue(waiter, &syncpt->intr.wait_head)) { - /* added at head of list - new threshold value */ - host1x_hw_intr_set_syncpt_threshold(host, syncpt->id, thresh); + list_for_each_entry_safe(fence, tmp, &sp->fences.list, list) { + if (((value - fence->threshold) & 0x80000000U) != 0U) { + /* Fence is not yet expired, we are done */ + break; + } - /* added as first waiter - enable interrupt */ - if (queue_was_empty) - host1x_hw_intr_enable_syncpt_intr(host, syncpt->id); + list_del_init(&fence->list); + host1x_fence_signal(fence); } - if (ref) - *ref = waiter; - - spin_unlock(&syncpt->intr.lock); + /* Re-enable interrupt if necessary */ + host1x_intr_update_hw_state(host, sp); - return 0; + spin_unlock(&sp->fences.lock); } -void host1x_intr_put_ref(struct host1x *host, unsigned int id, void *ref, - bool flush) +int host1x_intr_init(struct host1x *host) { - struct host1x_waitlist *waiter = ref; - struct host1x_syncpt *syncpt; - - atomic_cmpxchg(&waiter->state, WLS_PENDING, WLS_CANCELLED); - - syncpt = host->syncpt + id; + struct host1x_intr_irq_data *irq_data; + unsigned int id; + int i, err; - spin_lock(&syncpt->intr.lock); - if (atomic_cmpxchg(&waiter->state, WLS_CANCELLED, WLS_HANDLED) == - WLS_CANCELLED) { - list_del(&waiter->list); - kref_put(&waiter->refcount, waiter_release); - } - spin_unlock(&syncpt->intr.lock); + for (id = 0; id < host1x_syncpt_nb_pts(host); ++id) { + struct host1x_syncpt *syncpt = &host->syncpt[id]; - if (flush) { - /* Wait until any concurrently executing handler has finished. */ - while (atomic_read(&waiter->state) != WLS_HANDLED) - schedule(); + spin_lock_init(&syncpt->fences.lock); + INIT_LIST_HEAD(&syncpt->fences.list); } - kref_put(&waiter->refcount, waiter_release); -} - -int host1x_intr_init(struct host1x *host, unsigned int irq_sync) -{ - unsigned int id; - u32 nb_pts = host1x_syncpt_nb_pts(host); + irq_data = devm_kcalloc(host->dev, host->num_syncpt_irqs, sizeof(irq_data[0]), GFP_KERNEL); + if (!irq_data) + return -ENOMEM; - mutex_init(&host->intr_mutex); - host->intr_syncpt_irq = irq_sync; + host1x_hw_intr_disable_all_syncpt_intrs(host); - for (id = 0; id < nb_pts; ++id) { - struct host1x_syncpt *syncpt = host->syncpt + id; + for (i = 0; i < host->num_syncpt_irqs; i++) { + irq_data[i].host = host; + irq_data[i].offset = i; - spin_lock_init(&syncpt->intr.lock); - INIT_LIST_HEAD(&syncpt->intr.wait_head); - snprintf(syncpt->intr.thresh_irq_name, - sizeof(syncpt->intr.thresh_irq_name), - "host1x_sp_%02u", id); + err = devm_request_irq(host->dev, host->syncpt_irqs[i], + host->intr_op->isr, IRQF_SHARED, + "host1x_syncpt", &irq_data[i]); + if (err < 0) + return err; } - host1x_intr_start(host); - return 0; } void host1x_intr_deinit(struct host1x *host) { - host1x_intr_stop(host); } void host1x_intr_start(struct host1x *host) @@ -304,8 +141,7 @@ void host1x_intr_start(struct host1x *host) int err; mutex_lock(&host->intr_mutex); - err = host1x_hw_intr_init_host_sync(host, DIV_ROUND_UP(hz, 1000000), - syncpt_thresh_work); + err = host1x_hw_intr_init_host_sync(host, DIV_ROUND_UP(hz, 1000000)); if (err) { mutex_unlock(&host->intr_mutex); return; @@ -315,36 +151,5 @@ void host1x_intr_start(struct host1x *host) void host1x_intr_stop(struct host1x *host) { - unsigned int id; - struct host1x_syncpt *syncpt = host->syncpt; - u32 nb_pts = host1x_syncpt_nb_pts(host); - - mutex_lock(&host->intr_mutex); - host1x_hw_intr_disable_all_syncpt_intrs(host); - - for (id = 0; id < nb_pts; ++id) { - struct host1x_waitlist *waiter, *next; - - list_for_each_entry_safe(waiter, next, - &syncpt[id].intr.wait_head, list) { - if (atomic_cmpxchg(&waiter->state, - WLS_CANCELLED, WLS_HANDLED) == WLS_CANCELLED) { - list_del(&waiter->list); - kref_put(&waiter->refcount, waiter_release); - } - } - - if (!list_empty(&syncpt[id].intr.wait_head)) { - /* output diagnostics */ - mutex_unlock(&host->intr_mutex); - pr_warn("%s cannot stop syncpt intr id=%u\n", - __func__, id); - return; - } - } - - host1x_hw_intr_free_syncpt_irq(host); - - mutex_unlock(&host->intr_mutex); } diff --git a/drivers/gpu/host1x/intr.h b/drivers/gpu/host1x/intr.h index 6ea55e615e3a..11cdf13e32fe 100644 --- a/drivers/gpu/host1x/intr.h +++ b/drivers/gpu/host1x/intr.h @@ -2,85 +2,22 @@ /* * Tegra host1x Interrupt Management * - * Copyright (c) 2010-2013, NVIDIA Corporation. + * Copyright (c) 2010-2021, NVIDIA Corporation. */ #ifndef __HOST1X_INTR_H #define __HOST1X_INTR_H -#include <linux/interrupt.h> -#include <linux/workqueue.h> - -struct host1x_syncpt; struct host1x; +struct host1x_syncpt_fence; -enum host1x_intr_action { - /* - * Perform cleanup after a submit has completed. - * 'data' points to a channel - */ - HOST1X_INTR_ACTION_SUBMIT_COMPLETE = 0, - - /* - * Wake up a task. - * 'data' points to a wait_queue_head_t - */ - HOST1X_INTR_ACTION_WAKEUP, - - /* - * Wake up a interruptible task. - * 'data' points to a wait_queue_head_t - */ - HOST1X_INTR_ACTION_WAKEUP_INTERRUPTIBLE, - - HOST1X_INTR_ACTION_COUNT -}; - -struct host1x_syncpt_intr { - spinlock_t lock; - struct list_head wait_head; - char thresh_irq_name[12]; - struct work_struct work; -}; - -struct host1x_waitlist { - struct list_head list; - struct kref refcount; - u32 thresh; - enum host1x_intr_action action; - atomic_t state; - void *data; - int count; +struct host1x_intr_irq_data { + struct host1x *host; + u32 offset; }; -/* - * Schedule an action to be taken when a sync point reaches the given threshold. - * - * @id the sync point - * @thresh the threshold - * @action the action to take - * @data a pointer to extra data depending on action, see above - * @waiter waiter structure - assumes ownership - * @ref must be passed if cancellation is possible, else NULL - * - * This is a non-blocking api. - */ -int host1x_intr_add_action(struct host1x *host, struct host1x_syncpt *syncpt, - u32 thresh, enum host1x_intr_action action, - void *data, struct host1x_waitlist *waiter, - void **ref); - -/* - * Unreference an action submitted to host1x_intr_add_action(). - * You must call this if you passed non-NULL as ref. - * @ref the ref returned from host1x_intr_add_action() - * @flush wait until any pending handlers have completed before returning. - */ -void host1x_intr_put_ref(struct host1x *host, unsigned int id, void *ref, - bool flush); - /* Initialize host1x sync point interrupt */ -int host1x_intr_init(struct host1x *host, unsigned int irq_sync); +int host1x_intr_init(struct host1x *host); /* Deinitialize host1x sync point interrupt */ void host1x_intr_deinit(struct host1x *host); @@ -91,5 +28,10 @@ void host1x_intr_start(struct host1x *host); /* Disable host1x sync point interrupt */ void host1x_intr_stop(struct host1x *host); -irqreturn_t host1x_syncpt_thresh_fn(void *dev_id); +void host1x_intr_handle_interrupt(struct host1x *host, unsigned int id); + +void host1x_intr_add_fence_locked(struct host1x *host, struct host1x_syncpt_fence *fence); + +bool host1x_intr_remove_fence(struct host1x *host, struct host1x_syncpt_fence *fence); + #endif diff --git a/drivers/gpu/host1x/job.c b/drivers/gpu/host1x/job.c index adbdc225de8d..3ed49e1fd933 100644 --- a/drivers/gpu/host1x/job.c +++ b/drivers/gpu/host1x/job.c @@ -24,21 +24,25 @@ #define HOST1X_WAIT_SYNCPT_OFFSET 0x8 struct host1x_job *host1x_job_alloc(struct host1x_channel *ch, - u32 num_cmdbufs, u32 num_relocs) + u32 num_cmdbufs, u32 num_relocs, + bool skip_firewall) { struct host1x_job *job = NULL; unsigned int num_unpins = num_relocs; + bool enable_firewall; u64 total; void *mem; - if (!IS_ENABLED(CONFIG_TEGRA_HOST1X_FIREWALL)) + enable_firewall = IS_ENABLED(CONFIG_TEGRA_HOST1X_FIREWALL) && !skip_firewall; + + if (!enable_firewall) num_unpins += num_cmdbufs; /* Check that we're not going to overflow */ total = sizeof(struct host1x_job) + (u64)num_relocs * sizeof(struct host1x_reloc) + (u64)num_unpins * sizeof(struct host1x_job_unpin_data) + - (u64)num_cmdbufs * sizeof(struct host1x_job_gather) + + (u64)num_cmdbufs * sizeof(struct host1x_job_cmd) + (u64)num_unpins * sizeof(dma_addr_t) + (u64)num_unpins * sizeof(u32 *); if (total > ULONG_MAX) @@ -48,6 +52,8 @@ struct host1x_job *host1x_job_alloc(struct host1x_channel *ch, if (!job) return NULL; + job->enable_firewall = enable_firewall; + kref_init(&job->ref); job->channel = ch; @@ -57,8 +63,8 @@ struct host1x_job *host1x_job_alloc(struct host1x_channel *ch, mem += num_relocs * sizeof(struct host1x_reloc); job->unpins = num_unpins ? mem : NULL; mem += num_unpins * sizeof(struct host1x_job_unpin_data); - job->gathers = num_cmdbufs ? mem : NULL; - mem += num_cmdbufs * sizeof(struct host1x_job_gather); + job->cmds = num_cmdbufs ? mem : NULL; + mem += num_cmdbufs * sizeof(struct host1x_job_cmd); job->addr_phys = num_unpins ? mem : NULL; job->reloc_addr_phys = job->addr_phys; @@ -79,6 +85,19 @@ static void job_free(struct kref *ref) { struct host1x_job *job = container_of(ref, struct host1x_job, ref); + if (job->release) + job->release(job); + + if (job->fence) { + /* + * remove_callback is atomic w.r.t. fence signaling, so + * after the call returns, we know that the callback is not + * in execution, and the fence can be safely freed. + */ + dma_fence_remove_callback(job->fence, &job->fence_cb); + dma_fence_put(job->fence); + } + if (job->syncpt) host1x_syncpt_put(job->syncpt); @@ -94,32 +113,47 @@ EXPORT_SYMBOL(host1x_job_put); void host1x_job_add_gather(struct host1x_job *job, struct host1x_bo *bo, unsigned int words, unsigned int offset) { - struct host1x_job_gather *gather = &job->gathers[job->num_gathers]; + struct host1x_job_gather *gather = &job->cmds[job->num_cmds].gather; gather->words = words; gather->bo = bo; gather->offset = offset; - job->num_gathers++; + job->num_cmds++; } EXPORT_SYMBOL(host1x_job_add_gather); +void host1x_job_add_wait(struct host1x_job *job, u32 id, u32 thresh, + bool relative, u32 next_class) +{ + struct host1x_job_cmd *cmd = &job->cmds[job->num_cmds]; + + cmd->is_wait = true; + cmd->wait.id = id; + cmd->wait.threshold = thresh; + cmd->wait.next_class = next_class; + cmd->wait.relative = relative; + + job->num_cmds++; +} +EXPORT_SYMBOL(host1x_job_add_wait); + static unsigned int pin_job(struct host1x *host, struct host1x_job *job) { + unsigned long mask = HOST1X_RELOC_READ | HOST1X_RELOC_WRITE; struct host1x_client *client = job->client; struct device *dev = client->dev; struct host1x_job_gather *g; - struct iommu_domain *domain; unsigned int i; int err; - domain = iommu_get_domain_for_dev(dev); job->num_unpins = 0; for (i = 0; i < job->num_relocs; i++) { struct host1x_reloc *reloc = &job->relocs[i]; - dma_addr_t phys_addr, *phys; - struct sg_table *sgt; + enum dma_data_direction direction; + struct host1x_bo_mapping *map; + struct host1x_bo *bo; reloc->target.bo = host1x_bo_get(reloc->target.bo); if (!reloc->target.bo) { @@ -127,64 +161,44 @@ static unsigned int pin_job(struct host1x *host, struct host1x_job *job) goto unpin; } - /* - * If the client device is not attached to an IOMMU, the - * physical address of the buffer object can be used. - * - * Similarly, when an IOMMU domain is shared between all - * host1x clients, the IOVA is already available, so no - * need to map the buffer object again. - * - * XXX Note that this isn't always safe to do because it - * relies on an assumption that no cache maintenance is - * needed on the buffer objects. - */ - if (!domain || client->group) - phys = &phys_addr; - else - phys = NULL; - - sgt = host1x_bo_pin(dev, reloc->target.bo, phys); - if (IS_ERR(sgt)) { - err = PTR_ERR(sgt); - goto unpin; - } + bo = reloc->target.bo; - if (sgt) { - unsigned long mask = HOST1X_RELOC_READ | - HOST1X_RELOC_WRITE; - enum dma_data_direction dir; - - switch (reloc->flags & mask) { - case HOST1X_RELOC_READ: - dir = DMA_TO_DEVICE; - break; + switch (reloc->flags & mask) { + case HOST1X_RELOC_READ: + direction = DMA_TO_DEVICE; + break; - case HOST1X_RELOC_WRITE: - dir = DMA_FROM_DEVICE; - break; + case HOST1X_RELOC_WRITE: + direction = DMA_FROM_DEVICE; + break; - case HOST1X_RELOC_READ | HOST1X_RELOC_WRITE: - dir = DMA_BIDIRECTIONAL; - break; + case HOST1X_RELOC_READ | HOST1X_RELOC_WRITE: + direction = DMA_BIDIRECTIONAL; + break; - default: - err = -EINVAL; - goto unpin; - } + default: + err = -EINVAL; + goto unpin; + } - err = dma_map_sgtable(dev, sgt, dir, 0); - if (err) - goto unpin; + map = host1x_bo_pin(dev, bo, direction, NULL); + if (IS_ERR(map)) { + err = PTR_ERR(map); + goto unpin; + } - job->unpins[job->num_unpins].dev = dev; - job->unpins[job->num_unpins].dir = dir; - phys_addr = sg_dma_address(sgt->sgl); + /* + * host1x clients are generally not able to do scatter-gather themselves, so fail + * if the buffer is discontiguous and we fail to map its SG table to a single + * contiguous chunk of I/O virtual memory. + */ + if (map->chunks > 1) { + err = -EINVAL; + goto unpin; } - job->addr_phys[job->num_unpins] = phys_addr; - job->unpins[job->num_unpins].bo = reloc->target.bo; - job->unpins[job->num_unpins].sgt = sgt; + job->addr_phys[job->num_unpins] = map->phys; + job->unpins[job->num_unpins].map = map; job->num_unpins++; } @@ -192,45 +206,38 @@ static unsigned int pin_job(struct host1x *host, struct host1x_job *job) * We will copy gathers BO content later, so there is no need to * hold and pin them. */ - if (IS_ENABLED(CONFIG_TEGRA_HOST1X_FIREWALL)) + if (job->enable_firewall) return 0; - for (i = 0; i < job->num_gathers; i++) { + for (i = 0; i < job->num_cmds; i++) { + struct host1x_bo_mapping *map; size_t gather_size = 0; struct scatterlist *sg; - struct sg_table *sgt; - dma_addr_t phys_addr; unsigned long shift; struct iova *alloc; - dma_addr_t *phys; unsigned int j; - g = &job->gathers[i]; + if (job->cmds[i].is_wait) + continue; + + g = &job->cmds[i].gather; + g->bo = host1x_bo_get(g->bo); if (!g->bo) { err = -EINVAL; goto unpin; } - /** - * If the host1x is not attached to an IOMMU, there is no need - * to map the buffer object for the host1x, since the physical - * address can simply be used. - */ - if (!iommu_get_domain_for_dev(host->dev)) - phys = &phys_addr; - else - phys = NULL; - - sgt = host1x_bo_pin(host->dev, g->bo, phys); - if (IS_ERR(sgt)) { - err = PTR_ERR(sgt); - goto put; + map = host1x_bo_pin(host->dev, g->bo, DMA_TO_DEVICE, NULL); + if (IS_ERR(map)) { + err = PTR_ERR(map); + goto unpin; } if (host->domain) { - for_each_sgtable_sg(sgt, sg, j) + for_each_sgtable_sg(map->sgt, sg, j) gather_size += sg->length; + gather_size = iova_align(&host->iova, gather_size); shift = iova_shift(&host->iova); @@ -241,33 +248,23 @@ static unsigned int pin_job(struct host1x *host, struct host1x_job *job) goto put; } - err = iommu_map_sgtable(host->domain, - iova_dma_addr(&host->iova, alloc), - sgt, IOMMU_READ); + err = iommu_map_sgtable(host->domain, iova_dma_addr(&host->iova, alloc), + map->sgt, IOMMU_READ); if (err == 0) { __free_iova(&host->iova, alloc); err = -EINVAL; goto put; } - job->unpins[job->num_unpins].size = gather_size; - phys_addr = iova_dma_addr(&host->iova, alloc); - } else if (sgt) { - err = dma_map_sgtable(host->dev, sgt, DMA_TO_DEVICE, 0); - if (err) - goto put; - - job->unpins[job->num_unpins].dir = DMA_TO_DEVICE; - job->unpins[job->num_unpins].dev = host->dev; - phys_addr = sg_dma_address(sgt->sgl); + map->phys = iova_dma_addr(&host->iova, alloc); + map->size = gather_size; } - job->addr_phys[job->num_unpins] = phys_addr; - job->gather_addr_phys[i] = phys_addr; - - job->unpins[job->num_unpins].bo = g->bo; - job->unpins[job->num_unpins].sgt = sgt; + job->addr_phys[job->num_unpins] = map->phys; + job->unpins[job->num_unpins].map = map; job->num_unpins++; + + job->gather_addr_phys[i] = map->phys; } return 0; @@ -296,7 +293,7 @@ static int do_relocs(struct host1x_job *job, struct host1x_job_gather *g) if (cmdbuf != reloc->cmdbuf.bo) continue; - if (IS_ENABLED(CONFIG_TEGRA_HOST1X_FIREWALL)) { + if (job->enable_firewall) { target = (u32 *)job->gather_copy_mapped + reloc->cmdbuf.offset / sizeof(u32) + g->offset / sizeof(u32); @@ -538,8 +535,13 @@ static inline int copy_gathers(struct device *host, struct host1x_job *job, fw.num_relocs = job->num_relocs; fw.class = job->class; - for (i = 0; i < job->num_gathers; i++) { - struct host1x_job_gather *g = &job->gathers[i]; + for (i = 0; i < job->num_cmds; i++) { + struct host1x_job_gather *g; + + if (job->cmds[i].is_wait) + continue; + + g = &job->cmds[i].gather; size += g->words * sizeof(u32); } @@ -561,10 +563,14 @@ static inline int copy_gathers(struct device *host, struct host1x_job *job, job->gather_copy_size = size; - for (i = 0; i < job->num_gathers; i++) { - struct host1x_job_gather *g = &job->gathers[i]; + for (i = 0; i < job->num_cmds; i++) { + struct host1x_job_gather *g; void *gather; + if (job->cmds[i].is_wait) + continue; + g = &job->cmds[i].gather; + /* Copy the gather */ gather = host1x_bo_mmap(g->bo); memcpy(job->gather_copy_mapped + offset, gather + g->offset, @@ -600,28 +606,33 @@ int host1x_job_pin(struct host1x_job *job, struct device *dev) if (err) goto out; - if (IS_ENABLED(CONFIG_TEGRA_HOST1X_FIREWALL)) { + if (job->enable_firewall) { err = copy_gathers(host->dev, job, dev); if (err) goto out; } /* patch gathers */ - for (i = 0; i < job->num_gathers; i++) { - struct host1x_job_gather *g = &job->gathers[i]; + for (i = 0; i < job->num_cmds; i++) { + struct host1x_job_gather *g; + + if (job->cmds[i].is_wait) + continue; + g = &job->cmds[i].gather; /* process each gather mem only once */ if (g->handled) continue; /* copy_gathers() sets gathers base if firewall is enabled */ - if (!IS_ENABLED(CONFIG_TEGRA_HOST1X_FIREWALL)) + if (!job->enable_firewall) g->base = job->gather_addr_phys[i]; - for (j = i + 1; j < job->num_gathers; j++) { - if (job->gathers[j].bo == g->bo) { - job->gathers[j].handled = true; - job->gathers[j].base = g->base; + for (j = i + 1; j < job->num_cmds; j++) { + if (!job->cmds[j].is_wait && + job->cmds[j].gather.bo == g->bo) { + job->cmds[j].gather.handled = true; + job->cmds[j].gather.base = g->base; } } @@ -645,23 +656,16 @@ void host1x_job_unpin(struct host1x_job *job) unsigned int i; for (i = 0; i < job->num_unpins; i++) { - struct host1x_job_unpin_data *unpin = &job->unpins[i]; - struct device *dev = unpin->dev ?: host->dev; - struct sg_table *sgt = unpin->sgt; - - if (!IS_ENABLED(CONFIG_TEGRA_HOST1X_FIREWALL) && - unpin->size && host->domain) { - iommu_unmap(host->domain, job->addr_phys[i], - unpin->size); - free_iova(&host->iova, - iova_pfn(&host->iova, job->addr_phys[i])); - } + struct host1x_bo_mapping *map = job->unpins[i].map; + struct host1x_bo *bo = map->bo; - if (unpin->dev && sgt) - dma_unmap_sgtable(unpin->dev, sgt, unpin->dir, 0); + if (!job->enable_firewall && map->size && host->domain) { + iommu_unmap(host->domain, job->addr_phys[i], map->size); + free_iova(&host->iova, iova_pfn(&host->iova, job->addr_phys[i])); + } - host1x_bo_unpin(dev, unpin->bo, sgt); - host1x_bo_put(unpin->bo); + host1x_bo_unpin(map); + host1x_bo_put(bo); } job->num_unpins = 0; diff --git a/drivers/gpu/host1x/job.h b/drivers/gpu/host1x/job.h index 94bc2e4ae241..dad5a1946693 100644 --- a/drivers/gpu/host1x/job.h +++ b/drivers/gpu/host1x/job.h @@ -18,12 +18,24 @@ struct host1x_job_gather { bool handled; }; +struct host1x_job_wait { + u32 id; + u32 threshold; + u32 next_class; + bool relative; +}; + +struct host1x_job_cmd { + bool is_wait; + + union { + struct host1x_job_gather gather; + struct host1x_job_wait wait; + }; +}; + struct host1x_job_unpin_data { - struct host1x_bo *bo; - struct sg_table *sgt; - struct device *dev; - size_t size; - enum dma_data_direction dir; + struct host1x_bo_mapping *map; }; /* diff --git a/drivers/gpu/host1x/mipi.c b/drivers/gpu/host1x/mipi.c index 2efe12dde8bc..e51b43dd15a3 100644 --- a/drivers/gpu/host1x/mipi.c +++ b/drivers/gpu/host1x/mipi.c @@ -501,8 +501,6 @@ static int tegra_mipi_probe(struct platform_device *pdev) { const struct of_device_id *match; struct tegra_mipi *mipi; - struct resource *res; - int err; match = of_match_node(tegra_mipi_of_match, pdev->dev.of_node); if (!match) @@ -515,42 +513,27 @@ static int tegra_mipi_probe(struct platform_device *pdev) mipi->soc = match->data; mipi->dev = &pdev->dev; - res = platform_get_resource(pdev, IORESOURCE_MEM, 0); - mipi->regs = devm_ioremap_resource(&pdev->dev, res); + mipi->regs = devm_platform_get_and_ioremap_resource(pdev, 0, NULL); if (IS_ERR(mipi->regs)) return PTR_ERR(mipi->regs); mutex_init(&mipi->lock); - mipi->clk = devm_clk_get(&pdev->dev, NULL); + mipi->clk = devm_clk_get_prepared(&pdev->dev, NULL); if (IS_ERR(mipi->clk)) { dev_err(&pdev->dev, "failed to get clock\n"); return PTR_ERR(mipi->clk); } - err = clk_prepare(mipi->clk); - if (err < 0) - return err; - platform_set_drvdata(pdev, mipi); return 0; } -static int tegra_mipi_remove(struct platform_device *pdev) -{ - struct tegra_mipi *mipi = platform_get_drvdata(pdev); - - clk_unprepare(mipi->clk); - - return 0; -} - struct platform_driver tegra_mipi_driver = { .driver = { .name = "tegra-mipi", .of_match_table = tegra_mipi_of_match, }, .probe = tegra_mipi_probe, - .remove = tegra_mipi_remove, }; diff --git a/drivers/gpu/host1x/syncpt.c b/drivers/gpu/host1x/syncpt.c index e648ebbb2027..acc7d82e0585 100644 --- a/drivers/gpu/host1x/syncpt.c +++ b/drivers/gpu/host1x/syncpt.c @@ -7,6 +7,7 @@ #include <linux/module.h> #include <linux/device.h> +#include <linux/dma-fence.h> #include <linux/slab.h> #include <trace/events/host1x.h> @@ -137,12 +138,21 @@ void host1x_syncpt_restore(struct host1x *host) struct host1x_syncpt *sp_base = host->syncpt; unsigned int i; - for (i = 0; i < host1x_syncpt_nb_pts(host); i++) + for (i = 0; i < host1x_syncpt_nb_pts(host); i++) { + /* + * Unassign syncpt from channels for purposes of Tegra186 + * syncpoint protection. This prevents any channel from + * accessing it until it is reassigned. + */ + host1x_hw_syncpt_assign_to_channel(host, sp_base + i, NULL); host1x_hw_syncpt_restore(host, sp_base + i); + } for (i = 0; i < host1x_syncpt_nb_bases(host); i++) host1x_hw_syncpt_restore_wait_base(host, sp_base + i); + host1x_hw_syncpt_enable_protection(host); + wmb(); } @@ -200,17 +210,6 @@ int host1x_syncpt_incr(struct host1x_syncpt *sp) } EXPORT_SYMBOL(host1x_syncpt_incr); -/* - * Updated sync point form hardware, and returns true if syncpoint is expired, - * false if we may need to wait - */ -static bool syncpt_load_min_is_expired(struct host1x_syncpt *sp, u32 thresh) -{ - host1x_hw_syncpt_load(sp->host, sp); - - return host1x_syncpt_is_expired(sp, thresh); -} - /** * host1x_syncpt_wait() - wait for a syncpoint to reach a given value * @sp: host1x syncpoint @@ -221,99 +220,46 @@ static bool syncpt_load_min_is_expired(struct host1x_syncpt *sp, u32 thresh) int host1x_syncpt_wait(struct host1x_syncpt *sp, u32 thresh, long timeout, u32 *value) { - DECLARE_WAIT_QUEUE_HEAD_ONSTACK(wq); - void *ref; - struct host1x_waitlist *waiter; - int err = 0, check_count = 0; - u32 val; + struct dma_fence *fence; + long wait_err; - if (value) - *value = 0; + host1x_hw_syncpt_load(sp->host, sp); - /* first check cache */ - if (host1x_syncpt_is_expired(sp, thresh)) { - if (value) - *value = host1x_syncpt_load(sp); + if (value) + *value = host1x_syncpt_load(sp); + if (host1x_syncpt_is_expired(sp, thresh)) return 0; - } - - /* try to read from register */ - val = host1x_hw_syncpt_load(sp->host, sp); - if (host1x_syncpt_is_expired(sp, thresh)) { - if (value) - *value = val; - - goto done; - } - - if (!timeout) { - err = -EAGAIN; - goto done; - } - /* allocate a waiter */ - waiter = kzalloc(sizeof(*waiter), GFP_KERNEL); - if (!waiter) { - err = -ENOMEM; - goto done; - } - - /* schedule a wakeup when the syncpoint value is reached */ - err = host1x_intr_add_action(sp->host, sp, thresh, - HOST1X_INTR_ACTION_WAKEUP_INTERRUPTIBLE, - &wq, waiter, &ref); - if (err) - goto done; - - err = -EAGAIN; - /* Caller-specified timeout may be impractically low */ if (timeout < 0) timeout = LONG_MAX; + else if (timeout == 0) + return -EAGAIN; - /* wait for the syncpoint, or timeout, or signal */ - while (timeout) { - long check = min_t(long, SYNCPT_CHECK_PERIOD, timeout); - int remain; - - remain = wait_event_interruptible_timeout(wq, - syncpt_load_min_is_expired(sp, thresh), - check); - if (remain > 0 || host1x_syncpt_is_expired(sp, thresh)) { - if (value) - *value = host1x_syncpt_load(sp); - - err = 0; - - break; - } - - if (remain < 0) { - err = remain; - break; - } - - timeout -= check; - - if (timeout && check_count <= MAX_STUCK_CHECK_COUNT) { - dev_warn(sp->host->dev, - "%s: syncpoint id %u (%s) stuck waiting %d, timeout=%ld\n", - current->comm, sp->id, sp->name, - thresh, timeout); - - host1x_debug_dump_syncpts(sp->host); - - if (check_count == MAX_STUCK_CHECK_COUNT) - host1x_debug_dump(sp->host); + fence = host1x_fence_create(sp, thresh, false); + if (IS_ERR(fence)) + return PTR_ERR(fence); - check_count++; - } - } + wait_err = dma_fence_wait_timeout(fence, true, timeout); + if (wait_err == 0) + host1x_fence_cancel(fence); + dma_fence_put(fence); - host1x_intr_put_ref(sp->host, sp->id, ref, true); + if (value) + *value = host1x_syncpt_load(sp); -done: - return err; + /* + * Don't rely on dma_fence_wait_timeout return value, + * since it returns zero both on timeout and if the + * wait completed with 0 jiffies left. + */ + host1x_hw_syncpt_load(sp->host, sp); + if (wait_err == 0 && !host1x_syncpt_is_expired(sp, thresh)) + return -EAGAIN; + else if (wait_err < 0) + return wait_err; + else + return 0; } EXPORT_SYMBOL(host1x_syncpt_wait); @@ -350,13 +296,6 @@ int host1x_syncpt_init(struct host1x *host) for (i = 0; i < host->info->nb_pts; i++) { syncpt[i].id = i; syncpt[i].host = host; - - /* - * Unassign syncpt from channels for purposes of Tegra186 - * syncpoint protection. This prevents any channel from - * accessing it until it is reassigned. - */ - host1x_hw_syncpt_assign_to_channel(host, &syncpt[i], NULL); } for (i = 0; i < host->info->nb_bases; i++) @@ -366,9 +305,6 @@ int host1x_syncpt_init(struct host1x *host) host->syncpt = syncpt; host->bases = bases; - host1x_syncpt_restore(host); - host1x_hw_syncpt_enable_protection(host); - /* Allocate sync point to use for clearing waits for expired fences */ host->nop_sp = host1x_syncpt_alloc(host, 0, "reserved-nop"); if (!host->nop_sp) @@ -407,7 +343,7 @@ static void syncpt_release(struct kref *ref) atomic_set(&sp->max_val, host1x_syncpt_read(sp)); - mutex_lock(&sp->host->syncpt_mutex); + sp->locked = false; host1x_syncpt_base_free(sp->base); kfree(sp->name); @@ -431,7 +367,7 @@ void host1x_syncpt_put(struct host1x_syncpt *sp) if (!sp) return; - kref_put(&sp->ref, syncpt_release); + kref_put_mutex(&sp->ref, syncpt_release, &sp->host->syncpt_mutex); } EXPORT_SYMBOL(host1x_syncpt_put); diff --git a/drivers/gpu/host1x/syncpt.h b/drivers/gpu/host1x/syncpt.h index a6766f8d55ee..4c3f3b2f0e9c 100644 --- a/drivers/gpu/host1x/syncpt.h +++ b/drivers/gpu/host1x/syncpt.h @@ -14,6 +14,7 @@ #include <linux/kref.h> #include <linux/sched.h> +#include "fence.h" #include "intr.h" struct host1x; @@ -39,7 +40,14 @@ struct host1x_syncpt { struct host1x_syncpt_base *base; /* interrupt data */ - struct host1x_syncpt_intr intr; + struct host1x_fence_list fences; + + /* + * If a submission incrementing this syncpoint fails, lock it so that + * further submission cannot be made until application has handled the + * failure. + */ + bool locked; }; /* Initialize sync point array */ @@ -115,4 +123,9 @@ static inline int host1x_syncpt_is_valid(struct host1x_syncpt *sp) return sp->id < host1x_syncpt_nb_pts(sp->host); } +static inline void host1x_syncpt_set_locked(struct host1x_syncpt *sp) +{ + sp->locked = true; +} + #endif |
