diff options
Diffstat (limited to 'drivers/gpu/host1x')
78 files changed, 4386 insertions, 2369 deletions
diff --git a/drivers/gpu/host1x/Kconfig b/drivers/gpu/host1x/Kconfig index 91916326957f..e6c78ae2003a 100644 --- a/drivers/gpu/host1x/Kconfig +++ b/drivers/gpu/host1x/Kconfig @@ -1,7 +1,14 @@ +# SPDX-License-Identifier: GPL-2.0-only + +config TEGRA_HOST1X_CONTEXT_BUS + bool + config TEGRA_HOST1X tristate "NVIDIA Tegra host1x driver" - depends on ARCH_TEGRA || (ARM && COMPILE_TEST) - select IOMMU_IOVA if IOMMU_SUPPORT + depends on ARCH_TEGRA || COMPILE_TEST + select DMA_SHARED_BUFFER + select TEGRA_HOST1X_CONTEXT_BUS + select IOMMU_IOVA help Driver for the NVIDIA Tegra host1x hardware. diff --git a/drivers/gpu/host1x/Makefile b/drivers/gpu/host1x/Makefile index a1d9974cfcb5..ee5286ffe08d 100644 --- a/drivers/gpu/host1x/Makefile +++ b/drivers/gpu/host1x/Makefile @@ -1,3 +1,4 @@ +# SPDX-License-Identifier: GPL-2.0 host1x-y = \ bus.o \ syncpt.o \ @@ -8,9 +9,17 @@ host1x-y = \ job.o \ debug.o \ mipi.o \ + fence.o \ hw/host1x01.o \ hw/host1x02.o \ hw/host1x04.o \ - hw/host1x05.o + hw/host1x05.o \ + hw/host1x06.o \ + hw/host1x07.o \ + hw/host1x08.o + +host1x-$(CONFIG_IOMMU_API) += \ + context.o obj-$(CONFIG_TEGRA_HOST1X) += host1x.o +obj-$(CONFIG_TEGRA_HOST1X_CONTEXT_BUS) += context_bus.o diff --git a/drivers/gpu/host1x/bus.c b/drivers/gpu/host1x/bus.c index a048e3ac523d..723a80895cd4 100644 --- a/drivers/gpu/host1x/bus.c +++ b/drivers/gpu/host1x/bus.c @@ -1,22 +1,14 @@ +// SPDX-License-Identifier: GPL-2.0-only /* * Copyright (C) 2012 Avionic Design GmbH * Copyright (C) 2012-2013, NVIDIA Corporation - * - * This program is free software; you can redistribute it and/or modify it - * under the terms and conditions of the GNU General Public License, - * version 2, as published by the Free Software Foundation. - * - * This program is distributed in the hope it will be useful, but WITHOUT - * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or - * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for - * more details. - * - * You should have received a copy of the GNU General Public License - * along with this program. If not, see <http://www.gnu.org/licenses/>. */ +#include <linux/debugfs.h> +#include <linux/dma-mapping.h> #include <linux/host1x.h> #include <linux/of.h> +#include <linux/seq_file.h> #include <linux/slab.h> #include <linux/of_device.h> @@ -41,13 +33,15 @@ struct host1x_subdev { /** * host1x_subdev_add() - add a new subdevice with an associated device node * @device: host1x device to add the subdevice to - * @driver: host1x driver + * @driver: host1x driver containing the subdevices * @np: device node */ static int host1x_subdev_add(struct host1x_device *device, + struct host1x_driver *driver, struct device_node *np) { struct host1x_subdev *subdev; + int err; subdev = kzalloc(sizeof(*subdev), GFP_KERNEL); if (!subdev) @@ -60,6 +54,18 @@ static int host1x_subdev_add(struct host1x_device *device, list_add_tail(&subdev->list, &device->subdevs); mutex_unlock(&device->subdevs_lock); + /* recursively add children */ + for_each_child_of_node_scoped(np, child) { + if (of_match_node(driver->subdevs, child) && + of_device_is_available(child)) { + err = host1x_subdev_add(device, driver, child); + if (err < 0) { + /* XXX cleanup? */ + return err; + } + } + } + return 0; } @@ -82,17 +88,14 @@ static void host1x_subdev_del(struct host1x_subdev *subdev) static int host1x_device_parse_dt(struct host1x_device *device, struct host1x_driver *driver) { - struct device_node *np; int err; - for_each_child_of_node(device->dev.parent->of_node, np) { + for_each_child_of_node_scoped(device->dev.parent->of_node, np) { if (of_match_node(driver->subdevs, np) && of_device_is_available(np)) { - err = host1x_subdev_add(device, np); - if (err < 0) { - of_node_put(np); + err = host1x_subdev_add(device, driver, np); + if (err < 0) return err; - } } } @@ -114,7 +117,7 @@ static void host1x_subdev_register(struct host1x_device *device, mutex_lock(&device->clients_lock); list_move_tail(&client->list, &device->clients); list_move_tail(&subdev->list, &device->active); - client->parent = &device->dev; + client->host = &device->dev; subdev->client = client; mutex_unlock(&device->clients_lock); mutex_unlock(&device->subdevs_lock); @@ -150,7 +153,7 @@ static void __host1x_subdev_unregister(struct host1x_device *device, */ mutex_lock(&device->clients_lock); subdev->client = NULL; - client->parent = NULL; + client->host = NULL; list_move_tail(&subdev->list, &device->subdevs); /* * XXX: Perhaps don't do this here, but rather explicitly remove it @@ -190,14 +193,24 @@ int host1x_device_init(struct host1x_device *device) mutex_lock(&device->clients_lock); list_for_each_entry(client, &device->clients, list) { + if (client->ops && client->ops->early_init) { + err = client->ops->early_init(client); + if (err < 0) { + dev_err(&device->dev, "failed to early initialize %s: %d\n", + dev_name(client->dev), err); + goto teardown_late; + } + } + } + + list_for_each_entry(client, &device->clients, list) { if (client->ops && client->ops->init) { err = client->ops->init(client); if (err < 0) { dev_err(&device->dev, "failed to initialize %s: %d\n", dev_name(client->dev), err); - mutex_unlock(&device->clients_lock); - return err; + goto teardown; } } } @@ -205,6 +218,22 @@ int host1x_device_init(struct host1x_device *device) mutex_unlock(&device->clients_lock); return 0; + +teardown: + list_for_each_entry_continue_reverse(client, &device->clients, list) + if (client->ops->exit) + client->ops->exit(client); + + /* reset client to end of list for late teardown */ + client = list_entry(&device->clients, struct host1x_client, list); + +teardown_late: + list_for_each_entry_continue_reverse(client, &device->clients, list) + if (client->ops->late_exit) + client->ops->late_exit(client); + + mutex_unlock(&device->clients_lock); + return err; } EXPORT_SYMBOL(host1x_device_init); @@ -237,6 +266,18 @@ int host1x_device_exit(struct host1x_device *device) } } + list_for_each_entry_reverse(client, &device->clients, list) { + if (client->ops && client->ops->late_exit) { + err = client->ops->late_exit(client); + if (err < 0) { + dev_err(&device->dev, "failed to late cleanup %s: %d\n", + dev_name(client->dev), err); + mutex_unlock(&device->clients_lock); + return err; + } + } + } + mutex_unlock(&device->clients_lock); return 0; @@ -287,11 +328,24 @@ static int host1x_del_client(struct host1x *host1x, return -ENODEV; } -static int host1x_device_match(struct device *dev, struct device_driver *drv) +static int host1x_device_match(struct device *dev, const struct device_driver *drv) { return strcmp(dev_name(dev), drv->name) == 0; } +/* + * Note that this is really only needed for backwards compatibility + * with libdrm, which parses this information from sysfs and will + * fail if it can't find the OF_FULLNAME, specifically. + */ +static int host1x_device_uevent(const struct device *dev, + struct kobj_uevent_env *env) +{ + of_device_uevent(dev->parent, env); + + return 0; +} + static const struct dev_pm_ops host1x_device_pm_ops = { .suspend = pm_generic_suspend, .resume = pm_generic_resume, @@ -301,9 +355,10 @@ static const struct dev_pm_ops host1x_device_pm_ops = { .restore = pm_generic_restore, }; -struct bus_type host1x_bus_type = { +const struct bus_type host1x_bus_type = { .name = "host1x", .match = host1x_device_match, + .uevent = host1x_device_uevent, .pm = &host1x_device_pm_ops, }; @@ -388,12 +443,13 @@ static int host1x_device_add(struct host1x *host1x, device->dev.coherent_dma_mask = host1x->dev->coherent_dma_mask; device->dev.dma_mask = &device->dev.coherent_dma_mask; dev_set_name(&device->dev, "%s", driver->driver.name); - of_dma_configure(&device->dev, host1x->dev->of_node); device->dev.release = host1x_device_release; - device->dev.of_node = host1x->dev->of_node; device->dev.bus = &host1x_bus_type; device->dev.parent = host1x->dev; + device->dev.dma_parms = &device->dma_parms; + dma_set_max_seg_size(&device->dev, UINT_MAX); + err = host1x_device_parse_dt(device, driver); if (err < 0) { kfree(device); @@ -415,6 +471,18 @@ static int host1x_device_add(struct host1x *host1x, mutex_unlock(&clients_lock); + /* + * Add device even if there are no subdevs to ensure syncpoint functionality + * is available regardless of whether any engine subdevices are present + */ + if (list_empty(&device->subdevs)) { + err = device_add(&device->dev); + if (err < 0) + dev_err(&device->dev, "failed to add device: %d\n", err); + else + device->registered = true; + } + return 0; } @@ -471,6 +539,36 @@ static void host1x_detach_driver(struct host1x *host1x, mutex_unlock(&host1x->devices_lock); } +static int host1x_devices_show(struct seq_file *s, void *data) +{ + struct host1x *host1x = s->private; + struct host1x_device *device; + + mutex_lock(&host1x->devices_lock); + + list_for_each_entry(device, &host1x->devices, list) { + struct host1x_subdev *subdev; + + seq_printf(s, "%s\n", dev_name(&device->dev)); + + mutex_lock(&device->subdevs_lock); + + list_for_each_entry(subdev, &device->active, list) + seq_printf(s, " %pOFf: %s\n", subdev->np, + dev_name(subdev->client->dev)); + + list_for_each_entry(subdev, &device->subdevs, list) + seq_printf(s, " %pOFf:\n", subdev->np); + + mutex_unlock(&device->subdevs_lock); + } + + mutex_unlock(&host1x->devices_lock); + + return 0; +} +DEFINE_SHOW_ATTRIBUTE(host1x_devices); + /** * host1x_register() - register a host1x controller * @host1x: host1x controller @@ -494,6 +592,9 @@ int host1x_register(struct host1x *host1x) mutex_unlock(&drivers_lock); + debugfs_create_file("devices", S_IRUGO, host1x->debugfs, host1x, + &host1x_devices_fops); + return 0; } @@ -600,8 +701,17 @@ EXPORT_SYMBOL(host1x_driver_register_full); */ void host1x_driver_unregister(struct host1x_driver *driver) { + struct host1x *host1x; + driver_unregister(&driver->driver); + mutex_lock(&devices_lock); + + list_for_each_entry(host1x, &devices, list) + host1x_detach_driver(host1x, driver); + + mutex_unlock(&devices_lock); + mutex_lock(&drivers_lock); list_del_init(&driver->list); mutex_unlock(&drivers_lock); @@ -609,7 +719,31 @@ void host1x_driver_unregister(struct host1x_driver *driver) EXPORT_SYMBOL(host1x_driver_unregister); /** - * host1x_client_register() - register a host1x client + * __host1x_client_init() - initialize a host1x client + * @client: host1x client + * @key: lock class key for the client-specific mutex + */ +void __host1x_client_init(struct host1x_client *client, struct lock_class_key *key) +{ + host1x_bo_cache_init(&client->cache); + INIT_LIST_HEAD(&client->list); + __mutex_init(&client->lock, "host1x client lock", key); + client->usecount = 0; +} +EXPORT_SYMBOL(__host1x_client_init); + +/** + * host1x_client_exit() - uninitialize a host1x client + * @client: host1x client + */ +void host1x_client_exit(struct host1x_client *client) +{ + mutex_destroy(&client->lock); +} +EXPORT_SYMBOL(host1x_client_exit); + +/** + * __host1x_client_register() - register a host1x client * @client: host1x client * * Registers a host1x client with each host1x controller instance. Note that @@ -619,7 +753,7 @@ EXPORT_SYMBOL(host1x_driver_unregister); * device and call host1x_device_init(), which will in turn call each client's * &host1x_client_ops.init implementation. */ -int host1x_client_register(struct host1x_client *client) +int __host1x_client_register(struct host1x_client *client) { struct host1x *host1x; int err; @@ -642,7 +776,7 @@ int host1x_client_register(struct host1x_client *client) return 0; } -EXPORT_SYMBOL(host1x_client_register); +EXPORT_SYMBOL(__host1x_client_register); /** * host1x_client_unregister() - unregister a host1x client @@ -651,7 +785,7 @@ EXPORT_SYMBOL(host1x_client_register); * Removes a host1x client from its host1x controller instance. If a logical * device has already been initialized, it will be torn down. */ -int host1x_client_unregister(struct host1x_client *client) +void host1x_client_unregister(struct host1x_client *client) { struct host1x_client *c; struct host1x *host1x; @@ -663,7 +797,7 @@ int host1x_client_unregister(struct host1x_client *client) err = host1x_del_client(host1x, client); if (!err) { mutex_unlock(&devices_lock); - return 0; + return; } } @@ -679,6 +813,152 @@ int host1x_client_unregister(struct host1x_client *client) mutex_unlock(&clients_lock); - return 0; + host1x_bo_cache_destroy(&client->cache); } EXPORT_SYMBOL(host1x_client_unregister); + +int host1x_client_suspend(struct host1x_client *client) +{ + int err = 0; + + mutex_lock(&client->lock); + + if (client->usecount == 1) { + if (client->ops && client->ops->suspend) { + err = client->ops->suspend(client); + if (err < 0) + goto unlock; + } + } + + client->usecount--; + dev_dbg(client->dev, "use count: %u\n", client->usecount); + + if (client->parent) { + err = host1x_client_suspend(client->parent); + if (err < 0) + goto resume; + } + + goto unlock; + +resume: + if (client->usecount == 0) + if (client->ops && client->ops->resume) + client->ops->resume(client); + + client->usecount++; +unlock: + mutex_unlock(&client->lock); + return err; +} +EXPORT_SYMBOL(host1x_client_suspend); + +int host1x_client_resume(struct host1x_client *client) +{ + int err = 0; + + mutex_lock(&client->lock); + + if (client->parent) { + err = host1x_client_resume(client->parent); + if (err < 0) + goto unlock; + } + + if (client->usecount == 0) { + if (client->ops && client->ops->resume) { + err = client->ops->resume(client); + if (err < 0) + goto suspend; + } + } + + client->usecount++; + dev_dbg(client->dev, "use count: %u\n", client->usecount); + + goto unlock; + +suspend: + if (client->parent) + host1x_client_suspend(client->parent); +unlock: + mutex_unlock(&client->lock); + return err; +} +EXPORT_SYMBOL(host1x_client_resume); + +struct host1x_bo_mapping *host1x_bo_pin(struct device *dev, struct host1x_bo *bo, + enum dma_data_direction dir, + struct host1x_bo_cache *cache) +{ + struct host1x_bo_mapping *mapping; + + if (cache) { + mutex_lock(&cache->lock); + + list_for_each_entry(mapping, &cache->mappings, entry) { + if (mapping->bo == bo && mapping->direction == dir) { + kref_get(&mapping->ref); + goto unlock; + } + } + } + + mapping = bo->ops->pin(dev, bo, dir); + if (IS_ERR(mapping)) + goto unlock; + + spin_lock(&mapping->bo->lock); + list_add_tail(&mapping->list, &bo->mappings); + spin_unlock(&mapping->bo->lock); + + if (cache) { + INIT_LIST_HEAD(&mapping->entry); + mapping->cache = cache; + + list_add_tail(&mapping->entry, &cache->mappings); + + /* bump reference count to track the copy in the cache */ + kref_get(&mapping->ref); + } + +unlock: + if (cache) + mutex_unlock(&cache->lock); + + return mapping; +} +EXPORT_SYMBOL(host1x_bo_pin); + +static void __host1x_bo_unpin(struct kref *ref) +{ + struct host1x_bo_mapping *mapping = to_host1x_bo_mapping(ref); + + /* + * When the last reference of the mapping goes away, make sure to remove the mapping from + * the cache. + */ + if (mapping->cache) + list_del(&mapping->entry); + + spin_lock(&mapping->bo->lock); + list_del(&mapping->list); + spin_unlock(&mapping->bo->lock); + + mapping->bo->ops->unpin(mapping); +} + +void host1x_bo_unpin(struct host1x_bo_mapping *mapping) +{ + struct host1x_bo_cache *cache = mapping->cache; + + if (cache) + mutex_lock(&cache->lock); + + kref_put(&mapping->ref, __host1x_bo_unpin); + + if (cache) + mutex_unlock(&cache->lock); +} +EXPORT_SYMBOL(host1x_bo_unpin); diff --git a/drivers/gpu/host1x/bus.h b/drivers/gpu/host1x/bus.h index 88fb1c4aac68..a80ceadfeb34 100644 --- a/drivers/gpu/host1x/bus.h +++ b/drivers/gpu/host1x/bus.h @@ -1,18 +1,7 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ /* * Copyright (C) 2012 Avionic Design GmbH * Copyright (C) 2012-2013, NVIDIA Corporation - * - * This program is free software; you can redistribute it and/or modify it - * under the terms and conditions of the GNU General Public License, - * version 2, as published by the Free Software Foundation. - * - * This program is distributed in the hope it will be useful, but WITHOUT - * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or - * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for - * more details. - * - * You should have received a copy of the GNU General Public License - * along with this program. If not, see <http://www.gnu.org/licenses/>. */ #ifndef HOST1X_BUS_H @@ -21,7 +10,7 @@ struct bus_type; struct host1x; -extern struct bus_type host1x_bus_type; +extern const struct bus_type host1x_bus_type; int host1x_register(struct host1x *host1x); int host1x_unregister(struct host1x *host1x); diff --git a/drivers/gpu/host1x/cdma.c b/drivers/gpu/host1x/cdma.c index 28541b280739..ba2e572567c0 100644 --- a/drivers/gpu/host1x/cdma.c +++ b/drivers/gpu/host1x/cdma.c @@ -1,19 +1,8 @@ +// SPDX-License-Identifier: GPL-2.0-only /* * Tegra host1x Command DMA * * Copyright (c) 2010-2013, NVIDIA Corporation. - * - * This program is free software; you can redistribute it and/or modify it - * under the terms and conditions of the GNU General Public License, - * version 2, as published by the Free Software Foundation. - * - * This program is distributed in the hope it will be useful, but WITHOUT - * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or - * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for - * more details. - * - * You should have received a copy of the GNU General Public License - * along with this program. If not, see <http://www.gnu.org/licenses/>. */ @@ -41,7 +30,17 @@ * means that the push buffer is full, not empty. */ -#define HOST1X_PUSHBUFFER_SLOTS 512 +/* + * Typically the commands written into the push buffer are a pair of words. We + * use slots to represent each of these pairs and to simplify things. Note the + * strange number of slots allocated here. 512 slots will fit exactly within a + * single memory page. We also need one additional word at the end of the push + * buffer for the RESTART opcode that will instruct the CDMA to jump back to + * the beginning of the push buffer. With 512 slots, this means that we'll use + * 2 memory pages and waste 4092 bytes of the second page that will never be + * used. + */ +#define HOST1X_PUSHBUFFER_SLOTS 511 /* * Clean up push buffer resources @@ -51,7 +50,7 @@ static void host1x_pushbuffer_destroy(struct push_buffer *pb) struct host1x_cdma *cdma = pb_to_cdma(pb); struct host1x *host1x = cdma_to_host1x(cdma); - if (!pb->phys) + if (!pb->mapped) return; if (host1x->domain) { @@ -106,7 +105,7 @@ static int host1x_pushbuffer_init(struct push_buffer *pb) pb->dma = iova_dma_addr(&host1x->iova, alloc); err = iommu_map(host1x->domain, pb->dma, pb->phys, size, - IOMMU_READ); + IOMMU_READ, GFP_KERNEL); if (err) goto iommu_free_iova; } else { @@ -127,7 +126,7 @@ static int host1x_pushbuffer_init(struct push_buffer *pb) iommu_free_iova: __free_iova(&host1x->iova, alloc); iommu_free_mem: - dma_free_wc(host1x->dev, pb->alloc_size, pb->mapped, pb->phys); + dma_free_wc(host1x->dev, size, pb->mapped, pb->phys); return err; } @@ -143,7 +142,10 @@ static void host1x_pushbuffer_push(struct push_buffer *pb, u32 op1, u32 op2) WARN_ON(pb->pos == pb->fence); *(p++) = op1; *(p++) = op2; - pb->pos = (pb->pos + 8) & (pb->size - 1); + pb->pos += 8; + + if (pb->pos >= pb->size) + pb->pos -= pb->size; } /* @@ -153,7 +155,10 @@ static void host1x_pushbuffer_push(struct push_buffer *pb, u32 op1, u32 op2) static void host1x_pushbuffer_pop(struct push_buffer *pb, unsigned int slots) { /* Advance the next write position */ - pb->fence = (pb->fence + slots * 8) & (pb->size - 1); + pb->fence += slots * 8; + + if (pb->fence >= pb->size) + pb->fence -= pb->size; } /* @@ -161,7 +166,12 @@ static void host1x_pushbuffer_pop(struct push_buffer *pb, unsigned int slots) */ static u32 host1x_pushbuffer_space(struct push_buffer *pb) { - return ((pb->fence - pb->pos) & (pb->size - 1)) / 8; + unsigned int fence = pb->fence; + + if (pb->fence < pb->pos) + fence += pb->size; + + return (fence - pb->pos) / 8; } /* @@ -210,7 +220,7 @@ unsigned int host1x_cdma_wait_locked(struct host1x_cdma *cdma, cdma->event = event; mutex_unlock(&cdma->lock); - down(&cdma->sem); + wait_for_completion(&cdma->complete); mutex_lock(&cdma->lock); } @@ -218,21 +228,56 @@ unsigned int host1x_cdma_wait_locked(struct host1x_cdma *cdma, } /* + * Sleep (if necessary) until the push buffer has enough free space. + * + * Must be called with the cdma lock held. + */ +static int host1x_cdma_wait_pushbuffer_space(struct host1x *host1x, + struct host1x_cdma *cdma, + unsigned int needed) +{ + while (true) { + struct push_buffer *pb = &cdma->push_buffer; + unsigned int space; + + space = host1x_pushbuffer_space(pb); + if (space >= needed) + break; + + trace_host1x_wait_cdma(dev_name(cdma_to_channel(cdma)->dev), + CDMA_EVENT_PUSH_BUFFER_SPACE); + + /* If somebody has managed to already start waiting, yield */ + if (cdma->event != CDMA_EVENT_NONE) { + mutex_unlock(&cdma->lock); + schedule(); + mutex_lock(&cdma->lock); + continue; + } + + cdma->event = CDMA_EVENT_PUSH_BUFFER_SPACE; + + mutex_unlock(&cdma->lock); + wait_for_completion(&cdma->complete); + mutex_lock(&cdma->lock); + } + + return 0; +} +/* * Start timer that tracks the time spent by the job. * Must be called with the cdma lock held. */ static void cdma_start_timer_locked(struct host1x_cdma *cdma, struct host1x_job *job) { - struct host1x *host = cdma_to_host1x(cdma); - if (cdma->timeout.client) { /* timer already started */ return; } cdma->timeout.client = job->client; - cdma->timeout.syncpt = host1x_syncpt_get(host, job->syncpt_id); + cdma->timeout.syncpt = job->syncpt; cdma->timeout.syncpt_val = job->syncpt_end; cdma->timeout.start_ktime = ktime_get(); @@ -247,7 +292,7 @@ static void cdma_start_timer_locked(struct host1x_cdma *cdma, static void stop_cdma_timer_locked(struct host1x_cdma *cdma) { cancel_delayed_work(&cdma->timeout.wq); - cdma->timeout.client = 0; + cdma->timeout.client = NULL; } /* @@ -263,23 +308,18 @@ static void stop_cdma_timer_locked(struct host1x_cdma *cdma) static void update_cdma_locked(struct host1x_cdma *cdma) { bool signal = false; - struct host1x *host1x = cdma_to_host1x(cdma); struct host1x_job *job, *n; - /* If CDMA is stopped, queue is cleared and we can return */ - if (!cdma->running) - return; - /* * Walk the sync queue, reading the sync point registers as necessary, * to consume as many sync queue entries as possible without blocking */ list_for_each_entry_safe(job, n, &cdma->sync_queue, list) { - struct host1x_syncpt *sp = - host1x_syncpt_get(host1x, job->syncpt_id); + struct host1x_syncpt *sp = job->syncpt; /* Check whether this syncpt has completed, and bail if not */ - if (!host1x_syncpt_is_expired(sp, job->syncpt_end)) { + if (!host1x_syncpt_is_expired(sp, job->syncpt_end) && + !job->cancelled) { /* Start timer on next pending syncpt */ if (job->timeout) cdma_start_timer_locked(cdma, job); @@ -314,7 +354,7 @@ static void update_cdma_locked(struct host1x_cdma *cdma) if (signal) { cdma->event = CDMA_EVENT_NONE; - up(&cdma->sem); + complete(&cdma->complete); } } @@ -323,7 +363,7 @@ void host1x_cdma_update_sync_queue(struct host1x_cdma *cdma, { struct host1x *host1x = cdma_to_host1x(cdma); u32 restart_addr, syncpt_incrs, syncpt_val; - struct host1x_job *job = NULL; + struct host1x_job *job, *next_job = NULL; syncpt_val = host1x_syncpt_load(cdma->timeout.syncpt); @@ -341,40 +381,40 @@ void host1x_cdma_update_sync_queue(struct host1x_cdma *cdma, __func__); list_for_each_entry(job, &cdma->sync_queue, list) { - if (syncpt_val < job->syncpt_end) - break; + if (syncpt_val < job->syncpt_end) { + + if (!list_is_last(&job->list, &cdma->sync_queue)) + next_job = list_next_entry(job, list); + + goto syncpt_incr; + } host1x_job_dump(dev, job); } + /* all jobs have been completed */ + job = NULL; + +syncpt_incr: + /* - * Walk the sync_queue, first incrementing with the CPU syncpts that - * are partially executed (the first buffer) or fully skipped while - * still in the current context (slots are also NOP-ed). - * - * At the point contexts are interleaved, syncpt increments must be - * done inline with the pushbuffer from a GATHER buffer to maintain - * the order (slots are modified to be a GATHER of syncpt incrs). + * Increment with CPU the remaining syncpts of a partially executed job. * - * Note: save in restart_addr the location where the timed out buffer - * started in the PB, so we can start the refetch from there (with the - * modified NOP-ed PB slots). This lets things appear to have completed - * properly for this buffer and resources are freed. + * CDMA will continue execution starting with the next job or will get + * into idle state. */ - - dev_dbg(dev, "%s: perform CPU incr on pending same ctx buffers\n", - __func__); - - if (!list_empty(&cdma->sync_queue)) - restart_addr = job->first_get; + if (next_job) + restart_addr = next_job->first_get; else restart_addr = cdma->last_pos; - /* do CPU increments as long as this context continues */ - list_for_each_entry_from(job, &cdma->sync_queue, list) { - /* different context, gets us out of this loop */ - if (job->client != cdma->timeout.client) - break; + if (!job) + goto resume; + + /* do CPU increments for the remaining syncpts */ + if (job->syncpt_recovery) { + dev_dbg(dev, "%s: perform CPU incr on pending buffers\n", + __func__); /* won't need a timeout when replayed */ job->timeout = 0; @@ -389,25 +429,74 @@ void host1x_cdma_update_sync_queue(struct host1x_cdma *cdma, syncpt_incrs, job->syncpt_end, job->num_slots); - syncpt_val += syncpt_incrs; - } + dev_dbg(dev, "%s: finished sync_queue modification\n", + __func__); + } else { + struct host1x_job *failed_job = job; - /* - * The following sumbits from the same client may be dependent on the - * failed submit and therefore they may fail. Force a small timeout - * to make the queue cleanup faster. - */ + host1x_job_dump(dev, job); + + host1x_syncpt_set_locked(job->syncpt); + failed_job->cancelled = true; + + list_for_each_entry_continue(job, &cdma->sync_queue, list) { + unsigned int i; + + if (job->syncpt != failed_job->syncpt) + continue; + + for (i = 0; i < job->num_slots; i++) { + unsigned int slot = (job->first_get/8 + i) % + HOST1X_PUSHBUFFER_SLOTS; + u32 *mapped = cdma->push_buffer.mapped; + + /* + * Overwrite opcodes with 0 word writes + * to offset 0xbad. This does nothing but + * has a easily detected signature in debug + * traces. + * + * On systems with MLOCK enforcement enabled, + * the above 0 word writes would fall foul of + * the enforcement. As such, in the first slot + * put a RESTART_W opcode to the beginning + * of the next job. We don't use this for older + * chips since those only support the RESTART + * opcode with inconvenient alignment requirements. + */ + if (i == 0 && host1x->info->has_wide_gather) { + unsigned int next_job = (job->first_get/8 + job->num_slots) + % HOST1X_PUSHBUFFER_SLOTS; + mapped[2*slot+0] = (0xd << 28) | (next_job * 2); + mapped[2*slot+1] = 0x0; + } else { + mapped[2*slot+0] = 0x1bad0000; + mapped[2*slot+1] = 0x1bad0000; + } + } - list_for_each_entry_from(job, &cdma->sync_queue, list) - if (job->client == cdma->timeout.client) - job->timeout = min_t(unsigned int, job->timeout, 500); + job->cancelled = true; + } - dev_dbg(dev, "%s: finished sync_queue modification\n", __func__); + wmb(); + update_cdma_locked(cdma); + } + +resume: /* roll back DMAGET and start up channel again */ host1x_hw_cdma_resume(host1x, cdma, restart_addr); } +static void cdma_update_work(struct work_struct *work) +{ + struct host1x_cdma *cdma = container_of(work, struct host1x_cdma, update_work); + + mutex_lock(&cdma->lock); + update_cdma_locked(cdma); + mutex_unlock(&cdma->lock); +} + /* * Create a cdma */ @@ -416,7 +505,8 @@ int host1x_cdma_init(struct host1x_cdma *cdma) int err; mutex_init(&cdma->lock); - sema_init(&cdma->sem, 0); + init_completion(&cdma->complete); + INIT_WORK(&cdma->update_work, cdma_update_work); INIT_LIST_HEAD(&cdma->sync_queue); @@ -459,13 +549,22 @@ int host1x_cdma_begin(struct host1x_cdma *cdma, struct host1x_job *job) mutex_lock(&cdma->lock); + /* + * Check if syncpoint was locked due to previous job timeout. + * This needs to be done within the cdma lock to avoid a race + * with the timeout handler. + */ + if (job->syncpt->locked) { + mutex_unlock(&cdma->lock); + return -EPERM; + } + if (job->timeout) { /* init state on first submit with timeout value */ if (!cdma->timeout.initialized) { int err; - err = host1x_hw_cdma_timeout_init(host1x, cdma, - job->syncpt_id); + err = host1x_hw_cdma_timeout_init(host1x, cdma); if (err) { mutex_unlock(&cdma->lock); return err; @@ -490,7 +589,6 @@ int host1x_cdma_begin(struct host1x_cdma *cdma, struct host1x_job *job) */ void host1x_cdma_push(struct host1x_cdma *cdma, u32 op1, u32 op2) { - struct host1x *host1x = cdma_to_host1x(cdma); struct push_buffer *pb = &cdma->push_buffer; u32 slots_free = cdma->slots_free; @@ -498,11 +596,9 @@ void host1x_cdma_push(struct host1x_cdma *cdma, u32 op1, u32 op2) trace_host1x_cdma_push(dev_name(cdma_to_channel(cdma)->dev), op1, op2); - if (slots_free == 0) { - host1x_hw_cdma_flush(host1x, cdma); + if (slots_free == 0) slots_free = host1x_cdma_wait_locked(cdma, CDMA_EVENT_PUSH_BUFFER_SPACE); - } cdma->slots_free = slots_free - 1; cdma->slots_used++; @@ -510,6 +606,52 @@ void host1x_cdma_push(struct host1x_cdma *cdma, u32 op1, u32 op2) } /* + * Push four words into two consecutive push buffer slots. Note that extra + * care needs to be taken not to split the two slots across the end of the + * push buffer. Otherwise the RESTART opcode at the end of the push buffer + * that ensures processing will restart at the beginning will break up the + * four words. + * + * Blocks as necessary if the push buffer is full. + */ +void host1x_cdma_push_wide(struct host1x_cdma *cdma, u32 op1, u32 op2, + u32 op3, u32 op4) +{ + struct host1x_channel *channel = cdma_to_channel(cdma); + struct host1x *host1x = cdma_to_host1x(cdma); + struct push_buffer *pb = &cdma->push_buffer; + unsigned int space, needed = 2, extra = 0; + + if (host1x_debug_trace_cmdbuf) + trace_host1x_cdma_push_wide(dev_name(channel->dev), op1, op2, + op3, op4); + + /* compute number of extra slots needed for padding */ + if (pb->pos + 16 > pb->size) { + extra = (pb->size - pb->pos) / 8; + needed += extra; + } + + host1x_cdma_wait_pushbuffer_space(host1x, cdma, needed); + space = host1x_pushbuffer_space(pb); + + cdma->slots_free = space - needed; + cdma->slots_used += needed; + + if (extra > 0) { + /* + * If there isn't enough space at the tail of the pushbuffer, + * insert a RESTART(0) here to go back to the beginning. + * The code above adjusted the indexes appropriately. + */ + host1x_pushbuffer_push(pb, (0x5 << 28), 0xdead0000); + } + + host1x_pushbuffer_push(pb, op1, op2); + host1x_pushbuffer_push(pb, op3, op4); +} + +/* * End a cdma submit * Kick off DMA, add job to the sync queue, and a number of slots to be freed * from the pushbuffer. The handles for a submit must all be pinned at the same @@ -541,7 +683,5 @@ void host1x_cdma_end(struct host1x_cdma *cdma, */ void host1x_cdma_update(struct host1x_cdma *cdma) { - mutex_lock(&cdma->lock); - update_cdma_locked(cdma); - mutex_unlock(&cdma->lock); + schedule_work(&cdma->update_work); } diff --git a/drivers/gpu/host1x/cdma.h b/drivers/gpu/host1x/cdma.h index 286d49386be9..7fd8168af4f9 100644 --- a/drivers/gpu/host1x/cdma.h +++ b/drivers/gpu/host1x/cdma.h @@ -1,27 +1,17 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ /* * Tegra host1x Command DMA * * Copyright (c) 2010-2013, NVIDIA Corporation. - * - * This program is free software; you can redistribute it and/or modify it - * under the terms and conditions of the GNU General Public License, - * version 2, as published by the Free Software Foundation. - * - * This program is distributed in the hope it will be useful, but WITHOUT - * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or - * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for - * more details. - * - * You should have received a copy of the GNU General Public License - * along with this program. If not, see <http://www.gnu.org/licenses/>. */ #ifndef __HOST1X_CDMA_H #define __HOST1X_CDMA_H #include <linux/sched.h> -#include <linux/semaphore.h> +#include <linux/completion.h> #include <linux/list.h> +#include <linux/workqueue.h> struct host1x_syncpt; struct host1x_userctx_timeout; @@ -44,7 +34,7 @@ struct host1x_job; struct push_buffer { void *mapped; /* mapped pushbuffer memory */ dma_addr_t dma; /* device address of pushbuffer */ - phys_addr_t phys; /* physical address of pushbuffer */ + dma_addr_t phys; /* physical address of pushbuffer */ u32 fence; /* index we've written */ u32 pos; /* index to write to */ u32 size; @@ -58,7 +48,7 @@ struct buffer_timeout { u32 syncpt_val; /* syncpt value when completed */ ktime_t start_ktime; /* starting time */ /* context timeout information */ - int client; + struct host1x_client *client; }; enum cdma_event { @@ -69,8 +59,8 @@ enum cdma_event { struct host1x_cdma { struct mutex lock; /* controls access to shared state */ - struct semaphore sem; /* signalled when event occurs */ - enum cdma_event event; /* event that sem is waiting for */ + struct completion complete; /* signalled when event occurs */ + enum cdma_event event; /* event that complete is waiting for */ unsigned int slots_used; /* pb slots used in current submit */ unsigned int slots_free; /* pb slots free in current submit */ unsigned int first_get; /* DMAGET value, where submit begins */ @@ -80,6 +70,7 @@ struct host1x_cdma { struct buffer_timeout timeout; /* channel's timeout state/wq */ bool running; bool torndown; + struct work_struct update_work; }; #define cdma_to_channel(cdma) container_of(cdma, struct host1x_channel, cdma) @@ -90,6 +81,8 @@ int host1x_cdma_init(struct host1x_cdma *cdma); int host1x_cdma_deinit(struct host1x_cdma *cdma); int host1x_cdma_begin(struct host1x_cdma *cdma, struct host1x_job *job); void host1x_cdma_push(struct host1x_cdma *cdma, u32 op1, u32 op2); +void host1x_cdma_push_wide(struct host1x_cdma *cdma, u32 op1, u32 op2, + u32 op3, u32 op4); void host1x_cdma_end(struct host1x_cdma *cdma, struct host1x_job *job); void host1x_cdma_update(struct host1x_cdma *cdma); void host1x_cdma_peek(struct host1x_cdma *cdma, u32 dmaget, int slot, diff --git a/drivers/gpu/host1x/channel.c b/drivers/gpu/host1x/channel.c index db9b91d1384c..08077afe4cde 100644 --- a/drivers/gpu/host1x/channel.c +++ b/drivers/gpu/host1x/channel.c @@ -1,19 +1,8 @@ +// SPDX-License-Identifier: GPL-2.0-only /* * Tegra host1x Channel * * Copyright (c) 2010-2013, NVIDIA Corporation. - * - * This program is free software; you can redistribute it and/or modify it - * under the terms and conditions of the GNU General Public License, - * version 2, as published by the Free Software Foundation. - * - * This program is distributed in the hope it will be useful, but WITHOUT - * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or - * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for - * more details. - * - * You should have received a copy of the GNU General Public License - * along with this program. If not, see <http://www.gnu.org/licenses/>. */ #include <linux/slab.h> @@ -32,22 +21,20 @@ int host1x_channel_list_init(struct host1x_channel_list *chlist, if (!chlist->channels) return -ENOMEM; - chlist->allocated_channels = - kcalloc(BITS_TO_LONGS(num_channels), sizeof(unsigned long), - GFP_KERNEL); + chlist->allocated_channels = bitmap_zalloc(num_channels, GFP_KERNEL); if (!chlist->allocated_channels) { kfree(chlist->channels); return -ENOMEM; } - bitmap_zero(chlist->allocated_channels, num_channels); + mutex_init(&chlist->lock); return 0; } void host1x_channel_list_free(struct host1x_channel_list *chlist) { - kfree(chlist->allocated_channels); + bitmap_free(chlist->allocated_channels); kfree(chlist->channels); } @@ -86,6 +73,33 @@ struct host1x_channel *host1x_channel_get_index(struct host1x *host, return ch; } +void host1x_channel_stop(struct host1x_channel *channel) +{ + struct host1x *host = dev_get_drvdata(channel->dev->parent); + + host1x_hw_cdma_stop(host, &channel->cdma); +} +EXPORT_SYMBOL(host1x_channel_stop); + +/** + * host1x_channel_stop_all() - disable CDMA on allocated channels + * @host: host1x instance + * + * Stop CDMA on allocated channels + */ +void host1x_channel_stop_all(struct host1x *host) +{ + struct host1x_channel_list *chlist = &host->channel_list; + int bit; + + mutex_lock(&chlist->lock); + + for_each_set_bit(bit, chlist->allocated_channels, host->info->nb_channels) + host1x_channel_stop(&chlist->channels[bit]); + + mutex_unlock(&chlist->lock); +} + static void release_channel(struct kref *kref) { struct host1x_channel *channel = @@ -111,8 +125,11 @@ static struct host1x_channel *acquire_unused_channel(struct host1x *host) unsigned int max_channels = host->info->nb_channels; unsigned int index; + mutex_lock(&chlist->lock); + index = find_first_zero_bit(chlist->allocated_channels, max_channels); if (index >= max_channels) { + mutex_unlock(&chlist->lock); dev_err(host->dev, "failed to find free channel\n"); return NULL; } @@ -121,20 +138,21 @@ static struct host1x_channel *acquire_unused_channel(struct host1x *host) set_bit(index, chlist->allocated_channels); + mutex_unlock(&chlist->lock); + return &chlist->channels[index]; } /** * host1x_channel_request() - Allocate a channel - * @device: Host1x unit this channel will be used to send commands to + * @client: Host1x client this channel will be used to send commands to * - * Allocates a new host1x channel for @device. If there are no free channels, - * this will sleep until one becomes available. May return NULL if CDMA + * Allocates a new host1x channel for @client. May return NULL if CDMA * initialization fails. */ -struct host1x_channel *host1x_channel_request(struct device *dev) +struct host1x_channel *host1x_channel_request(struct host1x_client *client) { - struct host1x *host = dev_get_drvdata(dev->parent); + struct host1x *host = dev_get_drvdata(client->dev->parent); struct host1x_channel_list *chlist = &host->channel_list; struct host1x_channel *channel; int err; @@ -145,7 +163,8 @@ struct host1x_channel *host1x_channel_request(struct device *dev) kref_init(&channel->refcount); mutex_init(&channel->submitlock); - channel->dev = dev; + channel->client = client; + channel->dev = client->dev; err = host1x_hw_channel_init(host, channel, channel->id); if (err < 0) @@ -160,7 +179,7 @@ struct host1x_channel *host1x_channel_request(struct device *dev) fail: clear_bit(channel->id, chlist->allocated_channels); - dev_err(dev, "failed to initialize channel\n"); + dev_err(client->dev, "failed to initialize channel\n"); return NULL; } diff --git a/drivers/gpu/host1x/channel.h b/drivers/gpu/host1x/channel.h index 7068e42d42df..d7aede204d83 100644 --- a/drivers/gpu/host1x/channel.h +++ b/drivers/gpu/host1x/channel.h @@ -1,19 +1,8 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ /* * Tegra host1x Channel * * Copyright (c) 2010-2013, NVIDIA Corporation. - * - * This program is free software; you can redistribute it and/or modify it - * under the terms and conditions of the GNU General Public License, - * version 2, as published by the Free Software Foundation. - * - * This program is distributed in the hope it will be useful, but WITHOUT - * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or - * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for - * more details. - * - * You should have received a copy of the GNU General Public License - * along with this program. If not, see <http://www.gnu.org/licenses/>. */ #ifndef __HOST1X_CHANNEL_H @@ -21,6 +10,7 @@ #include <linux/io.h> #include <linux/kref.h> +#include <linux/mutex.h> #include "cdma.h" @@ -29,6 +19,8 @@ struct host1x_channel; struct host1x_channel_list { struct host1x_channel *channels; + + struct mutex lock; unsigned long *allocated_channels; }; @@ -37,6 +29,7 @@ struct host1x_channel { unsigned int id; struct mutex submitlock; void __iomem *regs; + struct host1x_client *client; struct device *dev; struct host1x_cdma cdma; }; @@ -47,5 +40,6 @@ int host1x_channel_list_init(struct host1x_channel_list *chlist, void host1x_channel_list_free(struct host1x_channel_list *chlist); struct host1x_channel *host1x_channel_get_index(struct host1x *host, unsigned int index); +void host1x_channel_stop_all(struct host1x *host); #endif diff --git a/drivers/gpu/host1x/context.c b/drivers/gpu/host1x/context.c new file mode 100644 index 000000000000..a6f6779662a3 --- /dev/null +++ b/drivers/gpu/host1x/context.c @@ -0,0 +1,177 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Copyright (c) 2021, NVIDIA Corporation. + */ + +#include <linux/device.h> +#include <linux/kref.h> +#include <linux/of.h> +#include <linux/of_device.h> +#include <linux/pid.h> +#include <linux/slab.h> + +#include "context.h" +#include "dev.h" + +static void host1x_memory_context_release(struct device *dev) +{ + /* context device is freed in host1x_memory_context_list_free() */ +} + +int host1x_memory_context_list_init(struct host1x *host1x) +{ + struct host1x_memory_context_list *cdl = &host1x->context_list; + struct device_node *node = host1x->dev->of_node; + struct host1x_memory_context *ctx; + unsigned int i; + int err; + + cdl->devs = NULL; + cdl->len = 0; + mutex_init(&cdl->lock); + + err = of_property_count_u32_elems(node, "iommu-map"); + if (err < 0) + return 0; + + cdl->len = err / 4; + cdl->devs = kcalloc(cdl->len, sizeof(*cdl->devs), GFP_KERNEL); + if (!cdl->devs) + return -ENOMEM; + + for (i = 0; i < cdl->len; i++) { + ctx = &cdl->devs[i]; + + ctx->host = host1x; + + device_initialize(&ctx->dev); + + /* + * Due to an issue with T194 NVENC, only 38 bits can be used. + * Anyway, 256GiB of IOVA ought to be enough for anyone. + */ + ctx->dma_mask = DMA_BIT_MASK(38); + ctx->dev.dma_mask = &ctx->dma_mask; + ctx->dev.coherent_dma_mask = ctx->dma_mask; + dev_set_name(&ctx->dev, "host1x-ctx.%d", i); + ctx->dev.bus = &host1x_context_device_bus_type; + ctx->dev.parent = host1x->dev; + ctx->dev.release = host1x_memory_context_release; + + ctx->dev.dma_parms = &ctx->dma_parms; + dma_set_max_seg_size(&ctx->dev, UINT_MAX); + + err = device_add(&ctx->dev); + if (err) { + dev_err(host1x->dev, "could not add context device %d: %d\n", i, err); + put_device(&ctx->dev); + goto unreg_devices; + } + + err = of_dma_configure_id(&ctx->dev, node, true, &i); + if (err) { + dev_err(host1x->dev, "IOMMU configuration failed for context device %d: %d\n", + i, err); + device_unregister(&ctx->dev); + goto unreg_devices; + } + + if (!tegra_dev_iommu_get_stream_id(&ctx->dev, &ctx->stream_id) || + !device_iommu_mapped(&ctx->dev)) { + dev_err(host1x->dev, "Context device %d has no IOMMU!\n", i); + device_unregister(&ctx->dev); + + /* + * This means that if IOMMU is disabled but context devices + * are defined in the device tree, Host1x will fail to probe. + * That's probably OK in this time and age. + */ + err = -EINVAL; + + goto unreg_devices; + } + } + + return 0; + +unreg_devices: + while (i--) + device_unregister(&cdl->devs[i].dev); + + kfree(cdl->devs); + cdl->devs = NULL; + cdl->len = 0; + + return err; +} + +void host1x_memory_context_list_free(struct host1x_memory_context_list *cdl) +{ + unsigned int i; + + for (i = 0; i < cdl->len; i++) + device_unregister(&cdl->devs[i].dev); + + kfree(cdl->devs); + cdl->len = 0; +} + +struct host1x_memory_context *host1x_memory_context_alloc(struct host1x *host1x, + struct device *dev, + struct pid *pid) +{ + struct host1x_memory_context_list *cdl = &host1x->context_list; + struct host1x_memory_context *free = NULL; + int i; + + if (!cdl->len) + return ERR_PTR(-EOPNOTSUPP); + + mutex_lock(&cdl->lock); + + for (i = 0; i < cdl->len; i++) { + struct host1x_memory_context *cd = &cdl->devs[i]; + + if (cd->dev.iommu->iommu_dev != dev->iommu->iommu_dev) + continue; + + if (cd->owner == pid) { + refcount_inc(&cd->ref); + mutex_unlock(&cdl->lock); + return cd; + } else if (!cd->owner && !free) { + free = cd; + } + } + + if (!free) { + mutex_unlock(&cdl->lock); + return ERR_PTR(-EBUSY); + } + + refcount_set(&free->ref, 1); + free->owner = get_pid(pid); + + mutex_unlock(&cdl->lock); + + return free; +} +EXPORT_SYMBOL_GPL(host1x_memory_context_alloc); + +void host1x_memory_context_get(struct host1x_memory_context *cd) +{ + refcount_inc(&cd->ref); +} +EXPORT_SYMBOL_GPL(host1x_memory_context_get); + +void host1x_memory_context_put(struct host1x_memory_context *cd) +{ + struct host1x_memory_context_list *cdl = &cd->host->context_list; + + if (refcount_dec_and_mutex_lock(&cd->ref, &cdl->lock)) { + put_pid(cd->owner); + cd->owner = NULL; + mutex_unlock(&cdl->lock); + } +} +EXPORT_SYMBOL_GPL(host1x_memory_context_put); diff --git a/drivers/gpu/host1x/context.h b/drivers/gpu/host1x/context.h new file mode 100644 index 000000000000..3e03bc1d3bac --- /dev/null +++ b/drivers/gpu/host1x/context.h @@ -0,0 +1,38 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * Host1x context devices + * + * Copyright (c) 2020, NVIDIA Corporation. + */ + +#ifndef __HOST1X_CONTEXT_H +#define __HOST1X_CONTEXT_H + +#include <linux/mutex.h> +#include <linux/refcount.h> + +struct host1x; + +extern struct bus_type host1x_context_device_bus_type; + +struct host1x_memory_context_list { + struct mutex lock; + struct host1x_memory_context *devs; + unsigned int len; +}; + +#ifdef CONFIG_IOMMU_API +int host1x_memory_context_list_init(struct host1x *host1x); +void host1x_memory_context_list_free(struct host1x_memory_context_list *cdl); +#else +static inline int host1x_memory_context_list_init(struct host1x *host1x) +{ + return 0; +} + +static inline void host1x_memory_context_list_free(struct host1x_memory_context_list *cdl) +{ +} +#endif + +#endif diff --git a/drivers/gpu/host1x/context_bus.c b/drivers/gpu/host1x/context_bus.c new file mode 100644 index 000000000000..7cd0e1a5edd1 --- /dev/null +++ b/drivers/gpu/host1x/context_bus.c @@ -0,0 +1,26 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Copyright (c) 2021, NVIDIA Corporation. + */ + +#include <linux/device.h> +#include <linux/of.h> + +const struct bus_type host1x_context_device_bus_type = { + .name = "host1x-context", +}; +EXPORT_SYMBOL_GPL(host1x_context_device_bus_type); + +static int __init host1x_context_device_bus_init(void) +{ + int err; + + err = bus_register(&host1x_context_device_bus_type); + if (err < 0) { + pr_err("bus type registration failed: %d\n", err); + return err; + } + + return 0; +} +postcore_initcall(host1x_context_device_bus_init); diff --git a/drivers/gpu/host1x/debug.c b/drivers/gpu/host1x/debug.c index 2aae0e63214c..6433c00d5d7e 100644 --- a/drivers/gpu/host1x/debug.c +++ b/drivers/gpu/host1x/debug.c @@ -1,21 +1,13 @@ +// SPDX-License-Identifier: GPL-2.0-only /* * Copyright (C) 2010 Google, Inc. * Author: Erik Gilling <konkers@android.com> * * Copyright (C) 2011-2013 NVIDIA Corporation - * - * This software is licensed under the terms of the GNU General Public - * License version 2, as published by the Free Software Foundation, and - * may be copied, distributed, and modified under those terms. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * */ #include <linux/debugfs.h> +#include <linux/pm_runtime.h> #include <linux/seq_file.h> #include <linux/uaccess.h> @@ -25,6 +17,8 @@ #include "debug.h" #include "channel.h" +static DEFINE_MUTEX(debug_lock); + unsigned int host1x_debug_trace_cmdbuf; static pid_t host1x_debug_force_timeout_pid; @@ -40,41 +34,79 @@ void host1x_debug_output(struct output *o, const char *fmt, ...) len = vsnprintf(o->buf, sizeof(o->buf), fmt, args); va_end(args); - o->fn(o->ctx, o->buf, len); + o->fn(o->ctx, o->buf, len, false); +} + +void host1x_debug_cont(struct output *o, const char *fmt, ...) +{ + va_list args; + int len; + + va_start(args, fmt); + len = vsnprintf(o->buf, sizeof(o->buf), fmt, args); + va_end(args); + + o->fn(o->ctx, o->buf, len, true); } static int show_channel(struct host1x_channel *ch, void *data, bool show_fifo) { struct host1x *m = dev_get_drvdata(ch->dev->parent); struct output *o = data; + int err; + + err = pm_runtime_resume_and_get(m->dev); + if (err < 0) + return err; mutex_lock(&ch->cdma.lock); + mutex_lock(&debug_lock); if (show_fifo) host1x_hw_show_channel_fifo(m, ch, o); host1x_hw_show_channel_cdma(m, ch, o); + mutex_unlock(&debug_lock); mutex_unlock(&ch->cdma.lock); + pm_runtime_put(m->dev); + return 0; } -static void show_syncpts(struct host1x *m, struct output *o) +static void show_syncpts(struct host1x *m, struct output *o, bool show_all) { + unsigned long irqflags; + struct list_head *pos; unsigned int i; + int err; host1x_debug_output(o, "---- syncpts ----\n"); + err = pm_runtime_resume_and_get(m->dev); + if (err < 0) + return; + for (i = 0; i < host1x_syncpt_nb_pts(m); i++) { u32 max = host1x_syncpt_read_max(m->syncpt + i); u32 min = host1x_syncpt_load(m->syncpt + i); + unsigned int waiters = 0; + + spin_lock_irqsave(&m->syncpt[i].fences.lock, irqflags); + list_for_each(pos, &m->syncpt[i].fences.list) + waiters++; + spin_unlock_irqrestore(&m->syncpt[i].fences.lock, irqflags); + + if (!kref_read(&m->syncpt[i].ref)) + continue; - if (!min && !max) + if (!show_all && !min && !max && !waiters) continue; - host1x_debug_output(o, "id %u (%s) min %d max %d\n", - i, m->syncpt[i].name, min, max); + host1x_debug_output(o, + "id %u (%s) min %d max %d (%d waiters)\n", + i, m->syncpt[i].name, min, max, waiters); } for (i = 0; i < host1x_syncpt_nb_bases(m); i++) { @@ -86,15 +118,17 @@ static void show_syncpts(struct host1x *m, struct output *o) base_val); } + pm_runtime_put(m->dev); + host1x_debug_output(o, "\n"); } static void show_all(struct host1x *m, struct output *o, bool show_fifo) { - int i; + unsigned int i; host1x_hw_show_mlocks(m, o); - show_syncpts(m, o); + show_syncpts(m, o, true); host1x_debug_output(o, "---- channels ----\n"); for (i = 0; i < m->info->nb_channels; ++i) { @@ -107,7 +141,7 @@ static void show_all(struct host1x *m, struct output *o, bool show_fifo) } } -static int host1x_debug_show_all(struct seq_file *s, void *unused) +static int host1x_debug_all_show(struct seq_file *s, void *unused) { struct output o = { .fn = write_to_seqfile, @@ -118,6 +152,7 @@ static int host1x_debug_show_all(struct seq_file *s, void *unused) return 0; } +DEFINE_SHOW_ATTRIBUTE(host1x_debug_all); static int host1x_debug_show(struct seq_file *s, void *unused) { @@ -130,38 +165,12 @@ static int host1x_debug_show(struct seq_file *s, void *unused) return 0; } - -static int host1x_debug_open_all(struct inode *inode, struct file *file) -{ - return single_open(file, host1x_debug_show_all, inode->i_private); -} - -static const struct file_operations host1x_debug_all_fops = { - .open = host1x_debug_open_all, - .read = seq_read, - .llseek = seq_lseek, - .release = single_release, -}; - -static int host1x_debug_open(struct inode *inode, struct file *file) -{ - return single_open(file, host1x_debug_show, inode->i_private); -} - -static const struct file_operations host1x_debug_fops = { - .open = host1x_debug_open, - .read = seq_read, - .llseek = seq_lseek, - .release = single_release, -}; +DEFINE_SHOW_ATTRIBUTE(host1x_debug); static void host1x_debugfs_init(struct host1x *host1x) { struct dentry *de = debugfs_create_dir("tegra-host1x", NULL); - if (!de) - return; - /* Store the created entry */ host1x->debugfs = de; @@ -207,12 +216,3 @@ void host1x_debug_dump(struct host1x *host1x) show_all(host1x, &o, true); } - -void host1x_debug_dump_syncpts(struct host1x *host1x) -{ - struct output o = { - .fn = write_to_printk - }; - - show_syncpts(host1x, &o); -} diff --git a/drivers/gpu/host1x/debug.h b/drivers/gpu/host1x/debug.h index 4595b2e0799f..c43c61d876a9 100644 --- a/drivers/gpu/host1x/debug.h +++ b/drivers/gpu/host1x/debug.h @@ -1,19 +1,8 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ /* * Tegra host1x Debug * * Copyright (c) 2011-2013 NVIDIA Corporation. - * - * This program is free software; you can redistribute it and/or modify it - * under the terms and conditions of the GNU General Public License, - * version 2, as published by the Free Software Foundation. - * - * This program is distributed in the hope it will be useful, but WITHOUT - * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or - * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for - * more details. - * - * You should have received a copy of the GNU General Public License - * along with this program. If not, see <http://www.gnu.org/licenses/>. */ #ifndef __HOST1X_DEBUG_H #define __HOST1X_DEBUG_H @@ -24,28 +13,33 @@ struct host1x; struct output { - void (*fn)(void *ctx, const char *str, size_t len); + void (*fn)(void *ctx, const char *str, size_t len, bool cont); void *ctx; char buf[256]; }; -static inline void write_to_seqfile(void *ctx, const char *str, size_t len) +static inline void write_to_seqfile(void *ctx, const char *str, size_t len, + bool cont) { seq_write((struct seq_file *)ctx, str, len); } -static inline void write_to_printk(void *ctx, const char *str, size_t len) +static inline void write_to_printk(void *ctx, const char *str, size_t len, + bool cont) { - pr_info("%s", str); + if (cont) + pr_cont("%s", str); + else + pr_info("%s", str); } void __printf(2, 3) host1x_debug_output(struct output *o, const char *fmt, ...); +void __printf(2, 3) host1x_debug_cont(struct output *o, const char *fmt, ...); extern unsigned int host1x_debug_trace_cmdbuf; void host1x_debug_init(struct host1x *host1x); void host1x_debug_deinit(struct host1x *host1x); void host1x_debug_dump(struct host1x *host1x); -void host1x_debug_dump_syncpts(struct host1x *host1x); #endif diff --git a/drivers/gpu/host1x/dev.c b/drivers/gpu/host1x/dev.c index 778272514164..3f475f0e6545 100644 --- a/drivers/gpu/host1x/dev.c +++ b/drivers/gpu/host1x/dev.c @@ -1,36 +1,35 @@ +// SPDX-License-Identifier: GPL-2.0-only /* * Tegra host1x driver * * Copyright (c) 2010-2013, NVIDIA Corporation. - * - * This program is free software; you can redistribute it and/or modify it - * under the terms and conditions of the GNU General Public License, - * version 2, as published by the Free Software Foundation. - * - * This program is distributed in the hope it will be useful, but WITHOUT - * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or - * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for - * more details. - * - * You should have received a copy of the GNU General Public License - * along with this program. If not, see <http://www.gnu.org/licenses/>. */ #include <linux/clk.h> +#include <linux/delay.h> #include <linux/dma-mapping.h> #include <linux/io.h> #include <linux/list.h> #include <linux/module.h> -#include <linux/of_device.h> #include <linux/of.h> +#include <linux/of_platform.h> +#include <linux/platform_device.h> +#include <linux/pm_runtime.h> #include <linux/slab.h> +#include <soc/tegra/common.h> + #define CREATE_TRACE_POINTS #include <trace/events/host1x.h> #undef CREATE_TRACE_POINTS +#if IS_ENABLED(CONFIG_ARM_DMA_USE_IOMMU) +#include <asm/dma-iommu.h> +#endif + #include "bus.h" #include "channel.h" +#include "context.h" #include "debug.h" #include "dev.h" #include "intr.h" @@ -39,6 +38,24 @@ #include "hw/host1x02.h" #include "hw/host1x04.h" #include "hw/host1x05.h" +#include "hw/host1x06.h" +#include "hw/host1x07.h" +#include "hw/host1x08.h" + +void host1x_common_writel(struct host1x *host1x, u32 v, u32 r) +{ + writel(v, host1x->common_regs + r); +} + +void host1x_hypervisor_writel(struct host1x *host1x, u32 v, u32 r) +{ + writel(v, host1x->hv_regs + r); +} + +u32 host1x_hypervisor_readl(struct host1x *host1x, u32 r) +{ + return readl(host1x->hv_regs + r); +} void host1x_sync_writel(struct host1x *host1x, u32 v, u32 r) { @@ -54,6 +71,15 @@ u32 host1x_sync_readl(struct host1x *host1x, u32 r) return readl(sync_regs + r); } +#ifdef CONFIG_64BIT +u64 host1x_sync_readq(struct host1x *host1x, u32 r) +{ + void __iomem *sync_regs = host1x->regs + host1x->info->sync_offset; + + return readq(sync_regs + r); +} +#endif + void host1x_ch_writel(struct host1x_channel *ch, u32 v, u32 r) { writel(v, ch->regs + r); @@ -72,6 +98,11 @@ static const struct host1x_info host1x01_info = { .init = host1x01_init, .sync_offset = 0x3000, .dma_mask = DMA_BIT_MASK(32), + .has_wide_gather = false, + .has_hypervisor = false, + .num_sid_entries = 0, + .sid_table = NULL, + .reserve_vblank_syncpts = true, }; static const struct host1x_info host1x02_info = { @@ -82,6 +113,11 @@ static const struct host1x_info host1x02_info = { .init = host1x02_init, .sync_offset = 0x3000, .dma_mask = DMA_BIT_MASK(32), + .has_wide_gather = false, + .has_hypervisor = false, + .num_sid_entries = 0, + .sid_table = NULL, + .reserve_vblank_syncpts = true, }; static const struct host1x_info host1x04_info = { @@ -92,6 +128,11 @@ static const struct host1x_info host1x04_info = { .init = host1x04_init, .sync_offset = 0x2100, .dma_mask = DMA_BIT_MASK(34), + .has_wide_gather = false, + .has_hypervisor = false, + .num_sid_entries = 0, + .sid_table = NULL, + .reserve_vblank_syncpts = false, }; static const struct host1x_info host1x05_info = { @@ -102,9 +143,154 @@ static const struct host1x_info host1x05_info = { .init = host1x05_init, .sync_offset = 0x2100, .dma_mask = DMA_BIT_MASK(34), + .has_wide_gather = false, + .has_hypervisor = false, + .num_sid_entries = 0, + .sid_table = NULL, + .reserve_vblank_syncpts = false, +}; + +static const struct host1x_sid_entry tegra186_sid_table[] = { + { /* SE1 */ .base = 0x1ac8, .offset = 0x90, .limit = 0x90 }, + { /* SE2 */ .base = 0x1ad0, .offset = 0x90, .limit = 0x90 }, + { /* SE3 */ .base = 0x1ad8, .offset = 0x90, .limit = 0x90 }, + { /* SE4 */ .base = 0x1ae0, .offset = 0x90, .limit = 0x90 }, + { /* ISP */ .base = 0x1ae8, .offset = 0x50, .limit = 0x50 }, + { /* VIC */ .base = 0x1af0, .offset = 0x30, .limit = 0x34 }, + { /* NVENC */ .base = 0x1af8, .offset = 0x30, .limit = 0x34 }, + { /* NVDEC */ .base = 0x1b00, .offset = 0x30, .limit = 0x34 }, + { /* NVJPG */ .base = 0x1b08, .offset = 0x30, .limit = 0x34 }, + { /* TSEC */ .base = 0x1b10, .offset = 0x30, .limit = 0x34 }, + { /* TSECB */ .base = 0x1b18, .offset = 0x30, .limit = 0x34 }, + { /* VI 0 */ .base = 0x1b80, .offset = 0x10000, .limit = 0x10000 }, + { /* VI 1 */ .base = 0x1b88, .offset = 0x20000, .limit = 0x20000 }, + { /* VI 2 */ .base = 0x1b90, .offset = 0x30000, .limit = 0x30000 }, + { /* VI 3 */ .base = 0x1b98, .offset = 0x40000, .limit = 0x40000 }, + { /* VI 4 */ .base = 0x1ba0, .offset = 0x50000, .limit = 0x50000 }, + { /* VI 5 */ .base = 0x1ba8, .offset = 0x60000, .limit = 0x60000 }, + { /* VI 6 */ .base = 0x1bb0, .offset = 0x70000, .limit = 0x70000 }, + { /* VI 7 */ .base = 0x1bb8, .offset = 0x80000, .limit = 0x80000 }, + { /* VI 8 */ .base = 0x1bc0, .offset = 0x90000, .limit = 0x90000 }, + { /* VI 9 */ .base = 0x1bc8, .offset = 0xa0000, .limit = 0xa0000 }, + { /* VI 10 */ .base = 0x1bd0, .offset = 0xb0000, .limit = 0xb0000 }, + { /* VI 11 */ .base = 0x1bd8, .offset = 0xc0000, .limit = 0xc0000 }, +}; + +static const struct host1x_info host1x06_info = { + .nb_channels = 63, + .nb_pts = 576, + .nb_mlocks = 24, + .nb_bases = 16, + .init = host1x06_init, + .sync_offset = 0x0, + .dma_mask = DMA_BIT_MASK(40), + .has_wide_gather = true, + .has_hypervisor = true, + .num_sid_entries = ARRAY_SIZE(tegra186_sid_table), + .sid_table = tegra186_sid_table, + .reserve_vblank_syncpts = false, + .skip_reset_assert = true, +}; + +static const struct host1x_sid_entry tegra194_sid_table[] = { + { /* SE1 */ .base = 0x1ac8, .offset = 0x90, .limit = 0x90 }, + { /* SE2 */ .base = 0x1ad0, .offset = 0x90, .limit = 0x90 }, + { /* SE3 */ .base = 0x1ad8, .offset = 0x90, .limit = 0x90 }, + { /* SE4 */ .base = 0x1ae0, .offset = 0x90, .limit = 0x90 }, + { /* ISP */ .base = 0x1ae8, .offset = 0x800, .limit = 0x800 }, + { /* VIC */ .base = 0x1af0, .offset = 0x30, .limit = 0x34 }, + { /* NVENC */ .base = 0x1af8, .offset = 0x30, .limit = 0x34 }, + { /* NVDEC */ .base = 0x1b00, .offset = 0x30, .limit = 0x34 }, + { /* NVJPG */ .base = 0x1b08, .offset = 0x30, .limit = 0x34 }, + { /* TSEC */ .base = 0x1b10, .offset = 0x30, .limit = 0x34 }, + { /* TSECB */ .base = 0x1b18, .offset = 0x30, .limit = 0x34 }, + { /* VI */ .base = 0x1b80, .offset = 0x800, .limit = 0x800 }, + { /* VI_THI */ .base = 0x1b88, .offset = 0x30, .limit = 0x34 }, + { /* ISP_THI */ .base = 0x1b90, .offset = 0x30, .limit = 0x34 }, + { /* PVA0_CLUSTER */ .base = 0x1b98, .offset = 0x0, .limit = 0x0 }, + { /* PVA0_CLUSTER */ .base = 0x1ba0, .offset = 0x0, .limit = 0x0 }, + { /* NVDLA0 */ .base = 0x1ba8, .offset = 0x30, .limit = 0x34 }, + { /* NVDLA1 */ .base = 0x1bb0, .offset = 0x30, .limit = 0x34 }, + { /* NVENC1 */ .base = 0x1bb8, .offset = 0x30, .limit = 0x34 }, + { /* NVDEC1 */ .base = 0x1bc0, .offset = 0x30, .limit = 0x34 }, +}; + +static const struct host1x_info host1x07_info = { + .nb_channels = 63, + .nb_pts = 704, + .nb_mlocks = 32, + .nb_bases = 0, + .init = host1x07_init, + .sync_offset = 0x0, + .dma_mask = DMA_BIT_MASK(40), + .has_wide_gather = true, + .has_hypervisor = true, + .num_sid_entries = ARRAY_SIZE(tegra194_sid_table), + .sid_table = tegra194_sid_table, + .reserve_vblank_syncpts = false, +}; + +/* + * Tegra234 has two stream ID protection tables, one for setting stream IDs + * through the channel path via SETSTREAMID, and one for setting them via + * MMIO. We program each engine's data stream ID in the channel path table + * and firmware stream ID in the MMIO path table. + */ +static const struct host1x_sid_entry tegra234_sid_table[] = { + { /* SE1 MMIO */ .base = 0x1650, .offset = 0x90, .limit = 0x90 }, + { /* SE1 ch */ .base = 0x1730, .offset = 0x90, .limit = 0x90 }, + { /* SE2 MMIO */ .base = 0x1658, .offset = 0x90, .limit = 0x90 }, + { /* SE2 ch */ .base = 0x1738, .offset = 0x90, .limit = 0x90 }, + { /* SE4 MMIO */ .base = 0x1660, .offset = 0x90, .limit = 0x90 }, + { /* SE4 ch */ .base = 0x1740, .offset = 0x90, .limit = 0x90 }, + { /* ISP MMIO */ .base = 0x1680, .offset = 0x800, .limit = 0x800 }, + { /* VIC MMIO */ .base = 0x1688, .offset = 0x34, .limit = 0x34 }, + { /* VIC ch */ .base = 0x17b8, .offset = 0x30, .limit = 0x30 }, + { /* NVENC MMIO */ .base = 0x1690, .offset = 0x34, .limit = 0x34 }, + { /* NVENC ch */ .base = 0x17c0, .offset = 0x30, .limit = 0x30 }, + { /* NVDEC MMIO */ .base = 0x1698, .offset = 0x34, .limit = 0x34 }, + { /* NVDEC ch */ .base = 0x17c8, .offset = 0x30, .limit = 0x30 }, + { /* NVJPG MMIO */ .base = 0x16a0, .offset = 0x34, .limit = 0x34 }, + { /* NVJPG ch */ .base = 0x17d0, .offset = 0x30, .limit = 0x30 }, + { /* TSEC MMIO */ .base = 0x16a8, .offset = 0x30, .limit = 0x34 }, + { /* NVJPG1 MMIO */ .base = 0x16b0, .offset = 0x34, .limit = 0x34 }, + { /* NVJPG1 ch */ .base = 0x17a8, .offset = 0x30, .limit = 0x30 }, + { /* VI MMIO */ .base = 0x16b8, .offset = 0x800, .limit = 0x800 }, + { /* VI_THI MMIO */ .base = 0x16c0, .offset = 0x30, .limit = 0x34 }, + { /* ISP_THI MMIO */ .base = 0x16c8, .offset = 0x30, .limit = 0x34 }, + { /* NVDLA MMIO */ .base = 0x16d8, .offset = 0x30, .limit = 0x34 }, + { /* NVDLA ch */ .base = 0x17e0, .offset = 0x30, .limit = 0x34 }, + { /* NVDLA1 MMIO */ .base = 0x16e0, .offset = 0x30, .limit = 0x34 }, + { /* NVDLA1 ch */ .base = 0x17e8, .offset = 0x30, .limit = 0x34 }, + { /* OFA MMIO */ .base = 0x16e8, .offset = 0x34, .limit = 0x34 }, + { /* OFA ch */ .base = 0x1768, .offset = 0x30, .limit = 0x30 }, + { /* VI2 MMIO */ .base = 0x16f0, .offset = 0x800, .limit = 0x800 }, + { /* VI2_THI MMIO */ .base = 0x16f8, .offset = 0x30, .limit = 0x34 }, +}; + +static const struct host1x_info host1x08_info = { + .nb_channels = 63, + .nb_pts = 1024, + .nb_mlocks = 24, + .nb_bases = 0, + .init = host1x08_init, + .sync_offset = 0x0, + .dma_mask = DMA_BIT_MASK(40), + .has_wide_gather = true, + .has_hypervisor = true, + .has_common = true, + .num_sid_entries = ARRAY_SIZE(tegra234_sid_table), + .sid_table = tegra234_sid_table, + .streamid_vm_table = { 0x1004, 128 }, + .classid_vm_table = { 0x1404, 25 }, + .mmio_vm_table = { 0x1504, 25 }, + .reserve_vblank_syncpts = false, }; static const struct of_device_id host1x_of_match[] = { + { .compatible = "nvidia,tegra234-host1x", .data = &host1x08_info, }, + { .compatible = "nvidia,tegra194-host1x", .data = &host1x07_info, }, + { .compatible = "nvidia,tegra186-host1x", .data = &host1x06_info, }, { .compatible = "nvidia,tegra210-host1x", .data = &host1x05_info, }, { .compatible = "nvidia,tegra124-host1x", .data = &host1x04_info, }, { .compatible = "nvidia,tegra114-host1x", .data = &host1x02_info, }, @@ -114,48 +300,292 @@ static const struct of_device_id host1x_of_match[] = { }; MODULE_DEVICE_TABLE(of, host1x_of_match); -static int host1x_probe(struct platform_device *pdev) +static void host1x_setup_virtualization_tables(struct host1x *host) { - const struct of_device_id *id; - struct host1x *host; - struct resource *regs; - int syncpt_irq; + const struct host1x_info *info = host->info; + unsigned int i; + + if (!info->has_hypervisor) + return; + + for (i = 0; i < info->num_sid_entries; i++) { + const struct host1x_sid_entry *entry = &info->sid_table[i]; + + host1x_hypervisor_writel(host, entry->offset, entry->base); + host1x_hypervisor_writel(host, entry->limit, entry->base + 4); + } + + for (i = 0; i < info->streamid_vm_table.count; i++) { + /* Allow access to all stream IDs to all VMs. */ + host1x_hypervisor_writel(host, 0xff, info->streamid_vm_table.base + 4 * i); + } + + for (i = 0; i < info->classid_vm_table.count; i++) { + /* Allow access to all classes to all VMs. */ + host1x_hypervisor_writel(host, 0xff, info->classid_vm_table.base + 4 * i); + } + + for (i = 0; i < info->mmio_vm_table.count; i++) { + /* Use VM1 (that's us) as originator VMID for engine MMIO accesses. */ + host1x_hypervisor_writel(host, 0x1, info->mmio_vm_table.base + 4 * i); + } +} + +static bool host1x_wants_iommu(struct host1x *host1x) +{ + /* Our IOMMU usage policy doesn't currently play well with GART */ + if (of_machine_is_compatible("nvidia,tegra20")) + return false; + + /* + * If we support addressing a maximum of 32 bits of physical memory + * and if the host1x firewall is enabled, there's no need to enable + * IOMMU support. This can happen for example on Tegra20, Tegra30 + * and Tegra114. + * + * Tegra124 and later can address up to 34 bits of physical memory and + * many platforms come equipped with more than 2 GiB of system memory, + * which requires crossing the 4 GiB boundary. But there's a catch: on + * SoCs before Tegra186 (i.e. Tegra124 and Tegra210), the host1x can + * only address up to 32 bits of memory in GATHER opcodes, which means + * that command buffers need to either be in the first 2 GiB of system + * memory (which could quickly lead to memory exhaustion), or command + * buffers need to be treated differently from other buffers (which is + * not possible with the current ABI). + * + * A third option is to use the IOMMU in these cases to make sure all + * buffers will be mapped into a 32-bit IOVA space that host1x can + * address. This allows all of the system memory to be used and works + * within the limitations of the host1x on these SoCs. + * + * In summary, default to enable IOMMU on Tegra124 and later. For any + * of the earlier SoCs, only use the IOMMU for additional safety when + * the host1x firewall is disabled. + */ + if (host1x->info->dma_mask <= DMA_BIT_MASK(32)) { + if (IS_ENABLED(CONFIG_TEGRA_HOST1X_FIREWALL)) + return false; + } + + return true; +} + +/* + * Returns ERR_PTR on failure, NULL if the translation is IDENTITY, otherwise a + * valid paging domain. + */ +static struct iommu_domain *host1x_iommu_attach(struct host1x *host) +{ + struct iommu_domain *domain = iommu_get_domain_for_dev(host->dev); int err; - id = of_match_device(host1x_of_match, &pdev->dev); - if (!id) - return -EINVAL; +#if IS_ENABLED(CONFIG_ARM_DMA_USE_IOMMU) + if (host->dev->archdata.mapping) { + struct dma_iommu_mapping *mapping = + to_dma_iommu_mapping(host->dev); + arm_iommu_detach_device(host->dev); + arm_iommu_release_mapping(mapping); + + domain = iommu_get_domain_for_dev(host->dev); + } +#endif + + /* + * We may not always want to enable IOMMU support (for example if the + * host1x firewall is already enabled and we don't support addressing + * more than 32 bits of physical memory), so check for that first. + * + * Similarly, if host1x is already attached to an IOMMU (via the DMA + * API), don't try to attach again. + */ + if (domain && domain->type == IOMMU_DOMAIN_IDENTITY) + domain = NULL; + if (!host1x_wants_iommu(host) || domain) + return domain; + + host->group = iommu_group_get(host->dev); + if (host->group) { + struct iommu_domain_geometry *geometry; + dma_addr_t start, end; + unsigned long order; + + err = iova_cache_get(); + if (err < 0) + goto put_group; + + host->domain = iommu_paging_domain_alloc(host->dev); + if (IS_ERR(host->domain)) { + err = PTR_ERR(host->domain); + host->domain = NULL; + goto put_cache; + } + + err = iommu_attach_group(host->domain, host->group); + if (err) { + if (err == -ENODEV) + err = 0; - regs = platform_get_resource(pdev, IORESOURCE_MEM, 0); - if (!regs) { - dev_err(&pdev->dev, "failed to get registers\n"); - return -ENXIO; + goto free_domain; + } + + geometry = &host->domain->geometry; + start = geometry->aperture_start & host->info->dma_mask; + end = geometry->aperture_end & host->info->dma_mask; + + order = __ffs(host->domain->pgsize_bitmap); + init_iova_domain(&host->iova, 1UL << order, start >> order); + host->iova_end = end; + + domain = host->domain; } - syncpt_irq = platform_get_irq(pdev, 0); - if (syncpt_irq < 0) { - dev_err(&pdev->dev, "failed to get IRQ\n"); - return -ENXIO; + return domain; + +free_domain: + iommu_domain_free(host->domain); + host->domain = NULL; +put_cache: + iova_cache_put(); +put_group: + iommu_group_put(host->group); + host->group = NULL; + + return ERR_PTR(err); +} + +static int host1x_iommu_init(struct host1x *host) +{ + u64 mask = host->info->dma_mask; + struct iommu_domain *domain; + int err; + + domain = host1x_iommu_attach(host); + if (IS_ERR(domain)) { + err = PTR_ERR(domain); + dev_err(host->dev, "failed to attach to IOMMU: %d\n", err); + return err; + } + + /* + * If we're not behind an IOMMU make sure we don't get push buffers + * that are allocated outside of the range addressable by the GATHER + * opcode. + * + * Newer generations of Tegra (Tegra186 and later) support a wide + * variant of the GATHER opcode that allows addressing more bits. + */ + if (!domain && !host->info->has_wide_gather) + mask = DMA_BIT_MASK(32); + + err = dma_coerce_mask_and_coherent(host->dev, mask); + if (err < 0) { + dev_err(host->dev, "failed to set DMA mask: %d\n", err); + return err; + } + + return 0; +} + +static void host1x_iommu_exit(struct host1x *host) +{ + if (host->domain) { + put_iova_domain(&host->iova); + iommu_detach_group(host->domain, host->group); + + iommu_domain_free(host->domain); + host->domain = NULL; + + iova_cache_put(); + + iommu_group_put(host->group); + host->group = NULL; + } +} + +static int host1x_get_resets(struct host1x *host) +{ + int err; + + host->resets[0].id = "mc"; + host->resets[1].id = "host1x"; + host->nresets = ARRAY_SIZE(host->resets); + + err = devm_reset_control_bulk_get_optional_exclusive_released( + host->dev, host->nresets, host->resets); + if (err) { + dev_err(host->dev, "failed to get reset: %d\n", err); + return err; } + return 0; +} + +static int host1x_probe(struct platform_device *pdev) +{ + struct host1x *host; + int err, i; + host = devm_kzalloc(&pdev->dev, sizeof(*host), GFP_KERNEL); if (!host) return -ENOMEM; + host->info = of_device_get_match_data(&pdev->dev); + + if (host->info->has_hypervisor) { + host->regs = devm_platform_ioremap_resource_byname(pdev, "vm"); + if (IS_ERR(host->regs)) + return PTR_ERR(host->regs); + + host->hv_regs = devm_platform_ioremap_resource_byname(pdev, "hypervisor"); + if (IS_ERR(host->hv_regs)) + return PTR_ERR(host->hv_regs); + + if (host->info->has_common) { + host->common_regs = devm_platform_ioremap_resource_byname(pdev, "common"); + if (IS_ERR(host->common_regs)) + return PTR_ERR(host->common_regs); + } + } else { + host->regs = devm_platform_ioremap_resource(pdev, 0); + if (IS_ERR(host->regs)) + return PTR_ERR(host->regs); + } + + for (i = 0; i < ARRAY_SIZE(host->syncpt_irqs); i++) { + char irq_name[] = "syncptX"; + + sprintf(irq_name, "syncpt%d", i); + + err = platform_get_irq_byname_optional(pdev, irq_name); + if (err == -ENXIO) + break; + if (err < 0) + return err; + + host->syncpt_irqs[i] = err; + } + + host->num_syncpt_irqs = i; + + /* Device tree without irq names */ + if (i == 0) { + host->syncpt_irqs[0] = platform_get_irq(pdev, 0); + if (host->syncpt_irqs[0] < 0) + return host->syncpt_irqs[0]; + + host->num_syncpt_irqs = 1; + } + mutex_init(&host->devices_lock); INIT_LIST_HEAD(&host->devices); INIT_LIST_HEAD(&host->list); host->dev = &pdev->dev; - host->info = id->data; /* set common host1x device data */ platform_set_drvdata(pdev, host); - host->regs = devm_ioremap_resource(&pdev->dev, regs); - if (IS_ERR(host->regs)) - return PTR_ERR(host->regs); - - dma_set_mask_and_coherent(host->dev, host->info->dma_mask); + host->dev->dma_parms = &host->dma_parms; + dma_set_max_seg_size(host->dev, UINT_MAX); if (host->info->init) { err = host->info->init(host); @@ -164,130 +594,190 @@ static int host1x_probe(struct platform_device *pdev) } host->clk = devm_clk_get(&pdev->dev, NULL); - if (IS_ERR(host->clk)) { - dev_err(&pdev->dev, "failed to get clock\n"); - err = PTR_ERR(host->clk); - return err; - } + if (IS_ERR(host->clk)) + return dev_err_probe(&pdev->dev, PTR_ERR(host->clk), "failed to get clock\n"); - host->rst = devm_reset_control_get(&pdev->dev, "host1x"); - if (IS_ERR(host->rst)) { - err = PTR_ERR(host->rst); - dev_err(&pdev->dev, "failed to get reset: %d\n", err); + err = host1x_get_resets(host); + if (err) return err; - } - if (iommu_present(&platform_bus_type)) { - struct iommu_domain_geometry *geometry; - unsigned long order; + host1x_bo_cache_init(&host->cache); - host->domain = iommu_domain_alloc(&platform_bus_type); - if (!host->domain) - return -ENOMEM; - - err = iommu_attach_device(host->domain, &pdev->dev); - if (err == -ENODEV) { - iommu_domain_free(host->domain); - host->domain = NULL; - goto skip_iommu; - } else if (err) { - goto fail_free_domain; - } - - geometry = &host->domain->geometry; - - order = __ffs(host->domain->pgsize_bitmap); - init_iova_domain(&host->iova, 1UL << order, - geometry->aperture_start >> order, - geometry->aperture_end >> order); - host->iova_end = geometry->aperture_end; + err = host1x_iommu_init(host); + if (err < 0) { + dev_err(&pdev->dev, "failed to setup IOMMU: %d\n", err); + goto destroy_cache; } -skip_iommu: err = host1x_channel_list_init(&host->channel_list, host->info->nb_channels); if (err) { dev_err(&pdev->dev, "failed to initialize channel list\n"); - goto fail_detach_device; + goto iommu_exit; } - err = clk_prepare_enable(host->clk); - if (err < 0) { - dev_err(&pdev->dev, "failed to enable clock\n"); - goto fail_free_channels; - } - - err = reset_control_deassert(host->rst); - if (err < 0) { - dev_err(&pdev->dev, "failed to deassert reset: %d\n", err); - goto fail_unprepare_disable; + err = host1x_memory_context_list_init(host); + if (err) { + dev_err(&pdev->dev, "failed to initialize context list\n"); + goto free_channels; } err = host1x_syncpt_init(host); if (err) { dev_err(&pdev->dev, "failed to initialize syncpts\n"); - goto fail_reset_assert; + goto free_contexts; } - err = host1x_intr_init(host, syncpt_irq); + mutex_init(&host->intr_mutex); + + pm_runtime_enable(&pdev->dev); + + err = devm_tegra_core_dev_init_opp_table_common(&pdev->dev); + if (err) + goto pm_disable; + + /* the driver's code isn't ready yet for the dynamic RPM */ + err = pm_runtime_resume_and_get(&pdev->dev); + if (err) + goto pm_disable; + + err = host1x_intr_init(host); if (err) { dev_err(&pdev->dev, "failed to initialize interrupts\n"); - goto fail_deinit_syncpt; + goto pm_put; } host1x_debug_init(host); err = host1x_register(host); if (err < 0) - goto fail_deinit_intr; + goto deinit_debugfs; + + err = devm_of_platform_populate(&pdev->dev); + if (err < 0) + goto unregister; return 0; -fail_deinit_intr: +unregister: + host1x_unregister(host); +deinit_debugfs: + host1x_debug_deinit(host); host1x_intr_deinit(host); -fail_deinit_syncpt: +pm_put: + pm_runtime_put_sync_suspend(&pdev->dev); +pm_disable: + pm_runtime_disable(&pdev->dev); host1x_syncpt_deinit(host); -fail_reset_assert: - reset_control_assert(host->rst); -fail_unprepare_disable: - clk_disable_unprepare(host->clk); -fail_free_channels: +free_contexts: + host1x_memory_context_list_free(&host->context_list); +free_channels: host1x_channel_list_free(&host->channel_list); -fail_detach_device: - if (host->domain) { - put_iova_domain(&host->iova); - iommu_detach_device(host->domain, &pdev->dev); - } -fail_free_domain: - if (host->domain) - iommu_domain_free(host->domain); +iommu_exit: + host1x_iommu_exit(host); +destroy_cache: + host1x_bo_cache_destroy(&host->cache); return err; } -static int host1x_remove(struct platform_device *pdev) +static void host1x_remove(struct platform_device *pdev) { struct host1x *host = platform_get_drvdata(pdev); host1x_unregister(host); + host1x_debug_deinit(host); + + pm_runtime_force_suspend(&pdev->dev); + host1x_intr_deinit(host); host1x_syncpt_deinit(host); - reset_control_assert(host->rst); + host1x_memory_context_list_free(&host->context_list); + host1x_channel_list_free(&host->channel_list); + host1x_iommu_exit(host); + host1x_bo_cache_destroy(&host->cache); +} + +static int __maybe_unused host1x_runtime_suspend(struct device *dev) +{ + struct host1x *host = dev_get_drvdata(dev); + int err; + + host1x_channel_stop_all(host); + host1x_intr_stop(host); + host1x_syncpt_save(host); + + if (!host->info->skip_reset_assert) { + err = reset_control_bulk_assert(host->nresets, host->resets); + if (err) { + dev_err(dev, "failed to assert reset: %d\n", err); + goto resume_host1x; + } + + usleep_range(1000, 2000); + } + clk_disable_unprepare(host->clk); + reset_control_bulk_release(host->nresets, host->resets); - if (host->domain) { - put_iova_domain(&host->iova); - iommu_detach_device(host->domain, &pdev->dev); - iommu_domain_free(host->domain); + return 0; + +resume_host1x: + host1x_setup_virtualization_tables(host); + host1x_syncpt_restore(host); + host1x_intr_start(host); + + return err; +} + +static int __maybe_unused host1x_runtime_resume(struct device *dev) +{ + struct host1x *host = dev_get_drvdata(dev); + int err; + + err = reset_control_bulk_acquire(host->nresets, host->resets); + if (err) { + dev_err(dev, "failed to acquire reset: %d\n", err); + return err; + } + + err = clk_prepare_enable(host->clk); + if (err) { + dev_err(dev, "failed to enable clock: %d\n", err); + goto release_reset; + } + + err = reset_control_bulk_deassert(host->nresets, host->resets); + if (err < 0) { + dev_err(dev, "failed to deassert reset: %d\n", err); + goto disable_clk; } + host1x_setup_virtualization_tables(host); + host1x_syncpt_restore(host); + host1x_intr_start(host); + return 0; + +disable_clk: + clk_disable_unprepare(host->clk); +release_reset: + reset_control_bulk_release(host->nresets, host->resets); + + return err; } +static const struct dev_pm_ops host1x_pm_ops = { + SET_RUNTIME_PM_OPS(host1x_runtime_suspend, host1x_runtime_resume, + NULL) + SET_SYSTEM_SLEEP_PM_OPS(pm_runtime_force_suspend, pm_runtime_force_resume) +}; + static struct platform_driver tegra_host1x_driver = { .driver = { .name = "tegra-host1x", .of_match_table = host1x_of_match, + .pm = &host1x_pm_ops, }, .probe = host1x_probe, .remove = host1x_remove, @@ -321,6 +811,20 @@ static void __exit tegra_host1x_exit(void) } module_exit(tegra_host1x_exit); +/** + * host1x_get_dma_mask() - query the supported DMA mask for host1x + * @host1x: host1x instance + * + * Note that this returns the supported DMA mask for host1x, which can be + * different from the applicable DMA mask under certain circumstances. + */ +u64 host1x_get_dma_mask(struct host1x *host1x) +{ + return host1x->info->dma_mask; +} +EXPORT_SYMBOL(host1x_get_dma_mask); + +MODULE_SOFTDEP("post: tegra-drm"); MODULE_AUTHOR("Thierry Reding <thierry.reding@avionic-design.de>"); MODULE_AUTHOR("Terje Bergstrom <tbergstrom@nvidia.com>"); MODULE_DESCRIPTION("Host1x driver for Tegra products"); diff --git a/drivers/gpu/host1x/dev.h b/drivers/gpu/host1x/dev.h index ffdbc15b749b..ef44618ed88a 100644 --- a/drivers/gpu/host1x/dev.h +++ b/drivers/gpu/host1x/dev.h @@ -1,17 +1,6 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ /* * Copyright (c) 2012-2015, NVIDIA Corporation. - * - * This program is free software; you can redistribute it and/or modify it - * under the terms and conditions of the GNU General Public License, - * version 2, as published by the Free Software Foundation. - * - * This program is distributed in the hope it will be useful, but WITHOUT - * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or - * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for - * more details. - * - * You should have received a copy of the GNU General Public License - * along with this program. If not, see <http://www.gnu.org/licenses/>. */ #ifndef HOST1X_DEV_H @@ -20,11 +9,13 @@ #include <linux/device.h> #include <linux/iommu.h> #include <linux/iova.h> +#include <linux/irqreturn.h> #include <linux/platform_device.h> #include <linux/reset.h> #include "cdma.h" #include "channel.h" +#include "context.h" #include "intr.h" #include "job.h" #include "syncpt.h" @@ -48,7 +39,7 @@ struct host1x_cdma_ops { void (*start)(struct host1x_cdma *cdma); void (*stop)(struct host1x_cdma *cdma); void (*flush)(struct host1x_cdma *cdma); - int (*timeout_init)(struct host1x_cdma *cdma, unsigned int syncpt); + int (*timeout_init)(struct host1x_cdma *cdma); void (*timeout_destroy)(struct host1x_cdma *cdma); void (*freeze)(struct host1x_cdma *cdma); void (*resume)(struct host1x_cdma *cdma, u32 getptr); @@ -78,18 +69,31 @@ struct host1x_syncpt_ops { void (*load_wait_base)(struct host1x_syncpt *syncpt); u32 (*load)(struct host1x_syncpt *syncpt); int (*cpu_incr)(struct host1x_syncpt *syncpt); - int (*patch_wait)(struct host1x_syncpt *syncpt, void *patch_addr); + void (*assign_to_channel)(struct host1x_syncpt *syncpt, + struct host1x_channel *channel); + void (*enable_protection)(struct host1x *host); }; struct host1x_intr_ops { - int (*init_host_sync)(struct host1x *host, u32 cpm, - void (*syncpt_thresh_work)(struct work_struct *work)); + int (*init_host_sync)(struct host1x *host, u32 cpm); void (*set_syncpt_threshold)( struct host1x *host, unsigned int id, u32 thresh); void (*enable_syncpt_intr)(struct host1x *host, unsigned int id); void (*disable_syncpt_intr)(struct host1x *host, unsigned int id); void (*disable_all_syncpt_intrs)(struct host1x *host); int (*free_syncpt_irq)(struct host1x *host); + irqreturn_t (*isr)(int irq, void *dev_id); +}; + +struct host1x_sid_entry { + unsigned int base; + unsigned int offset; + unsigned int limit; +}; + +struct host1x_table_desc { + unsigned int base; + unsigned int count; }; struct host1x_info { @@ -100,24 +104,49 @@ struct host1x_info { int (*init)(struct host1x *host1x); /* initialize per SoC ops */ unsigned int sync_offset; /* offset of syncpoint registers */ u64 dma_mask; /* mask of addressable memory */ + bool has_wide_gather; /* supports GATHER_W opcode */ + bool has_hypervisor; /* has hypervisor registers */ + bool has_common; /* has common registers separate from hypervisor */ + unsigned int num_sid_entries; + const struct host1x_sid_entry *sid_table; + struct host1x_table_desc streamid_vm_table; + struct host1x_table_desc classid_vm_table; + struct host1x_table_desc mmio_vm_table; + /* + * On T20-T148, the boot chain may setup DC to increment syncpoints + * 26/27 on VBLANK. As such we cannot use these syncpoints until + * the display driver disables VBLANK increments. + */ + bool reserve_vblank_syncpts; + /* + * On Tegra186, secure world applications may require access to + * host1x during suspend/resume. To allow this, we need to leave + * host1x not in reset. + */ + bool skip_reset_assert; }; struct host1x { const struct host1x_info *info; void __iomem *regs; + void __iomem *hv_regs; /* hypervisor region */ + void __iomem *common_regs; + int syncpt_irqs[8]; + int num_syncpt_irqs; struct host1x_syncpt *syncpt; struct host1x_syncpt_base *bases; struct device *dev; struct clk *clk; - struct reset_control *rst; + struct reset_control_bulk_data resets[2]; + unsigned int nresets; + struct iommu_group *group; struct iommu_domain *domain; struct iova_domain iova; dma_addr_t iova_end; struct mutex intr_mutex; - int intr_syncpt_irq; const struct host1x_syncpt_ops *syncpt_op; const struct host1x_intr_ops *intr_op; @@ -131,6 +160,7 @@ struct host1x { struct mutex syncpt_mutex; struct host1x_channel_list channel_list; + struct host1x_memory_context_list context_list; struct dentry *debugfs; @@ -138,11 +168,21 @@ struct host1x { struct list_head devices; struct list_head list; + + struct device_dma_parameters dma_parms; + + struct host1x_bo_cache cache; }; -void host1x_sync_writel(struct host1x *host1x, u32 r, u32 v); +void host1x_common_writel(struct host1x *host1x, u32 v, u32 r); +void host1x_hypervisor_writel(struct host1x *host1x, u32 v, u32 r); +u32 host1x_hypervisor_readl(struct host1x *host1x, u32 r); +void host1x_sync_writel(struct host1x *host1x, u32 v, u32 r); u32 host1x_sync_readl(struct host1x *host1x, u32 r); -void host1x_ch_writel(struct host1x_channel *ch, u32 r, u32 v); +#ifdef CONFIG_64BIT +u64 host1x_sync_readq(struct host1x *host1x, u32 r); +#endif +void host1x_ch_writel(struct host1x_channel *ch, u32 v, u32 r); u32 host1x_ch_readl(struct host1x_channel *ch, u32 r); static inline void host1x_hw_syncpt_restore(struct host1x *host, @@ -175,17 +215,21 @@ static inline int host1x_hw_syncpt_cpu_incr(struct host1x *host, return host->syncpt_op->cpu_incr(sp); } -static inline int host1x_hw_syncpt_patch_wait(struct host1x *host, - struct host1x_syncpt *sp, - void *patch_addr) +static inline void host1x_hw_syncpt_assign_to_channel( + struct host1x *host, struct host1x_syncpt *sp, + struct host1x_channel *ch) +{ + return host->syncpt_op->assign_to_channel(sp, ch); +} + +static inline void host1x_hw_syncpt_enable_protection(struct host1x *host) { - return host->syncpt_op->patch_wait(sp, patch_addr); + return host->syncpt_op->enable_protection(host); } -static inline int host1x_hw_intr_init_host_sync(struct host1x *host, u32 cpm, - void (*syncpt_thresh_work)(struct work_struct *)) +static inline int host1x_hw_intr_init_host_sync(struct host1x *host, u32 cpm) { - return host->intr_op->init_host_sync(host, cpm, syncpt_thresh_work); + return host->intr_op->init_host_sync(host, cpm); } static inline void host1x_hw_intr_set_syncpt_threshold(struct host1x *host, @@ -249,10 +293,9 @@ static inline void host1x_hw_cdma_flush(struct host1x *host, } static inline int host1x_hw_cdma_timeout_init(struct host1x *host, - struct host1x_cdma *cdma, - unsigned int syncpt) + struct host1x_cdma *cdma) { - return host->cdma_op->timeout_init(cdma, syncpt); + return host->cdma_op->timeout_init(cdma); } static inline void host1x_hw_cdma_timeout_destroy(struct host1x *host, diff --git a/drivers/gpu/host1x/fence.c b/drivers/gpu/host1x/fence.c new file mode 100644 index 000000000000..139ad1afd935 --- /dev/null +++ b/drivers/gpu/host1x/fence.c @@ -0,0 +1,154 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Syncpoint dma_fence implementation + * + * Copyright (c) 2020, NVIDIA Corporation. + */ + +#include <linux/dma-fence.h> +#include <linux/file.h> +#include <linux/fs.h> +#include <linux/slab.h> +#include <linux/sync_file.h> + +#include "fence.h" +#include "intr.h" +#include "syncpt.h" + +static const char *host1x_syncpt_fence_get_driver_name(struct dma_fence *f) +{ + return "host1x"; +} + +static const char *host1x_syncpt_fence_get_timeline_name(struct dma_fence *f) +{ + return "syncpoint"; +} + +static struct host1x_syncpt_fence *to_host1x_fence(struct dma_fence *f) +{ + return container_of(f, struct host1x_syncpt_fence, base); +} + +static bool host1x_syncpt_fence_enable_signaling(struct dma_fence *f) +{ + struct host1x_syncpt_fence *sf = to_host1x_fence(f); + + if (host1x_syncpt_is_expired(sf->sp, sf->threshold)) + return false; + + /* Reference for interrupt path. */ + dma_fence_get(f); + + /* + * The dma_fence framework requires the fence driver to keep a + * reference to any fences for which 'enable_signaling' has been + * called (and that have not been signalled). + * + * We cannot currently always guarantee that all fences get signalled + * or cancelled. As such, for such situations, set up a timeout, so + * that long-lasting fences will get reaped eventually. + */ + if (sf->timeout) { + /* Reference for timeout path. */ + dma_fence_get(f); + schedule_delayed_work(&sf->timeout_work, msecs_to_jiffies(30000)); + } + + host1x_intr_add_fence_locked(sf->sp->host, sf); + + /* + * The fence may get signalled at any time after the above call, + * so we need to initialize all state used by signalling + * before it. + */ + + return true; +} + +static const struct dma_fence_ops host1x_syncpt_fence_ops = { + .get_driver_name = host1x_syncpt_fence_get_driver_name, + .get_timeline_name = host1x_syncpt_fence_get_timeline_name, + .enable_signaling = host1x_syncpt_fence_enable_signaling, +}; + +void host1x_fence_signal(struct host1x_syncpt_fence *f) +{ + if (atomic_xchg(&f->signaling, 1)) { + /* + * Already on timeout path, but we removed the fence before + * timeout path could, so drop interrupt path reference. + */ + dma_fence_put(&f->base); + return; + } + + if (f->timeout && cancel_delayed_work(&f->timeout_work)) { + /* + * We know that the timeout path will not be entered. + * Safe to drop the timeout path's reference now. + */ + dma_fence_put(&f->base); + } + + dma_fence_signal_locked(&f->base); + dma_fence_put(&f->base); +} + +static void do_fence_timeout(struct work_struct *work) +{ + struct delayed_work *dwork = (struct delayed_work *)work; + struct host1x_syncpt_fence *f = + container_of(dwork, struct host1x_syncpt_fence, timeout_work); + + if (atomic_xchg(&f->signaling, 1)) { + /* Already on interrupt path, drop timeout path reference if any. */ + if (f->timeout) + dma_fence_put(&f->base); + return; + } + + if (host1x_intr_remove_fence(f->sp->host, f)) { + /* + * Managed to remove fence from queue, so it's safe to drop + * the interrupt path's reference. + */ + dma_fence_put(&f->base); + } + + dma_fence_set_error(&f->base, -ETIMEDOUT); + dma_fence_signal(&f->base); + if (f->timeout) + dma_fence_put(&f->base); +} + +struct dma_fence *host1x_fence_create(struct host1x_syncpt *sp, u32 threshold, + bool timeout) +{ + struct host1x_syncpt_fence *fence; + + fence = kzalloc(sizeof(*fence), GFP_KERNEL); + if (!fence) + return ERR_PTR(-ENOMEM); + + fence->sp = sp; + fence->threshold = threshold; + fence->timeout = timeout; + + dma_fence_init(&fence->base, &host1x_syncpt_fence_ops, &sp->fences.lock, + dma_fence_context_alloc(1), 0); + + INIT_DELAYED_WORK(&fence->timeout_work, do_fence_timeout); + + return &fence->base; +} +EXPORT_SYMBOL(host1x_fence_create); + +void host1x_fence_cancel(struct dma_fence *f) +{ + struct host1x_syncpt_fence *sf = to_host1x_fence(f); + + schedule_delayed_work(&sf->timeout_work, 0); + flush_delayed_work(&sf->timeout_work); +} +EXPORT_SYMBOL(host1x_fence_cancel); diff --git a/drivers/gpu/host1x/fence.h b/drivers/gpu/host1x/fence.h new file mode 100644 index 000000000000..f3c644c73cad --- /dev/null +++ b/drivers/gpu/host1x/fence.h @@ -0,0 +1,30 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * Copyright (c) 2020, NVIDIA Corporation. + */ + +#ifndef HOST1X_FENCE_H +#define HOST1X_FENCE_H + +struct host1x_syncpt_fence { + struct dma_fence base; + + atomic_t signaling; + + struct host1x_syncpt *sp; + u32 threshold; + bool timeout; + + struct delayed_work timeout_work; + + struct list_head list; +}; + +struct host1x_fence_list { + spinlock_t lock; + struct list_head list; +}; + +void host1x_fence_signal(struct host1x_syncpt_fence *fence); + +#endif diff --git a/drivers/gpu/host1x/hw/cdma_hw.c b/drivers/gpu/host1x/hw/cdma_hw.c index 6b231119193e..3f3f0018eee0 100644 --- a/drivers/gpu/host1x/hw/cdma_hw.c +++ b/drivers/gpu/host1x/hw/cdma_hw.c @@ -1,19 +1,8 @@ +// SPDX-License-Identifier: GPL-2.0-only /* * Tegra host1x Command DMA * * Copyright (c) 2010-2013, NVIDIA Corporation. - * - * This program is free software; you can redistribute it and/or modify it - * under the terms and conditions of the GNU General Public License, - * version 2, as published by the Free Software Foundation. - * - * This program is distributed in the hope it will be useful, but WITHOUT - * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or - * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for - * more details. - * - * You should have received a copy of the GNU General Public License - * along with this program. If not, see <http://www.gnu.org/licenses/>. */ #include <linux/slab.h> @@ -39,8 +28,6 @@ static void push_buffer_init(struct push_buffer *pb) static void cdma_timeout_cpu_incr(struct host1x_cdma *cdma, u32 getptr, u32 syncpt_incrs, u32 syncval, u32 nr_slots) { - struct host1x *host1x = cdma_to_host1x(cdma); - struct push_buffer *pb = &cdma->push_buffer; unsigned int i; for (i = 0; i < syncpt_incrs; i++) @@ -48,18 +35,6 @@ static void cdma_timeout_cpu_incr(struct host1x_cdma *cdma, u32 getptr, /* after CPU incr, ensure shadow is up to date */ host1x_syncpt_load(cdma->timeout.syncpt); - - /* NOP all the PB slots */ - while (nr_slots--) { - u32 *p = (u32 *)(pb->mapped + getptr); - *(p++) = HOST1X_OPCODE_NOP; - *(p++) = HOST1X_OPCODE_NOP; - dev_dbg(host1x->dev, "%s: NOP at %pad+%#x\n", __func__, - &pb->dma, getptr); - getptr = (getptr + 8) & (pb->size - 1); - } - - wmb(); } /* @@ -68,20 +43,31 @@ static void cdma_timeout_cpu_incr(struct host1x_cdma *cdma, u32 getptr, static void cdma_start(struct host1x_cdma *cdma) { struct host1x_channel *ch = cdma_to_channel(cdma); + u64 start, end; if (cdma->running) return; cdma->last_pos = cdma->push_buffer.pos; + start = cdma->push_buffer.dma; + end = cdma->push_buffer.size + 4; host1x_ch_writel(ch, HOST1X_CHANNEL_DMACTRL_DMASTOP, HOST1X_CHANNEL_DMACTRL); /* set base, put and end pointer */ - host1x_ch_writel(ch, cdma->push_buffer.dma, HOST1X_CHANNEL_DMASTART); + host1x_ch_writel(ch, lower_32_bits(start), HOST1X_CHANNEL_DMASTART); +#if HOST1X_HW >= 6 + host1x_ch_writel(ch, upper_32_bits(start), HOST1X_CHANNEL_DMASTART_HI); +#endif host1x_ch_writel(ch, cdma->push_buffer.pos, HOST1X_CHANNEL_DMAPUT); - host1x_ch_writel(ch, cdma->push_buffer.dma + cdma->push_buffer.size + 4, - HOST1X_CHANNEL_DMAEND); +#if HOST1X_HW >= 6 + host1x_ch_writel(ch, 0, HOST1X_CHANNEL_DMAPUT_HI); +#endif + host1x_ch_writel(ch, lower_32_bits(end), HOST1X_CHANNEL_DMAEND); +#if HOST1X_HW >= 6 + host1x_ch_writel(ch, upper_32_bits(end), HOST1X_CHANNEL_DMAEND_HI); +#endif /* reset GET */ host1x_ch_writel(ch, HOST1X_CHANNEL_DMACTRL_DMASTOP | @@ -104,6 +90,7 @@ static void cdma_timeout_restart(struct host1x_cdma *cdma, u32 getptr) { struct host1x *host1x = cdma_to_host1x(cdma); struct host1x_channel *ch = cdma_to_channel(cdma); + u64 start, end; if (cdma->running) return; @@ -113,10 +100,18 @@ static void cdma_timeout_restart(struct host1x_cdma *cdma, u32 getptr) host1x_ch_writel(ch, HOST1X_CHANNEL_DMACTRL_DMASTOP, HOST1X_CHANNEL_DMACTRL); + start = cdma->push_buffer.dma; + end = cdma->push_buffer.size + 4; + /* set base, end pointer (all of memory) */ - host1x_ch_writel(ch, cdma->push_buffer.dma, HOST1X_CHANNEL_DMASTART); - host1x_ch_writel(ch, cdma->push_buffer.dma + cdma->push_buffer.size, - HOST1X_CHANNEL_DMAEND); + host1x_ch_writel(ch, lower_32_bits(start), HOST1X_CHANNEL_DMASTART); +#if HOST1X_HW >= 6 + host1x_ch_writel(ch, upper_32_bits(start), HOST1X_CHANNEL_DMASTART_HI); +#endif + host1x_ch_writel(ch, lower_32_bits(end), HOST1X_CHANNEL_DMAEND); +#if HOST1X_HW >= 6 + host1x_ch_writel(ch, upper_32_bits(end), HOST1X_CHANNEL_DMAEND_HI); +#endif /* set GET, by loading the value in PUT (then reset GET) */ host1x_ch_writel(ch, getptr, HOST1X_CHANNEL_DMAPUT); @@ -172,6 +167,30 @@ static void cdma_stop(struct host1x_cdma *cdma) mutex_unlock(&cdma->lock); } +static void cdma_hw_cmdproc_stop(struct host1x *host, struct host1x_channel *ch, + bool stop) +{ +#if HOST1X_HW >= 6 + host1x_ch_writel(ch, stop ? 0x1 : 0x0, HOST1X_CHANNEL_CMDPROC_STOP); +#else + u32 cmdproc_stop = host1x_sync_readl(host, HOST1X_SYNC_CMDPROC_STOP); + if (stop) + cmdproc_stop |= BIT(ch->id); + else + cmdproc_stop &= ~BIT(ch->id); + host1x_sync_writel(host, cmdproc_stop, HOST1X_SYNC_CMDPROC_STOP); +#endif +} + +static void cdma_hw_teardown(struct host1x *host, struct host1x_channel *ch) +{ +#if HOST1X_HW >= 6 + host1x_ch_writel(ch, 0x1, HOST1X_CHANNEL_TEARDOWN); +#else + host1x_sync_writel(host, BIT(ch->id), HOST1X_SYNC_CH_TEARDOWN); +#endif +} + /* * Stops both channel's command processor and CDMA immediately. * Also, tears down the channel and resets corresponding module. @@ -180,7 +199,6 @@ static void cdma_freeze(struct host1x_cdma *cdma) { struct host1x *host = cdma_to_host1x(cdma); struct host1x_channel *ch = cdma_to_channel(cdma); - u32 cmdproc_stop; if (cdma->torndown && !cdma->running) { dev_warn(host->dev, "Already torn down\n"); @@ -189,9 +207,7 @@ static void cdma_freeze(struct host1x_cdma *cdma) dev_dbg(host->dev, "freezing channel (id %d)\n", ch->id); - cmdproc_stop = host1x_sync_readl(host, HOST1X_SYNC_CMDPROC_STOP); - cmdproc_stop |= BIT(ch->id); - host1x_sync_writel(host, cmdproc_stop, HOST1X_SYNC_CMDPROC_STOP); + cdma_hw_cmdproc_stop(host, ch, true); dev_dbg(host->dev, "%s: DMA GET 0x%x, PUT HW 0x%x / shadow 0x%x\n", __func__, host1x_ch_readl(ch, HOST1X_CHANNEL_DMAGET), @@ -201,7 +217,7 @@ static void cdma_freeze(struct host1x_cdma *cdma) host1x_ch_writel(ch, HOST1X_CHANNEL_DMACTRL_DMASTOP, HOST1X_CHANNEL_DMACTRL); - host1x_sync_writel(host, BIT(ch->id), HOST1X_SYNC_CH_TEARDOWN); + cdma_hw_teardown(host, ch); cdma->running = false; cdma->torndown = true; @@ -211,20 +227,60 @@ static void cdma_resume(struct host1x_cdma *cdma, u32 getptr) { struct host1x *host1x = cdma_to_host1x(cdma); struct host1x_channel *ch = cdma_to_channel(cdma); - u32 cmdproc_stop; dev_dbg(host1x->dev, "resuming channel (id %u, DMAGET restart = 0x%x)\n", ch->id, getptr); - cmdproc_stop = host1x_sync_readl(host1x, HOST1X_SYNC_CMDPROC_STOP); - cmdproc_stop &= ~BIT(ch->id); - host1x_sync_writel(host1x, cmdproc_stop, HOST1X_SYNC_CMDPROC_STOP); + cdma_hw_cmdproc_stop(host1x, ch, false); cdma->torndown = false; cdma_timeout_restart(cdma, getptr); } +static void timeout_release_mlock(struct host1x_cdma *cdma) +{ +#if HOST1X_HW >= 8 + /* Tegra186 and Tegra194 require a more complicated MLOCK release + * sequence. Furthermore, those chips by default don't enforce MLOCKs, + * so it turns out that if we don't /actually/ need MLOCKs, we can just + * ignore them. + * + * As such, for now just implement this on Tegra234 where things are + * stricter but also easy to implement. + */ + struct host1x_channel *ch = cdma_to_channel(cdma); + struct host1x *host1x = cdma_to_host1x(cdma); + u32 offset; + + switch (ch->client->class) { + case HOST1X_CLASS_NVJPG1: + offset = HOST1X_COMMON_NVJPG1_MLOCK; + break; + case HOST1X_CLASS_NVENC: + offset = HOST1X_COMMON_NVENC_MLOCK; + break; + case HOST1X_CLASS_VIC: + offset = HOST1X_COMMON_VIC_MLOCK; + break; + case HOST1X_CLASS_NVJPG: + offset = HOST1X_COMMON_NVJPG_MLOCK; + break; + case HOST1X_CLASS_NVDEC: + offset = HOST1X_COMMON_NVDEC_MLOCK; + break; + case HOST1X_CLASS_OFA: + offset = HOST1X_COMMON_OFA_MLOCK; + break; + default: + WARN(1, "%s was not updated for class %u", __func__, ch->client->class); + return; + } + + host1x_common_writel(host1x, 0x0, offset); +#endif +} + /* * If this timeout fires, it indicates the current sync_queue entry has * exceeded its TTL and the userctx should be timed out and remaining @@ -232,7 +288,7 @@ static void cdma_resume(struct host1x_cdma *cdma, u32 getptr) */ static void cdma_timeout_handler(struct work_struct *work) { - u32 prev_cmdproc, cmdproc_stop, syncpt_val; + u32 syncpt_val; struct host1x_cdma *cdma; struct host1x *host1x; struct host1x_channel *ch; @@ -254,12 +310,7 @@ static void cdma_timeout_handler(struct work_struct *work) } /* stop processing to get a clean snapshot */ - prev_cmdproc = host1x_sync_readl(host1x, HOST1X_SYNC_CMDPROC_STOP); - cmdproc_stop = prev_cmdproc | BIT(ch->id); - host1x_sync_writel(host1x, cmdproc_stop, HOST1X_SYNC_CMDPROC_STOP); - - dev_dbg(host1x->dev, "cdma_timeout: cmdproc was 0x%x is 0x%x\n", - prev_cmdproc, cmdproc_stop); + cdma_hw_cmdproc_stop(host1x, ch, true); syncpt_val = host1x_syncpt_load(cdma->timeout.syncpt); @@ -268,9 +319,7 @@ static void cdma_timeout_handler(struct work_struct *work) dev_dbg(host1x->dev, "cdma_timeout: expired, but buffer had completed\n"); /* restore */ - cmdproc_stop = prev_cmdproc & ~(BIT(ch->id)); - host1x_sync_writel(host1x, cmdproc_stop, - HOST1X_SYNC_CMDPROC_STOP); + cdma_hw_cmdproc_stop(host1x, ch, false); mutex_unlock(&cdma->lock); return; } @@ -282,6 +331,9 @@ static void cdma_timeout_handler(struct work_struct *work) /* stop HW, resetting channel/module */ host1x_hw_cdma_freeze(host1x, cdma); + /* release any held MLOCK */ + timeout_release_mlock(cdma); + host1x_cdma_update_sync_queue(cdma, ch->dev); mutex_unlock(&cdma->lock); } @@ -289,7 +341,7 @@ static void cdma_timeout_handler(struct work_struct *work) /* * Init timeout resources */ -static int cdma_timeout_init(struct host1x_cdma *cdma, unsigned int syncpt) +static int cdma_timeout_init(struct host1x_cdma *cdma) { INIT_DELAYED_WORK(&cdma->timeout.wq, cdma_timeout_handler); cdma->timeout.initialized = true; diff --git a/drivers/gpu/host1x/hw/channel_hw.c b/drivers/gpu/host1x/hw/channel_hw.c index 8447a56c41ca..2df6a16d484e 100644 --- a/drivers/gpu/host1x/hw/channel_hw.c +++ b/drivers/gpu/host1x/hw/channel_hw.c @@ -1,22 +1,12 @@ +// SPDX-License-Identifier: GPL-2.0-only /* * Tegra host1x Channel * * Copyright (c) 2010-2013, NVIDIA Corporation. - * - * This program is free software; you can redistribute it and/or modify it - * under the terms and conditions of the GNU General Public License, - * version 2, as published by the Free Software Foundation. - * - * This program is distributed in the hope it will be useful, but WITHOUT - * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or - * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for - * more details. - * - * You should have received a copy of the GNU General Public License - * along with this program. If not, see <http://www.gnu.org/licenses/>. */ #include <linux/host1x.h> +#include <linux/iommu.h> #include <linux/slab.h> #include <trace/events/host1x.h> @@ -26,7 +16,6 @@ #include "../intr.h" #include "../job.h" -#define HOST1X_CHANNEL_SIZE 16384 #define TRACE_MAX_LENGTH 128U static void trace_write_gather(struct host1x_cdma *cdma, struct host1x_bo *bo, @@ -58,25 +47,120 @@ static void trace_write_gather(struct host1x_cdma *cdma, struct host1x_bo *bo, } } -static void submit_gathers(struct host1x_job *job) +static void submit_wait(struct host1x_job *job, u32 id, u32 threshold) { struct host1x_cdma *cdma = &job->channel->cdma; - unsigned int i; - for (i = 0; i < job->num_gathers; i++) { - struct host1x_job_gather *g = &job->gathers[i]; - u32 op1 = host1x_opcode_gather(g->words); - u32 op2 = g->base + g->offset; +#if HOST1X_HW >= 2 + host1x_cdma_push_wide(cdma, + host1x_opcode_setclass( + HOST1X_CLASS_HOST1X, + HOST1X_UCLASS_LOAD_SYNCPT_PAYLOAD_32, + /* WAIT_SYNCPT_32 is at SYNCPT_PAYLOAD_32+2 */ + BIT(0) | BIT(2) + ), + threshold, + id, + HOST1X_OPCODE_NOP + ); +#else + /* TODO add waitchk or use waitbases or other mitigation */ + host1x_cdma_push(cdma, + host1x_opcode_setclass( + HOST1X_CLASS_HOST1X, + host1x_uclass_wait_syncpt_r(), + BIT(0) + ), + host1x_class_host_wait_syncpt(id, threshold) + ); +#endif +} - trace_write_gather(cdma, g->bo, g->offset, op1 & 0xffff); - host1x_cdma_push(cdma, op1, op2); +static void submit_setclass(struct host1x_job *job, u32 next_class) +{ + struct host1x_cdma *cdma = &job->channel->cdma; + +#if HOST1X_HW >= 6 + u32 stream_id; + + /* + * If a memory context has been set, use it. Otherwise + * (if context isolation is disabled) use the engine's + * firmware stream ID. + */ + if (job->memory_context) + stream_id = job->memory_context->stream_id; + else + stream_id = job->engine_fallback_streamid; + + host1x_cdma_push_wide(cdma, + host1x_opcode_setclass(next_class, 0, 0), + host1x_opcode_setpayload(stream_id), + host1x_opcode_setstreamid(job->engine_streamid_offset / 4), + HOST1X_OPCODE_NOP); +#else + host1x_cdma_push(cdma, + host1x_opcode_setclass(next_class, 0, 0), + HOST1X_OPCODE_NOP + ); +#endif +} + +static void submit_gathers(struct host1x_job *job, struct host1x_job_cmd *cmds, u32 num_cmds, + u32 job_syncpt_base) +{ + struct host1x_cdma *cdma = &job->channel->cdma; +#if HOST1X_HW < 6 + struct device *dev = job->channel->dev; +#endif + unsigned int i; + u32 threshold; + + for (i = 0; i < num_cmds; i++) { + struct host1x_job_cmd *cmd = &cmds[i]; + + if (cmd->is_wait) { + if (cmd->wait.relative) + threshold = job_syncpt_base + cmd->wait.threshold; + else + threshold = cmd->wait.threshold; + + submit_wait(job, cmd->wait.id, threshold); + submit_setclass(job, cmd->wait.next_class); + } else { + struct host1x_job_gather *g = &cmd->gather; + + dma_addr_t addr = g->base + g->offset; + u32 op2, op3; + + op2 = lower_32_bits(addr); + op3 = upper_32_bits(addr); + + trace_write_gather(cdma, g->bo, g->offset, g->words); + + if (op3 != 0) { +#if HOST1X_HW >= 6 + u32 op1 = host1x_opcode_gather_wide(g->words); + u32 op4 = HOST1X_OPCODE_NOP; + + host1x_cdma_push_wide(cdma, op1, op2, op3, op4); +#else + dev_err(dev, "invalid gather for push buffer %pad\n", + &addr); + continue; +#endif + } else { + u32 op1 = host1x_opcode_gather(g->words); + + host1x_cdma_push(cdma, op1, op2); + } + } } } static inline void synchronize_syncpt_base(struct host1x_job *job) { - struct host1x *host = dev_get_drvdata(job->channel->dev->parent); - struct host1x_syncpt *sp = host->syncpt + job->syncpt_id; + struct host1x_syncpt *sp = job->syncpt; unsigned int id; u32 value; @@ -90,54 +174,115 @@ static inline void synchronize_syncpt_base(struct host1x_job *job) HOST1X_UCLASS_LOAD_SYNCPT_BASE_VALUE_F(value)); } -static int channel_submit(struct host1x_job *job) +static void host1x_channel_set_streamid(struct host1x_channel *channel) { - struct host1x_channel *ch = job->channel; - struct host1x_syncpt *sp; - u32 user_syncpt_incrs = job->syncpt_incrs; - u32 prev_max = 0; - u32 syncval; - int err; - struct host1x_waitlist *completed_waiter = NULL; +#if HOST1X_HW >= 6 + u32 stream_id; + + if (!tegra_dev_iommu_get_stream_id(channel->dev->parent, &stream_id)) + stream_id = TEGRA_STREAM_ID_BYPASS; + + host1x_ch_writel(channel, stream_id, HOST1X_CHANNEL_SMMU_STREAMID); +#endif +} + +static void host1x_enable_gather_filter(struct host1x_channel *ch) +{ +#if HOST1X_HW >= 6 struct host1x *host = dev_get_drvdata(ch->dev->parent); + u32 val; + + if (!host->hv_regs) + return; + + val = host1x_hypervisor_readl( + host, HOST1X_HV_CH_KERNEL_FILTER_GBUFFER(ch->id / 32)); + val |= BIT(ch->id % 32); + host1x_hypervisor_writel( + host, val, HOST1X_HV_CH_KERNEL_FILTER_GBUFFER(ch->id / 32)); +#elif HOST1X_HW >= 4 + host1x_ch_writel(ch, + HOST1X_CHANNEL_CHANNELCTRL_KERNEL_FILTER_GBUFFER(1), + HOST1X_CHANNEL_CHANNELCTRL); +#endif +} - sp = host->syncpt + job->syncpt_id; - trace_host1x_channel_submit(dev_name(ch->dev), - job->num_gathers, job->num_relocs, - job->num_waitchk, job->syncpt_id, - job->syncpt_incrs); +static void channel_program_cdma(struct host1x_job *job) +{ + struct host1x_cdma *cdma = &job->channel->cdma; + struct host1x_syncpt *sp = job->syncpt; - /* before error checks, return current max */ - prev_max = job->syncpt_end = host1x_syncpt_read_max(sp); +#if HOST1X_HW >= 6 + u32 fence; + int i = 0; - /* get submit lock */ - err = mutex_lock_interruptible(&ch->submitlock); - if (err) - goto error; + if (job->num_cmds == 0) + goto prefences_done; + if (!job->cmds[0].is_wait || job->cmds[0].wait.relative) + goto prefences_done; - completed_waiter = kzalloc(sizeof(*completed_waiter), GFP_KERNEL); - if (!completed_waiter) { - mutex_unlock(&ch->submitlock); - err = -ENOMEM; - goto error; - } + /* Enter host1x class with invalid stream ID for prefence waits. */ + host1x_cdma_push_wide(cdma, + host1x_opcode_acquire_mlock(1), + host1x_opcode_setclass(1, 0, 0), + host1x_opcode_setpayload(0), + host1x_opcode_setstreamid(0x1fffff)); - /* begin a CDMA submit */ - err = host1x_cdma_begin(&ch->cdma, job); - if (err) { - mutex_unlock(&ch->submitlock); - goto error; + for (i = 0; i < job->num_cmds; i++) { + struct host1x_job_cmd *cmd = &job->cmds[i]; + + if (!cmd->is_wait || cmd->wait.relative) + break; + + submit_wait(job, cmd->wait.id, cmd->wait.threshold); } + host1x_cdma_push(cdma, + HOST1X_OPCODE_NOP, + host1x_opcode_release_mlock(1)); + +prefences_done: + /* Enter engine class with invalid stream ID. */ + host1x_cdma_push_wide(cdma, + host1x_opcode_acquire_mlock(job->class), + host1x_opcode_setclass(job->class, 0, 0), + host1x_opcode_setpayload(0), + host1x_opcode_setstreamid(job->engine_streamid_offset / 4)); + + /* Before switching stream ID to real stream ID, ensure engine is idle. */ + fence = host1x_syncpt_incr_max(sp, 1); + host1x_cdma_push(&job->channel->cdma, + host1x_opcode_nonincr(HOST1X_UCLASS_INCR_SYNCPT, 1), + HOST1X_UCLASS_INCR_SYNCPT_INDX_F(job->syncpt->id) | + HOST1X_UCLASS_INCR_SYNCPT_COND_F(4)); + submit_wait(job, job->syncpt->id, fence); + submit_setclass(job, job->class); + + /* Submit work. */ + job->syncpt_end = host1x_syncpt_incr_max(sp, job->syncpt_incrs); + submit_gathers(job, job->cmds + i, job->num_cmds - i, job->syncpt_end - job->syncpt_incrs); + + /* Before releasing MLOCK, ensure engine is idle again. */ + fence = host1x_syncpt_incr_max(sp, 1); + host1x_cdma_push(&job->channel->cdma, + host1x_opcode_nonincr(HOST1X_UCLASS_INCR_SYNCPT, 1), + HOST1X_UCLASS_INCR_SYNCPT_INDX_F(job->syncpt->id) | + HOST1X_UCLASS_INCR_SYNCPT_COND_F(4)); + submit_wait(job, job->syncpt->id, fence); + + /* Release MLOCK. */ + host1x_cdma_push(cdma, + HOST1X_OPCODE_NOP, host1x_opcode_release_mlock(job->class)); +#else if (job->serialize) { /* * Force serialization by inserting a host wait for the * previous job to finish before this one can commence. */ - host1x_cdma_push(&ch->cdma, + host1x_cdma_push(cdma, host1x_opcode_setclass(HOST1X_CLASS_HOST1X, host1x_uclass_wait_syncpt_r(), 1), - host1x_class_host_wait_syncpt(job->syncpt_id, + host1x_class_host_wait_syncpt(job->syncpt->id, host1x_syncpt_read_max(sp))); } @@ -145,43 +290,96 @@ static int channel_submit(struct host1x_job *job) if (sp->base) synchronize_syncpt_base(job); - syncval = host1x_syncpt_incr_max(sp, user_syncpt_incrs); - - job->syncpt_end = syncval; - /* add a setclass for modules that require it */ if (job->class) - host1x_cdma_push(&ch->cdma, + host1x_cdma_push(cdma, host1x_opcode_setclass(job->class, 0, 0), HOST1X_OPCODE_NOP); - submit_gathers(job); + job->syncpt_end = host1x_syncpt_incr_max(sp, job->syncpt_incrs); + + submit_gathers(job, job->cmds, job->num_cmds, job->syncpt_end - job->syncpt_incrs); +#endif +} + +static void job_complete_callback(struct dma_fence *fence, struct dma_fence_cb *cb) +{ + struct host1x_job *job = container_of(cb, struct host1x_job, fence_cb); + + /* Schedules CDMA update. */ + host1x_cdma_update(&job->channel->cdma); +} + +static int channel_submit(struct host1x_job *job) +{ + struct host1x_channel *ch = job->channel; + struct host1x_syncpt *sp = job->syncpt; + u32 prev_max = 0; + u32 syncval; + int err; + struct host1x *host = dev_get_drvdata(ch->dev->parent); + + trace_host1x_channel_submit(dev_name(ch->dev), + job->num_cmds, job->num_relocs, + job->syncpt->id, job->syncpt_incrs); + + /* before error checks, return current max */ + prev_max = job->syncpt_end = host1x_syncpt_read_max(sp); + + /* get submit lock */ + err = mutex_lock_interruptible(&ch->submitlock); + if (err) + return err; + + host1x_channel_set_streamid(ch); + host1x_enable_gather_filter(ch); + host1x_hw_syncpt_assign_to_channel(host, sp, ch); + + /* begin a CDMA submit */ + err = host1x_cdma_begin(&ch->cdma, job); + if (err) { + mutex_unlock(&ch->submitlock); + return err; + } + + channel_program_cdma(job); + syncval = host1x_syncpt_read_max(sp); + + /* + * Create fence before submitting job to HW to avoid job completing + * before the fence is set up. + */ + job->fence = host1x_fence_create(sp, syncval, true); + if (WARN(IS_ERR(job->fence), "Failed to create submit complete fence")) { + job->fence = NULL; + } else { + err = dma_fence_add_callback(job->fence, &job->fence_cb, + job_complete_callback); + } /* end CDMA submit & stash pinned hMems into sync queue */ host1x_cdma_end(&ch->cdma, job); trace_host1x_channel_submitted(dev_name(ch->dev), prev_max, syncval); - /* schedule a submit complete interrupt */ - err = host1x_intr_add_action(host, job->syncpt_id, syncval, - HOST1X_INTR_ACTION_SUBMIT_COMPLETE, ch, - completed_waiter, NULL); - completed_waiter = NULL; - WARN(err, "Failed to set submit complete interrupt"); - mutex_unlock(&ch->submitlock); - return 0; + if (err == -ENOENT) + host1x_cdma_update(&ch->cdma); + else + WARN(err, "Failed to set submit complete interrupt"); -error: - kfree(completed_waiter); - return err; + return 0; } static int host1x_channel_init(struct host1x_channel *ch, struct host1x *dev, unsigned int index) { - ch->regs = dev->regs + index * HOST1X_CHANNEL_SIZE; +#if HOST1X_HW < 6 + ch->regs = dev->regs + index * 0x4000; +#else + ch->regs = dev->regs + index * 0x100; +#endif return 0; } diff --git a/drivers/gpu/host1x/hw/debug_hw.c b/drivers/gpu/host1x/hw/debug_hw.c index 7a4a3286e4a7..4c32aa1b95e8 100644 --- a/drivers/gpu/host1x/hw/debug_hw.c +++ b/drivers/gpu/host1x/hw/debug_hw.c @@ -1,18 +1,9 @@ +// SPDX-License-Identifier: GPL-2.0-only /* * Copyright (C) 2010 Google, Inc. * Author: Erik Gilling <konkers@android.com> * * Copyright (C) 2011-2013 NVIDIA Corporation - * - * This software is licensed under the terms of the GNU General Public - * License version 2, as published by the Free Software Foundation, and - * may be copied, distributed, and modified under those terms. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * */ #include "../dev.h" @@ -30,6 +21,13 @@ enum { HOST1X_OPCODE_IMM = 0x04, HOST1X_OPCODE_RESTART = 0x05, HOST1X_OPCODE_GATHER = 0x06, + HOST1X_OPCODE_SETSTRMID = 0x07, + HOST1X_OPCODE_SETAPPID = 0x08, + HOST1X_OPCODE_SETPYLD = 0x09, + HOST1X_OPCODE_INCR_W = 0x0a, + HOST1X_OPCODE_NONINCR_W = 0x0b, + HOST1X_OPCODE_GATHER_W = 0x0c, + HOST1X_OPCODE_RESTART_W = 0x0d, HOST1X_OPCODE_EXTEND = 0x0e, }; @@ -38,78 +36,134 @@ enum { HOST1X_OPCODE_EXTEND_RELEASE_MLOCK = 0x01, }; -static unsigned int show_channel_command(struct output *o, u32 val) +#define INVALID_PAYLOAD 0xffffffff + +static unsigned int show_channel_command(struct output *o, u32 val, + u32 *payload) { - unsigned int mask, subop; + unsigned int mask, subop, num, opcode; - switch (val >> 28) { + opcode = val >> 28; + + switch (opcode) { case HOST1X_OPCODE_SETCLASS: mask = val & 0x3f; if (mask) { - host1x_debug_output(o, "SETCL(class=%03x, offset=%03x, mask=%02x, [", + host1x_debug_cont(o, "SETCL(class=%03x, offset=%03x, mask=%02x, [", val >> 6 & 0x3ff, val >> 16 & 0xfff, mask); return hweight8(mask); } - host1x_debug_output(o, "SETCL(class=%03x)\n", val >> 6 & 0x3ff); + host1x_debug_cont(o, "SETCL(class=%03x)\n", val >> 6 & 0x3ff); return 0; case HOST1X_OPCODE_INCR: - host1x_debug_output(o, "INCR(offset=%03x, [", + num = val & 0xffff; + host1x_debug_cont(o, "INCR(offset=%03x, [", val >> 16 & 0xfff); - return val & 0xffff; + if (!num) + host1x_debug_cont(o, "])\n"); + + return num; case HOST1X_OPCODE_NONINCR: - host1x_debug_output(o, "NONINCR(offset=%03x, [", + num = val & 0xffff; + host1x_debug_cont(o, "NONINCR(offset=%03x, [", val >> 16 & 0xfff); - return val & 0xffff; + if (!num) + host1x_debug_cont(o, "])\n"); + + return num; case HOST1X_OPCODE_MASK: mask = val & 0xffff; - host1x_debug_output(o, "MASK(offset=%03x, mask=%03x, [", + host1x_debug_cont(o, "MASK(offset=%03x, mask=%03x, [", val >> 16 & 0xfff, mask); + if (!mask) + host1x_debug_cont(o, "])\n"); + return hweight16(mask); case HOST1X_OPCODE_IMM: - host1x_debug_output(o, "IMM(offset=%03x, data=%03x)\n", + host1x_debug_cont(o, "IMM(offset=%03x, data=%03x)\n", val >> 16 & 0xfff, val & 0xffff); return 0; case HOST1X_OPCODE_RESTART: - host1x_debug_output(o, "RESTART(offset=%08x)\n", val << 4); + host1x_debug_cont(o, "RESTART(offset=%08x)\n", val << 4); return 0; case HOST1X_OPCODE_GATHER: - host1x_debug_output(o, "GATHER(offset=%03x, insert=%d, type=%d, count=%04x, addr=[", + host1x_debug_cont(o, "GATHER(offset=%03x, insert=%d, type=%d, count=%04x, addr=[", val >> 16 & 0xfff, val >> 15 & 0x1, val >> 14 & 0x1, val & 0x3fff); return 1; +#if HOST1X_HW >= 6 + case HOST1X_OPCODE_SETSTRMID: + host1x_debug_cont(o, "SETSTRMID(offset=%06x)\n", + val & 0x3fffff); + return 0; + + case HOST1X_OPCODE_SETAPPID: + host1x_debug_cont(o, "SETAPPID(appid=%02x)\n", val & 0xff); + return 0; + + case HOST1X_OPCODE_SETPYLD: + *payload = val & 0xffff; + host1x_debug_cont(o, "SETPYLD(data=%04x)\n", *payload); + return 0; + + case HOST1X_OPCODE_INCR_W: + case HOST1X_OPCODE_NONINCR_W: + host1x_debug_cont(o, "%s(offset=%06x, ", + opcode == HOST1X_OPCODE_INCR_W ? + "INCR_W" : "NONINCR_W", + val & 0x3fffff); + if (*payload == 0) { + host1x_debug_cont(o, "[])\n"); + return 0; + } else if (*payload == INVALID_PAYLOAD) { + host1x_debug_cont(o, "unknown)\n"); + return 0; + } else { + host1x_debug_cont(o, "["); + return *payload; + } + + case HOST1X_OPCODE_GATHER_W: + host1x_debug_cont(o, "GATHER_W(count=%04x, addr=[", + val & 0x3fff); + return 2; +#endif + case HOST1X_OPCODE_EXTEND: subop = val >> 24 & 0xf; if (subop == HOST1X_OPCODE_EXTEND_ACQUIRE_MLOCK) - host1x_debug_output(o, "ACQUIRE_MLOCK(index=%d)\n", + host1x_debug_cont(o, "ACQUIRE_MLOCK(index=%d)\n", val & 0xff); else if (subop == HOST1X_OPCODE_EXTEND_RELEASE_MLOCK) - host1x_debug_output(o, "RELEASE_MLOCK(index=%d)\n", + host1x_debug_cont(o, "RELEASE_MLOCK(index=%d)\n", val & 0xff); else - host1x_debug_output(o, "EXTEND_UNKNOWN(%08x)\n", val); + host1x_debug_cont(o, "EXTEND_UNKNOWN(%08x)\n", val); return 0; default: + host1x_debug_cont(o, "UNKNOWN\n"); return 0; } } -static void show_gather(struct output *o, phys_addr_t phys_addr, +static void show_gather(struct output *o, dma_addr_t phys_addr, unsigned int words, struct host1x_cdma *cdma, - phys_addr_t pin_addr, u32 *map_addr) + dma_addr_t pin_addr, u32 *map_addr) { /* Map dmaget cursor to corresponding mem handle */ u32 offset = phys_addr - pin_addr; unsigned int data_count = 0, i; + u32 payload = INVALID_PAYLOAD; /* * Sometimes we're given different hardware address to the same @@ -122,15 +176,24 @@ static void show_gather(struct output *o, phys_addr_t phys_addr, } for (i = 0; i < words; i++) { - u32 addr = phys_addr + i * 4; - u32 val = *(map_addr + offset / 4 + i); + dma_addr_t addr = phys_addr + i * 4; + u32 voffset = offset + i * 4; + u32 val; + + /* If we reach the RESTART opcode, continue at the beginning of pushbuffer */ + if (cdma && voffset >= cdma->push_buffer.size) { + addr -= cdma->push_buffer.size; + voffset -= cdma->push_buffer.size; + } + + val = *(map_addr + voffset / 4); if (!data_count) { - host1x_debug_output(o, "%08x: %08x:", addr, val); - data_count = show_channel_command(o, val); + host1x_debug_output(o, " %pad: %08x: ", &addr, val); + data_count = show_channel_command(o, val, &payload); } else { - host1x_debug_output(o, "%08x%s", val, - data_count > 0 ? ", " : "])\n"); + host1x_debug_cont(o, "%08x%s", val, + data_count > 1 ? ", " : "])\n"); data_count--; } } @@ -138,20 +201,28 @@ static void show_gather(struct output *o, phys_addr_t phys_addr, static void show_channel_gathers(struct output *o, struct host1x_cdma *cdma) { + struct push_buffer *pb = &cdma->push_buffer; struct host1x_job *job; list_for_each_entry(job, &cdma->sync_queue, list) { unsigned int i; - host1x_debug_output(o, "\n%p: JOB, syncpt_id=%d, syncpt_val=%d, first_get=%08x, timeout=%d num_slots=%d, num_handles=%d\n", - job, job->syncpt_id, job->syncpt_end, - job->first_get, job->timeout, + host1x_debug_output(o, "JOB, syncpt %u: %u timeout: %u num_slots: %u num_handles: %u\n", + job->syncpt->id, job->syncpt_end, job->timeout, job->num_slots, job->num_unpins); - for (i = 0; i < job->num_gathers; i++) { - struct host1x_job_gather *g = &job->gathers[i]; + show_gather(o, pb->dma + job->first_get, job->num_slots * 2, cdma, + pb->dma, pb->mapped); + + for (i = 0; i < job->num_cmds; i++) { + struct host1x_job_gather *g; u32 *mapped; + if (job->cmds[i].is_wait) + continue; + + g = &job->cmds[i].gather; + if (job->gather_copy_mapped) mapped = (u32 *)job->gather_copy_mapped; else @@ -162,10 +233,10 @@ static void show_channel_gathers(struct output *o, struct host1x_cdma *cdma) continue; } - host1x_debug_output(o, " GATHER at %pad+%#x, %d words\n", + host1x_debug_output(o, " GATHER at %pad+%#x, %d words\n", &g->base, g->offset, g->words); - show_gather(o, g->base + g->offset, g->words, cdma, + show_gather(o, g->base + g->offset, g->words, NULL, g->base, mapped); if (!job->gather_copy_mapped) @@ -174,138 +245,11 @@ static void show_channel_gathers(struct output *o, struct host1x_cdma *cdma) } } -static void host1x_debug_show_channel_cdma(struct host1x *host, - struct host1x_channel *ch, - struct output *o) -{ - struct host1x_cdma *cdma = &ch->cdma; - u32 dmaput, dmaget, dmactrl; - u32 cbstat, cbread; - u32 val, base, baseval; - - dmaput = host1x_ch_readl(ch, HOST1X_CHANNEL_DMAPUT); - dmaget = host1x_ch_readl(ch, HOST1X_CHANNEL_DMAGET); - dmactrl = host1x_ch_readl(ch, HOST1X_CHANNEL_DMACTRL); - cbread = host1x_sync_readl(host, HOST1X_SYNC_CBREAD(ch->id)); - cbstat = host1x_sync_readl(host, HOST1X_SYNC_CBSTAT(ch->id)); - - host1x_debug_output(o, "%u-%s: ", ch->id, dev_name(ch->dev)); - - if (HOST1X_CHANNEL_DMACTRL_DMASTOP_V(dmactrl) || - !ch->cdma.push_buffer.mapped) { - host1x_debug_output(o, "inactive\n\n"); - return; - } - - if (HOST1X_SYNC_CBSTAT_CBCLASS_V(cbstat) == HOST1X_CLASS_HOST1X && - HOST1X_SYNC_CBSTAT_CBOFFSET_V(cbstat) == - HOST1X_UCLASS_WAIT_SYNCPT) - host1x_debug_output(o, "waiting on syncpt %d val %d\n", - cbread >> 24, cbread & 0xffffff); - else if (HOST1X_SYNC_CBSTAT_CBCLASS_V(cbstat) == - HOST1X_CLASS_HOST1X && - HOST1X_SYNC_CBSTAT_CBOFFSET_V(cbstat) == - HOST1X_UCLASS_WAIT_SYNCPT_BASE) { - base = (cbread >> 16) & 0xff; - baseval = - host1x_sync_readl(host, HOST1X_SYNC_SYNCPT_BASE(base)); - val = cbread & 0xffff; - host1x_debug_output(o, "waiting on syncpt %d val %d (base %d = %d; offset = %d)\n", - cbread >> 24, baseval + val, base, - baseval, val); - } else - host1x_debug_output(o, "active class %02x, offset %04x, val %08x\n", - HOST1X_SYNC_CBSTAT_CBCLASS_V(cbstat), - HOST1X_SYNC_CBSTAT_CBOFFSET_V(cbstat), - cbread); - - host1x_debug_output(o, "DMAPUT %08x, DMAGET %08x, DMACTL %08x\n", - dmaput, dmaget, dmactrl); - host1x_debug_output(o, "CBREAD %08x, CBSTAT %08x\n", cbread, cbstat); - - show_channel_gathers(o, cdma); - host1x_debug_output(o, "\n"); -} - -static void host1x_debug_show_channel_fifo(struct host1x *host, - struct host1x_channel *ch, - struct output *o) -{ - u32 val, rd_ptr, wr_ptr, start, end; - unsigned int data_count = 0; - - host1x_debug_output(o, "%u: fifo:\n", ch->id); - - val = host1x_ch_readl(ch, HOST1X_CHANNEL_FIFOSTAT); - host1x_debug_output(o, "FIFOSTAT %08x\n", val); - if (HOST1X_CHANNEL_FIFOSTAT_CFEMPTY_V(val)) { - host1x_debug_output(o, "[empty]\n"); - return; - } - - host1x_sync_writel(host, 0x0, HOST1X_SYNC_CFPEEK_CTRL); - host1x_sync_writel(host, HOST1X_SYNC_CFPEEK_CTRL_ENA_F(1) | - HOST1X_SYNC_CFPEEK_CTRL_CHANNR_F(ch->id), - HOST1X_SYNC_CFPEEK_CTRL); - - val = host1x_sync_readl(host, HOST1X_SYNC_CFPEEK_PTRS); - rd_ptr = HOST1X_SYNC_CFPEEK_PTRS_CF_RD_PTR_V(val); - wr_ptr = HOST1X_SYNC_CFPEEK_PTRS_CF_WR_PTR_V(val); - - val = host1x_sync_readl(host, HOST1X_SYNC_CF_SETUP(ch->id)); - start = HOST1X_SYNC_CF_SETUP_BASE_V(val); - end = HOST1X_SYNC_CF_SETUP_LIMIT_V(val); - - do { - host1x_sync_writel(host, 0x0, HOST1X_SYNC_CFPEEK_CTRL); - host1x_sync_writel(host, HOST1X_SYNC_CFPEEK_CTRL_ENA_F(1) | - HOST1X_SYNC_CFPEEK_CTRL_CHANNR_F(ch->id) | - HOST1X_SYNC_CFPEEK_CTRL_ADDR_F(rd_ptr), - HOST1X_SYNC_CFPEEK_CTRL); - val = host1x_sync_readl(host, HOST1X_SYNC_CFPEEK_READ); - - if (!data_count) { - host1x_debug_output(o, "%08x:", val); - data_count = show_channel_command(o, val); - } else { - host1x_debug_output(o, "%08x%s", val, - data_count > 0 ? ", " : "])\n"); - data_count--; - } - - if (rd_ptr == end) - rd_ptr = start; - else - rd_ptr++; - } while (rd_ptr != wr_ptr); - - if (data_count) - host1x_debug_output(o, ", ...])\n"); - host1x_debug_output(o, "\n"); - - host1x_sync_writel(host, 0x0, HOST1X_SYNC_CFPEEK_CTRL); -} - -static void host1x_debug_show_mlocks(struct host1x *host, struct output *o) -{ - unsigned int i; - - host1x_debug_output(o, "---- mlocks ----\n"); - - for (i = 0; i < host1x_syncpt_nb_mlocks(host); i++) { - u32 owner = - host1x_sync_readl(host, HOST1X_SYNC_MLOCK_OWNER(i)); - if (HOST1X_SYNC_MLOCK_OWNER_CH_OWNS_V(owner)) - host1x_debug_output(o, "%u: locked by channel %u\n", - i, HOST1X_SYNC_MLOCK_OWNER_CHID_V(owner)); - else if (HOST1X_SYNC_MLOCK_OWNER_CPU_OWNS_V(owner)) - host1x_debug_output(o, "%u: locked by cpu\n", i); - else - host1x_debug_output(o, "%u: unlocked\n", i); - } - - host1x_debug_output(o, "\n"); -} +#if HOST1X_HW >= 6 +#include "debug_hw_1x06.c" +#else +#include "debug_hw_1x01.c" +#endif static const struct host1x_debug_ops host1x_debug_ops = { .show_channel_cdma = host1x_debug_show_channel_cdma, diff --git a/drivers/gpu/host1x/hw/debug_hw_1x01.c b/drivers/gpu/host1x/hw/debug_hw_1x01.c new file mode 100644 index 000000000000..85242a59fa6a --- /dev/null +++ b/drivers/gpu/host1x/hw/debug_hw_1x01.c @@ -0,0 +1,149 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Copyright (C) 2010 Google, Inc. + * Author: Erik Gilling <konkers@android.com> + * + * Copyright (C) 2011-2013 NVIDIA Corporation + */ + +#include "../dev.h" +#include "../debug.h" +#include "../cdma.h" +#include "../channel.h" + +static void host1x_debug_show_channel_cdma(struct host1x *host, + struct host1x_channel *ch, + struct output *o) +{ + struct host1x_cdma *cdma = &ch->cdma; + dma_addr_t dmastart, dmaend; + u32 dmaput, dmaget, dmactrl; + u32 cbstat, cbread; + u32 val, base, baseval; + + dmastart = host1x_ch_readl(ch, HOST1X_CHANNEL_DMASTART); + dmaend = host1x_ch_readl(ch, HOST1X_CHANNEL_DMAEND); + dmaput = host1x_ch_readl(ch, HOST1X_CHANNEL_DMAPUT); + dmaget = host1x_ch_readl(ch, HOST1X_CHANNEL_DMAGET); + dmactrl = host1x_ch_readl(ch, HOST1X_CHANNEL_DMACTRL); + cbread = host1x_sync_readl(host, HOST1X_SYNC_CBREAD(ch->id)); + cbstat = host1x_sync_readl(host, HOST1X_SYNC_CBSTAT(ch->id)); + + host1x_debug_output(o, "%u-%s: ", ch->id, dev_name(ch->dev)); + + if (HOST1X_CHANNEL_DMACTRL_DMASTOP_V(dmactrl) || + !ch->cdma.push_buffer.mapped) { + host1x_debug_output(o, "inactive\n\n"); + return; + } + + if (HOST1X_SYNC_CBSTAT_CBCLASS_V(cbstat) == HOST1X_CLASS_HOST1X && + HOST1X_SYNC_CBSTAT_CBOFFSET_V(cbstat) == + HOST1X_UCLASS_WAIT_SYNCPT) + host1x_debug_output(o, "waiting on syncpt %d val %d\n", + cbread >> 24, cbread & 0xffffff); + else if (HOST1X_SYNC_CBSTAT_CBCLASS_V(cbstat) == + HOST1X_CLASS_HOST1X && + HOST1X_SYNC_CBSTAT_CBOFFSET_V(cbstat) == + HOST1X_UCLASS_WAIT_SYNCPT_BASE) { + base = (cbread >> 16) & 0xff; + baseval = + host1x_sync_readl(host, HOST1X_SYNC_SYNCPT_BASE(base)); + val = cbread & 0xffff; + host1x_debug_output(o, "waiting on syncpt %d val %d (base %d = %d; offset = %d)\n", + cbread >> 24, baseval + val, base, + baseval, val); + } else + host1x_debug_output(o, "active class %02x, offset %04x, val %08x\n", + HOST1X_SYNC_CBSTAT_CBCLASS_V(cbstat), + HOST1X_SYNC_CBSTAT_CBOFFSET_V(cbstat), + cbread); + + host1x_debug_output(o, "DMASTART %pad, DMAEND %pad\n", &dmastart, &dmaend); + host1x_debug_output(o, "DMAPUT %08x DMAGET %08x DMACTL %08x\n", + dmaput, dmaget, dmactrl); + host1x_debug_output(o, "CBREAD %08x CBSTAT %08x\n", cbread, cbstat); + + show_channel_gathers(o, cdma); + host1x_debug_output(o, "\n"); +} + +static void host1x_debug_show_channel_fifo(struct host1x *host, + struct host1x_channel *ch, + struct output *o) +{ + u32 val, rd_ptr, wr_ptr, start, end; + unsigned int data_count = 0; + + host1x_debug_output(o, "%u: fifo:\n", ch->id); + + val = host1x_ch_readl(ch, HOST1X_CHANNEL_FIFOSTAT); + host1x_debug_output(o, "FIFOSTAT %08x\n", val); + if (HOST1X_CHANNEL_FIFOSTAT_CFEMPTY_V(val)) { + host1x_debug_output(o, "[empty]\n"); + return; + } + + host1x_sync_writel(host, 0x0, HOST1X_SYNC_CFPEEK_CTRL); + host1x_sync_writel(host, HOST1X_SYNC_CFPEEK_CTRL_ENA_F(1) | + HOST1X_SYNC_CFPEEK_CTRL_CHANNR_F(ch->id), + HOST1X_SYNC_CFPEEK_CTRL); + + val = host1x_sync_readl(host, HOST1X_SYNC_CFPEEK_PTRS); + rd_ptr = HOST1X_SYNC_CFPEEK_PTRS_CF_RD_PTR_V(val); + wr_ptr = HOST1X_SYNC_CFPEEK_PTRS_CF_WR_PTR_V(val); + + val = host1x_sync_readl(host, HOST1X_SYNC_CF_SETUP(ch->id)); + start = HOST1X_SYNC_CF_SETUP_BASE_V(val); + end = HOST1X_SYNC_CF_SETUP_LIMIT_V(val); + + do { + host1x_sync_writel(host, 0x0, HOST1X_SYNC_CFPEEK_CTRL); + host1x_sync_writel(host, HOST1X_SYNC_CFPEEK_CTRL_ENA_F(1) | + HOST1X_SYNC_CFPEEK_CTRL_CHANNR_F(ch->id) | + HOST1X_SYNC_CFPEEK_CTRL_ADDR_F(rd_ptr), + HOST1X_SYNC_CFPEEK_CTRL); + val = host1x_sync_readl(host, HOST1X_SYNC_CFPEEK_READ); + + if (!data_count) { + host1x_debug_output(o, "%08x: ", val); + data_count = show_channel_command(o, val, NULL); + } else { + host1x_debug_cont(o, "%08x%s", val, + data_count > 1 ? ", " : "])\n"); + data_count--; + } + + if (rd_ptr == end) + rd_ptr = start; + else + rd_ptr++; + } while (rd_ptr != wr_ptr); + + if (data_count) + host1x_debug_cont(o, ", ...])\n"); + host1x_debug_output(o, "\n"); + + host1x_sync_writel(host, 0x0, HOST1X_SYNC_CFPEEK_CTRL); +} + +static void host1x_debug_show_mlocks(struct host1x *host, struct output *o) +{ + unsigned int i; + + host1x_debug_output(o, "---- mlocks ----\n"); + + for (i = 0; i < host1x_syncpt_nb_mlocks(host); i++) { + u32 owner = + host1x_sync_readl(host, HOST1X_SYNC_MLOCK_OWNER(i)); + if (HOST1X_SYNC_MLOCK_OWNER_CH_OWNS_V(owner)) + host1x_debug_output(o, "%u: locked by channel %u\n", + i, HOST1X_SYNC_MLOCK_OWNER_CHID_V(owner)); + else if (HOST1X_SYNC_MLOCK_OWNER_CPU_OWNS_V(owner)) + host1x_debug_output(o, "%u: locked by cpu\n", i); + else + host1x_debug_output(o, "%u: unlocked\n", i); + } + + host1x_debug_output(o, "\n"); +} diff --git a/drivers/gpu/host1x/hw/debug_hw_1x06.c b/drivers/gpu/host1x/hw/debug_hw_1x06.c new file mode 100644 index 000000000000..9d0667879a19 --- /dev/null +++ b/drivers/gpu/host1x/hw/debug_hw_1x06.c @@ -0,0 +1,145 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Copyright (C) 2010 Google, Inc. + * Author: Erik Gilling <konkers@android.com> + * + * Copyright (C) 2011-2017 NVIDIA Corporation + */ + +#include "../dev.h" +#include "../debug.h" +#include "../cdma.h" +#include "../channel.h" + +static void host1x_debug_show_channel_cdma(struct host1x *host, + struct host1x_channel *ch, + struct output *o) +{ + struct host1x_cdma *cdma = &ch->cdma; + dma_addr_t dmastart = 0, dmaend = 0; + u32 dmaput, dmaget, dmactrl; + u32 offset, class; + u32 ch_stat; + +#if defined(CONFIG_ARCH_DMA_ADDR_T_64BIT) && HOST1X_HW >= 6 + dmastart = host1x_ch_readl(ch, HOST1X_CHANNEL_DMASTART_HI); + dmastart <<= 32; +#endif + dmastart |= host1x_ch_readl(ch, HOST1X_CHANNEL_DMASTART); + +#if defined(CONFIG_ARCH_DMA_ADDR_T_64BIT) && HOST1X_HW >= 6 + dmaend = host1x_ch_readl(ch, HOST1X_CHANNEL_DMAEND_HI); + dmaend <<= 32; +#endif + dmaend |= host1x_ch_readl(ch, HOST1X_CHANNEL_DMAEND); + + dmaput = host1x_ch_readl(ch, HOST1X_CHANNEL_DMAPUT); + dmaget = host1x_ch_readl(ch, HOST1X_CHANNEL_DMAGET); + dmactrl = host1x_ch_readl(ch, HOST1X_CHANNEL_DMACTRL); + offset = host1x_ch_readl(ch, HOST1X_CHANNEL_CMDP_OFFSET); + class = host1x_ch_readl(ch, HOST1X_CHANNEL_CMDP_CLASS); + ch_stat = host1x_ch_readl(ch, HOST1X_CHANNEL_CHANNELSTAT); + + host1x_debug_output(o, "%u-%s: ", ch->id, dev_name(ch->dev)); + + if (dmactrl & HOST1X_CHANNEL_DMACTRL_DMASTOP || + !ch->cdma.push_buffer.mapped) { + host1x_debug_output(o, "inactive\n\n"); + return; + } + + if (class == HOST1X_CLASS_HOST1X && offset == HOST1X_UCLASS_WAIT_SYNCPT) + host1x_debug_output(o, "waiting on syncpt\n"); + else + host1x_debug_output(o, "active class %02x, offset %04x\n", + class, offset); + + host1x_debug_output(o, "DMASTART %pad, DMAEND %pad\n", &dmastart, &dmaend); + host1x_debug_output(o, "DMAPUT %08x DMAGET %08x DMACTL %08x\n", + dmaput, dmaget, dmactrl); + host1x_debug_output(o, "CHANNELSTAT %02x\n", ch_stat); + + show_channel_gathers(o, cdma); + host1x_debug_output(o, "\n"); +} + +static void host1x_debug_show_channel_fifo(struct host1x *host, + struct host1x_channel *ch, + struct output *o) +{ +#if HOST1X_HW <= 6 + u32 rd_ptr, wr_ptr, start, end; + u32 payload = INVALID_PAYLOAD; + unsigned int data_count = 0; +#endif + u32 val; + + host1x_debug_output(o, "%u: fifo:\n", ch->id); + + val = host1x_ch_readl(ch, HOST1X_CHANNEL_CMDFIFO_STAT); + host1x_debug_output(o, "CMDFIFO_STAT %08x\n", val); + if (val & HOST1X_CHANNEL_CMDFIFO_STAT_EMPTY) { + host1x_debug_output(o, "[empty]\n"); + return; + } + + val = host1x_ch_readl(ch, HOST1X_CHANNEL_CMDFIFO_RDATA); + host1x_debug_output(o, "CMDFIFO_RDATA %08x\n", val); + +#if HOST1X_HW <= 6 + /* Peek pointer values are invalid during SLCG, so disable it */ + host1x_hypervisor_writel(host, 0x1, HOST1X_HV_ICG_EN_OVERRIDE); + + val = 0; + val |= HOST1X_HV_CMDFIFO_PEEK_CTRL_ENABLE; + val |= HOST1X_HV_CMDFIFO_PEEK_CTRL_CHANNEL(ch->id); + host1x_hypervisor_writel(host, val, HOST1X_HV_CMDFIFO_PEEK_CTRL); + + val = host1x_hypervisor_readl(host, HOST1X_HV_CMDFIFO_PEEK_PTRS); + rd_ptr = HOST1X_HV_CMDFIFO_PEEK_PTRS_RD_PTR_V(val); + wr_ptr = HOST1X_HV_CMDFIFO_PEEK_PTRS_WR_PTR_V(val); + + val = host1x_hypervisor_readl(host, HOST1X_HV_CMDFIFO_SETUP(ch->id)); + start = HOST1X_HV_CMDFIFO_SETUP_BASE_V(val); + end = HOST1X_HV_CMDFIFO_SETUP_LIMIT_V(val); + + do { + val = 0; + val |= HOST1X_HV_CMDFIFO_PEEK_CTRL_ENABLE; + val |= HOST1X_HV_CMDFIFO_PEEK_CTRL_CHANNEL(ch->id); + val |= HOST1X_HV_CMDFIFO_PEEK_CTRL_ADDR(rd_ptr); + host1x_hypervisor_writel(host, val, + HOST1X_HV_CMDFIFO_PEEK_CTRL); + + val = host1x_hypervisor_readl(host, + HOST1X_HV_CMDFIFO_PEEK_READ); + + if (!data_count) { + host1x_debug_output(o, "%03x 0x%08x: ", + rd_ptr - start, val); + data_count = show_channel_command(o, val, &payload); + } else { + host1x_debug_cont(o, "%08x%s", val, + data_count > 1 ? ", " : "])\n"); + data_count--; + } + + if (rd_ptr == end) + rd_ptr = start; + else + rd_ptr++; + } while (rd_ptr != wr_ptr); + + if (data_count) + host1x_debug_cont(o, ", ...])\n"); + host1x_debug_output(o, "\n"); + + host1x_hypervisor_writel(host, 0x0, HOST1X_HV_CMDFIFO_PEEK_CTRL); + host1x_hypervisor_writel(host, 0x0, HOST1X_HV_ICG_EN_OVERRIDE); +#endif +} + +static void host1x_debug_show_mlocks(struct host1x *host, struct output *o) +{ + /* TODO */ +} diff --git a/drivers/gpu/host1x/hw/host1x01.c b/drivers/gpu/host1x/hw/host1x01.c index 859b73beb4d0..8d8a117a5153 100644 --- a/drivers/gpu/host1x/hw/host1x01.c +++ b/drivers/gpu/host1x/hw/host1x01.c @@ -1,19 +1,8 @@ +// SPDX-License-Identifier: GPL-2.0-only /* * Host1x init for T20 and T30 Architecture Chips * * Copyright (c) 2011-2013, NVIDIA Corporation. - * - * This program is free software; you can redistribute it and/or modify it - * under the terms and conditions of the GNU General Public License, - * version 2, as published by the Free Software Foundation. - * - * This program is distributed in the hope it will be useful, but WITHOUT - * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or - * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for - * more details. - * - * You should have received a copy of the GNU General Public License - * along with this program. If not, see <http://www.gnu.org/licenses/>. */ /* include hw specification */ @@ -21,6 +10,8 @@ #include "host1x01_hardware.h" /* include code */ +#define HOST1X_HW 1 + #include "cdma_hw.c" #include "channel_hw.c" #include "debug_hw.c" diff --git a/drivers/gpu/host1x/hw/host1x01.h b/drivers/gpu/host1x/hw/host1x01.h index 2706b6743250..6516c3f1edf5 100644 --- a/drivers/gpu/host1x/hw/host1x01.h +++ b/drivers/gpu/host1x/hw/host1x01.h @@ -1,19 +1,8 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ /* * Host1x init for T20 and T30 Architecture Chips * * Copyright (c) 2011-2013, NVIDIA Corporation. - * - * This program is free software; you can redistribute it and/or modify it - * under the terms and conditions of the GNU General Public License, - * version 2, as published by the Free Software Foundation. - * - * This program is distributed in the hope it will be useful, but WITHOUT - * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or - * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for - * more details. - * - * You should have received a copy of the GNU General Public License - * along with this program. If not, see <http://www.gnu.org/licenses/>. */ #ifndef HOST1X_HOST1X01_H #define HOST1X_HOST1X01_H diff --git a/drivers/gpu/host1x/hw/host1x01_hardware.h b/drivers/gpu/host1x/hw/host1x01_hardware.h index 5f0fb866efa8..cb93d7c1808c 100644 --- a/drivers/gpu/host1x/hw/host1x01_hardware.h +++ b/drivers/gpu/host1x/hw/host1x01_hardware.h @@ -1,19 +1,8 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ /* * Tegra host1x Register Offsets for Tegra20 and Tegra30 * * Copyright (c) 2010-2013 NVIDIA Corporation. - * - * This program is free software; you can redistribute it and/or modify it - * under the terms and conditions of the GNU General Public License, - * version 2, as published by the Free Software Foundation. - * - * This program is distributed in the hope it will be useful, but WITHOUT - * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or - * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for - * more details. - * - * You should have received a copy of the GNU General Public License - * along with this program. If not, see <http://www.gnu.org/licenses/>. */ #ifndef __HOST1X_HOST1X01_HARDWARE_H @@ -26,118 +15,6 @@ #include "hw_host1x01_sync.h" #include "hw_host1x01_uclass.h" -static inline u32 host1x_class_host_wait_syncpt( - unsigned indx, unsigned threshold) -{ - return host1x_uclass_wait_syncpt_indx_f(indx) - | host1x_uclass_wait_syncpt_thresh_f(threshold); -} - -static inline u32 host1x_class_host_load_syncpt_base( - unsigned indx, unsigned threshold) -{ - return host1x_uclass_load_syncpt_base_base_indx_f(indx) - | host1x_uclass_load_syncpt_base_value_f(threshold); -} - -static inline u32 host1x_class_host_wait_syncpt_base( - unsigned indx, unsigned base_indx, unsigned offset) -{ - return host1x_uclass_wait_syncpt_base_indx_f(indx) - | host1x_uclass_wait_syncpt_base_base_indx_f(base_indx) - | host1x_uclass_wait_syncpt_base_offset_f(offset); -} - -static inline u32 host1x_class_host_incr_syncpt_base( - unsigned base_indx, unsigned offset) -{ - return host1x_uclass_incr_syncpt_base_base_indx_f(base_indx) - | host1x_uclass_incr_syncpt_base_offset_f(offset); -} - -static inline u32 host1x_class_host_incr_syncpt( - unsigned cond, unsigned indx) -{ - return host1x_uclass_incr_syncpt_cond_f(cond) - | host1x_uclass_incr_syncpt_indx_f(indx); -} - -static inline u32 host1x_class_host_indoff_reg_write( - unsigned mod_id, unsigned offset, bool auto_inc) -{ - u32 v = host1x_uclass_indoff_indbe_f(0xf) - | host1x_uclass_indoff_indmodid_f(mod_id) - | host1x_uclass_indoff_indroffset_f(offset); - if (auto_inc) - v |= host1x_uclass_indoff_autoinc_f(1); - return v; -} - -static inline u32 host1x_class_host_indoff_reg_read( - unsigned mod_id, unsigned offset, bool auto_inc) -{ - u32 v = host1x_uclass_indoff_indmodid_f(mod_id) - | host1x_uclass_indoff_indroffset_f(offset) - | host1x_uclass_indoff_rwn_read_v(); - if (auto_inc) - v |= host1x_uclass_indoff_autoinc_f(1); - return v; -} - - -/* cdma opcodes */ -static inline u32 host1x_opcode_setclass( - unsigned class_id, unsigned offset, unsigned mask) -{ - return (0 << 28) | (offset << 16) | (class_id << 6) | mask; -} - -static inline u32 host1x_opcode_incr(unsigned offset, unsigned count) -{ - return (1 << 28) | (offset << 16) | count; -} - -static inline u32 host1x_opcode_nonincr(unsigned offset, unsigned count) -{ - return (2 << 28) | (offset << 16) | count; -} - -static inline u32 host1x_opcode_mask(unsigned offset, unsigned mask) -{ - return (3 << 28) | (offset << 16) | mask; -} - -static inline u32 host1x_opcode_imm(unsigned offset, unsigned value) -{ - return (4 << 28) | (offset << 16) | value; -} - -static inline u32 host1x_opcode_imm_incr_syncpt(unsigned cond, unsigned indx) -{ - return host1x_opcode_imm(host1x_uclass_incr_syncpt_r(), - host1x_class_host_incr_syncpt(cond, indx)); -} - -static inline u32 host1x_opcode_restart(unsigned address) -{ - return (5 << 28) | (address >> 4); -} - -static inline u32 host1x_opcode_gather(unsigned count) -{ - return (6 << 28) | count; -} - -static inline u32 host1x_opcode_gather_nonincr(unsigned offset, unsigned count) -{ - return (6 << 28) | (offset << 16) | BIT(15) | count; -} - -static inline u32 host1x_opcode_gather_incr(unsigned offset, unsigned count) -{ - return (6 << 28) | (offset << 16) | BIT(15) | BIT(14) | count; -} - -#define HOST1X_OPCODE_NOP host1x_opcode_nonincr(0, 0) +#include "opcodes.h" #endif diff --git a/drivers/gpu/host1x/hw/host1x02.c b/drivers/gpu/host1x/hw/host1x02.c index 928946c2144b..583b33c04884 100644 --- a/drivers/gpu/host1x/hw/host1x02.c +++ b/drivers/gpu/host1x/hw/host1x02.c @@ -1,19 +1,8 @@ +// SPDX-License-Identifier: GPL-2.0-only /* * Host1x init for Tegra114 SoCs * * Copyright (c) 2013 NVIDIA Corporation. - * - * This program is free software; you can redistribute it and/or modify it - * under the terms and conditions of the GNU General Public License, - * version 2, as published by the Free Software Foundation. - * - * This program is distributed in the hope it will be useful, but WITHOUT - * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or - * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for - * more details. - * - * You should have received a copy of the GNU General Public License - * along with this program. If not, see <http://www.gnu.org/licenses/>. */ /* include hw specification */ @@ -21,6 +10,8 @@ #include "host1x02_hardware.h" /* include code */ +#define HOST1X_HW 2 + #include "cdma_hw.c" #include "channel_hw.c" #include "debug_hw.c" diff --git a/drivers/gpu/host1x/hw/host1x02.h b/drivers/gpu/host1x/hw/host1x02.h index f7486609a90e..7e5c3e4700d2 100644 --- a/drivers/gpu/host1x/hw/host1x02.h +++ b/drivers/gpu/host1x/hw/host1x02.h @@ -1,19 +1,8 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ /* * Host1x init for Tegra114 SoCs * * Copyright (c) 2013 NVIDIA Corporation. - * - * This program is free software; you can redistribute it and/or modify it - * under the terms and conditions of the GNU General Public License, - * version 2, as published by the Free Software Foundation. - * - * This program is distributed in the hope it will be useful, but WITHOUT - * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or - * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for - * more details. - * - * You should have received a copy of the GNU General Public License - * along with this program. If not, see <http://www.gnu.org/licenses/>. */ #ifndef HOST1X_HOST1X02_H diff --git a/drivers/gpu/host1x/hw/host1x02_hardware.h b/drivers/gpu/host1x/hw/host1x02_hardware.h index 154901860bc6..2d1282b9bc33 100644 --- a/drivers/gpu/host1x/hw/host1x02_hardware.h +++ b/drivers/gpu/host1x/hw/host1x02_hardware.h @@ -1,19 +1,8 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ /* * Tegra host1x Register Offsets for Tegra114 * * Copyright (c) 2010-2013 NVIDIA Corporation. - * - * This program is free software; you can redistribute it and/or modify it - * under the terms and conditions of the GNU General Public License, - * version 2, as published by the Free Software Foundation. - * - * This program is distributed in the hope it will be useful, but WITHOUT - * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or - * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for - * more details. - * - * You should have received a copy of the GNU General Public License - * along with this program. If not, see <http://www.gnu.org/licenses/>. */ #ifndef __HOST1X_HOST1X02_HARDWARE_H @@ -26,117 +15,6 @@ #include "hw_host1x02_sync.h" #include "hw_host1x02_uclass.h" -static inline u32 host1x_class_host_wait_syncpt( - unsigned indx, unsigned threshold) -{ - return host1x_uclass_wait_syncpt_indx_f(indx) - | host1x_uclass_wait_syncpt_thresh_f(threshold); -} - -static inline u32 host1x_class_host_load_syncpt_base( - unsigned indx, unsigned threshold) -{ - return host1x_uclass_load_syncpt_base_base_indx_f(indx) - | host1x_uclass_load_syncpt_base_value_f(threshold); -} - -static inline u32 host1x_class_host_wait_syncpt_base( - unsigned indx, unsigned base_indx, unsigned offset) -{ - return host1x_uclass_wait_syncpt_base_indx_f(indx) - | host1x_uclass_wait_syncpt_base_base_indx_f(base_indx) - | host1x_uclass_wait_syncpt_base_offset_f(offset); -} - -static inline u32 host1x_class_host_incr_syncpt_base( - unsigned base_indx, unsigned offset) -{ - return host1x_uclass_incr_syncpt_base_base_indx_f(base_indx) - | host1x_uclass_incr_syncpt_base_offset_f(offset); -} - -static inline u32 host1x_class_host_incr_syncpt( - unsigned cond, unsigned indx) -{ - return host1x_uclass_incr_syncpt_cond_f(cond) - | host1x_uclass_incr_syncpt_indx_f(indx); -} - -static inline u32 host1x_class_host_indoff_reg_write( - unsigned mod_id, unsigned offset, bool auto_inc) -{ - u32 v = host1x_uclass_indoff_indbe_f(0xf) - | host1x_uclass_indoff_indmodid_f(mod_id) - | host1x_uclass_indoff_indroffset_f(offset); - if (auto_inc) - v |= host1x_uclass_indoff_autoinc_f(1); - return v; -} - -static inline u32 host1x_class_host_indoff_reg_read( - unsigned mod_id, unsigned offset, bool auto_inc) -{ - u32 v = host1x_uclass_indoff_indmodid_f(mod_id) - | host1x_uclass_indoff_indroffset_f(offset) - | host1x_uclass_indoff_rwn_read_v(); - if (auto_inc) - v |= host1x_uclass_indoff_autoinc_f(1); - return v; -} - -/* cdma opcodes */ -static inline u32 host1x_opcode_setclass( - unsigned class_id, unsigned offset, unsigned mask) -{ - return (0 << 28) | (offset << 16) | (class_id << 6) | mask; -} - -static inline u32 host1x_opcode_incr(unsigned offset, unsigned count) -{ - return (1 << 28) | (offset << 16) | count; -} - -static inline u32 host1x_opcode_nonincr(unsigned offset, unsigned count) -{ - return (2 << 28) | (offset << 16) | count; -} - -static inline u32 host1x_opcode_mask(unsigned offset, unsigned mask) -{ - return (3 << 28) | (offset << 16) | mask; -} - -static inline u32 host1x_opcode_imm(unsigned offset, unsigned value) -{ - return (4 << 28) | (offset << 16) | value; -} - -static inline u32 host1x_opcode_imm_incr_syncpt(unsigned cond, unsigned indx) -{ - return host1x_opcode_imm(host1x_uclass_incr_syncpt_r(), - host1x_class_host_incr_syncpt(cond, indx)); -} - -static inline u32 host1x_opcode_restart(unsigned address) -{ - return (5 << 28) | (address >> 4); -} - -static inline u32 host1x_opcode_gather(unsigned count) -{ - return (6 << 28) | count; -} - -static inline u32 host1x_opcode_gather_nonincr(unsigned offset, unsigned count) -{ - return (6 << 28) | (offset << 16) | BIT(15) | count; -} - -static inline u32 host1x_opcode_gather_incr(unsigned offset, unsigned count) -{ - return (6 << 28) | (offset << 16) | BIT(15) | BIT(14) | count; -} - -#define HOST1X_OPCODE_NOP host1x_opcode_nonincr(0, 0) +#include "opcodes.h" #endif diff --git a/drivers/gpu/host1x/hw/host1x04.c b/drivers/gpu/host1x/hw/host1x04.c index 8007c70fa9c4..26b459eb2d3e 100644 --- a/drivers/gpu/host1x/hw/host1x04.c +++ b/drivers/gpu/host1x/hw/host1x04.c @@ -1,19 +1,8 @@ +// SPDX-License-Identifier: GPL-2.0-only /* * Host1x init for Tegra124 SoCs * * Copyright (c) 2013 NVIDIA Corporation. - * - * This program is free software; you can redistribute it and/or modify it - * under the terms and conditions of the GNU General Public License, - * version 2, as published by the Free Software Foundation. - * - * This program is distributed in the hope it will be useful, but WITHOUT - * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or - * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for - * more details. - * - * You should have received a copy of the GNU General Public License - * along with this program. If not, see <http://www.gnu.org/licenses/>. */ /* include hw specification */ @@ -21,6 +10,8 @@ #include "host1x04_hardware.h" /* include code */ +#define HOST1X_HW 4 + #include "cdma_hw.c" #include "channel_hw.c" #include "debug_hw.c" diff --git a/drivers/gpu/host1x/hw/host1x04.h b/drivers/gpu/host1x/hw/host1x04.h index a9ab7496c06e..2a1e8153c5fe 100644 --- a/drivers/gpu/host1x/hw/host1x04.h +++ b/drivers/gpu/host1x/hw/host1x04.h @@ -1,19 +1,8 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ /* * Host1x init for Tegra124 SoCs * * Copyright (c) 2013 NVIDIA Corporation. - * - * This program is free software; you can redistribute it and/or modify it - * under the terms and conditions of the GNU General Public License, - * version 2, as published by the Free Software Foundation. - * - * This program is distributed in the hope it will be useful, but WITHOUT - * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or - * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for - * more details. - * - * You should have received a copy of the GNU General Public License - * along with this program. If not, see <http://www.gnu.org/licenses/>. */ #ifndef HOST1X_HOST1X04_H diff --git a/drivers/gpu/host1x/hw/host1x04_hardware.h b/drivers/gpu/host1x/hw/host1x04_hardware.h index de1a38175328..84d244e8af30 100644 --- a/drivers/gpu/host1x/hw/host1x04_hardware.h +++ b/drivers/gpu/host1x/hw/host1x04_hardware.h @@ -1,19 +1,8 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ /* * Tegra host1x Register Offsets for Tegra124 * * Copyright (c) 2010-2013 NVIDIA Corporation. - * - * This program is free software; you can redistribute it and/or modify it - * under the terms and conditions of the GNU General Public License, - * version 2, as published by the Free Software Foundation. - * - * This program is distributed in the hope it will be useful, but WITHOUT - * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or - * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for - * more details. - * - * You should have received a copy of the GNU General Public License - * along with this program. If not, see <http://www.gnu.org/licenses/>. */ #ifndef __HOST1X_HOST1X04_HARDWARE_H @@ -26,117 +15,6 @@ #include "hw_host1x04_sync.h" #include "hw_host1x04_uclass.h" -static inline u32 host1x_class_host_wait_syncpt( - unsigned indx, unsigned threshold) -{ - return host1x_uclass_wait_syncpt_indx_f(indx) - | host1x_uclass_wait_syncpt_thresh_f(threshold); -} - -static inline u32 host1x_class_host_load_syncpt_base( - unsigned indx, unsigned threshold) -{ - return host1x_uclass_load_syncpt_base_base_indx_f(indx) - | host1x_uclass_load_syncpt_base_value_f(threshold); -} - -static inline u32 host1x_class_host_wait_syncpt_base( - unsigned indx, unsigned base_indx, unsigned offset) -{ - return host1x_uclass_wait_syncpt_base_indx_f(indx) - | host1x_uclass_wait_syncpt_base_base_indx_f(base_indx) - | host1x_uclass_wait_syncpt_base_offset_f(offset); -} - -static inline u32 host1x_class_host_incr_syncpt_base( - unsigned base_indx, unsigned offset) -{ - return host1x_uclass_incr_syncpt_base_base_indx_f(base_indx) - | host1x_uclass_incr_syncpt_base_offset_f(offset); -} - -static inline u32 host1x_class_host_incr_syncpt( - unsigned cond, unsigned indx) -{ - return host1x_uclass_incr_syncpt_cond_f(cond) - | host1x_uclass_incr_syncpt_indx_f(indx); -} - -static inline u32 host1x_class_host_indoff_reg_write( - unsigned mod_id, unsigned offset, bool auto_inc) -{ - u32 v = host1x_uclass_indoff_indbe_f(0xf) - | host1x_uclass_indoff_indmodid_f(mod_id) - | host1x_uclass_indoff_indroffset_f(offset); - if (auto_inc) - v |= host1x_uclass_indoff_autoinc_f(1); - return v; -} - -static inline u32 host1x_class_host_indoff_reg_read( - unsigned mod_id, unsigned offset, bool auto_inc) -{ - u32 v = host1x_uclass_indoff_indmodid_f(mod_id) - | host1x_uclass_indoff_indroffset_f(offset) - | host1x_uclass_indoff_rwn_read_v(); - if (auto_inc) - v |= host1x_uclass_indoff_autoinc_f(1); - return v; -} - -/* cdma opcodes */ -static inline u32 host1x_opcode_setclass( - unsigned class_id, unsigned offset, unsigned mask) -{ - return (0 << 28) | (offset << 16) | (class_id << 6) | mask; -} - -static inline u32 host1x_opcode_incr(unsigned offset, unsigned count) -{ - return (1 << 28) | (offset << 16) | count; -} - -static inline u32 host1x_opcode_nonincr(unsigned offset, unsigned count) -{ - return (2 << 28) | (offset << 16) | count; -} - -static inline u32 host1x_opcode_mask(unsigned offset, unsigned mask) -{ - return (3 << 28) | (offset << 16) | mask; -} - -static inline u32 host1x_opcode_imm(unsigned offset, unsigned value) -{ - return (4 << 28) | (offset << 16) | value; -} - -static inline u32 host1x_opcode_imm_incr_syncpt(unsigned cond, unsigned indx) -{ - return host1x_opcode_imm(host1x_uclass_incr_syncpt_r(), - host1x_class_host_incr_syncpt(cond, indx)); -} - -static inline u32 host1x_opcode_restart(unsigned address) -{ - return (5 << 28) | (address >> 4); -} - -static inline u32 host1x_opcode_gather(unsigned count) -{ - return (6 << 28) | count; -} - -static inline u32 host1x_opcode_gather_nonincr(unsigned offset, unsigned count) -{ - return (6 << 28) | (offset << 16) | BIT(15) | count; -} - -static inline u32 host1x_opcode_gather_incr(unsigned offset, unsigned count) -{ - return (6 << 28) | (offset << 16) | BIT(15) | BIT(14) | count; -} - -#define HOST1X_OPCODE_NOP host1x_opcode_nonincr(0, 0) +#include "opcodes.h" #endif diff --git a/drivers/gpu/host1x/hw/host1x05.c b/drivers/gpu/host1x/hw/host1x05.c index 047097ce3bad..6d9803343aae 100644 --- a/drivers/gpu/host1x/hw/host1x05.c +++ b/drivers/gpu/host1x/hw/host1x05.c @@ -1,19 +1,8 @@ +// SPDX-License-Identifier: GPL-2.0-only /* * Host1x init for Tegra210 SoCs * * Copyright (c) 2015 NVIDIA Corporation. - * - * This program is free software; you can redistribute it and/or modify it - * under the terms and conditions of the GNU General Public License, - * version 2, as published by the Free Software Foundation. - * - * This program is distributed in the hope it will be useful, but WITHOUT - * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or - * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for - * more details. - * - * You should have received a copy of the GNU General Public License - * along with this program. If not, see <http://www.gnu.org/licenses/>. */ /* include hw specification */ @@ -21,6 +10,8 @@ #include "host1x05_hardware.h" /* include code */ +#define HOST1X_HW 5 + #include "cdma_hw.c" #include "channel_hw.c" #include "debug_hw.c" diff --git a/drivers/gpu/host1x/hw/host1x05.h b/drivers/gpu/host1x/hw/host1x05.h index a306d9c05cd5..addfd41e7ef3 100644 --- a/drivers/gpu/host1x/hw/host1x05.h +++ b/drivers/gpu/host1x/hw/host1x05.h @@ -1,19 +1,8 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ /* * Host1x init for Tegra210 SoCs * * Copyright (c) 2015 NVIDIA Corporation. - * - * This program is free software; you can redistribute it and/or modify it - * under the terms and conditions of the GNU General Public License, - * version 2, as published by the Free Software Foundation. - * - * This program is distributed in the hope it will be useful, but WITHOUT - * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or - * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for - * more details. - * - * You should have received a copy of the GNU General Public License - * along with this program. If not, see <http://www.gnu.org/licenses/>. */ #ifndef HOST1X_HOST1X05_H diff --git a/drivers/gpu/host1x/hw/host1x05_hardware.h b/drivers/gpu/host1x/hw/host1x05_hardware.h index 2937ebb6be11..1dcde6ec7909 100644 --- a/drivers/gpu/host1x/hw/host1x05_hardware.h +++ b/drivers/gpu/host1x/hw/host1x05_hardware.h @@ -1,19 +1,8 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ /* * Tegra host1x Register Offsets for Tegra210 * * Copyright (c) 2015 NVIDIA Corporation. - * - * This program is free software; you can redistribute it and/or modify it - * under the terms and conditions of the GNU General Public License, - * version 2, as published by the Free Software Foundation. - * - * This program is distributed in the hope it will be useful, but WITHOUT - * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or - * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for - * more details. - * - * You should have received a copy of the GNU General Public License - * along with this program. If not, see <http://www.gnu.org/licenses/>. */ #ifndef __HOST1X_HOST1X05_HARDWARE_H @@ -26,117 +15,6 @@ #include "hw_host1x05_sync.h" #include "hw_host1x05_uclass.h" -static inline u32 host1x_class_host_wait_syncpt( - unsigned indx, unsigned threshold) -{ - return host1x_uclass_wait_syncpt_indx_f(indx) - | host1x_uclass_wait_syncpt_thresh_f(threshold); -} - -static inline u32 host1x_class_host_load_syncpt_base( - unsigned indx, unsigned threshold) -{ - return host1x_uclass_load_syncpt_base_base_indx_f(indx) - | host1x_uclass_load_syncpt_base_value_f(threshold); -} - -static inline u32 host1x_class_host_wait_syncpt_base( - unsigned indx, unsigned base_indx, unsigned offset) -{ - return host1x_uclass_wait_syncpt_base_indx_f(indx) - | host1x_uclass_wait_syncpt_base_base_indx_f(base_indx) - | host1x_uclass_wait_syncpt_base_offset_f(offset); -} - -static inline u32 host1x_class_host_incr_syncpt_base( - unsigned base_indx, unsigned offset) -{ - return host1x_uclass_incr_syncpt_base_base_indx_f(base_indx) - | host1x_uclass_incr_syncpt_base_offset_f(offset); -} - -static inline u32 host1x_class_host_incr_syncpt( - unsigned cond, unsigned indx) -{ - return host1x_uclass_incr_syncpt_cond_f(cond) - | host1x_uclass_incr_syncpt_indx_f(indx); -} - -static inline u32 host1x_class_host_indoff_reg_write( - unsigned mod_id, unsigned offset, bool auto_inc) -{ - u32 v = host1x_uclass_indoff_indbe_f(0xf) - | host1x_uclass_indoff_indmodid_f(mod_id) - | host1x_uclass_indoff_indroffset_f(offset); - if (auto_inc) - v |= host1x_uclass_indoff_autoinc_f(1); - return v; -} - -static inline u32 host1x_class_host_indoff_reg_read( - unsigned mod_id, unsigned offset, bool auto_inc) -{ - u32 v = host1x_uclass_indoff_indmodid_f(mod_id) - | host1x_uclass_indoff_indroffset_f(offset) - | host1x_uclass_indoff_rwn_read_v(); - if (auto_inc) - v |= host1x_uclass_indoff_autoinc_f(1); - return v; -} - -/* cdma opcodes */ -static inline u32 host1x_opcode_setclass( - unsigned class_id, unsigned offset, unsigned mask) -{ - return (0 << 28) | (offset << 16) | (class_id << 6) | mask; -} - -static inline u32 host1x_opcode_incr(unsigned offset, unsigned count) -{ - return (1 << 28) | (offset << 16) | count; -} - -static inline u32 host1x_opcode_nonincr(unsigned offset, unsigned count) -{ - return (2 << 28) | (offset << 16) | count; -} - -static inline u32 host1x_opcode_mask(unsigned offset, unsigned mask) -{ - return (3 << 28) | (offset << 16) | mask; -} - -static inline u32 host1x_opcode_imm(unsigned offset, unsigned value) -{ - return (4 << 28) | (offset << 16) | value; -} - -static inline u32 host1x_opcode_imm_incr_syncpt(unsigned cond, unsigned indx) -{ - return host1x_opcode_imm(host1x_uclass_incr_syncpt_r(), - host1x_class_host_incr_syncpt(cond, indx)); -} - -static inline u32 host1x_opcode_restart(unsigned address) -{ - return (5 << 28) | (address >> 4); -} - -static inline u32 host1x_opcode_gather(unsigned count) -{ - return (6 << 28) | count; -} - -static inline u32 host1x_opcode_gather_nonincr(unsigned offset, unsigned count) -{ - return (6 << 28) | (offset << 16) | BIT(15) | count; -} - -static inline u32 host1x_opcode_gather_incr(unsigned offset, unsigned count) -{ - return (6 << 28) | (offset << 16) | BIT(15) | BIT(14) | count; -} - -#define HOST1X_OPCODE_NOP host1x_opcode_nonincr(0, 0) +#include "opcodes.h" #endif diff --git a/drivers/gpu/host1x/hw/host1x06.c b/drivers/gpu/host1x/hw/host1x06.c new file mode 100644 index 000000000000..844f81ae2d7e --- /dev/null +++ b/drivers/gpu/host1x/hw/host1x06.c @@ -0,0 +1,33 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Host1x init for Tegra186 SoCs + * + * Copyright (c) 2017 NVIDIA Corporation. + */ + +/* include hw specification */ +#include "host1x06.h" +#include "host1x06_hardware.h" + +/* include code */ +#define HOST1X_HW 6 + +#include "cdma_hw.c" +#include "channel_hw.c" +#include "debug_hw.c" +#include "intr_hw.c" +#include "syncpt_hw.c" + +#include "../dev.h" + +int host1x06_init(struct host1x *host) +{ + host->channel_op = &host1x_channel_ops; + host->cdma_op = &host1x_cdma_ops; + host->cdma_pb_op = &host1x_pushbuffer_ops; + host->syncpt_op = &host1x_syncpt_ops; + host->intr_op = &host1x_intr_ops; + host->debug_op = &host1x_debug_ops; + + return 0; +} diff --git a/drivers/gpu/host1x/hw/host1x06.h b/drivers/gpu/host1x/hw/host1x06.h new file mode 100644 index 000000000000..4ea756895ca5 --- /dev/null +++ b/drivers/gpu/host1x/hw/host1x06.h @@ -0,0 +1,15 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * Host1x init for Tegra186 SoCs + * + * Copyright (c) 2017 NVIDIA Corporation. + */ + +#ifndef HOST1X_HOST1X06_H +#define HOST1X_HOST1X06_H + +struct host1x; + +int host1x06_init(struct host1x *host); + +#endif diff --git a/drivers/gpu/host1x/hw/host1x06_hardware.h b/drivers/gpu/host1x/hw/host1x06_hardware.h new file mode 100644 index 000000000000..c05cfa7e3090 --- /dev/null +++ b/drivers/gpu/host1x/hw/host1x06_hardware.h @@ -0,0 +1,21 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * Tegra host1x Register Offsets for Tegra186 + * + * Copyright (c) 2017 NVIDIA Corporation. + */ + +#ifndef __HOST1X_HOST1X06_HARDWARE_H +#define __HOST1X_HOST1X06_HARDWARE_H + +#include <linux/types.h> +#include <linux/bitops.h> + +#include "hw_host1x06_channel.h" +#include "hw_host1x06_uclass.h" +#include "hw_host1x06_vm.h" +#include "hw_host1x06_hypervisor.h" + +#include "opcodes.h" + +#endif diff --git a/drivers/gpu/host1x/hw/host1x07.c b/drivers/gpu/host1x/hw/host1x07.c new file mode 100644 index 000000000000..0c6f14f7ec80 --- /dev/null +++ b/drivers/gpu/host1x/hw/host1x07.c @@ -0,0 +1,33 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Host1x init for Tegra194 SoCs + * + * Copyright (c) 2018 NVIDIA Corporation. + */ + +/* include hw specification */ +#include "host1x07.h" +#include "host1x07_hardware.h" + +/* include code */ +#define HOST1X_HW 7 + +#include "cdma_hw.c" +#include "channel_hw.c" +#include "debug_hw.c" +#include "intr_hw.c" +#include "syncpt_hw.c" + +#include "../dev.h" + +int host1x07_init(struct host1x *host) +{ + host->channel_op = &host1x_channel_ops; + host->cdma_op = &host1x_cdma_ops; + host->cdma_pb_op = &host1x_pushbuffer_ops; + host->syncpt_op = &host1x_syncpt_ops; + host->intr_op = &host1x_intr_ops; + host->debug_op = &host1x_debug_ops; + + return 0; +} diff --git a/drivers/gpu/host1x/hw/host1x07.h b/drivers/gpu/host1x/hw/host1x07.h new file mode 100644 index 000000000000..419b6eaad3d8 --- /dev/null +++ b/drivers/gpu/host1x/hw/host1x07.h @@ -0,0 +1,15 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * Host1x init for Tegra194 SoCs + * + * Copyright (c) 2018 NVIDIA Corporation. + */ + +#ifndef HOST1X_HOST1X07_H +#define HOST1X_HOST1X07_H + +struct host1x; + +int host1x07_init(struct host1x *host); + +#endif diff --git a/drivers/gpu/host1x/hw/host1x07_hardware.h b/drivers/gpu/host1x/hw/host1x07_hardware.h new file mode 100644 index 000000000000..d67364e03956 --- /dev/null +++ b/drivers/gpu/host1x/hw/host1x07_hardware.h @@ -0,0 +1,21 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * Tegra host1x Register Offsets for Tegra194 + * + * Copyright (c) 2018 NVIDIA Corporation. + */ + +#ifndef __HOST1X_HOST1X07_HARDWARE_H +#define __HOST1X_HOST1X07_HARDWARE_H + +#include <linux/types.h> +#include <linux/bitops.h> + +#include "hw_host1x07_channel.h" +#include "hw_host1x07_uclass.h" +#include "hw_host1x07_vm.h" +#include "hw_host1x07_hypervisor.h" + +#include "opcodes.h" + +#endif diff --git a/drivers/gpu/host1x/hw/host1x08.c b/drivers/gpu/host1x/hw/host1x08.c new file mode 100644 index 000000000000..754890c34c74 --- /dev/null +++ b/drivers/gpu/host1x/hw/host1x08.c @@ -0,0 +1,33 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Host1x init for Tegra234 SoCs + * + * Copyright (c) 2022 NVIDIA Corporation. + */ + +/* include hw specification */ +#include "host1x08.h" +#include "host1x08_hardware.h" + +/* include code */ +#define HOST1X_HW 8 + +#include "cdma_hw.c" +#include "channel_hw.c" +#include "debug_hw.c" +#include "intr_hw.c" +#include "syncpt_hw.c" + +#include "../dev.h" + +int host1x08_init(struct host1x *host) +{ + host->channel_op = &host1x_channel_ops; + host->cdma_op = &host1x_cdma_ops; + host->cdma_pb_op = &host1x_pushbuffer_ops; + host->syncpt_op = &host1x_syncpt_ops; + host->intr_op = &host1x_intr_ops; + host->debug_op = &host1x_debug_ops; + + return 0; +} diff --git a/drivers/gpu/host1x/hw/host1x08.h b/drivers/gpu/host1x/hw/host1x08.h new file mode 100644 index 000000000000..a6bad56e44cf --- /dev/null +++ b/drivers/gpu/host1x/hw/host1x08.h @@ -0,0 +1,15 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * Host1x init for Tegra234 SoCs + * + * Copyright (c) 2018 NVIDIA Corporation. + */ + +#ifndef HOST1X_HOST1X08_H +#define HOST1X_HOST1X08_H + +struct host1x; + +int host1x08_init(struct host1x *host); + +#endif diff --git a/drivers/gpu/host1x/hw/host1x08_hardware.h b/drivers/gpu/host1x/hw/host1x08_hardware.h new file mode 100644 index 000000000000..936243060bff --- /dev/null +++ b/drivers/gpu/host1x/hw/host1x08_hardware.h @@ -0,0 +1,21 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * Tegra host1x Register Offsets for Tegra234 + * + * Copyright (c) 2022 NVIDIA Corporation. + */ + +#ifndef __HOST1X_HOST1X08_HARDWARE_H +#define __HOST1X_HOST1X08_HARDWARE_H + +#include <linux/types.h> +#include <linux/bitops.h> + +#include "hw_host1x08_uclass.h" +#include "hw_host1x08_vm.h" +#include "hw_host1x08_hypervisor.h" +#include "hw_host1x08_common.h" + +#include "opcodes.h" + +#endif diff --git a/drivers/gpu/host1x/hw/hw_host1x01_channel.h b/drivers/gpu/host1x/hw/hw_host1x01_channel.h index b4bc7ca4e051..8da43eaba2c8 100644 --- a/drivers/gpu/host1x/hw/hw_host1x01_channel.h +++ b/drivers/gpu/host1x/hw/hw_host1x01_channel.h @@ -1,18 +1,6 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ /* * Copyright (c) 2012-2013, NVIDIA Corporation. - * - * This program is free software; you can redistribute it and/or modify it - * under the terms and conditions of the GNU General Public License, - * version 2, as published by the Free Software Foundation. - * - * This program is distributed in the hope it will be useful, but WITHOUT - * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or - * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for - * more details. - * - * You should have received a copy of the GNU General Public License - * along with this program. If not, see <http://www.gnu.org/licenses/>. - * */ /* diff --git a/drivers/gpu/host1x/hw/hw_host1x01_sync.h b/drivers/gpu/host1x/hw/hw_host1x01_sync.h index 31238c285d46..ec95e7ae7ca5 100644 --- a/drivers/gpu/host1x/hw/hw_host1x01_sync.h +++ b/drivers/gpu/host1x/hw/hw_host1x01_sync.h @@ -1,18 +1,6 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ /* * Copyright (c) 2012-2013, NVIDIA Corporation. - * - * This program is free software; you can redistribute it and/or modify it - * under the terms and conditions of the GNU General Public License, - * version 2, as published by the Free Software Foundation. - * - * This program is distributed in the hope it will be useful, but WITHOUT - * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or - * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for - * more details. - * - * You should have received a copy of the GNU General Public License - * along with this program. If not, see <http://www.gnu.org/licenses/>. - * */ /* diff --git a/drivers/gpu/host1x/hw/hw_host1x01_uclass.h b/drivers/gpu/host1x/hw/hw_host1x01_uclass.h index f7553599ee27..1239bfd46a5e 100644 --- a/drivers/gpu/host1x/hw/hw_host1x01_uclass.h +++ b/drivers/gpu/host1x/hw/hw_host1x01_uclass.h @@ -1,18 +1,6 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ /* * Copyright (c) 2012-2013, NVIDIA Corporation. - * - * This program is free software; you can redistribute it and/or modify it - * under the terms and conditions of the GNU General Public License, - * version 2, as published by the Free Software Foundation. - * - * This program is distributed in the hope it will be useful, but WITHOUT - * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or - * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for - * more details. - * - * You should have received a copy of the GNU General Public License - * along with this program. If not, see <http://www.gnu.org/licenses/>. - * */ /* diff --git a/drivers/gpu/host1x/hw/hw_host1x02_channel.h b/drivers/gpu/host1x/hw/hw_host1x02_channel.h index e490bcde33fe..210d317ad2b7 100644 --- a/drivers/gpu/host1x/hw/hw_host1x02_channel.h +++ b/drivers/gpu/host1x/hw/hw_host1x02_channel.h @@ -1,18 +1,6 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ /* * Copyright (c) 2013 NVIDIA Corporation. - * - * This program is free software; you can redistribute it and/or modify it - * under the terms and conditions of the GNU General Public License, - * version 2, as published by the Free Software Foundation. - * - * This program is distributed in the hope it will be useful, but WITHOUT - * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or - * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for - * more details. - * - * You should have received a copy of the GNU General Public License - * along with this program. If not, see <http://www.gnu.org/licenses/>. - * */ /* diff --git a/drivers/gpu/host1x/hw/hw_host1x02_sync.h b/drivers/gpu/host1x/hw/hw_host1x02_sync.h index 540c7b65995f..44b4f8379732 100644 --- a/drivers/gpu/host1x/hw/hw_host1x02_sync.h +++ b/drivers/gpu/host1x/hw/hw_host1x02_sync.h @@ -1,18 +1,6 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ /* * Copyright (c) 2013 NVIDIA Corporation. - * - * This program is free software; you can redistribute it and/or modify it - * under the terms and conditions of the GNU General Public License, - * version 2, as published by the Free Software Foundation. - * - * This program is distributed in the hope it will be useful, but WITHOUT - * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or - * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for - * more details. - * - * You should have received a copy of the GNU General Public License - * along with this program. If not, see <http://www.gnu.org/licenses/>. - * */ /* diff --git a/drivers/gpu/host1x/hw/hw_host1x02_uclass.h b/drivers/gpu/host1x/hw/hw_host1x02_uclass.h index 028e49d9bac9..0a2ab8f1da6f 100644 --- a/drivers/gpu/host1x/hw/hw_host1x02_uclass.h +++ b/drivers/gpu/host1x/hw/hw_host1x02_uclass.h @@ -1,18 +1,6 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ /* * Copyright (c) 2013 NVIDIA Corporation. - * - * This program is free software; you can redistribute it and/or modify it - * under the terms and conditions of the GNU General Public License, - * version 2, as published by the Free Software Foundation. - * - * This program is distributed in the hope it will be useful, but WITHOUT - * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or - * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for - * more details. - * - * You should have received a copy of the GNU General Public License - * along with this program. If not, see <http://www.gnu.org/licenses/>. - * */ /* @@ -177,5 +165,17 @@ static inline u32 host1x_uclass_indoff_rwn_read_v(void) } #define HOST1X_UCLASS_INDOFF_INDROFFSET_F(v) \ host1x_uclass_indoff_indroffset_f(v) +static inline u32 host1x_uclass_load_syncpt_payload_32_r(void) +{ + return 0x4e; +} +#define HOST1X_UCLASS_LOAD_SYNCPT_PAYLOAD_32 \ + host1x_uclass_load_syncpt_payload_32_r() +static inline u32 host1x_uclass_wait_syncpt_32_r(void) +{ + return 0x50; +} +#define HOST1X_UCLASS_WAIT_SYNCPT_32 \ + host1x_uclass_wait_syncpt_32_r() #endif diff --git a/drivers/gpu/host1x/hw/hw_host1x04_channel.h b/drivers/gpu/host1x/hw/hw_host1x04_channel.h index 95e6f96142b9..38d110645ee0 100644 --- a/drivers/gpu/host1x/hw/hw_host1x04_channel.h +++ b/drivers/gpu/host1x/hw/hw_host1x04_channel.h @@ -1,18 +1,6 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ /* * Copyright (c) 2013 NVIDIA Corporation. - * - * This program is free software; you can redistribute it and/or modify it - * under the terms and conditions of the GNU General Public License, - * version 2, as published by the Free Software Foundation. - * - * This program is distributed in the hope it will be useful, but WITHOUT - * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or - * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for - * more details. - * - * You should have received a copy of the GNU General Public License - * along with this program. If not, see <http://www.gnu.org/licenses/>. - * */ /* @@ -117,5 +105,17 @@ static inline u32 host1x_channel_dmactrl_dmainitget(void) } #define HOST1X_CHANNEL_DMACTRL_DMAINITGET \ host1x_channel_dmactrl_dmainitget() +static inline u32 host1x_channel_channelctrl_r(void) +{ + return 0x98; +} +#define HOST1X_CHANNEL_CHANNELCTRL \ + host1x_channel_channelctrl_r() +static inline u32 host1x_channel_channelctrl_kernel_filter_gbuffer_f(u32 v) +{ + return (v & 0x1) << 2; +} +#define HOST1X_CHANNEL_CHANNELCTRL_KERNEL_FILTER_GBUFFER(v) \ + host1x_channel_channelctrl_kernel_filter_gbuffer_f(v) #endif diff --git a/drivers/gpu/host1x/hw/hw_host1x04_sync.h b/drivers/gpu/host1x/hw/hw_host1x04_sync.h index 3d6c8ec65934..0be98562c201 100644 --- a/drivers/gpu/host1x/hw/hw_host1x04_sync.h +++ b/drivers/gpu/host1x/hw/hw_host1x04_sync.h @@ -1,18 +1,6 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ /* * Copyright (c) 2013 NVIDIA Corporation. - * - * This program is free software; you can redistribute it and/or modify it - * under the terms and conditions of the GNU General Public License, - * version 2, as published by the Free Software Foundation. - * - * This program is distributed in the hope it will be useful, but WITHOUT - * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or - * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for - * more details. - * - * You should have received a copy of the GNU General Public License - * along with this program. If not, see <http://www.gnu.org/licenses/>. - * */ /* diff --git a/drivers/gpu/host1x/hw/hw_host1x04_uclass.h b/drivers/gpu/host1x/hw/hw_host1x04_uclass.h index d1460e971493..60c692b92955 100644 --- a/drivers/gpu/host1x/hw/hw_host1x04_uclass.h +++ b/drivers/gpu/host1x/hw/hw_host1x04_uclass.h @@ -1,18 +1,6 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ /* * Copyright (c) 2013 NVIDIA Corporation. - * - * This program is free software; you can redistribute it and/or modify it - * under the terms and conditions of the GNU General Public License, - * version 2, as published by the Free Software Foundation. - * - * This program is distributed in the hope it will be useful, but WITHOUT - * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or - * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for - * more details. - * - * You should have received a copy of the GNU General Public License - * along with this program. If not, see <http://www.gnu.org/licenses/>. - * */ /* @@ -177,5 +165,17 @@ static inline u32 host1x_uclass_indoff_rwn_read_v(void) } #define HOST1X_UCLASS_INDOFF_INDROFFSET_F(v) \ host1x_uclass_indoff_indroffset_f(v) +static inline u32 host1x_uclass_load_syncpt_payload_32_r(void) +{ + return 0x4e; +} +#define HOST1X_UCLASS_LOAD_SYNCPT_PAYLOAD_32 \ + host1x_uclass_load_syncpt_payload_32_r() +static inline u32 host1x_uclass_wait_syncpt_32_r(void) +{ + return 0x50; +} +#define HOST1X_UCLASS_WAIT_SYNCPT_32 \ + host1x_uclass_wait_syncpt_32_r() #endif diff --git a/drivers/gpu/host1x/hw/hw_host1x05_channel.h b/drivers/gpu/host1x/hw/hw_host1x05_channel.h index fce6e2c1ff4c..7e628ef58c49 100644 --- a/drivers/gpu/host1x/hw/hw_host1x05_channel.h +++ b/drivers/gpu/host1x/hw/hw_host1x05_channel.h @@ -1,18 +1,6 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ /* * Copyright (c) 2015 NVIDIA Corporation. - * - * This program is free software; you can redistribute it and/or modify it - * under the terms and conditions of the GNU General Public License, - * version 2, as published by the Free Software Foundation. - * - * This program is distributed in the hope it will be useful, but WITHOUT - * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or - * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for - * more details. - * - * You should have received a copy of the GNU General Public License - * along with this program. If not, see <http://www.gnu.org/licenses/>. - * */ /* @@ -117,5 +105,17 @@ static inline u32 host1x_channel_dmactrl_dmainitget(void) } #define HOST1X_CHANNEL_DMACTRL_DMAINITGET \ host1x_channel_dmactrl_dmainitget() +static inline u32 host1x_channel_channelctrl_r(void) +{ + return 0x98; +} +#define HOST1X_CHANNEL_CHANNELCTRL \ + host1x_channel_channelctrl_r() +static inline u32 host1x_channel_channelctrl_kernel_filter_gbuffer_f(u32 v) +{ + return (v & 0x1) << 2; +} +#define HOST1X_CHANNEL_CHANNELCTRL_KERNEL_FILTER_GBUFFER(v) \ + host1x_channel_channelctrl_kernel_filter_gbuffer_f(v) #endif diff --git a/drivers/gpu/host1x/hw/hw_host1x05_sync.h b/drivers/gpu/host1x/hw/hw_host1x05_sync.h index ca10eee5045c..1a85c793bd3e 100644 --- a/drivers/gpu/host1x/hw/hw_host1x05_sync.h +++ b/drivers/gpu/host1x/hw/hw_host1x05_sync.h @@ -1,18 +1,6 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ /* * Copyright (c) 2015 NVIDIA Corporation. - * - * This program is free software; you can redistribute it and/or modify it - * under the terms and conditions of the GNU General Public License, - * version 2, as published by the Free Software Foundation. - * - * This program is distributed in the hope it will be useful, but WITHOUT - * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or - * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for - * more details. - * - * You should have received a copy of the GNU General Public License - * along with this program. If not, see <http://www.gnu.org/licenses/>. - * */ /* diff --git a/drivers/gpu/host1x/hw/hw_host1x05_uclass.h b/drivers/gpu/host1x/hw/hw_host1x05_uclass.h index 0c411da6bc41..2fcc9a2ad3ef 100644 --- a/drivers/gpu/host1x/hw/hw_host1x05_uclass.h +++ b/drivers/gpu/host1x/hw/hw_host1x05_uclass.h @@ -1,18 +1,6 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ /* * Copyright (c) 2015 NVIDIA Corporation. - * - * This program is free software; you can redistribute it and/or modify it - * under the terms and conditions of the GNU General Public License, - * version 2, as published by the Free Software Foundation. - * - * This program is distributed in the hope it will be useful, but WITHOUT - * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or - * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for - * more details. - * - * You should have received a copy of the GNU General Public License - * along with this program. If not, see <http://www.gnu.org/licenses/>. - * */ /* @@ -177,5 +165,17 @@ static inline u32 host1x_uclass_indoff_rwn_read_v(void) } #define HOST1X_UCLASS_INDOFF_INDROFFSET_F(v) \ host1x_uclass_indoff_indroffset_f(v) +static inline u32 host1x_uclass_load_syncpt_payload_32_r(void) +{ + return 0x4e; +} +#define HOST1X_UCLASS_LOAD_SYNCPT_PAYLOAD_32 \ + host1x_uclass_load_syncpt_payload_32_r() +static inline u32 host1x_uclass_wait_syncpt_32_r(void) +{ + return 0x50; +} +#define HOST1X_UCLASS_WAIT_SYNCPT_32 \ + host1x_uclass_wait_syncpt_32_r() #endif diff --git a/drivers/gpu/host1x/hw/hw_host1x06_channel.h b/drivers/gpu/host1x/hw/hw_host1x06_channel.h new file mode 100644 index 000000000000..18ae1c57bbea --- /dev/null +++ b/drivers/gpu/host1x/hw/hw_host1x06_channel.h @@ -0,0 +1,11 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright (C) 2019 NVIDIA Corporation. + */ + +#ifndef HOST1X_HW_HOST1X06_CHANNEL_H +#define HOST1X_HW_HOST1X06_CHANNEL_H + +#define HOST1X_CHANNEL_SMMU_STREAMID 0x084 + +#endif diff --git a/drivers/gpu/host1x/hw/hw_host1x06_hypervisor.h b/drivers/gpu/host1x/hw/hw_host1x06_hypervisor.h new file mode 100644 index 000000000000..a7fc9ec4bc3e --- /dev/null +++ b/drivers/gpu/host1x/hw/hw_host1x06_hypervisor.h @@ -0,0 +1,20 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * Copyright (c) 2017 NVIDIA Corporation. + */ + +#define HOST1X_HV_SYNCPT_PROT_EN 0x1ac4 +#define HOST1X_HV_SYNCPT_PROT_EN_CH_EN BIT(1) +#define HOST1X_HV_CH_KERNEL_FILTER_GBUFFER(x) (0x2020 + (x * 4)) +#define HOST1X_HV_CMDFIFO_PEEK_CTRL 0x233c +#define HOST1X_HV_CMDFIFO_PEEK_CTRL_ADDR(x) (x) +#define HOST1X_HV_CMDFIFO_PEEK_CTRL_CHANNEL(x) ((x) << 16) +#define HOST1X_HV_CMDFIFO_PEEK_CTRL_ENABLE BIT(31) +#define HOST1X_HV_CMDFIFO_PEEK_READ 0x2340 +#define HOST1X_HV_CMDFIFO_PEEK_PTRS 0x2344 +#define HOST1X_HV_CMDFIFO_PEEK_PTRS_WR_PTR_V(x) (((x) >> 16) & 0xfff) +#define HOST1X_HV_CMDFIFO_PEEK_PTRS_RD_PTR_V(x) ((x) & 0xfff) +#define HOST1X_HV_CMDFIFO_SETUP(x) (0x2588 + (x * 4)) +#define HOST1X_HV_CMDFIFO_SETUP_LIMIT_V(x) (((x) >> 16) & 0xfff) +#define HOST1X_HV_CMDFIFO_SETUP_BASE_V(x) ((x) & 0xfff) +#define HOST1X_HV_ICG_EN_OVERRIDE 0x2aa8 diff --git a/drivers/gpu/host1x/hw/hw_host1x06_uclass.h b/drivers/gpu/host1x/hw/hw_host1x06_uclass.h new file mode 100644 index 000000000000..50c32de452fb --- /dev/null +++ b/drivers/gpu/host1x/hw/hw_host1x06_uclass.h @@ -0,0 +1,181 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * Copyright (c) 2017 NVIDIA Corporation. + */ + + /* + * Function naming determines intended use: + * + * <x>_r(void) : Returns the offset for register <x>. + * + * <x>_w(void) : Returns the word offset for word (4 byte) element <x>. + * + * <x>_<y>_s(void) : Returns size of field <y> of register <x> in bits. + * + * <x>_<y>_f(u32 v) : Returns a value based on 'v' which has been shifted + * and masked to place it at field <y> of register <x>. This value + * can be |'d with others to produce a full register value for + * register <x>. + * + * <x>_<y>_m(void) : Returns a mask for field <y> of register <x>. This + * value can be ~'d and then &'d to clear the value of field <y> for + * register <x>. + * + * <x>_<y>_<z>_f(void) : Returns the constant value <z> after being shifted + * to place it at field <y> of register <x>. This value can be |'d + * with others to produce a full register value for <x>. + * + * <x>_<y>_v(u32 r) : Returns the value of field <y> from a full register + * <x> value 'r' after being shifted to place its LSB at bit 0. + * This value is suitable for direct comparison with other unshifted + * values appropriate for use in field <y> of register <x>. + * + * <x>_<y>_<z>_v(void) : Returns the constant value for <z> defined for + * field <y> of register <x>. This value is suitable for direct + * comparison with unshifted values appropriate for use in field <y> + * of register <x>. + */ + +#ifndef HOST1X_HW_HOST1X06_UCLASS_H +#define HOST1X_HW_HOST1X06_UCLASS_H + +static inline u32 host1x_uclass_incr_syncpt_r(void) +{ + return 0x0; +} +#define HOST1X_UCLASS_INCR_SYNCPT \ + host1x_uclass_incr_syncpt_r() +static inline u32 host1x_uclass_incr_syncpt_cond_f(u32 v) +{ + return (v & 0xff) << 10; +} +#define HOST1X_UCLASS_INCR_SYNCPT_COND_F(v) \ + host1x_uclass_incr_syncpt_cond_f(v) +static inline u32 host1x_uclass_incr_syncpt_indx_f(u32 v) +{ + return (v & 0x3ff) << 0; +} +#define HOST1X_UCLASS_INCR_SYNCPT_INDX_F(v) \ + host1x_uclass_incr_syncpt_indx_f(v) +static inline u32 host1x_uclass_wait_syncpt_r(void) +{ + return 0x8; +} +#define HOST1X_UCLASS_WAIT_SYNCPT \ + host1x_uclass_wait_syncpt_r() +static inline u32 host1x_uclass_wait_syncpt_indx_f(u32 v) +{ + return (v & 0xff) << 24; +} +#define HOST1X_UCLASS_WAIT_SYNCPT_INDX_F(v) \ + host1x_uclass_wait_syncpt_indx_f(v) +static inline u32 host1x_uclass_wait_syncpt_thresh_f(u32 v) +{ + return (v & 0xffffff) << 0; +} +#define HOST1X_UCLASS_WAIT_SYNCPT_THRESH_F(v) \ + host1x_uclass_wait_syncpt_thresh_f(v) +static inline u32 host1x_uclass_wait_syncpt_base_r(void) +{ + return 0x9; +} +#define HOST1X_UCLASS_WAIT_SYNCPT_BASE \ + host1x_uclass_wait_syncpt_base_r() +static inline u32 host1x_uclass_wait_syncpt_base_indx_f(u32 v) +{ + return (v & 0xff) << 24; +} +#define HOST1X_UCLASS_WAIT_SYNCPT_BASE_INDX_F(v) \ + host1x_uclass_wait_syncpt_base_indx_f(v) +static inline u32 host1x_uclass_wait_syncpt_base_base_indx_f(u32 v) +{ + return (v & 0xff) << 16; +} +#define HOST1X_UCLASS_WAIT_SYNCPT_BASE_BASE_INDX_F(v) \ + host1x_uclass_wait_syncpt_base_base_indx_f(v) +static inline u32 host1x_uclass_wait_syncpt_base_offset_f(u32 v) +{ + return (v & 0xffff) << 0; +} +#define HOST1X_UCLASS_WAIT_SYNCPT_BASE_OFFSET_F(v) \ + host1x_uclass_wait_syncpt_base_offset_f(v) +static inline u32 host1x_uclass_load_syncpt_base_r(void) +{ + return 0xb; +} +#define HOST1X_UCLASS_LOAD_SYNCPT_BASE \ + host1x_uclass_load_syncpt_base_r() +static inline u32 host1x_uclass_load_syncpt_base_base_indx_f(u32 v) +{ + return (v & 0xff) << 24; +} +#define HOST1X_UCLASS_LOAD_SYNCPT_BASE_BASE_INDX_F(v) \ + host1x_uclass_load_syncpt_base_base_indx_f(v) +static inline u32 host1x_uclass_load_syncpt_base_value_f(u32 v) +{ + return (v & 0xffffff) << 0; +} +#define HOST1X_UCLASS_LOAD_SYNCPT_BASE_VALUE_F(v) \ + host1x_uclass_load_syncpt_base_value_f(v) +static inline u32 host1x_uclass_incr_syncpt_base_base_indx_f(u32 v) +{ + return (v & 0xff) << 24; +} +#define HOST1X_UCLASS_INCR_SYNCPT_BASE_BASE_INDX_F(v) \ + host1x_uclass_incr_syncpt_base_base_indx_f(v) +static inline u32 host1x_uclass_incr_syncpt_base_offset_f(u32 v) +{ + return (v & 0xffffff) << 0; +} +#define HOST1X_UCLASS_INCR_SYNCPT_BASE_OFFSET_F(v) \ + host1x_uclass_incr_syncpt_base_offset_f(v) +static inline u32 host1x_uclass_indoff_r(void) +{ + return 0x2d; +} +#define HOST1X_UCLASS_INDOFF \ + host1x_uclass_indoff_r() +static inline u32 host1x_uclass_indoff_indbe_f(u32 v) +{ + return (v & 0xf) << 28; +} +#define HOST1X_UCLASS_INDOFF_INDBE_F(v) \ + host1x_uclass_indoff_indbe_f(v) +static inline u32 host1x_uclass_indoff_autoinc_f(u32 v) +{ + return (v & 0x1) << 27; +} +#define HOST1X_UCLASS_INDOFF_AUTOINC_F(v) \ + host1x_uclass_indoff_autoinc_f(v) +static inline u32 host1x_uclass_indoff_indmodid_f(u32 v) +{ + return (v & 0xff) << 18; +} +#define HOST1X_UCLASS_INDOFF_INDMODID_F(v) \ + host1x_uclass_indoff_indmodid_f(v) +static inline u32 host1x_uclass_indoff_indroffset_f(u32 v) +{ + return (v & 0xffff) << 2; +} +#define HOST1X_UCLASS_INDOFF_INDROFFSET_F(v) \ + host1x_uclass_indoff_indroffset_f(v) +static inline u32 host1x_uclass_indoff_rwn_read_v(void) +{ + return 1; +} +#define HOST1X_UCLASS_INDOFF_INDROFFSET_F(v) \ + host1x_uclass_indoff_indroffset_f(v) +static inline u32 host1x_uclass_load_syncpt_payload_32_r(void) +{ + return 0x4e; +} +#define HOST1X_UCLASS_LOAD_SYNCPT_PAYLOAD_32 \ + host1x_uclass_load_syncpt_payload_32_r() +static inline u32 host1x_uclass_wait_syncpt_32_r(void) +{ + return 0x50; +} +#define HOST1X_UCLASS_WAIT_SYNCPT_32 \ + host1x_uclass_wait_syncpt_32_r() + +#endif diff --git a/drivers/gpu/host1x/hw/hw_host1x06_vm.h b/drivers/gpu/host1x/hw/hw_host1x06_vm.h new file mode 100644 index 000000000000..818564a76bc6 --- /dev/null +++ b/drivers/gpu/host1x/hw/hw_host1x06_vm.h @@ -0,0 +1,35 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * Copyright (c) 2017 NVIDIA Corporation. + */ + +#define HOST1X_CHANNEL_DMASTART 0x0000 +#define HOST1X_CHANNEL_DMASTART_HI 0x0004 +#define HOST1X_CHANNEL_DMAPUT 0x0008 +#define HOST1X_CHANNEL_DMAPUT_HI 0x000c +#define HOST1X_CHANNEL_DMAGET 0x0010 +#define HOST1X_CHANNEL_DMAGET_HI 0x0014 +#define HOST1X_CHANNEL_DMAEND 0x0018 +#define HOST1X_CHANNEL_DMAEND_HI 0x001c +#define HOST1X_CHANNEL_DMACTRL 0x0020 +#define HOST1X_CHANNEL_DMACTRL_DMASTOP BIT(0) +#define HOST1X_CHANNEL_DMACTRL_DMAGETRST BIT(1) +#define HOST1X_CHANNEL_DMACTRL_DMAINITGET BIT(2) +#define HOST1X_CHANNEL_CMDFIFO_STAT 0x0024 +#define HOST1X_CHANNEL_CMDFIFO_STAT_EMPTY BIT(13) +#define HOST1X_CHANNEL_CMDFIFO_RDATA 0x0028 +#define HOST1X_CHANNEL_CMDP_OFFSET 0x0030 +#define HOST1X_CHANNEL_CMDP_CLASS 0x0034 +#define HOST1X_CHANNEL_CHANNELSTAT 0x0038 +#define HOST1X_CHANNEL_CMDPROC_STOP 0x0048 +#define HOST1X_CHANNEL_TEARDOWN 0x004c + +#define HOST1X_SYNC_SYNCPT_CPU_INCR(x) (0x6400 + 4*(x)) +#define HOST1X_SYNC_SYNCPT_THRESH_CPU0_INT_STATUS(x) (0x6464 + 4*(x)) +#define HOST1X_SYNC_SYNCPT_THRESH_INT_ENABLE_CPU0(x) (0x652c + 4*(x)) +#define HOST1X_SYNC_SYNCPT_THRESH_INT_DISABLE(x) (0x6590 + 4*(x)) +#define HOST1X_SYNC_SYNCPT_BASE(x) (0x8000 + 4*(x)) +#define HOST1X_SYNC_SYNCPT(x) (0x8080 + 4*(x)) +#define HOST1X_SYNC_SYNCPT_INT_THRESH(x) (0x8a00 + 4*(x)) +#define HOST1X_SYNC_SYNCPT_CH_APP(x) (0x9384 + 4*(x)) +#define HOST1X_SYNC_SYNCPT_CH_APP_CH(v) (((v) & 0x3f) << 8) diff --git a/drivers/gpu/host1x/hw/hw_host1x07_channel.h b/drivers/gpu/host1x/hw/hw_host1x07_channel.h new file mode 100644 index 000000000000..96fa72bbd7ab --- /dev/null +++ b/drivers/gpu/host1x/hw/hw_host1x07_channel.h @@ -0,0 +1,11 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright (C) 2019 NVIDIA Corporation. + */ + +#ifndef HOST1X_HW_HOST1X07_CHANNEL_H +#define HOST1X_HW_HOST1X07_CHANNEL_H + +#define HOST1X_CHANNEL_SMMU_STREAMID 0x084 + +#endif diff --git a/drivers/gpu/host1x/hw/hw_host1x07_hypervisor.h b/drivers/gpu/host1x/hw/hw_host1x07_hypervisor.h new file mode 100644 index 000000000000..52141d53954a --- /dev/null +++ b/drivers/gpu/host1x/hw/hw_host1x07_hypervisor.h @@ -0,0 +1,20 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * Copyright (c) 2018 NVIDIA Corporation. + */ + +#define HOST1X_HV_SYNCPT_PROT_EN 0x1ac4 +#define HOST1X_HV_SYNCPT_PROT_EN_CH_EN BIT(1) +#define HOST1X_HV_CH_KERNEL_FILTER_GBUFFER(x) (0x2020 + (x * 4)) +#define HOST1X_HV_CMDFIFO_PEEK_CTRL 0x233c +#define HOST1X_HV_CMDFIFO_PEEK_CTRL_ADDR(x) (x) +#define HOST1X_HV_CMDFIFO_PEEK_CTRL_CHANNEL(x) ((x) << 16) +#define HOST1X_HV_CMDFIFO_PEEK_CTRL_ENABLE BIT(31) +#define HOST1X_HV_CMDFIFO_PEEK_READ 0x2340 +#define HOST1X_HV_CMDFIFO_PEEK_PTRS 0x2344 +#define HOST1X_HV_CMDFIFO_PEEK_PTRS_WR_PTR_V(x) (((x) >> 16) & 0xfff) +#define HOST1X_HV_CMDFIFO_PEEK_PTRS_RD_PTR_V(x) ((x) & 0xfff) +#define HOST1X_HV_CMDFIFO_SETUP(x) (0x2588 + (x * 4)) +#define HOST1X_HV_CMDFIFO_SETUP_LIMIT_V(x) (((x) >> 16) & 0xfff) +#define HOST1X_HV_CMDFIFO_SETUP_BASE_V(x) ((x) & 0xfff) +#define HOST1X_HV_ICG_EN_OVERRIDE 0x2aa8 diff --git a/drivers/gpu/host1x/hw/hw_host1x07_uclass.h b/drivers/gpu/host1x/hw/hw_host1x07_uclass.h new file mode 100644 index 000000000000..887b878f92f7 --- /dev/null +++ b/drivers/gpu/host1x/hw/hw_host1x07_uclass.h @@ -0,0 +1,181 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * Copyright (c) 2018 NVIDIA Corporation. + */ + + /* + * Function naming determines intended use: + * + * <x>_r(void) : Returns the offset for register <x>. + * + * <x>_w(void) : Returns the word offset for word (4 byte) element <x>. + * + * <x>_<y>_s(void) : Returns size of field <y> of register <x> in bits. + * + * <x>_<y>_f(u32 v) : Returns a value based on 'v' which has been shifted + * and masked to place it at field <y> of register <x>. This value + * can be |'d with others to produce a full register value for + * register <x>. + * + * <x>_<y>_m(void) : Returns a mask for field <y> of register <x>. This + * value can be ~'d and then &'d to clear the value of field <y> for + * register <x>. + * + * <x>_<y>_<z>_f(void) : Returns the constant value <z> after being shifted + * to place it at field <y> of register <x>. This value can be |'d + * with others to produce a full register value for <x>. + * + * <x>_<y>_v(u32 r) : Returns the value of field <y> from a full register + * <x> value 'r' after being shifted to place its LSB at bit 0. + * This value is suitable for direct comparison with other unshifted + * values appropriate for use in field <y> of register <x>. + * + * <x>_<y>_<z>_v(void) : Returns the constant value for <z> defined for + * field <y> of register <x>. This value is suitable for direct + * comparison with unshifted values appropriate for use in field <y> + * of register <x>. + */ + +#ifndef HOST1X_HW_HOST1X07_UCLASS_H +#define HOST1X_HW_HOST1X07_UCLASS_H + +static inline u32 host1x_uclass_incr_syncpt_r(void) +{ + return 0x0; +} +#define HOST1X_UCLASS_INCR_SYNCPT \ + host1x_uclass_incr_syncpt_r() +static inline u32 host1x_uclass_incr_syncpt_cond_f(u32 v) +{ + return (v & 0xff) << 10; +} +#define HOST1X_UCLASS_INCR_SYNCPT_COND_F(v) \ + host1x_uclass_incr_syncpt_cond_f(v) +static inline u32 host1x_uclass_incr_syncpt_indx_f(u32 v) +{ + return (v & 0x3ff) << 0; +} +#define HOST1X_UCLASS_INCR_SYNCPT_INDX_F(v) \ + host1x_uclass_incr_syncpt_indx_f(v) +static inline u32 host1x_uclass_wait_syncpt_r(void) +{ + return 0x8; +} +#define HOST1X_UCLASS_WAIT_SYNCPT \ + host1x_uclass_wait_syncpt_r() +static inline u32 host1x_uclass_wait_syncpt_indx_f(u32 v) +{ + return (v & 0xff) << 24; +} +#define HOST1X_UCLASS_WAIT_SYNCPT_INDX_F(v) \ + host1x_uclass_wait_syncpt_indx_f(v) +static inline u32 host1x_uclass_wait_syncpt_thresh_f(u32 v) +{ + return (v & 0xffffff) << 0; +} +#define HOST1X_UCLASS_WAIT_SYNCPT_THRESH_F(v) \ + host1x_uclass_wait_syncpt_thresh_f(v) +static inline u32 host1x_uclass_wait_syncpt_base_r(void) +{ + return 0x9; +} +#define HOST1X_UCLASS_WAIT_SYNCPT_BASE \ + host1x_uclass_wait_syncpt_base_r() +static inline u32 host1x_uclass_wait_syncpt_base_indx_f(u32 v) +{ + return (v & 0xff) << 24; +} +#define HOST1X_UCLASS_WAIT_SYNCPT_BASE_INDX_F(v) \ + host1x_uclass_wait_syncpt_base_indx_f(v) +static inline u32 host1x_uclass_wait_syncpt_base_base_indx_f(u32 v) +{ + return (v & 0xff) << 16; +} +#define HOST1X_UCLASS_WAIT_SYNCPT_BASE_BASE_INDX_F(v) \ + host1x_uclass_wait_syncpt_base_base_indx_f(v) +static inline u32 host1x_uclass_wait_syncpt_base_offset_f(u32 v) +{ + return (v & 0xffff) << 0; +} +#define HOST1X_UCLASS_WAIT_SYNCPT_BASE_OFFSET_F(v) \ + host1x_uclass_wait_syncpt_base_offset_f(v) +static inline u32 host1x_uclass_load_syncpt_base_r(void) +{ + return 0xb; +} +#define HOST1X_UCLASS_LOAD_SYNCPT_BASE \ + host1x_uclass_load_syncpt_base_r() +static inline u32 host1x_uclass_load_syncpt_base_base_indx_f(u32 v) +{ + return (v & 0xff) << 24; +} +#define HOST1X_UCLASS_LOAD_SYNCPT_BASE_BASE_INDX_F(v) \ + host1x_uclass_load_syncpt_base_base_indx_f(v) +static inline u32 host1x_uclass_load_syncpt_base_value_f(u32 v) +{ + return (v & 0xffffff) << 0; +} +#define HOST1X_UCLASS_LOAD_SYNCPT_BASE_VALUE_F(v) \ + host1x_uclass_load_syncpt_base_value_f(v) +static inline u32 host1x_uclass_incr_syncpt_base_base_indx_f(u32 v) +{ + return (v & 0xff) << 24; +} +#define HOST1X_UCLASS_INCR_SYNCPT_BASE_BASE_INDX_F(v) \ + host1x_uclass_incr_syncpt_base_base_indx_f(v) +static inline u32 host1x_uclass_incr_syncpt_base_offset_f(u32 v) +{ + return (v & 0xffffff) << 0; +} +#define HOST1X_UCLASS_INCR_SYNCPT_BASE_OFFSET_F(v) \ + host1x_uclass_incr_syncpt_base_offset_f(v) +static inline u32 host1x_uclass_indoff_r(void) +{ + return 0x2d; +} +#define HOST1X_UCLASS_INDOFF \ + host1x_uclass_indoff_r() +static inline u32 host1x_uclass_indoff_indbe_f(u32 v) +{ + return (v & 0xf) << 28; +} +#define HOST1X_UCLASS_INDOFF_INDBE_F(v) \ + host1x_uclass_indoff_indbe_f(v) +static inline u32 host1x_uclass_indoff_autoinc_f(u32 v) +{ + return (v & 0x1) << 27; +} +#define HOST1X_UCLASS_INDOFF_AUTOINC_F(v) \ + host1x_uclass_indoff_autoinc_f(v) +static inline u32 host1x_uclass_indoff_indmodid_f(u32 v) +{ + return (v & 0xff) << 18; +} +#define HOST1X_UCLASS_INDOFF_INDMODID_F(v) \ + host1x_uclass_indoff_indmodid_f(v) +static inline u32 host1x_uclass_indoff_indroffset_f(u32 v) +{ + return (v & 0xffff) << 2; +} +#define HOST1X_UCLASS_INDOFF_INDROFFSET_F(v) \ + host1x_uclass_indoff_indroffset_f(v) +static inline u32 host1x_uclass_indoff_rwn_read_v(void) +{ + return 1; +} +#define HOST1X_UCLASS_INDOFF_INDROFFSET_F(v) \ + host1x_uclass_indoff_indroffset_f(v) +static inline u32 host1x_uclass_load_syncpt_payload_32_r(void) +{ + return 0x4e; +} +#define HOST1X_UCLASS_LOAD_SYNCPT_PAYLOAD_32 \ + host1x_uclass_load_syncpt_payload_32_r() +static inline u32 host1x_uclass_wait_syncpt_32_r(void) +{ + return 0x50; +} +#define HOST1X_UCLASS_WAIT_SYNCPT_32 \ + host1x_uclass_wait_syncpt_32_r() + +#endif diff --git a/drivers/gpu/host1x/hw/hw_host1x07_vm.h b/drivers/gpu/host1x/hw/hw_host1x07_vm.h new file mode 100644 index 000000000000..b766851d5b83 --- /dev/null +++ b/drivers/gpu/host1x/hw/hw_host1x07_vm.h @@ -0,0 +1,34 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * Copyright (c) 2018 NVIDIA Corporation. + */ + +#define HOST1X_CHANNEL_DMASTART 0x0000 +#define HOST1X_CHANNEL_DMASTART_HI 0x0004 +#define HOST1X_CHANNEL_DMAPUT 0x0008 +#define HOST1X_CHANNEL_DMAPUT_HI 0x000c +#define HOST1X_CHANNEL_DMAGET 0x0010 +#define HOST1X_CHANNEL_DMAGET_HI 0x0014 +#define HOST1X_CHANNEL_DMAEND 0x0018 +#define HOST1X_CHANNEL_DMAEND_HI 0x001c +#define HOST1X_CHANNEL_DMACTRL 0x0020 +#define HOST1X_CHANNEL_DMACTRL_DMASTOP BIT(0) +#define HOST1X_CHANNEL_DMACTRL_DMAGETRST BIT(1) +#define HOST1X_CHANNEL_DMACTRL_DMAINITGET BIT(2) +#define HOST1X_CHANNEL_CMDFIFO_STAT 0x0024 +#define HOST1X_CHANNEL_CMDFIFO_STAT_EMPTY BIT(13) +#define HOST1X_CHANNEL_CMDFIFO_RDATA 0x0028 +#define HOST1X_CHANNEL_CMDP_OFFSET 0x0030 +#define HOST1X_CHANNEL_CMDP_CLASS 0x0034 +#define HOST1X_CHANNEL_CHANNELSTAT 0x0038 +#define HOST1X_CHANNEL_CMDPROC_STOP 0x0048 +#define HOST1X_CHANNEL_TEARDOWN 0x004c + +#define HOST1X_SYNC_SYNCPT_CPU_INCR(x) (0x6400 + 4 * (x)) +#define HOST1X_SYNC_SYNCPT_THRESH_CPU0_INT_STATUS(x) (0x6464 + 4 * (x)) +#define HOST1X_SYNC_SYNCPT_THRESH_INT_ENABLE_CPU0(x) (0x652c + 4 * (x)) +#define HOST1X_SYNC_SYNCPT_THRESH_INT_DISABLE(x) (0x6590 + 4 * (x)) +#define HOST1X_SYNC_SYNCPT(x) (0x8080 + 4 * (x)) +#define HOST1X_SYNC_SYNCPT_INT_THRESH(x) (0x9980 + 4 * (x)) +#define HOST1X_SYNC_SYNCPT_CH_APP(x) (0xa604 + 4 * (x)) +#define HOST1X_SYNC_SYNCPT_CH_APP_CH(v) (((v) & 0x3f) << 8) diff --git a/drivers/gpu/host1x/hw/hw_host1x08_channel.h b/drivers/gpu/host1x/hw/hw_host1x08_channel.h new file mode 100644 index 000000000000..c9272d2ab14a --- /dev/null +++ b/drivers/gpu/host1x/hw/hw_host1x08_channel.h @@ -0,0 +1,11 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * Copyright (c) 2022 NVIDIA Corporation. + */ + +#ifndef HOST1X_HW_HOST1X08_CHANNEL_H +#define HOST1X_HW_HOST1X08_CHANNEL_H + +#define HOST1X_CHANNEL_SMMU_STREAMID 0x084 + +#endif diff --git a/drivers/gpu/host1x/hw/hw_host1x08_common.h b/drivers/gpu/host1x/hw/hw_host1x08_common.h new file mode 100644 index 000000000000..8e0c99150ec2 --- /dev/null +++ b/drivers/gpu/host1x/hw/hw_host1x08_common.h @@ -0,0 +1,11 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * Copyright (c) 2022 NVIDIA Corporation. + */ + +#define HOST1X_COMMON_OFA_MLOCK 0x4050 +#define HOST1X_COMMON_NVJPG1_MLOCK 0x4070 +#define HOST1X_COMMON_VIC_MLOCK 0x4078 +#define HOST1X_COMMON_NVENC_MLOCK 0x407c +#define HOST1X_COMMON_NVDEC_MLOCK 0x4080 +#define HOST1X_COMMON_NVJPG_MLOCK 0x4084 diff --git a/drivers/gpu/host1x/hw/hw_host1x08_hypervisor.h b/drivers/gpu/host1x/hw/hw_host1x08_hypervisor.h new file mode 100644 index 000000000000..22964324c914 --- /dev/null +++ b/drivers/gpu/host1x/hw/hw_host1x08_hypervisor.h @@ -0,0 +1,9 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * Copyright (c) 2022 NVIDIA Corporation. + */ + +#define HOST1X_HV_SYNCPT_PROT_EN 0x1724 +#define HOST1X_HV_SYNCPT_PROT_EN_CH_EN BIT(1) +#define HOST1X_HV_CH_MLOCK_EN(x) (0x1700 + (x * 4)) +#define HOST1X_HV_CH_KERNEL_FILTER_GBUFFER(x) (0x1710 + (x * 4)) diff --git a/drivers/gpu/host1x/hw/hw_host1x08_uclass.h b/drivers/gpu/host1x/hw/hw_host1x08_uclass.h new file mode 100644 index 000000000000..4fb1d090edae --- /dev/null +++ b/drivers/gpu/host1x/hw/hw_host1x08_uclass.h @@ -0,0 +1,181 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * Copyright (c) 2018 NVIDIA Corporation. + */ + + /* + * Function naming determines intended use: + * + * <x>_r(void) : Returns the offset for register <x>. + * + * <x>_w(void) : Returns the word offset for word (4 byte) element <x>. + * + * <x>_<y>_s(void) : Returns size of field <y> of register <x> in bits. + * + * <x>_<y>_f(u32 v) : Returns a value based on 'v' which has been shifted + * and masked to place it at field <y> of register <x>. This value + * can be |'d with others to produce a full register value for + * register <x>. + * + * <x>_<y>_m(void) : Returns a mask for field <y> of register <x>. This + * value can be ~'d and then &'d to clear the value of field <y> for + * register <x>. + * + * <x>_<y>_<z>_f(void) : Returns the constant value <z> after being shifted + * to place it at field <y> of register <x>. This value can be |'d + * with others to produce a full register value for <x>. + * + * <x>_<y>_v(u32 r) : Returns the value of field <y> from a full register + * <x> value 'r' after being shifted to place its LSB at bit 0. + * This value is suitable for direct comparison with other unshifted + * values appropriate for use in field <y> of register <x>. + * + * <x>_<y>_<z>_v(void) : Returns the constant value for <z> defined for + * field <y> of register <x>. This value is suitable for direct + * comparison with unshifted values appropriate for use in field <y> + * of register <x>. + */ + +#ifndef HOST1X_HW_HOST1X08_UCLASS_H +#define HOST1X_HW_HOST1X08_UCLASS_H + +static inline u32 host1x_uclass_incr_syncpt_r(void) +{ + return 0x0; +} +#define HOST1X_UCLASS_INCR_SYNCPT \ + host1x_uclass_incr_syncpt_r() +static inline u32 host1x_uclass_incr_syncpt_cond_f(u32 v) +{ + return (v & 0xff) << 10; +} +#define HOST1X_UCLASS_INCR_SYNCPT_COND_F(v) \ + host1x_uclass_incr_syncpt_cond_f(v) +static inline u32 host1x_uclass_incr_syncpt_indx_f(u32 v) +{ + return (v & 0x3ff) << 0; +} +#define HOST1X_UCLASS_INCR_SYNCPT_INDX_F(v) \ + host1x_uclass_incr_syncpt_indx_f(v) +static inline u32 host1x_uclass_wait_syncpt_r(void) +{ + return 0x8; +} +#define HOST1X_UCLASS_WAIT_SYNCPT \ + host1x_uclass_wait_syncpt_r() +static inline u32 host1x_uclass_wait_syncpt_indx_f(u32 v) +{ + return (v & 0xff) << 24; +} +#define HOST1X_UCLASS_WAIT_SYNCPT_INDX_F(v) \ + host1x_uclass_wait_syncpt_indx_f(v) +static inline u32 host1x_uclass_wait_syncpt_thresh_f(u32 v) +{ + return (v & 0xffffff) << 0; +} +#define HOST1X_UCLASS_WAIT_SYNCPT_THRESH_F(v) \ + host1x_uclass_wait_syncpt_thresh_f(v) +static inline u32 host1x_uclass_wait_syncpt_base_r(void) +{ + return 0x9; +} +#define HOST1X_UCLASS_WAIT_SYNCPT_BASE \ + host1x_uclass_wait_syncpt_base_r() +static inline u32 host1x_uclass_wait_syncpt_base_indx_f(u32 v) +{ + return (v & 0xff) << 24; +} +#define HOST1X_UCLASS_WAIT_SYNCPT_BASE_INDX_F(v) \ + host1x_uclass_wait_syncpt_base_indx_f(v) +static inline u32 host1x_uclass_wait_syncpt_base_base_indx_f(u32 v) +{ + return (v & 0xff) << 16; +} +#define HOST1X_UCLASS_WAIT_SYNCPT_BASE_BASE_INDX_F(v) \ + host1x_uclass_wait_syncpt_base_base_indx_f(v) +static inline u32 host1x_uclass_wait_syncpt_base_offset_f(u32 v) +{ + return (v & 0xffff) << 0; +} +#define HOST1X_UCLASS_WAIT_SYNCPT_BASE_OFFSET_F(v) \ + host1x_uclass_wait_syncpt_base_offset_f(v) +static inline u32 host1x_uclass_load_syncpt_base_r(void) +{ + return 0xb; +} +#define HOST1X_UCLASS_LOAD_SYNCPT_BASE \ + host1x_uclass_load_syncpt_base_r() +static inline u32 host1x_uclass_load_syncpt_base_base_indx_f(u32 v) +{ + return (v & 0xff) << 24; +} +#define HOST1X_UCLASS_LOAD_SYNCPT_BASE_BASE_INDX_F(v) \ + host1x_uclass_load_syncpt_base_base_indx_f(v) +static inline u32 host1x_uclass_load_syncpt_base_value_f(u32 v) +{ + return (v & 0xffffff) << 0; +} +#define HOST1X_UCLASS_LOAD_SYNCPT_BASE_VALUE_F(v) \ + host1x_uclass_load_syncpt_base_value_f(v) +static inline u32 host1x_uclass_incr_syncpt_base_base_indx_f(u32 v) +{ + return (v & 0xff) << 24; +} +#define HOST1X_UCLASS_INCR_SYNCPT_BASE_BASE_INDX_F(v) \ + host1x_uclass_incr_syncpt_base_base_indx_f(v) +static inline u32 host1x_uclass_incr_syncpt_base_offset_f(u32 v) +{ + return (v & 0xffffff) << 0; +} +#define HOST1X_UCLASS_INCR_SYNCPT_BASE_OFFSET_F(v) \ + host1x_uclass_incr_syncpt_base_offset_f(v) +static inline u32 host1x_uclass_indoff_r(void) +{ + return 0x2d; +} +#define HOST1X_UCLASS_INDOFF \ + host1x_uclass_indoff_r() +static inline u32 host1x_uclass_indoff_indbe_f(u32 v) +{ + return (v & 0xf) << 28; +} +#define HOST1X_UCLASS_INDOFF_INDBE_F(v) \ + host1x_uclass_indoff_indbe_f(v) +static inline u32 host1x_uclass_indoff_autoinc_f(u32 v) +{ + return (v & 0x1) << 27; +} +#define HOST1X_UCLASS_INDOFF_AUTOINC_F(v) \ + host1x_uclass_indoff_autoinc_f(v) +static inline u32 host1x_uclass_indoff_indmodid_f(u32 v) +{ + return (v & 0xff) << 18; +} +#define HOST1X_UCLASS_INDOFF_INDMODID_F(v) \ + host1x_uclass_indoff_indmodid_f(v) +static inline u32 host1x_uclass_indoff_indroffset_f(u32 v) +{ + return (v & 0xffff) << 2; +} +#define HOST1X_UCLASS_INDOFF_INDROFFSET_F(v) \ + host1x_uclass_indoff_indroffset_f(v) +static inline u32 host1x_uclass_indoff_rwn_read_v(void) +{ + return 1; +} +#define HOST1X_UCLASS_INDOFF_INDROFFSET_F(v) \ + host1x_uclass_indoff_indroffset_f(v) +static inline u32 host1x_uclass_load_syncpt_payload_32_r(void) +{ + return 0x4e; +} +#define HOST1X_UCLASS_LOAD_SYNCPT_PAYLOAD_32 \ + host1x_uclass_load_syncpt_payload_32_r() +static inline u32 host1x_uclass_wait_syncpt_32_r(void) +{ + return 0x50; +} +#define HOST1X_UCLASS_WAIT_SYNCPT_32 \ + host1x_uclass_wait_syncpt_32_r() + +#endif diff --git a/drivers/gpu/host1x/hw/hw_host1x08_vm.h b/drivers/gpu/host1x/hw/hw_host1x08_vm.h new file mode 100644 index 000000000000..1455a4670bf8 --- /dev/null +++ b/drivers/gpu/host1x/hw/hw_host1x08_vm.h @@ -0,0 +1,36 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * Copyright (c) 2022 NVIDIA Corporation. + */ + +#define HOST1X_CHANNEL_DMASTART 0x0000 +#define HOST1X_CHANNEL_DMASTART_HI 0x0004 +#define HOST1X_CHANNEL_DMAPUT 0x0008 +#define HOST1X_CHANNEL_DMAPUT_HI 0x000c +#define HOST1X_CHANNEL_DMAGET 0x0010 +#define HOST1X_CHANNEL_DMAGET_HI 0x0014 +#define HOST1X_CHANNEL_DMAEND 0x0018 +#define HOST1X_CHANNEL_DMAEND_HI 0x001c +#define HOST1X_CHANNEL_DMACTRL 0x0020 +#define HOST1X_CHANNEL_DMACTRL_DMASTOP BIT(0) +#define HOST1X_CHANNEL_DMACTRL_DMAGETRST BIT(1) +#define HOST1X_CHANNEL_DMACTRL_DMAINITGET BIT(2) +#define HOST1X_CHANNEL_CMDFIFO_STAT 0x0024 +#define HOST1X_CHANNEL_CMDFIFO_STAT_EMPTY BIT(13) +#define HOST1X_CHANNEL_CMDFIFO_RDATA 0x0028 +#define HOST1X_CHANNEL_CMDP_OFFSET 0x0030 +#define HOST1X_CHANNEL_CMDP_CLASS 0x0034 +#define HOST1X_CHANNEL_CHANNELSTAT 0x0038 +#define HOST1X_CHANNEL_CMDPROC_STOP 0x0048 +#define HOST1X_CHANNEL_TEARDOWN 0x004c +#define HOST1X_CHANNEL_SMMU_STREAMID 0x0084 + +#define HOST1X_SYNC_SYNCPT_CPU_INCR(x) (0x6400 + 4 * (x)) +#define HOST1X_SYNC_SYNCPT_THRESH_CPU0_INT_STATUS(x) (0x6600 + 4 * (x)) +#define HOST1X_SYNC_SYNCPT_INTR_DEST(x) (0x6684 + 4 * (x)) +#define HOST1X_SYNC_SYNCPT_THRESH_INT_ENABLE_CPU0(x) (0x770c + 4 * (x)) +#define HOST1X_SYNC_SYNCPT_THRESH_INT_DISABLE(x) (0x7790 + 4 * (x)) +#define HOST1X_SYNC_SYNCPT(x) (0x8080 + 4 * (x)) +#define HOST1X_SYNC_SYNCPT_INT_THRESH(x) (0xa088 + 4 * (x)) +#define HOST1X_SYNC_SYNCPT_CH_APP(x) (0xb090 + 4 * (x)) +#define HOST1X_SYNC_SYNCPT_CH_APP_CH(v) (((v) & 0x3f) << 8) diff --git a/drivers/gpu/host1x/hw/intr_hw.c b/drivers/gpu/host1x/hw/intr_hw.c index dacb8009a605..bd5b5ef62f35 100644 --- a/drivers/gpu/host1x/hw/intr_hw.c +++ b/drivers/gpu/host1x/hw/intr_hw.c @@ -1,66 +1,79 @@ +// SPDX-License-Identifier: GPL-2.0-only /* * Tegra host1x Interrupt Management * * Copyright (C) 2010 Google, Inc. * Copyright (c) 2010-2013, NVIDIA Corporation. - * - * This program is free software; you can redistribute it and/or modify it - * under the terms and conditions of the GNU General Public License, - * version 2, as published by the Free Software Foundation. - * - * This program is distributed in the hope it will be useful, but WITHOUT - * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or - * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for - * more details. - * - * You should have received a copy of the GNU General Public License - * along with this program. If not, see <http://www.gnu.org/licenses/>. */ -#include <linux/interrupt.h> -#include <linux/irq.h> #include <linux/io.h> #include "../intr.h" #include "../dev.h" -/* - * Sync point threshold interrupt service function - * Handles sync point threshold triggers, in interrupt context - */ -static void host1x_intr_syncpt_handle(struct host1x_syncpt *syncpt) +static void process_32_syncpts(struct host1x *host, unsigned long val, u32 reg_offset) { - unsigned int id = syncpt->id; - struct host1x *host = syncpt->host; + unsigned int id; - host1x_sync_writel(host, BIT_MASK(id), - HOST1X_SYNC_SYNCPT_THRESH_INT_DISABLE(BIT_WORD(id))); - host1x_sync_writel(host, BIT_MASK(id), - HOST1X_SYNC_SYNCPT_THRESH_CPU0_INT_STATUS(BIT_WORD(id))); + if (!val) + return; - schedule_work(&syncpt->intr.work); + host1x_sync_writel(host, val, HOST1X_SYNC_SYNCPT_THRESH_INT_DISABLE(reg_offset)); + host1x_sync_writel(host, val, HOST1X_SYNC_SYNCPT_THRESH_CPU0_INT_STATUS(reg_offset)); + + for_each_set_bit(id, &val, 32) + host1x_intr_handle_interrupt(host, reg_offset * 32 + id); } static irqreturn_t syncpt_thresh_isr(int irq, void *dev_id) { - struct host1x *host = dev_id; + struct host1x_intr_irq_data *irq_data = dev_id; + struct host1x *host = irq_data->host; unsigned long reg; - unsigned int i, id; + unsigned int i; - for (i = 0; i < DIV_ROUND_UP(host->info->nb_pts, 32); i++) { +#if !defined(CONFIG_64BIT) + for (i = irq_data->offset; i < DIV_ROUND_UP(host->info->nb_pts, 32); + i += host->num_syncpt_irqs) { reg = host1x_sync_readl(host, HOST1X_SYNC_SYNCPT_THRESH_CPU0_INT_STATUS(i)); - for_each_set_bit(id, ®, BITS_PER_LONG) { - struct host1x_syncpt *syncpt = - host->syncpt + (i * BITS_PER_LONG + id); - host1x_intr_syncpt_handle(syncpt); - } + + process_32_syncpts(host, reg, i); + } +#elif HOST1X_HW == 6 || HOST1X_HW == 7 + /* + * Tegra186 and Tegra194 have the first INT_STATUS register not 64-bit aligned, + * and only have one interrupt line. + */ + reg = host1x_sync_readl(host, HOST1X_SYNC_SYNCPT_THRESH_CPU0_INT_STATUS(0)); + process_32_syncpts(host, reg, 0); + + for (i = 1; i < (host->info->nb_pts / 32) - 1; i += 2) { + reg = host1x_sync_readq(host, + HOST1X_SYNC_SYNCPT_THRESH_CPU0_INT_STATUS(i)); + + process_32_syncpts(host, lower_32_bits(reg), i); + process_32_syncpts(host, upper_32_bits(reg), i + 1); } + reg = host1x_sync_readl(host, HOST1X_SYNC_SYNCPT_THRESH_CPU0_INT_STATUS(i)); + process_32_syncpts(host, reg, i); +#else + /* All 64-bit capable SoCs have number of syncpoints divisible by 64 */ + for (i = irq_data->offset; i < DIV_ROUND_UP(host->info->nb_pts, 64); + i += host->num_syncpt_irqs) { + reg = host1x_sync_readq(host, + HOST1X_SYNC_SYNCPT_THRESH_CPU0_INT_STATUS(i * 2)); + + process_32_syncpts(host, lower_32_bits(reg), i * 2 + 0); + process_32_syncpts(host, upper_32_bits(reg), i * 2 + 1); + } +#endif + return IRQ_HANDLED; } -static void _host1x_intr_disable_all_syncpt_intrs(struct host1x *host) +static void host1x_intr_disable_all_syncpt_intrs(struct host1x *host) { unsigned int i; @@ -73,25 +86,9 @@ static void _host1x_intr_disable_all_syncpt_intrs(struct host1x *host) } static int -_host1x_intr_init_host_sync(struct host1x *host, u32 cpm, - void (*syncpt_thresh_work)(struct work_struct *)) +host1x_intr_init_host_sync(struct host1x *host, u32 cpm) { - unsigned int i; - int err; - - host1x_hw_intr_disable_all_syncpt_intrs(host); - - for (i = 0; i < host->info->nb_pts; i++) - INIT_WORK(&host->syncpt[i].intr.work, syncpt_thresh_work); - - err = devm_request_irq(host->dev, host->intr_syncpt_irq, - syncpt_thresh_isr, IRQF_SHARED, - "host1x_syncpt", host); - if (err < 0) { - WARN_ON(1); - return err; - } - +#if HOST1X_HW < 6 /* disable the ip_busy_timeout. this prevents write drops */ host1x_sync_writel(host, 0, HOST1X_SYNC_IP_BUSY_TIMEOUT); @@ -103,50 +100,55 @@ _host1x_intr_init_host_sync(struct host1x *host, u32 cpm, /* update host clocks per usec */ host1x_sync_writel(host, cpm, HOST1X_SYNC_USEC_CLK); +#endif +#if HOST1X_HW >= 8 + u32 id; + + /* + * Program threshold interrupt destination among 8 lines per VM, + * per syncpoint. For each group of 64 syncpoints (corresponding to two + * interrupt status registers), direct to one interrupt line, going + * around in a round robin fashion. + */ + for (id = 0; id < host->info->nb_pts; id++) { + u32 reg_offset = id / 64; + u32 irq_index = reg_offset % host->num_syncpt_irqs; + + host1x_sync_writel(host, irq_index, HOST1X_SYNC_SYNCPT_INTR_DEST(id)); + } +#endif return 0; } -static void _host1x_intr_set_syncpt_threshold(struct host1x *host, +static void host1x_intr_set_syncpt_threshold(struct host1x *host, unsigned int id, u32 thresh) { host1x_sync_writel(host, thresh, HOST1X_SYNC_SYNCPT_INT_THRESH(id)); } -static void _host1x_intr_enable_syncpt_intr(struct host1x *host, +static void host1x_intr_enable_syncpt_intr(struct host1x *host, unsigned int id) { - host1x_sync_writel(host, BIT_MASK(id), - HOST1X_SYNC_SYNCPT_THRESH_INT_ENABLE_CPU0(BIT_WORD(id))); + host1x_sync_writel(host, BIT(id % 32), + HOST1X_SYNC_SYNCPT_THRESH_INT_ENABLE_CPU0(id / 32)); } -static void _host1x_intr_disable_syncpt_intr(struct host1x *host, +static void host1x_intr_disable_syncpt_intr(struct host1x *host, unsigned int id) { - host1x_sync_writel(host, BIT_MASK(id), - HOST1X_SYNC_SYNCPT_THRESH_INT_DISABLE(BIT_WORD(id))); - host1x_sync_writel(host, BIT_MASK(id), - HOST1X_SYNC_SYNCPT_THRESH_CPU0_INT_STATUS(BIT_WORD(id))); -} - -static int _host1x_free_syncpt_irq(struct host1x *host) -{ - unsigned int i; - - devm_free_irq(host->dev, host->intr_syncpt_irq, host); - - for (i = 0; i < host->info->nb_pts; i++) - cancel_work_sync(&host->syncpt[i].intr.work); - - return 0; + host1x_sync_writel(host, BIT(id % 32), + HOST1X_SYNC_SYNCPT_THRESH_INT_DISABLE(id / 32)); + host1x_sync_writel(host, BIT(id % 32), + HOST1X_SYNC_SYNCPT_THRESH_CPU0_INT_STATUS(id / 32)); } static const struct host1x_intr_ops host1x_intr_ops = { - .init_host_sync = _host1x_intr_init_host_sync, - .set_syncpt_threshold = _host1x_intr_set_syncpt_threshold, - .enable_syncpt_intr = _host1x_intr_enable_syncpt_intr, - .disable_syncpt_intr = _host1x_intr_disable_syncpt_intr, - .disable_all_syncpt_intrs = _host1x_intr_disable_all_syncpt_intrs, - .free_syncpt_irq = _host1x_free_syncpt_irq, + .init_host_sync = host1x_intr_init_host_sync, + .set_syncpt_threshold = host1x_intr_set_syncpt_threshold, + .enable_syncpt_intr = host1x_intr_enable_syncpt_intr, + .disable_syncpt_intr = host1x_intr_disable_syncpt_intr, + .disable_all_syncpt_intrs = host1x_intr_disable_all_syncpt_intrs, + .isr = syncpt_thresh_isr, }; diff --git a/drivers/gpu/host1x/hw/opcodes.h b/drivers/gpu/host1x/hw/opcodes.h new file mode 100644 index 000000000000..649614499b04 --- /dev/null +++ b/drivers/gpu/host1x/hw/opcodes.h @@ -0,0 +1,150 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * Tegra host1x opcodes + * + * Copyright (c) 2022 NVIDIA Corporation. + */ + +#ifndef __HOST1X_OPCODES_H +#define __HOST1X_OPCODES_H + +#include <linux/types.h> + +static inline u32 host1x_class_host_wait_syncpt( + unsigned indx, unsigned threshold) +{ + return host1x_uclass_wait_syncpt_indx_f(indx) + | host1x_uclass_wait_syncpt_thresh_f(threshold); +} + +static inline u32 host1x_class_host_load_syncpt_base( + unsigned indx, unsigned threshold) +{ + return host1x_uclass_load_syncpt_base_base_indx_f(indx) + | host1x_uclass_load_syncpt_base_value_f(threshold); +} + +static inline u32 host1x_class_host_wait_syncpt_base( + unsigned indx, unsigned base_indx, unsigned offset) +{ + return host1x_uclass_wait_syncpt_base_indx_f(indx) + | host1x_uclass_wait_syncpt_base_base_indx_f(base_indx) + | host1x_uclass_wait_syncpt_base_offset_f(offset); +} + +static inline u32 host1x_class_host_incr_syncpt_base( + unsigned base_indx, unsigned offset) +{ + return host1x_uclass_incr_syncpt_base_base_indx_f(base_indx) + | host1x_uclass_incr_syncpt_base_offset_f(offset); +} + +static inline u32 host1x_class_host_incr_syncpt( + unsigned cond, unsigned indx) +{ + return host1x_uclass_incr_syncpt_cond_f(cond) + | host1x_uclass_incr_syncpt_indx_f(indx); +} + +static inline u32 host1x_class_host_indoff_reg_write( + unsigned mod_id, unsigned offset, bool auto_inc) +{ + u32 v = host1x_uclass_indoff_indbe_f(0xf) + | host1x_uclass_indoff_indmodid_f(mod_id) + | host1x_uclass_indoff_indroffset_f(offset); + if (auto_inc) + v |= host1x_uclass_indoff_autoinc_f(1); + return v; +} + +static inline u32 host1x_class_host_indoff_reg_read( + unsigned mod_id, unsigned offset, bool auto_inc) +{ + u32 v = host1x_uclass_indoff_indmodid_f(mod_id) + | host1x_uclass_indoff_indroffset_f(offset) + | host1x_uclass_indoff_rwn_read_v(); + if (auto_inc) + v |= host1x_uclass_indoff_autoinc_f(1); + return v; +} + +static inline u32 host1x_opcode_setclass( + unsigned class_id, unsigned offset, unsigned mask) +{ + return (0 << 28) | (offset << 16) | (class_id << 6) | mask; +} + +static inline u32 host1x_opcode_incr(unsigned offset, unsigned count) +{ + return (1 << 28) | (offset << 16) | count; +} + +static inline u32 host1x_opcode_nonincr(unsigned offset, unsigned count) +{ + return (2 << 28) | (offset << 16) | count; +} + +static inline u32 host1x_opcode_mask(unsigned offset, unsigned mask) +{ + return (3 << 28) | (offset << 16) | mask; +} + +static inline u32 host1x_opcode_imm(unsigned offset, unsigned value) +{ + return (4 << 28) | (offset << 16) | value; +} + +static inline u32 host1x_opcode_imm_incr_syncpt(unsigned cond, unsigned indx) +{ + return host1x_opcode_imm(host1x_uclass_incr_syncpt_r(), + host1x_class_host_incr_syncpt(cond, indx)); +} + +static inline u32 host1x_opcode_restart(unsigned address) +{ + return (5 << 28) | (address >> 4); +} + +static inline u32 host1x_opcode_gather(unsigned count) +{ + return (6 << 28) | count; +} + +static inline u32 host1x_opcode_gather_nonincr(unsigned offset, unsigned count) +{ + return (6 << 28) | (offset << 16) | BIT(15) | count; +} + +static inline u32 host1x_opcode_gather_incr(unsigned offset, unsigned count) +{ + return (6 << 28) | (offset << 16) | BIT(15) | BIT(14) | count; +} + +static inline u32 host1x_opcode_setstreamid(unsigned streamid) +{ + return (7 << 28) | streamid; +} + +static inline u32 host1x_opcode_setpayload(unsigned payload) +{ + return (9 << 28) | payload; +} + +static inline u32 host1x_opcode_gather_wide(unsigned count) +{ + return (12 << 28) | count; +} + +static inline u32 host1x_opcode_acquire_mlock(unsigned mlock) +{ + return (14 << 28) | (0 << 24) | mlock; +} + +static inline u32 host1x_opcode_release_mlock(unsigned mlock) +{ + return (14 << 28) | (1 << 24) | mlock; +} + +#define HOST1X_OPCODE_NOP host1x_opcode_nonincr(0, 0) + +#endif diff --git a/drivers/gpu/host1x/hw/syncpt_hw.c b/drivers/gpu/host1x/hw/syncpt_hw.c index c93f74fcce72..8cf35b2eff3d 100644 --- a/drivers/gpu/host1x/hw/syncpt_hw.c +++ b/drivers/gpu/host1x/hw/syncpt_hw.c @@ -1,19 +1,8 @@ +// SPDX-License-Identifier: GPL-2.0-only /* * Tegra host1x Syncpoints * * Copyright (c) 2010-2013, NVIDIA Corporation. - * - * This program is free software; you can redistribute it and/or modify it - * under the terms and conditions of the GNU General Public License, - * version 2, as published by the Free Software Foundation. - * - * This program is distributed in the hope it will be useful, but WITHOUT - * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or - * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for - * more details. - * - * You should have received a copy of the GNU General Public License - * along with this program. If not, see <http://www.gnu.org/licenses/>. */ #include <linux/io.h> @@ -37,10 +26,12 @@ static void syncpt_restore(struct host1x_syncpt *sp) */ static void syncpt_restore_wait_base(struct host1x_syncpt *sp) { +#if HOST1X_HW < 7 struct host1x *host = sp->host; host1x_sync_writel(host, sp->base_val, HOST1X_SYNC_SYNCPT_BASE(sp->id)); +#endif } /* @@ -48,10 +39,12 @@ static void syncpt_restore_wait_base(struct host1x_syncpt *sp) */ static void syncpt_read_wait_base(struct host1x_syncpt *sp) { +#if HOST1X_HW < 7 struct host1x *host = sp->host; sp->base_val = host1x_sync_readl(host, HOST1X_SYNC_SYNCPT_BASE(sp->id)); +#endif } /* @@ -89,21 +82,52 @@ static int syncpt_cpu_incr(struct host1x_syncpt *sp) host1x_syncpt_idle(sp)) return -EINVAL; - host1x_sync_writel(host, BIT_MASK(sp->id), + host1x_sync_writel(host, BIT(sp->id % 32), HOST1X_SYNC_SYNCPT_CPU_INCR(reg_offset)); wmb(); return 0; } -/* remove a wait pointed to by patch_addr */ -static int syncpt_patch_wait(struct host1x_syncpt *sp, void *patch_addr) +/** + * syncpt_assign_to_channel() - Assign syncpoint to channel + * @sp: syncpoint + * @ch: channel + * + * On chips with the syncpoint protection feature (Tegra186+), assign @sp to + * @ch, preventing other channels from incrementing the syncpoints. If @ch is + * NULL, unassigns the syncpoint. + * + * On older chips, do nothing. + */ +static void syncpt_assign_to_channel(struct host1x_syncpt *sp, + struct host1x_channel *ch) { - u32 override = host1x_class_host_wait_syncpt(HOST1X_SYNCPT_RESERVED, 0); +#if HOST1X_HW >= 6 + struct host1x *host = sp->host; - *((u32 *)patch_addr) = override; + host1x_sync_writel(host, + HOST1X_SYNC_SYNCPT_CH_APP_CH(ch ? ch->id : 0xff), + HOST1X_SYNC_SYNCPT_CH_APP(sp->id)); +#endif +} - return 0; +/** + * syncpt_enable_protection() - Enable syncpoint protection + * @host: host1x instance + * + * On chips with the syncpoint protection feature (Tegra186+), enable this + * feature. On older chips, do nothing. + */ +static void syncpt_enable_protection(struct host1x *host) +{ +#if HOST1X_HW >= 6 + if (!host->hv_regs) + return; + + host1x_hypervisor_writel(host, HOST1X_HV_SYNCPT_PROT_EN_CH_EN, + HOST1X_HV_SYNCPT_PROT_EN); +#endif } static const struct host1x_syncpt_ops host1x_syncpt_ops = { @@ -112,5 +136,6 @@ static const struct host1x_syncpt_ops host1x_syncpt_ops = { .load_wait_base = syncpt_read_wait_base, .load = syncpt_load, .cpu_incr = syncpt_cpu_incr, - .patch_wait = syncpt_patch_wait, + .assign_to_channel = syncpt_assign_to_channel, + .enable_protection = syncpt_enable_protection, }; diff --git a/drivers/gpu/host1x/intr.c b/drivers/gpu/host1x/intr.c index 8b4fad0ab35d..f77a678949e9 100644 --- a/drivers/gpu/host1x/intr.c +++ b/drivers/gpu/host1x/intr.c @@ -1,303 +1,138 @@ +// SPDX-License-Identifier: GPL-2.0-only /* * Tegra host1x Interrupt Management * - * Copyright (c) 2010-2013, NVIDIA Corporation. - * - * This program is free software; you can redistribute it and/or modify it - * under the terms and conditions of the GNU General Public License, - * version 2, as published by the Free Software Foundation. - * - * This program is distributed in the hope it will be useful, but WITHOUT - * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or - * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for - * more details. - * - * You should have received a copy of the GNU General Public License - * along with this program. If not, see <http://www.gnu.org/licenses/>. + * Copyright (c) 2010-2021, NVIDIA Corporation. */ #include <linux/clk.h> #include <linux/interrupt.h> -#include <linux/slab.h> -#include <linux/irq.h> - -#include <trace/events/host1x.h> -#include "channel.h" #include "dev.h" +#include "fence.h" #include "intr.h" -/* Wait list management */ - -enum waitlist_state { - WLS_PENDING, - WLS_REMOVED, - WLS_CANCELLED, - WLS_HANDLED -}; - -static void waiter_release(struct kref *kref) -{ - kfree(container_of(kref, struct host1x_waitlist, refcount)); -} - -/* - * add a waiter to a waiter queue, sorted by threshold - * returns true if it was added at the head of the queue - */ -static bool add_waiter_to_queue(struct host1x_waitlist *waiter, - struct list_head *queue) +static void host1x_intr_add_fence_to_list(struct host1x_fence_list *list, + struct host1x_syncpt_fence *fence) { - struct host1x_waitlist *pos; - u32 thresh = waiter->thresh; + struct host1x_syncpt_fence *fence_in_list; - list_for_each_entry_reverse(pos, queue, list) - if ((s32)(pos->thresh - thresh) <= 0) { - list_add(&waiter->list, &pos->list); - return false; + list_for_each_entry_reverse(fence_in_list, &list->list, list) { + if ((s32)(fence_in_list->threshold - fence->threshold) <= 0) { + /* Fence in list is before us, we can insert here */ + list_add(&fence->list, &fence_in_list->list); + return; } + } - list_add(&waiter->list, queue); - return true; + /* Add as first in list */ + list_add(&fence->list, &list->list); } -/* - * run through a waiter queue for a single sync point ID - * and gather all completed waiters into lists by actions - */ -static void remove_completed_waiters(struct list_head *head, u32 sync, - struct list_head completed[HOST1X_INTR_ACTION_COUNT]) +static void host1x_intr_update_hw_state(struct host1x *host, struct host1x_syncpt *sp) { - struct list_head *dest; - struct host1x_waitlist *waiter, *next, *prev; + struct host1x_syncpt_fence *fence; - list_for_each_entry_safe(waiter, next, head, list) { - if ((s32)(waiter->thresh - sync) > 0) - break; + if (!list_empty(&sp->fences.list)) { + fence = list_first_entry(&sp->fences.list, struct host1x_syncpt_fence, list); - dest = completed + waiter->action; - - /* consolidate submit cleanups */ - if (waiter->action == HOST1X_INTR_ACTION_SUBMIT_COMPLETE && - !list_empty(dest)) { - prev = list_entry(dest->prev, - struct host1x_waitlist, list); - if (prev->data == waiter->data) { - prev->count++; - dest = NULL; - } - } - - /* PENDING->REMOVED or CANCELLED->HANDLED */ - if (atomic_inc_return(&waiter->state) == WLS_HANDLED || !dest) { - list_del(&waiter->list); - kref_put(&waiter->refcount, waiter_release); - } else - list_move_tail(&waiter->list, dest); + host1x_hw_intr_set_syncpt_threshold(host, sp->id, fence->threshold); + host1x_hw_intr_enable_syncpt_intr(host, sp->id); + } else { + host1x_hw_intr_disable_syncpt_intr(host, sp->id); } } -static void reset_threshold_interrupt(struct host1x *host, - struct list_head *head, - unsigned int id) +void host1x_intr_add_fence_locked(struct host1x *host, struct host1x_syncpt_fence *fence) { - u32 thresh = - list_first_entry(head, struct host1x_waitlist, list)->thresh; + struct host1x_fence_list *fence_list = &fence->sp->fences; - host1x_hw_intr_set_syncpt_threshold(host, id, thresh); - host1x_hw_intr_enable_syncpt_intr(host, id); -} + INIT_LIST_HEAD(&fence->list); -static void action_submit_complete(struct host1x_waitlist *waiter) -{ - struct host1x_channel *channel = waiter->data; - - host1x_cdma_update(&channel->cdma); - - /* Add nr_completed to trace */ - trace_host1x_channel_submit_complete(dev_name(channel->dev), - waiter->count, waiter->thresh); - -} - -static void action_wakeup(struct host1x_waitlist *waiter) -{ - wait_queue_head_t *wq = waiter->data; - - wake_up(wq); + host1x_intr_add_fence_to_list(fence_list, fence); + host1x_intr_update_hw_state(host, fence->sp); } -static void action_wakeup_interruptible(struct host1x_waitlist *waiter) +bool host1x_intr_remove_fence(struct host1x *host, struct host1x_syncpt_fence *fence) { - wait_queue_head_t *wq = waiter->data; - - wake_up_interruptible(wq); -} + struct host1x_fence_list *fence_list = &fence->sp->fences; + unsigned long irqflags; -typedef void (*action_handler)(struct host1x_waitlist *waiter); + spin_lock_irqsave(&fence_list->lock, irqflags); -static const action_handler action_handlers[HOST1X_INTR_ACTION_COUNT] = { - action_submit_complete, - action_wakeup, - action_wakeup_interruptible, -}; - -static void run_handlers(struct list_head completed[HOST1X_INTR_ACTION_COUNT]) -{ - struct list_head *head = completed; - int i; - - for (i = 0; i < HOST1X_INTR_ACTION_COUNT; ++i, ++head) { - action_handler handler = action_handlers[i]; - struct host1x_waitlist *waiter, *next; - - list_for_each_entry_safe(waiter, next, head, list) { - list_del(&waiter->list); - handler(waiter); - WARN_ON(atomic_xchg(&waiter->state, WLS_HANDLED) != - WLS_REMOVED); - kref_put(&waiter->refcount, waiter_release); - } + if (list_empty(&fence->list)) { + spin_unlock_irqrestore(&fence_list->lock, irqflags); + return false; } -} - -/* - * Remove & handle all waiters that have completed for the given syncpt - */ -static int process_wait_list(struct host1x *host, - struct host1x_syncpt *syncpt, - u32 threshold) -{ - struct list_head completed[HOST1X_INTR_ACTION_COUNT]; - unsigned int i; - int empty; - - for (i = 0; i < HOST1X_INTR_ACTION_COUNT; ++i) - INIT_LIST_HEAD(completed + i); - spin_lock(&syncpt->intr.lock); + list_del_init(&fence->list); + host1x_intr_update_hw_state(host, fence->sp); - remove_completed_waiters(&syncpt->intr.wait_head, threshold, - completed); + spin_unlock_irqrestore(&fence_list->lock, irqflags); - empty = list_empty(&syncpt->intr.wait_head); - if (empty) - host1x_hw_intr_disable_syncpt_intr(host, syncpt->id); - else - reset_threshold_interrupt(host, &syncpt->intr.wait_head, - syncpt->id); - - spin_unlock(&syncpt->intr.lock); - - run_handlers(completed); - - return empty; -} - -/* - * Sync point threshold interrupt service thread function - * Handles sync point threshold triggers, in thread context - */ - -static void syncpt_thresh_work(struct work_struct *work) -{ - struct host1x_syncpt_intr *syncpt_intr = - container_of(work, struct host1x_syncpt_intr, work); - struct host1x_syncpt *syncpt = - container_of(syncpt_intr, struct host1x_syncpt, intr); - unsigned int id = syncpt->id; - struct host1x *host = syncpt->host; - - (void)process_wait_list(host, syncpt, - host1x_syncpt_load(host->syncpt + id)); + return true; } -int host1x_intr_add_action(struct host1x *host, unsigned int id, u32 thresh, - enum host1x_intr_action action, void *data, - struct host1x_waitlist *waiter, void **ref) +void host1x_intr_handle_interrupt(struct host1x *host, unsigned int id) { - struct host1x_syncpt *syncpt; - int queue_was_empty; + struct host1x_syncpt *sp = &host->syncpt[id]; + struct host1x_syncpt_fence *fence, *tmp; + unsigned int value; - if (waiter == NULL) { - pr_warn("%s: NULL waiter\n", __func__); - return -EINVAL; - } - - /* initialize a new waiter */ - INIT_LIST_HEAD(&waiter->list); - kref_init(&waiter->refcount); - if (ref) - kref_get(&waiter->refcount); - waiter->thresh = thresh; - waiter->action = action; - atomic_set(&waiter->state, WLS_PENDING); - waiter->data = data; - waiter->count = 1; - - syncpt = host->syncpt + id; + value = host1x_syncpt_load(sp); - spin_lock(&syncpt->intr.lock); + spin_lock(&sp->fences.lock); - queue_was_empty = list_empty(&syncpt->intr.wait_head); - - if (add_waiter_to_queue(waiter, &syncpt->intr.wait_head)) { - /* added at head of list - new threshold value */ - host1x_hw_intr_set_syncpt_threshold(host, id, thresh); + list_for_each_entry_safe(fence, tmp, &sp->fences.list, list) { + if (((value - fence->threshold) & 0x80000000U) != 0U) { + /* Fence is not yet expired, we are done */ + break; + } - /* added as first waiter - enable interrupt */ - if (queue_was_empty) - host1x_hw_intr_enable_syncpt_intr(host, id); + list_del_init(&fence->list); + host1x_fence_signal(fence); } - spin_unlock(&syncpt->intr.lock); + /* Re-enable interrupt if necessary */ + host1x_intr_update_hw_state(host, sp); - if (ref) - *ref = waiter; - return 0; + spin_unlock(&sp->fences.lock); } -void host1x_intr_put_ref(struct host1x *host, unsigned int id, void *ref) +int host1x_intr_init(struct host1x *host) { - struct host1x_waitlist *waiter = ref; - struct host1x_syncpt *syncpt; - - while (atomic_cmpxchg(&waiter->state, WLS_PENDING, WLS_CANCELLED) == - WLS_REMOVED) - schedule(); + struct host1x_intr_irq_data *irq_data; + unsigned int id; + int i, err; - syncpt = host->syncpt + id; - (void)process_wait_list(host, syncpt, - host1x_syncpt_load(host->syncpt + id)); + for (id = 0; id < host1x_syncpt_nb_pts(host); ++id) { + struct host1x_syncpt *syncpt = &host->syncpt[id]; - kref_put(&waiter->refcount, waiter_release); -} + spin_lock_init(&syncpt->fences.lock); + INIT_LIST_HEAD(&syncpt->fences.list); + } -int host1x_intr_init(struct host1x *host, unsigned int irq_sync) -{ - unsigned int id; - u32 nb_pts = host1x_syncpt_nb_pts(host); + irq_data = devm_kcalloc(host->dev, host->num_syncpt_irqs, sizeof(irq_data[0]), GFP_KERNEL); + if (!irq_data) + return -ENOMEM; - mutex_init(&host->intr_mutex); - host->intr_syncpt_irq = irq_sync; + host1x_hw_intr_disable_all_syncpt_intrs(host); - for (id = 0; id < nb_pts; ++id) { - struct host1x_syncpt *syncpt = host->syncpt + id; + for (i = 0; i < host->num_syncpt_irqs; i++) { + irq_data[i].host = host; + irq_data[i].offset = i; - spin_lock_init(&syncpt->intr.lock); - INIT_LIST_HEAD(&syncpt->intr.wait_head); - snprintf(syncpt->intr.thresh_irq_name, - sizeof(syncpt->intr.thresh_irq_name), - "host1x_sp_%02u", id); + err = devm_request_irq(host->dev, host->syncpt_irqs[i], + host->intr_op->isr, IRQF_SHARED, + "host1x_syncpt", &irq_data[i]); + if (err < 0) + return err; } - host1x_intr_start(host); - return 0; } void host1x_intr_deinit(struct host1x *host) { - host1x_intr_stop(host); } void host1x_intr_start(struct host1x *host) @@ -306,8 +141,7 @@ void host1x_intr_start(struct host1x *host) int err; mutex_lock(&host->intr_mutex); - err = host1x_hw_intr_init_host_sync(host, DIV_ROUND_UP(hz, 1000000), - syncpt_thresh_work); + err = host1x_hw_intr_init_host_sync(host, DIV_ROUND_UP(hz, 1000000)); if (err) { mutex_unlock(&host->intr_mutex); return; @@ -317,36 +151,5 @@ void host1x_intr_start(struct host1x *host) void host1x_intr_stop(struct host1x *host) { - unsigned int id; - struct host1x_syncpt *syncpt = host->syncpt; - u32 nb_pts = host1x_syncpt_nb_pts(host); - - mutex_lock(&host->intr_mutex); - host1x_hw_intr_disable_all_syncpt_intrs(host); - - for (id = 0; id < nb_pts; ++id) { - struct host1x_waitlist *waiter, *next; - - list_for_each_entry_safe(waiter, next, - &syncpt[id].intr.wait_head, list) { - if (atomic_cmpxchg(&waiter->state, - WLS_CANCELLED, WLS_HANDLED) == WLS_CANCELLED) { - list_del(&waiter->list); - kref_put(&waiter->refcount, waiter_release); - } - } - - if (!list_empty(&syncpt[id].intr.wait_head)) { - /* output diagnostics */ - mutex_unlock(&host->intr_mutex); - pr_warn("%s cannot stop syncpt intr id=%u\n", - __func__, id); - return; - } - } - - host1x_hw_intr_free_syncpt_irq(host); - - mutex_unlock(&host->intr_mutex); } diff --git a/drivers/gpu/host1x/intr.h b/drivers/gpu/host1x/intr.h index 1370c2bb75b8..11cdf13e32fe 100644 --- a/drivers/gpu/host1x/intr.h +++ b/drivers/gpu/host1x/intr.h @@ -1,93 +1,23 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ /* * Tegra host1x Interrupt Management * - * Copyright (c) 2010-2013, NVIDIA Corporation. - * - * This program is free software; you can redistribute it and/or modify it - * under the terms and conditions of the GNU General Public License, - * version 2, as published by the Free Software Foundation. - * - * This program is distributed in the hope it will be useful, but WITHOUT - * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or - * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for - * more details. - * - * You should have received a copy of the GNU General Public License - * along with this program. If not, see <http://www.gnu.org/licenses/>. + * Copyright (c) 2010-2021, NVIDIA Corporation. */ #ifndef __HOST1X_INTR_H #define __HOST1X_INTR_H -#include <linux/interrupt.h> -#include <linux/workqueue.h> - struct host1x; +struct host1x_syncpt_fence; -enum host1x_intr_action { - /* - * Perform cleanup after a submit has completed. - * 'data' points to a channel - */ - HOST1X_INTR_ACTION_SUBMIT_COMPLETE = 0, - - /* - * Wake up a task. - * 'data' points to a wait_queue_head_t - */ - HOST1X_INTR_ACTION_WAKEUP, - - /* - * Wake up a interruptible task. - * 'data' points to a wait_queue_head_t - */ - HOST1X_INTR_ACTION_WAKEUP_INTERRUPTIBLE, - - HOST1X_INTR_ACTION_COUNT -}; - -struct host1x_syncpt_intr { - spinlock_t lock; - struct list_head wait_head; - char thresh_irq_name[12]; - struct work_struct work; -}; - -struct host1x_waitlist { - struct list_head list; - struct kref refcount; - u32 thresh; - enum host1x_intr_action action; - atomic_t state; - void *data; - int count; +struct host1x_intr_irq_data { + struct host1x *host; + u32 offset; }; -/* - * Schedule an action to be taken when a sync point reaches the given threshold. - * - * @id the sync point - * @thresh the threshold - * @action the action to take - * @data a pointer to extra data depending on action, see above - * @waiter waiter structure - assumes ownership - * @ref must be passed if cancellation is possible, else NULL - * - * This is a non-blocking api. - */ -int host1x_intr_add_action(struct host1x *host, unsigned int id, u32 thresh, - enum host1x_intr_action action, void *data, - struct host1x_waitlist *waiter, void **ref); - -/* - * Unreference an action submitted to host1x_intr_add_action(). - * You must call this if you passed non-NULL as ref. - * @ref the ref returned from host1x_intr_add_action() - */ -void host1x_intr_put_ref(struct host1x *host, unsigned int id, void *ref); - /* Initialize host1x sync point interrupt */ -int host1x_intr_init(struct host1x *host, unsigned int irq_sync); +int host1x_intr_init(struct host1x *host); /* Deinitialize host1x sync point interrupt */ void host1x_intr_deinit(struct host1x *host); @@ -98,5 +28,10 @@ void host1x_intr_start(struct host1x *host); /* Disable host1x sync point interrupt */ void host1x_intr_stop(struct host1x *host); -irqreturn_t host1x_syncpt_thresh_fn(void *dev_id); +void host1x_intr_handle_interrupt(struct host1x *host, unsigned int id); + +void host1x_intr_add_fence_locked(struct host1x *host, struct host1x_syncpt_fence *fence); + +bool host1x_intr_remove_fence(struct host1x *host, struct host1x_syncpt_fence *fence); + #endif diff --git a/drivers/gpu/host1x/job.c b/drivers/gpu/host1x/job.c index bee504406cfc..3ed49e1fd933 100644 --- a/drivers/gpu/host1x/job.c +++ b/drivers/gpu/host1x/job.c @@ -1,24 +1,14 @@ +// SPDX-License-Identifier: GPL-2.0-only /* * Tegra host1x Job * * Copyright (c) 2010-2015, NVIDIA Corporation. - * - * This program is free software; you can redistribute it and/or modify it - * under the terms and conditions of the GNU General Public License, - * version 2, as published by the Free Software Foundation. - * - * This program is distributed in the hope it will be useful, but WITHOUT - * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or - * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for - * more details. - * - * You should have received a copy of the GNU General Public License - * along with this program. If not, see <http://www.gnu.org/licenses/>. */ #include <linux/dma-mapping.h> #include <linux/err.h> #include <linux/host1x.h> +#include <linux/iommu.h> #include <linux/kref.h> #include <linux/module.h> #include <linux/scatterlist.h> @@ -35,19 +25,24 @@ struct host1x_job *host1x_job_alloc(struct host1x_channel *ch, u32 num_cmdbufs, u32 num_relocs, - u32 num_waitchks) + bool skip_firewall) { struct host1x_job *job = NULL; - unsigned int num_unpins = num_cmdbufs + num_relocs; + unsigned int num_unpins = num_relocs; + bool enable_firewall; u64 total; void *mem; + enable_firewall = IS_ENABLED(CONFIG_TEGRA_HOST1X_FIREWALL) && !skip_firewall; + + if (!enable_firewall) + num_unpins += num_cmdbufs; + /* Check that we're not going to overflow */ total = sizeof(struct host1x_job) + (u64)num_relocs * sizeof(struct host1x_reloc) + (u64)num_unpins * sizeof(struct host1x_job_unpin_data) + - (u64)num_waitchks * sizeof(struct host1x_waitchk) + - (u64)num_cmdbufs * sizeof(struct host1x_job_gather) + + (u64)num_cmdbufs * sizeof(struct host1x_job_cmd) + (u64)num_unpins * sizeof(dma_addr_t) + (u64)num_unpins * sizeof(u32 *); if (total > ULONG_MAX) @@ -57,19 +52,19 @@ struct host1x_job *host1x_job_alloc(struct host1x_channel *ch, if (!job) return NULL; + job->enable_firewall = enable_firewall; + kref_init(&job->ref); job->channel = ch; /* Redistribute memory to the structs */ mem += sizeof(struct host1x_job); - job->relocarray = num_relocs ? mem : NULL; + job->relocs = num_relocs ? mem : NULL; mem += num_relocs * sizeof(struct host1x_reloc); job->unpins = num_unpins ? mem : NULL; mem += num_unpins * sizeof(struct host1x_job_unpin_data); - job->waitchk = num_waitchks ? mem : NULL; - mem += num_waitchks * sizeof(struct host1x_waitchk); - job->gathers = num_cmdbufs ? mem : NULL; - mem += num_cmdbufs * sizeof(struct host1x_job_gather); + job->cmds = num_cmdbufs ? mem : NULL; + mem += num_cmdbufs * sizeof(struct host1x_job_cmd); job->addr_phys = num_unpins ? mem : NULL; job->reloc_addr_phys = job->addr_phys; @@ -90,6 +85,22 @@ static void job_free(struct kref *ref) { struct host1x_job *job = container_of(ref, struct host1x_job, ref); + if (job->release) + job->release(job); + + if (job->fence) { + /* + * remove_callback is atomic w.r.t. fence signaling, so + * after the call returns, we know that the callback is not + * in execution, and the fence can be safely freed. + */ + dma_fence_remove_callback(job->fence, &job->fence_cb); + dma_fence_put(job->fence); + } + + if (job->syncpt) + host1x_syncpt_put(job->syncpt); + kfree(job); } @@ -100,95 +111,49 @@ void host1x_job_put(struct host1x_job *job) EXPORT_SYMBOL(host1x_job_put); void host1x_job_add_gather(struct host1x_job *job, struct host1x_bo *bo, - u32 words, u32 offset) + unsigned int words, unsigned int offset) { - struct host1x_job_gather *cur_gather = &job->gathers[job->num_gathers]; + struct host1x_job_gather *gather = &job->cmds[job->num_cmds].gather; - cur_gather->words = words; - cur_gather->bo = bo; - cur_gather->offset = offset; - job->num_gathers++; -} -EXPORT_SYMBOL(host1x_job_add_gather); + gather->words = words; + gather->bo = bo; + gather->offset = offset; -/* - * NULL an already satisfied WAIT_SYNCPT host method, by patching its - * args in the command stream. The method data is changed to reference - * a reserved (never given out or incr) HOST1X_SYNCPT_RESERVED syncpt - * with a matching threshold value of 0, so is guaranteed to be popped - * by the host HW. - */ -static void host1x_syncpt_patch_offset(struct host1x_syncpt *sp, - struct host1x_bo *h, u32 offset) -{ - void *patch_addr = NULL; - - /* patch the wait */ - patch_addr = host1x_bo_kmap(h, offset >> PAGE_SHIFT); - if (patch_addr) { - host1x_syncpt_patch_wait(sp, - patch_addr + (offset & ~PAGE_MASK)); - host1x_bo_kunmap(h, offset >> PAGE_SHIFT, patch_addr); - } else - pr_err("Could not map cmdbuf for wait check\n"); + job->num_cmds++; } +EXPORT_SYMBOL(host1x_job_add_gather); -/* - * Check driver supplied waitchk structs for syncpt thresholds - * that have already been satisfied and NULL the comparison (to - * avoid a wrap condition in the HW). - */ -static int do_waitchks(struct host1x_job *job, struct host1x *host, - struct host1x_job_gather *g) +void host1x_job_add_wait(struct host1x_job *job, u32 id, u32 thresh, + bool relative, u32 next_class) { - struct host1x_bo *patch = g->bo; - int i; - - /* compare syncpt vs wait threshold */ - for (i = 0; i < job->num_waitchk; i++) { - struct host1x_waitchk *wait = &job->waitchk[i]; - struct host1x_syncpt *sp = - host1x_syncpt_get(host, wait->syncpt_id); - - /* validate syncpt id */ - if (wait->syncpt_id > host1x_syncpt_nb_pts(host)) - continue; - - /* skip all other gathers */ - if (patch != wait->bo) - continue; + struct host1x_job_cmd *cmd = &job->cmds[job->num_cmds]; - trace_host1x_syncpt_wait_check(wait->bo, wait->offset, - wait->syncpt_id, wait->thresh, - host1x_syncpt_read_min(sp)); + cmd->is_wait = true; + cmd->wait.id = id; + cmd->wait.threshold = thresh; + cmd->wait.next_class = next_class; + cmd->wait.relative = relative; - if (host1x_syncpt_is_expired(sp, wait->thresh)) { - dev_dbg(host->dev, - "drop WAIT id %u (%s) thresh 0x%x, min 0x%x\n", - wait->syncpt_id, sp->name, wait->thresh, - host1x_syncpt_read_min(sp)); - - host1x_syncpt_patch_offset(sp, patch, - g->offset + wait->offset); - } - - wait->bo = NULL; - } - - return 0; + job->num_cmds++; } +EXPORT_SYMBOL(host1x_job_add_wait); static unsigned int pin_job(struct host1x *host, struct host1x_job *job) { + unsigned long mask = HOST1X_RELOC_READ | HOST1X_RELOC_WRITE; + struct host1x_client *client = job->client; + struct device *dev = client->dev; + struct host1x_job_gather *g; unsigned int i; int err; job->num_unpins = 0; for (i = 0; i < job->num_relocs; i++) { - struct host1x_reloc *reloc = &job->relocarray[i]; - struct sg_table *sgt; - dma_addr_t phys_addr; + struct host1x_reloc *reloc = &job->relocs[i]; + enum dma_data_direction direction; + struct host1x_bo_mapping *map; + struct host1x_bo *bo; reloc->target.bo = host1x_bo_get(reloc->target.bo); if (!reloc->target.bo) { @@ -196,43 +161,83 @@ static unsigned int pin_job(struct host1x *host, struct host1x_job *job) goto unpin; } - phys_addr = host1x_bo_pin(reloc->target.bo, &sgt); - if (!phys_addr) { + bo = reloc->target.bo; + + switch (reloc->flags & mask) { + case HOST1X_RELOC_READ: + direction = DMA_TO_DEVICE; + break; + + case HOST1X_RELOC_WRITE: + direction = DMA_FROM_DEVICE; + break; + + case HOST1X_RELOC_READ | HOST1X_RELOC_WRITE: + direction = DMA_BIDIRECTIONAL; + break; + + default: + err = -EINVAL; + goto unpin; + } + + map = host1x_bo_pin(dev, bo, direction, NULL); + if (IS_ERR(map)) { + err = PTR_ERR(map); + goto unpin; + } + + /* + * host1x clients are generally not able to do scatter-gather themselves, so fail + * if the buffer is discontiguous and we fail to map its SG table to a single + * contiguous chunk of I/O virtual memory. + */ + if (map->chunks > 1) { err = -EINVAL; goto unpin; } - job->addr_phys[job->num_unpins] = phys_addr; - job->unpins[job->num_unpins].bo = reloc->target.bo; - job->unpins[job->num_unpins].sgt = sgt; + job->addr_phys[job->num_unpins] = map->phys; + job->unpins[job->num_unpins].map = map; job->num_unpins++; } - for (i = 0; i < job->num_gathers; i++) { - struct host1x_job_gather *g = &job->gathers[i]; + /* + * We will copy gathers BO content later, so there is no need to + * hold and pin them. + */ + if (job->enable_firewall) + return 0; + + for (i = 0; i < job->num_cmds; i++) { + struct host1x_bo_mapping *map; size_t gather_size = 0; struct scatterlist *sg; - struct sg_table *sgt; - dma_addr_t phys_addr; unsigned long shift; struct iova *alloc; unsigned int j; + if (job->cmds[i].is_wait) + continue; + + g = &job->cmds[i].gather; + g->bo = host1x_bo_get(g->bo); if (!g->bo) { err = -EINVAL; goto unpin; } - phys_addr = host1x_bo_pin(g->bo, &sgt); - if (!phys_addr) { - err = -EINVAL; + map = host1x_bo_pin(host->dev, g->bo, DMA_TO_DEVICE, NULL); + if (IS_ERR(map)) { + err = PTR_ERR(map); goto unpin; } - if (!IS_ENABLED(CONFIG_TEGRA_HOST1X_FIREWALL) && host->domain) { - for_each_sg(sgt->sgl, sg, sgt->nents, j) + if (host->domain) { + for_each_sgtable_sg(map->sgt, sg, j) gather_size += sg->length; + gather_size = iova_align(&host->iova, gather_size); shift = iova_shift(&host->iova); @@ -240,34 +245,32 @@ static unsigned int pin_job(struct host1x *host, struct host1x_job *job) host->iova_end >> shift, true); if (!alloc) { err = -ENOMEM; - goto unpin; + goto put; } - err = iommu_map_sg(host->domain, - iova_dma_addr(&host->iova, alloc), - sgt->sgl, sgt->nents, IOMMU_READ); + err = iommu_map_sgtable(host->domain, iova_dma_addr(&host->iova, alloc), + map->sgt, IOMMU_READ); if (err == 0) { __free_iova(&host->iova, alloc); err = -EINVAL; - goto unpin; + goto put; } - job->addr_phys[job->num_unpins] = - iova_dma_addr(&host->iova, alloc); - job->unpins[job->num_unpins].size = gather_size; - } else { - job->addr_phys[job->num_unpins] = phys_addr; + map->phys = iova_dma_addr(&host->iova, alloc); + map->size = gather_size; } - job->gather_addr_phys[i] = job->addr_phys[job->num_unpins]; - - job->unpins[job->num_unpins].bo = g->bo; - job->unpins[job->num_unpins].sgt = sgt; + job->addr_phys[job->num_unpins] = map->phys; + job->unpins[job->num_unpins].map = map; job->num_unpins++; + + job->gather_addr_phys[i] = map->phys; } return 0; +put: + host1x_bo_put(g->bo); unpin: host1x_job_unpin(job); return err; @@ -275,14 +278,13 @@ unpin: static int do_relocs(struct host1x_job *job, struct host1x_job_gather *g) { - int i = 0; - u32 last_page = ~0; - void *cmdbuf_page_addr = NULL; + void *cmdbuf_addr = NULL; struct host1x_bo *cmdbuf = g->bo; + unsigned int i; /* pin & patch the relocs for one gather */ for (i = 0; i < job->num_relocs; i++) { - struct host1x_reloc *reloc = &job->relocarray[i]; + struct host1x_reloc *reloc = &job->relocs[i]; u32 reloc_addr = (job->reloc_addr_phys[i] + reloc->target.offset) >> reloc->shift; u32 *target; @@ -291,35 +293,29 @@ static int do_relocs(struct host1x_job *job, struct host1x_job_gather *g) if (cmdbuf != reloc->cmdbuf.bo) continue; - if (IS_ENABLED(CONFIG_TEGRA_HOST1X_FIREWALL)) { + if (job->enable_firewall) { target = (u32 *)job->gather_copy_mapped + reloc->cmdbuf.offset / sizeof(u32) + g->offset / sizeof(u32); goto patch_reloc; } - if (last_page != reloc->cmdbuf.offset >> PAGE_SHIFT) { - if (cmdbuf_page_addr) - host1x_bo_kunmap(cmdbuf, last_page, - cmdbuf_page_addr); + if (!cmdbuf_addr) { + cmdbuf_addr = host1x_bo_mmap(cmdbuf); - cmdbuf_page_addr = host1x_bo_kmap(cmdbuf, - reloc->cmdbuf.offset >> PAGE_SHIFT); - last_page = reloc->cmdbuf.offset >> PAGE_SHIFT; - - if (unlikely(!cmdbuf_page_addr)) { + if (unlikely(!cmdbuf_addr)) { pr_err("Could not map cmdbuf for relocation\n"); return -ENOMEM; } } - target = cmdbuf_page_addr + (reloc->cmdbuf.offset & ~PAGE_MASK); + target = cmdbuf_addr + reloc->cmdbuf.offset; patch_reloc: *target = reloc_addr; } - if (cmdbuf_page_addr) - host1x_bo_kunmap(cmdbuf, last_page, cmdbuf_page_addr); + if (cmdbuf_addr) + host1x_bo_munmap(cmdbuf, cmdbuf_addr); return 0; } @@ -339,17 +335,6 @@ static bool check_reloc(struct host1x_reloc *reloc, struct host1x_bo *cmdbuf, return true; } -static bool check_wait(struct host1x_waitchk *wait, struct host1x_bo *cmdbuf, - unsigned int offset) -{ - offset *= sizeof(u32); - - if (wait->bo != cmdbuf || wait->offset != offset) - return false; - - return true; -} - struct host1x_firewall { struct host1x_job *job; struct device *dev; @@ -357,9 +342,6 @@ struct host1x_firewall { unsigned int num_relocs; struct host1x_reloc *reloc; - unsigned int num_waitchks; - struct host1x_waitchk *waitchk; - struct host1x_bo *cmdbuf; unsigned int offset; @@ -386,20 +368,6 @@ static int check_register(struct host1x_firewall *fw, unsigned long offset) fw->reloc++; } - if (offset == HOST1X_WAIT_SYNCPT_OFFSET) { - if (fw->class != HOST1X_CLASS_HOST1X) - return -EINVAL; - - if (!fw->num_waitchks) - return -EINVAL; - - if (!check_wait(fw->waitchk, fw->cmdbuf, fw->offset)) - return -EINVAL; - - fw->num_waitchks--; - fw->waitchk++; - } - return 0; } @@ -553,23 +521,27 @@ out: return err; } -static inline int copy_gathers(struct host1x_job *job, struct device *dev) +static inline int copy_gathers(struct device *host, struct host1x_job *job, + struct device *dev) { struct host1x_firewall fw; size_t size = 0; size_t offset = 0; - int i; + unsigned int i; fw.job = job; fw.dev = dev; - fw.reloc = job->relocarray; + fw.reloc = job->relocs; fw.num_relocs = job->num_relocs; - fw.waitchk = job->waitchk; - fw.num_waitchks = job->num_waitchk; fw.class = job->class; - for (i = 0; i < job->num_gathers; i++) { - struct host1x_job_gather *g = &job->gathers[i]; + for (i = 0; i < job->num_cmds; i++) { + struct host1x_job_gather *g; + + if (job->cmds[i].is_wait) + continue; + + g = &job->cmds[i].gather; size += g->words * sizeof(u32); } @@ -578,12 +550,12 @@ static inline int copy_gathers(struct host1x_job *job, struct device *dev) * Try a non-blocking allocation from a higher priority pools first, * as awaiting for the allocation here is a major performance hit. */ - job->gather_copy_mapped = dma_alloc_wc(dev, size, &job->gather_copy, + job->gather_copy_mapped = dma_alloc_wc(host, size, &job->gather_copy, GFP_NOWAIT); /* the higher priority allocation failed, try the generic-blocking */ if (!job->gather_copy_mapped) - job->gather_copy_mapped = dma_alloc_wc(dev, size, + job->gather_copy_mapped = dma_alloc_wc(host, size, &job->gather_copy, GFP_KERNEL); if (!job->gather_copy_mapped) @@ -591,10 +563,14 @@ static inline int copy_gathers(struct host1x_job *job, struct device *dev) job->gather_copy_size = size; - for (i = 0; i < job->num_gathers; i++) { - struct host1x_job_gather *g = &job->gathers[i]; + for (i = 0; i < job->num_cmds; i++) { + struct host1x_job_gather *g; void *gather; + if (job->cmds[i].is_wait) + continue; + g = &job->cmds[i].gather; + /* Copy the gather */ gather = host1x_bo_mmap(g->bo); memcpy(job->gather_copy_mapped + offset, gather + g->offset, @@ -612,8 +588,8 @@ static inline int copy_gathers(struct host1x_job *job, struct device *dev) offset += g->words * sizeof(u32); } - /* No relocs and waitchks should remain at this point */ - if (fw.num_relocs || fw.num_waitchks) + /* No relocs should remain at this point */ + if (fw.num_relocs) return -EINVAL; return 0; @@ -624,57 +600,45 @@ int host1x_job_pin(struct host1x_job *job, struct device *dev) int err; unsigned int i, j; struct host1x *host = dev_get_drvdata(dev->parent); - DECLARE_BITMAP(waitchk_mask, host1x_syncpt_nb_pts(host)); - - bitmap_zero(waitchk_mask, host1x_syncpt_nb_pts(host)); - for (i = 0; i < job->num_waitchk; i++) { - u32 syncpt_id = job->waitchk[i].syncpt_id; - - if (syncpt_id < host1x_syncpt_nb_pts(host)) - set_bit(syncpt_id, waitchk_mask); - } - - /* get current syncpt values for waitchk */ - for_each_set_bit(i, waitchk_mask, host1x_syncpt_nb_pts(host)) - host1x_syncpt_load(host->syncpt + i); /* pin memory */ err = pin_job(host, job); if (err) goto out; - if (IS_ENABLED(CONFIG_TEGRA_HOST1X_FIREWALL)) { - err = copy_gathers(job, dev); + if (job->enable_firewall) { + err = copy_gathers(host->dev, job, dev); if (err) goto out; } /* patch gathers */ - for (i = 0; i < job->num_gathers; i++) { - struct host1x_job_gather *g = &job->gathers[i]; + for (i = 0; i < job->num_cmds; i++) { + struct host1x_job_gather *g; + + if (job->cmds[i].is_wait) + continue; + g = &job->cmds[i].gather; /* process each gather mem only once */ if (g->handled) continue; /* copy_gathers() sets gathers base if firewall is enabled */ - if (!IS_ENABLED(CONFIG_TEGRA_HOST1X_FIREWALL)) + if (!job->enable_firewall) g->base = job->gather_addr_phys[i]; - for (j = i + 1; j < job->num_gathers; j++) { - if (job->gathers[j].bo == g->bo) { - job->gathers[j].handled = true; - job->gathers[j].base = g->base; + for (j = i + 1; j < job->num_cmds; j++) { + if (!job->cmds[j].is_wait && + job->cmds[j].gather.bo == g->bo) { + job->cmds[j].gather.handled = true; + job->cmds[j].gather.base = g->base; } } err = do_relocs(job, g); if (err) break; - - err = do_waitchks(job, host, g); - if (err) - break; } out: @@ -692,23 +656,22 @@ void host1x_job_unpin(struct host1x_job *job) unsigned int i; for (i = 0; i < job->num_unpins; i++) { - struct host1x_job_unpin_data *unpin = &job->unpins[i]; + struct host1x_bo_mapping *map = job->unpins[i].map; + struct host1x_bo *bo = map->bo; - if (!IS_ENABLED(CONFIG_TEGRA_HOST1X_FIREWALL) && host->domain) { - iommu_unmap(host->domain, job->addr_phys[i], - unpin->size); - free_iova(&host->iova, - iova_pfn(&host->iova, job->addr_phys[i])); + if (!job->enable_firewall && map->size && host->domain) { + iommu_unmap(host->domain, job->addr_phys[i], map->size); + free_iova(&host->iova, iova_pfn(&host->iova, job->addr_phys[i])); } - host1x_bo_unpin(unpin->bo, unpin->sgt); - host1x_bo_put(unpin->bo); + host1x_bo_unpin(map); + host1x_bo_put(bo); } job->num_unpins = 0; if (job->gather_copy_size) - dma_free_wc(job->channel->dev, job->gather_copy_size, + dma_free_wc(host->dev, job->gather_copy_size, job->gather_copy_mapped, job->gather_copy); } EXPORT_SYMBOL(host1x_job_unpin); @@ -718,7 +681,7 @@ EXPORT_SYMBOL(host1x_job_unpin); */ void host1x_job_dump(struct device *dev, struct host1x_job *job) { - dev_dbg(dev, " SYNCPT_ID %d\n", job->syncpt_id); + dev_dbg(dev, " SYNCPT_ID %d\n", job->syncpt->id); dev_dbg(dev, " SYNCPT_VAL %d\n", job->syncpt_end); dev_dbg(dev, " FIRST_GET 0x%x\n", job->first_get); dev_dbg(dev, " TIMEOUT %d\n", job->timeout); diff --git a/drivers/gpu/host1x/job.h b/drivers/gpu/host1x/job.h index 4bda51d503ec..dad5a1946693 100644 --- a/drivers/gpu/host1x/job.h +++ b/drivers/gpu/host1x/job.h @@ -1,36 +1,41 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ /* * Tegra host1x Job * * Copyright (c) 2011-2013, NVIDIA Corporation. - * - * This program is free software; you can redistribute it and/or modify it - * under the terms and conditions of the GNU General Public License, - * version 2, as published by the Free Software Foundation. - * - * This program is distributed in the hope it will be useful, but WITHOUT - * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or - * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for - * more details. - * - * You should have received a copy of the GNU General Public License - * along with this program. If not, see <http://www.gnu.org/licenses/>. */ #ifndef __HOST1X_JOB_H #define __HOST1X_JOB_H +#include <linux/dma-direction.h> + struct host1x_job_gather { - u32 words; + unsigned int words; dma_addr_t base; struct host1x_bo *bo; - u32 offset; + unsigned int offset; bool handled; }; +struct host1x_job_wait { + u32 id; + u32 threshold; + u32 next_class; + bool relative; +}; + +struct host1x_job_cmd { + bool is_wait; + + union { + struct host1x_job_gather gather; + struct host1x_job_wait wait; + }; +}; + struct host1x_job_unpin_data { - struct host1x_bo *bo; - struct sg_table *sgt; - size_t size; + struct host1x_bo_mapping *map; }; /* diff --git a/drivers/gpu/host1x/mipi.c b/drivers/gpu/host1x/mipi.c index e00809d996a2..e51b43dd15a3 100644 --- a/drivers/gpu/host1x/mipi.c +++ b/drivers/gpu/host1x/mipi.c @@ -21,9 +21,9 @@ */ #include <linux/clk.h> -#include <linux/delay.h> #include <linux/host1x.h> #include <linux/io.h> +#include <linux/iopoll.h> #include <linux/of_platform.h> #include <linux/platform_device.h> #include <linux/slab.h> @@ -206,9 +206,9 @@ static int tegra_mipi_power_down(struct tegra_mipi *mipi) return 0; } -struct tegra_mipi_device *tegra_mipi_request(struct device *device) +struct tegra_mipi_device *tegra_mipi_request(struct device *device, + struct device_node *np) { - struct device_node *np = device->of_node; struct tegra_mipi_device *dev; struct of_phandle_args args; int err; @@ -293,24 +293,25 @@ int tegra_mipi_disable(struct tegra_mipi_device *dev) } EXPORT_SYMBOL(tegra_mipi_disable); -static int tegra_mipi_wait(struct tegra_mipi *mipi) +int tegra_mipi_finish_calibration(struct tegra_mipi_device *device) { - unsigned long timeout = jiffies + msecs_to_jiffies(250); + struct tegra_mipi *mipi = device->mipi; + void __iomem *status_reg = mipi->regs + (MIPI_CAL_STATUS << 2); u32 value; + int err; - while (time_before(jiffies, timeout)) { - value = tegra_mipi_readl(mipi, MIPI_CAL_STATUS); - if ((value & MIPI_CAL_STATUS_ACTIVE) == 0 && - (value & MIPI_CAL_STATUS_DONE) != 0) - return 0; - - usleep_range(10, 50); - } + err = readl_relaxed_poll_timeout(status_reg, value, + !(value & MIPI_CAL_STATUS_ACTIVE) && + (value & MIPI_CAL_STATUS_DONE), 50, + 250000); + mutex_unlock(&device->mipi->lock); + clk_disable(device->mipi->clk); - return -ETIMEDOUT; + return err; } +EXPORT_SYMBOL(tegra_mipi_finish_calibration); -int tegra_mipi_calibrate(struct tegra_mipi_device *device) +int tegra_mipi_start_calibration(struct tegra_mipi_device *device) { const struct tegra_mipi_soc *soc = device->mipi->soc; unsigned int i; @@ -374,14 +375,16 @@ int tegra_mipi_calibrate(struct tegra_mipi_device *device) value |= MIPI_CAL_CTRL_START; tegra_mipi_writel(device->mipi, value, MIPI_CAL_CTRL); - err = tegra_mipi_wait(device->mipi); - - mutex_unlock(&device->mipi->lock); - clk_disable(device->mipi->clk); + /* + * Wait for min 72uS to let calibration logic finish calibration + * sequence codes before waiting for pads idle state to apply the + * results. + */ + usleep_range(75, 80); - return err; + return 0; } -EXPORT_SYMBOL(tegra_mipi_calibrate); +EXPORT_SYMBOL(tegra_mipi_start_calibration); static const struct tegra_mipi_pad tegra114_mipi_pads[] = { { .data = MIPI_CAL_CONFIG_CSIA }, @@ -498,8 +501,6 @@ static int tegra_mipi_probe(struct platform_device *pdev) { const struct of_device_id *match; struct tegra_mipi *mipi; - struct resource *res; - int err; match = of_match_node(tegra_mipi_of_match, pdev->dev.of_node); if (!match) @@ -512,42 +513,27 @@ static int tegra_mipi_probe(struct platform_device *pdev) mipi->soc = match->data; mipi->dev = &pdev->dev; - res = platform_get_resource(pdev, IORESOURCE_MEM, 0); - mipi->regs = devm_ioremap_resource(&pdev->dev, res); + mipi->regs = devm_platform_get_and_ioremap_resource(pdev, 0, NULL); if (IS_ERR(mipi->regs)) return PTR_ERR(mipi->regs); mutex_init(&mipi->lock); - mipi->clk = devm_clk_get(&pdev->dev, NULL); + mipi->clk = devm_clk_get_prepared(&pdev->dev, NULL); if (IS_ERR(mipi->clk)) { dev_err(&pdev->dev, "failed to get clock\n"); return PTR_ERR(mipi->clk); } - err = clk_prepare(mipi->clk); - if (err < 0) - return err; - platform_set_drvdata(pdev, mipi); return 0; } -static int tegra_mipi_remove(struct platform_device *pdev) -{ - struct tegra_mipi *mipi = platform_get_drvdata(pdev); - - clk_unprepare(mipi->clk); - - return 0; -} - struct platform_driver tegra_mipi_driver = { .driver = { .name = "tegra-mipi", .of_match_table = tegra_mipi_of_match, }, .probe = tegra_mipi_probe, - .remove = tegra_mipi_remove, }; diff --git a/drivers/gpu/host1x/syncpt.c b/drivers/gpu/host1x/syncpt.c index 048ac9e344ce..acc7d82e0585 100644 --- a/drivers/gpu/host1x/syncpt.c +++ b/drivers/gpu/host1x/syncpt.c @@ -1,23 +1,13 @@ +// SPDX-License-Identifier: GPL-2.0-only /* * Tegra host1x Syncpoints * * Copyright (c) 2010-2015, NVIDIA Corporation. - * - * This program is free software; you can redistribute it and/or modify it - * under the terms and conditions of the GNU General Public License, - * version 2, as published by the Free Software Foundation. - * - * This program is distributed in the hope it will be useful, but WITHOUT - * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or - * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for - * more details. - * - * You should have received a copy of the GNU General Public License - * along with this program. If not, see <http://www.gnu.org/licenses/>. */ #include <linux/module.h> #include <linux/device.h> +#include <linux/dma-fence.h> #include <linux/slab.h> #include <trace/events/host1x.h> @@ -53,17 +43,32 @@ static void host1x_syncpt_base_free(struct host1x_syncpt_base *base) base->requested = false; } -static struct host1x_syncpt *host1x_syncpt_alloc(struct host1x *host, - struct device *dev, - unsigned long flags) +/** + * host1x_syncpt_alloc() - allocate a syncpoint + * @host: host1x device data + * @flags: bitfield of HOST1X_SYNCPT_* flags + * @name: name for the syncpoint for use in debug prints + * + * Allocates a hardware syncpoint for the caller's use. The caller then has + * the sole authority to mutate the syncpoint's value until it is freed again. + * + * If no free syncpoints are available, or a NULL name was specified, returns + * NULL. + */ +struct host1x_syncpt *host1x_syncpt_alloc(struct host1x *host, + unsigned long flags, + const char *name) { - int i; struct host1x_syncpt *sp = host->syncpt; - char *name; + char *full_name; + unsigned int i; + + if (!name) + return NULL; mutex_lock(&host->syncpt_mutex); - for (i = 0; i < host->info->nb_pts && sp->name; i++, sp++) + for (i = 0; i < host->info->nb_pts && kref_read(&sp->ref); i++, sp++) ; if (i >= host->info->nb_pts) @@ -75,19 +80,19 @@ static struct host1x_syncpt *host1x_syncpt_alloc(struct host1x *host, goto unlock; } - name = kasprintf(GFP_KERNEL, "%02u-%s", sp->id, - dev ? dev_name(dev) : NULL); - if (!name) + full_name = kasprintf(GFP_KERNEL, "%u-%s", sp->id, name); + if (!full_name) goto free_base; - sp->dev = dev; - sp->name = name; + sp->name = full_name; if (flags & HOST1X_SYNCPT_CLIENT_MANAGED) sp->client_managed = true; else sp->client_managed = false; + kref_init(&sp->ref); + mutex_unlock(&host->syncpt_mutex); return sp; @@ -98,6 +103,7 @@ unlock: mutex_unlock(&host->syncpt_mutex); return NULL; } +EXPORT_SYMBOL(host1x_syncpt_alloc); /** * host1x_syncpt_id() - retrieve syncpoint ID @@ -132,12 +138,21 @@ void host1x_syncpt_restore(struct host1x *host) struct host1x_syncpt *sp_base = host->syncpt; unsigned int i; - for (i = 0; i < host1x_syncpt_nb_pts(host); i++) + for (i = 0; i < host1x_syncpt_nb_pts(host); i++) { + /* + * Unassign syncpt from channels for purposes of Tegra186 + * syncpoint protection. This prevents any channel from + * accessing it until it is reassigned. + */ + host1x_hw_syncpt_assign_to_channel(host, sp_base + i, NULL); host1x_hw_syncpt_restore(host, sp_base + i); + } for (i = 0; i < host1x_syncpt_nb_bases(host); i++) host1x_hw_syncpt_restore_wait_base(host, sp_base + i); + host1x_hw_syncpt_enable_protection(host); + wmb(); } @@ -195,17 +210,6 @@ int host1x_syncpt_incr(struct host1x_syncpt *sp) } EXPORT_SYMBOL(host1x_syncpt_incr); -/* - * Updated sync point form hardware, and returns true if syncpoint is expired, - * false if we may need to wait - */ -static bool syncpt_load_min_is_expired(struct host1x_syncpt *sp, u32 thresh) -{ - host1x_hw_syncpt_load(sp->host, sp); - - return host1x_syncpt_is_expired(sp, thresh); -} - /** * host1x_syncpt_wait() - wait for a syncpoint to reach a given value * @sp: host1x syncpoint @@ -216,99 +220,46 @@ static bool syncpt_load_min_is_expired(struct host1x_syncpt *sp, u32 thresh) int host1x_syncpt_wait(struct host1x_syncpt *sp, u32 thresh, long timeout, u32 *value) { - DECLARE_WAIT_QUEUE_HEAD_ONSTACK(wq); - void *ref; - struct host1x_waitlist *waiter; - int err = 0, check_count = 0; - u32 val; + struct dma_fence *fence; + long wait_err; - if (value) - *value = 0; + host1x_hw_syncpt_load(sp->host, sp); - /* first check cache */ - if (host1x_syncpt_is_expired(sp, thresh)) { - if (value) - *value = host1x_syncpt_load(sp); + if (value) + *value = host1x_syncpt_load(sp); + if (host1x_syncpt_is_expired(sp, thresh)) return 0; - } - /* try to read from register */ - val = host1x_hw_syncpt_load(sp->host, sp); - if (host1x_syncpt_is_expired(sp, thresh)) { - if (value) - *value = val; - - goto done; - } - - if (!timeout) { - err = -EAGAIN; - goto done; - } - - /* allocate a waiter */ - waiter = kzalloc(sizeof(*waiter), GFP_KERNEL); - if (!waiter) { - err = -ENOMEM; - goto done; - } - - /* schedule a wakeup when the syncpoint value is reached */ - err = host1x_intr_add_action(sp->host, sp->id, thresh, - HOST1X_INTR_ACTION_WAKEUP_INTERRUPTIBLE, - &wq, waiter, &ref); - if (err) - goto done; - - err = -EAGAIN; - /* Caller-specified timeout may be impractically low */ if (timeout < 0) timeout = LONG_MAX; + else if (timeout == 0) + return -EAGAIN; - /* wait for the syncpoint, or timeout, or signal */ - while (timeout) { - long check = min_t(long, SYNCPT_CHECK_PERIOD, timeout); - int remain; - - remain = wait_event_interruptible_timeout(wq, - syncpt_load_min_is_expired(sp, thresh), - check); - if (remain > 0 || host1x_syncpt_is_expired(sp, thresh)) { - if (value) - *value = host1x_syncpt_load(sp); - - err = 0; - - break; - } - - if (remain < 0) { - err = remain; - break; - } - - timeout -= check; - - if (timeout && check_count <= MAX_STUCK_CHECK_COUNT) { - dev_warn(sp->host->dev, - "%s: syncpoint id %u (%s) stuck waiting %d, timeout=%ld\n", - current->comm, sp->id, sp->name, - thresh, timeout); + fence = host1x_fence_create(sp, thresh, false); + if (IS_ERR(fence)) + return PTR_ERR(fence); - host1x_debug_dump_syncpts(sp->host); + wait_err = dma_fence_wait_timeout(fence, true, timeout); + if (wait_err == 0) + host1x_fence_cancel(fence); + dma_fence_put(fence); - if (check_count == MAX_STUCK_CHECK_COUNT) - host1x_debug_dump(sp->host); - - check_count++; - } - } - - host1x_intr_put_ref(sp->host, sp->id, ref); + if (value) + *value = host1x_syncpt_load(sp); -done: - return err; + /* + * Don't rely on dma_fence_wait_timeout return value, + * since it returns zero both on timeout and if the + * wait completed with 0 jiffies left. + */ + host1x_hw_syncpt_load(sp->host, sp); + if (wait_err == 0 && !host1x_syncpt_is_expired(sp, thresh)) + return -EAGAIN; + else if (wait_err < 0) + return wait_err; + else + return 0; } EXPORT_SYMBOL(host1x_syncpt_wait); @@ -318,65 +269,12 @@ EXPORT_SYMBOL(host1x_syncpt_wait); bool host1x_syncpt_is_expired(struct host1x_syncpt *sp, u32 thresh) { u32 current_val; - u32 future_val; smp_rmb(); current_val = (u32)atomic_read(&sp->min_val); - future_val = (u32)atomic_read(&sp->max_val); - - /* Note the use of unsigned arithmetic here (mod 1<<32). - * - * c = current_val = min_val = the current value of the syncpoint. - * t = thresh = the value we are checking - * f = future_val = max_val = the value c will reach when all - * outstanding increments have completed. - * - * Note that c always chases f until it reaches f. - * - * Dtf = (f - t) - * Dtc = (c - t) - * - * Consider all cases: - * - * A) .....c..t..f..... Dtf < Dtc need to wait - * B) .....c.....f..t.. Dtf > Dtc expired - * C) ..t..c.....f..... Dtf > Dtc expired (Dct very large) - * - * Any case where f==c: always expired (for any t). Dtf == Dcf - * Any case where t==c: always expired (for any f). Dtf >= Dtc (because Dtc==0) - * Any case where t==f!=c: always wait. Dtf < Dtc (because Dtf==0, - * Dtc!=0) - * - * Other cases: - * - * A) .....t..f..c..... Dtf < Dtc need to wait - * A) .....f..c..t..... Dtf < Dtc need to wait - * A) .....f..t..c..... Dtf > Dtc expired - * - * So: - * Dtf >= Dtc implies EXPIRED (return true) - * Dtf < Dtc implies WAIT (return false) - * - * Note: If t is expired then we *cannot* wait on it. We would wait - * forever (hang the system). - * - * Note: do NOT get clever and remove the -thresh from both sides. It - * is NOT the same. - * - * If future valueis zero, we have a client managed sync point. In that - * case we do a direct comparison. - */ - if (!host1x_syncpt_client_managed(sp)) - return future_val - thresh >= current_val - thresh; - else - return (s32)(current_val - thresh) >= 0; -} -/* remove a wait pointed to by patch_addr */ -int host1x_syncpt_patch_wait(struct host1x_syncpt *sp, void *patch_addr) -{ - return host1x_hw_syncpt_patch_wait(sp->host, sp, patch_addr); + return ((current_val - thresh) & 0x80000000U) == 0U; } int host1x_syncpt_init(struct host1x *host) @@ -407,62 +305,71 @@ int host1x_syncpt_init(struct host1x *host) host->syncpt = syncpt; host->bases = bases; - host1x_syncpt_restore(host); - /* Allocate sync point to use for clearing waits for expired fences */ - host->nop_sp = host1x_syncpt_alloc(host, NULL, 0); + host->nop_sp = host1x_syncpt_alloc(host, 0, "reserved-nop"); if (!host->nop_sp) return -ENOMEM; + if (host->info->reserve_vblank_syncpts) { + kref_init(&host->syncpt[26].ref); + kref_init(&host->syncpt[27].ref); + } + return 0; } /** * host1x_syncpt_request() - request a syncpoint - * @dev: device requesting the syncpoint + * @client: client requesting the syncpoint * @flags: flags * * host1x client drivers can use this function to allocate a syncpoint for * subsequent use. A syncpoint returned by this function will be reserved for * use by the client exclusively. When no longer using a syncpoint, a host1x - * client driver needs to release it using host1x_syncpt_free(). + * client driver needs to release it using host1x_syncpt_put(). */ -struct host1x_syncpt *host1x_syncpt_request(struct device *dev, +struct host1x_syncpt *host1x_syncpt_request(struct host1x_client *client, unsigned long flags) { - struct host1x *host = dev_get_drvdata(dev->parent); + struct host1x *host = dev_get_drvdata(client->host->parent); - return host1x_syncpt_alloc(host, dev, flags); + return host1x_syncpt_alloc(host, flags, dev_name(client->dev)); } EXPORT_SYMBOL(host1x_syncpt_request); -/** - * host1x_syncpt_free() - free a requested syncpoint - * @sp: host1x syncpoint - * - * Release a syncpoint previously allocated using host1x_syncpt_request(). A - * host1x client driver should call this when the syncpoint is no longer in - * use. Note that client drivers must ensure that the syncpoint doesn't remain - * under the control of hardware after calling this function, otherwise two - * clients may end up trying to access the same syncpoint concurrently. - */ -void host1x_syncpt_free(struct host1x_syncpt *sp) +static void syncpt_release(struct kref *ref) { - if (!sp) - return; + struct host1x_syncpt *sp = container_of(ref, struct host1x_syncpt, ref); - mutex_lock(&sp->host->syncpt_mutex); + atomic_set(&sp->max_val, host1x_syncpt_read(sp)); + + sp->locked = false; host1x_syncpt_base_free(sp->base); kfree(sp->name); sp->base = NULL; - sp->dev = NULL; sp->name = NULL; sp->client_managed = false; mutex_unlock(&sp->host->syncpt_mutex); } -EXPORT_SYMBOL(host1x_syncpt_free); + +/** + * host1x_syncpt_put() - free a requested syncpoint + * @sp: host1x syncpoint + * + * Release a syncpoint previously allocated using host1x_syncpt_request(). A + * host1x client driver should call this when the syncpoint is no longer in + * use. + */ +void host1x_syncpt_put(struct host1x_syncpt *sp) +{ + if (!sp) + return; + + kref_put_mutex(&sp->ref, syncpt_release, &sp->host->syncpt_mutex); +} +EXPORT_SYMBOL(host1x_syncpt_put); void host1x_syncpt_deinit(struct host1x *host) { @@ -529,16 +436,48 @@ unsigned int host1x_syncpt_nb_mlocks(struct host1x *host) } /** - * host1x_syncpt_get() - obtain a syncpoint by ID + * host1x_syncpt_get_by_id() - obtain a syncpoint by ID + * @host: host1x controller + * @id: syncpoint ID + */ +struct host1x_syncpt *host1x_syncpt_get_by_id(struct host1x *host, + unsigned int id) +{ + if (id >= host->info->nb_pts) + return NULL; + + if (kref_get_unless_zero(&host->syncpt[id].ref)) + return &host->syncpt[id]; + else + return NULL; +} +EXPORT_SYMBOL(host1x_syncpt_get_by_id); + +/** + * host1x_syncpt_get_by_id_noref() - obtain a syncpoint by ID but don't + * increase the refcount. * @host: host1x controller * @id: syncpoint ID */ -struct host1x_syncpt *host1x_syncpt_get(struct host1x *host, unsigned int id) +struct host1x_syncpt *host1x_syncpt_get_by_id_noref(struct host1x *host, + unsigned int id) { if (id >= host->info->nb_pts) return NULL; - return host->syncpt + id; + return &host->syncpt[id]; +} +EXPORT_SYMBOL(host1x_syncpt_get_by_id_noref); + +/** + * host1x_syncpt_get() - increment syncpoint refcount + * @sp: syncpoint + */ +struct host1x_syncpt *host1x_syncpt_get(struct host1x_syncpt *sp) +{ + kref_get(&sp->ref); + + return sp; } EXPORT_SYMBOL(host1x_syncpt_get); @@ -561,3 +500,31 @@ u32 host1x_syncpt_base_id(struct host1x_syncpt_base *base) return base->id; } EXPORT_SYMBOL(host1x_syncpt_base_id); + +static void do_nothing(struct kref *ref) +{ +} + +/** + * host1x_syncpt_release_vblank_reservation() - Make VBLANK syncpoint + * available for allocation + * + * @client: host1x bus client + * @syncpt_id: syncpoint ID to make available + * + * Makes VBLANK<i> syncpoint available for allocatation if it was + * reserved at initialization time. This should be called by the display + * driver after it has ensured that any VBLANK increment programming configured + * by the boot chain has been disabled. + */ +void host1x_syncpt_release_vblank_reservation(struct host1x_client *client, + u32 syncpt_id) +{ + struct host1x *host = dev_get_drvdata(client->host->parent); + + if (!host->info->reserve_vblank_syncpts) + return; + + kref_put(&host->syncpt[syncpt_id].ref, do_nothing); +} +EXPORT_SYMBOL(host1x_syncpt_release_vblank_reservation); diff --git a/drivers/gpu/host1x/syncpt.h b/drivers/gpu/host1x/syncpt.h index f719205105ac..4c3f3b2f0e9c 100644 --- a/drivers/gpu/host1x/syncpt.h +++ b/drivers/gpu/host1x/syncpt.h @@ -1,19 +1,8 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ /* * Tegra host1x Syncpoints * * Copyright (c) 2010-2013, NVIDIA Corporation. - * - * This program is free software; you can redistribute it and/or modify it - * under the terms and conditions of the GNU General Public License, - * version 2, as published by the Free Software Foundation. - * - * This program is distributed in the hope it will be useful, but WITHOUT - * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or - * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for - * more details. - * - * You should have received a copy of the GNU General Public License - * along with this program. If not, see <http://www.gnu.org/licenses/>. */ #ifndef __HOST1X_SYNCPT_H @@ -22,8 +11,10 @@ #include <linux/atomic.h> #include <linux/host1x.h> #include <linux/kernel.h> +#include <linux/kref.h> #include <linux/sched.h> +#include "fence.h" #include "intr.h" struct host1x; @@ -37,6 +28,8 @@ struct host1x_syncpt_base { }; struct host1x_syncpt { + struct kref ref; + unsigned int id; atomic_t min_val; atomic_t max_val; @@ -44,11 +37,17 @@ struct host1x_syncpt { const char *name; bool client_managed; struct host1x *host; - struct device *dev; struct host1x_syncpt_base *base; /* interrupt data */ - struct host1x_syncpt_intr intr; + struct host1x_fence_list fences; + + /* + * If a submission incrementing this syncpoint fails, lock it so that + * further submission cannot be made until application has handled the + * failure. + */ + bool locked; }; /* Initialize sync point array */ @@ -124,7 +123,9 @@ static inline int host1x_syncpt_is_valid(struct host1x_syncpt *sp) return sp->id < host1x_syncpt_nb_pts(sp->host); } -/* Patch a wait by replacing it with a wait for syncpt 0 value 0 */ -int host1x_syncpt_patch_wait(struct host1x_syncpt *sp, void *patch_addr); +static inline void host1x_syncpt_set_locked(struct host1x_syncpt *sp) +{ + sp->locked = true; +} #endif |
