summaryrefslogtreecommitdiff
path: root/drivers/gpu/drm/ttm/ttm_resource.c
diff options
context:
space:
mode:
authorThomas Hellström <thomas.hellstrom@linux.intel.com>2021-06-02 10:38:10 +0200
committerThomas Hellström <thomas.hellstrom@linux.intel.com>2021-06-07 16:07:08 +0200
commit3bf3710e3718a5aebdf465343bc1125b6e8cca96 (patch)
treefdce250a03876dd8202f69c12c2f22b630f73ab4 /drivers/gpu/drm/ttm/ttm_resource.c
parentc43f2f9842347630f2ebfab05c9972e58b7df9d5 (diff)
drm/ttm: Add a generic TTM memcpy move for page-based iomem
The internal ttm_bo_util memcpy uses ioremap functionality, and while it probably might be possible to use it for copying in- and out of sglist represented io memory, using io_mem_reserve() / io_mem_free() callbacks, that would cause problems with fault(). Instead, implement a method mapping page-by-page using kmap_local() semantics. As an additional benefit we then avoid the occasional global TLB flushes of ioremap() and consuming ioremap space, elimination of a critical point of failure and with a slight change of semantics we could also push the memcpy out async for testing and async driver development purposes. A special linear iomem iterator is introduced internally to mimic the old ioremap behaviour for code-paths that can't immediately be ported over. This adds to the code size and should be considered a temporary solution. Looking at the code we have a lot of checks for iomap tagged pointers. Ideally we should extend the core memremap functions to also accept uncached memory and kmap_local functionality. Then we could strip a lot of code. Cc: Christian König <christian.koenig@amd.com> Signed-off-by: Thomas Hellström <thomas.hellstrom@linux.intel.com> Reviewed-by: Christian König <christian.koenig@amd.com> Link: https://lore.kernel.org/r/20210602083818.241793-4-thomas.hellstrom@linux.intel.com
Diffstat (limited to 'drivers/gpu/drm/ttm/ttm_resource.c')
-rw-r--r--drivers/gpu/drm/ttm/ttm_resource.c193
1 files changed, 193 insertions, 0 deletions
diff --git a/drivers/gpu/drm/ttm/ttm_resource.c b/drivers/gpu/drm/ttm/ttm_resource.c
index 2a68145572cc..2431717376e7 100644
--- a/drivers/gpu/drm/ttm/ttm_resource.c
+++ b/drivers/gpu/drm/ttm/ttm_resource.c
@@ -22,6 +22,10 @@
* Authors: Christian König
*/
+#include <linux/dma-buf-map.h>
+#include <linux/io-mapping.h>
+#include <linux/scatterlist.h>
+
#include <drm/ttm/ttm_resource.h>
#include <drm/ttm/ttm_bo_driver.h>
@@ -154,3 +158,192 @@ void ttm_resource_manager_debug(struct ttm_resource_manager *man,
man->func->debug(man, p);
}
EXPORT_SYMBOL(ttm_resource_manager_debug);
+
+static void ttm_kmap_iter_iomap_map_local(struct ttm_kmap_iter *iter,
+ struct dma_buf_map *dmap,
+ pgoff_t i)
+{
+ struct ttm_kmap_iter_iomap *iter_io =
+ container_of(iter, typeof(*iter_io), base);
+ void __iomem *addr;
+
+retry:
+ while (i >= iter_io->cache.end) {
+ iter_io->cache.sg = iter_io->cache.sg ?
+ sg_next(iter_io->cache.sg) : iter_io->st->sgl;
+ iter_io->cache.i = iter_io->cache.end;
+ iter_io->cache.end += sg_dma_len(iter_io->cache.sg) >>
+ PAGE_SHIFT;
+ iter_io->cache.offs = sg_dma_address(iter_io->cache.sg) -
+ iter_io->start;
+ }
+
+ if (i < iter_io->cache.i) {
+ iter_io->cache.end = 0;
+ iter_io->cache.sg = NULL;
+ goto retry;
+ }
+
+ addr = io_mapping_map_local_wc(iter_io->iomap, iter_io->cache.offs +
+ (((resource_size_t)i - iter_io->cache.i)
+ << PAGE_SHIFT));
+ dma_buf_map_set_vaddr_iomem(dmap, addr);
+}
+
+static void ttm_kmap_iter_iomap_unmap_local(struct ttm_kmap_iter *iter,
+ struct dma_buf_map *map)
+{
+ io_mapping_unmap_local(map->vaddr_iomem);
+}
+
+static const struct ttm_kmap_iter_ops ttm_kmap_iter_io_ops = {
+ .map_local = ttm_kmap_iter_iomap_map_local,
+ .unmap_local = ttm_kmap_iter_iomap_unmap_local,
+ .maps_tt = false,
+};
+
+/**
+ * ttm_kmap_iter_iomap_init - Initialize a struct ttm_kmap_iter_iomap
+ * @iter_io: The struct ttm_kmap_iter_iomap to initialize.
+ * @iomap: The struct io_mapping representing the underlying linear io_memory.
+ * @st: sg_table into @iomap, representing the memory of the struct
+ * ttm_resource.
+ * @start: Offset that needs to be subtracted from @st to make
+ * sg_dma_address(st->sgl) - @start == 0 for @iomap start.
+ *
+ * Return: Pointer to the embedded struct ttm_kmap_iter.
+ */
+struct ttm_kmap_iter *
+ttm_kmap_iter_iomap_init(struct ttm_kmap_iter_iomap *iter_io,
+ struct io_mapping *iomap,
+ struct sg_table *st,
+ resource_size_t start)
+{
+ iter_io->base.ops = &ttm_kmap_iter_io_ops;
+ iter_io->iomap = iomap;
+ iter_io->st = st;
+ iter_io->start = start;
+ memset(&iter_io->cache, 0, sizeof(iter_io->cache));
+
+ return &iter_io->base;
+}
+EXPORT_SYMBOL(ttm_kmap_iter_iomap_init);
+
+/**
+ * DOC: Linear io iterator
+ *
+ * This code should die in the not too near future. Best would be if we could
+ * make io-mapping use memremap for all io memory, and have memremap
+ * implement a kmap_local functionality. We could then strip a huge amount of
+ * code. These linear io iterators are implemented to mimic old functionality,
+ * and they don't use kmap_local semantics at all internally. Rather ioremap or
+ * friends, and at least on 32-bit they add global TLB flushes and points
+ * of failure.
+ */
+
+static void ttm_kmap_iter_linear_io_map_local(struct ttm_kmap_iter *iter,
+ struct dma_buf_map *dmap,
+ pgoff_t i)
+{
+ struct ttm_kmap_iter_linear_io *iter_io =
+ container_of(iter, typeof(*iter_io), base);
+
+ *dmap = iter_io->dmap;
+ dma_buf_map_incr(dmap, i * PAGE_SIZE);
+}
+
+static const struct ttm_kmap_iter_ops ttm_kmap_iter_linear_io_ops = {
+ .map_local = ttm_kmap_iter_linear_io_map_local,
+ .maps_tt = false,
+};
+
+/**
+ * ttm_kmap_iter_linear_io_init - Initialize an iterator for linear io memory
+ * @iter_io: The iterator to initialize
+ * @bdev: The TTM device
+ * @mem: The ttm resource representing the iomap.
+ *
+ * This function is for internal TTM use only. It sets up a memcpy kmap iterator
+ * pointing at a linear chunk of io memory.
+ *
+ * Return: A pointer to the embedded struct ttm_kmap_iter or error pointer on
+ * failure.
+ */
+struct ttm_kmap_iter *
+ttm_kmap_iter_linear_io_init(struct ttm_kmap_iter_linear_io *iter_io,
+ struct ttm_device *bdev,
+ struct ttm_resource *mem)
+{
+ int ret;
+
+ ret = ttm_mem_io_reserve(bdev, mem);
+ if (ret)
+ goto out_err;
+ if (!mem->bus.is_iomem) {
+ ret = -EINVAL;
+ goto out_io_free;
+ }
+
+ if (mem->bus.addr) {
+ dma_buf_map_set_vaddr(&iter_io->dmap, mem->bus.addr);
+ iter_io->needs_unmap = false;
+ } else {
+ size_t bus_size = (size_t)mem->num_pages << PAGE_SHIFT;
+
+ iter_io->needs_unmap = true;
+ memset(&iter_io->dmap, 0, sizeof(iter_io->dmap));
+ if (mem->bus.caching == ttm_write_combined)
+ dma_buf_map_set_vaddr_iomem(&iter_io->dmap,
+ ioremap_wc(mem->bus.offset,
+ bus_size));
+ else if (mem->bus.caching == ttm_cached)
+ dma_buf_map_set_vaddr(&iter_io->dmap,
+ memremap(mem->bus.offset, bus_size,
+ MEMREMAP_WB |
+ MEMREMAP_WT |
+ MEMREMAP_WC));
+
+ /* If uncached requested or if mapping cached or wc failed */
+ if (dma_buf_map_is_null(&iter_io->dmap))
+ dma_buf_map_set_vaddr_iomem(&iter_io->dmap,
+ ioremap(mem->bus.offset,
+ bus_size));
+
+ if (dma_buf_map_is_null(&iter_io->dmap)) {
+ ret = -ENOMEM;
+ goto out_io_free;
+ }
+ }
+
+ iter_io->base.ops = &ttm_kmap_iter_linear_io_ops;
+ return &iter_io->base;
+
+out_io_free:
+ ttm_mem_io_free(bdev, mem);
+out_err:
+ return ERR_PTR(ret);
+}
+
+/**
+ * ttm_kmap_iter_linear_io_fini - Clean up an iterator for linear io memory
+ * @iter_io: The iterator to initialize
+ * @bdev: The TTM device
+ * @mem: The ttm resource representing the iomap.
+ *
+ * This function is for internal TTM use only. It cleans up a memcpy kmap
+ * iterator initialized by ttm_kmap_iter_linear_io_init.
+ */
+void
+ttm_kmap_iter_linear_io_fini(struct ttm_kmap_iter_linear_io *iter_io,
+ struct ttm_device *bdev,
+ struct ttm_resource *mem)
+{
+ if (iter_io->needs_unmap && dma_buf_map_is_set(&iter_io->dmap)) {
+ if (iter_io->dmap.is_iomem)
+ iounmap(iter_io->dmap.vaddr_iomem);
+ else
+ memunmap(iter_io->dmap.vaddr);
+ }
+
+ ttm_mem_io_free(bdev, mem);
+}