summaryrefslogtreecommitdiff
path: root/kernel/dma
diff options
context:
space:
mode:
Diffstat (limited to 'kernel/dma')
-rw-r--r--kernel/dma/Kconfig121
-rw-r--r--kernel/dma/Makefile8
-rw-r--r--kernel/dma/coherent.c203
-rw-r--r--kernel/dma/contiguous.c249
-rw-r--r--kernel/dma/debug.c298
-rw-r--r--kernel/dma/debug.h130
-rw-r--r--kernel/dma/direct.c605
-rw-r--r--kernel/dma/direct.h128
-rw-r--r--kernel/dma/dummy.c5
-rw-r--r--kernel/dma/map_benchmark.c358
-rw-r--r--kernel/dma/mapping.c586
-rw-r--r--kernel/dma/ops_helpers.c93
-rw-r--r--kernel/dma/pool.c165
-rw-r--r--kernel/dma/remap.c7
-rw-r--r--kernel/dma/swiotlb.c1949
-rw-r--r--kernel/dma/virt.c59
16 files changed, 3762 insertions, 1202 deletions
diff --git a/kernel/dma/Kconfig b/kernel/dma/Kconfig
index 1da3f44f2565..d62f5957f36b 100644
--- a/kernel/dma/Kconfig
+++ b/kernel/dma/Kconfig
@@ -1,10 +1,32 @@
# SPDX-License-Identifier: GPL-2.0-only
+config NO_DMA
+ bool
+
config HAS_DMA
bool
depends on !NO_DMA
default y
+config DMA_OPS
+ depends on HAS_DMA
+ bool
+
+#
+# IOMMU drivers that can bypass the IOMMU code and optionally use the direct
+# mapping fast path should select this option and set the dma_ops_bypass
+# flag in struct device where applicable
+#
+config DMA_OPS_BYPASS
+ bool
+
+# Lets platform IOMMU driver choose between bypass and IOMMU
+config ARCH_HAS_DMA_MAP_DIRECT
+ bool
+
+config NEED_SG_DMA_FLAGS
+ bool
+
config NEED_SG_DMA_LENGTH
bool
@@ -14,21 +36,24 @@ config NEED_DMA_MAP_STATE
config ARCH_DMA_ADDR_T_64BIT
def_bool 64BIT || PHYS_ADDR_T_64BIT
-config ARCH_HAS_DMA_COHERENCE_H
- bool
-
config ARCH_HAS_DMA_SET_MASK
bool
#
# Select this option if the architecture needs special handling for
# DMA_ATTR_WRITE_COMBINE. Normally the "uncached" mapping should be what
-# people thing of when saying write combine, so very few platforms should
+# people think of when saying write combine, so very few platforms should
# need to enable this.
#
config ARCH_HAS_DMA_WRITE_COMBINE
bool
+#
+# Select if the architectures provides the arch_dma_mark_clean hook
+#
+config ARCH_HAS_DMA_MARK_CLEAN
+ bool
+
config DMA_DECLARE_COHERENT
bool
@@ -54,17 +79,47 @@ config ARCH_HAS_DMA_PREP_COHERENT
config ARCH_HAS_FORCE_DMA_UNENCRYPTED
bool
-config DMA_NONCOHERENT_CACHE_SYNC
- bool
-
-config DMA_VIRT_OPS
+#
+# Select this option if the architecture assumes DMA devices are coherent
+# by default.
+#
+config ARCH_DMA_DEFAULT_COHERENT
bool
- depends on HAS_DMA
config SWIOTLB
bool
select NEED_DMA_MAP_STATE
+config SWIOTLB_DYNAMIC
+ bool "Dynamic allocation of DMA bounce buffers"
+ default n
+ depends on SWIOTLB
+ help
+ This enables dynamic resizing of the software IO TLB. The kernel
+ starts with one memory pool at boot and it will allocate additional
+ pools as needed. To reduce run-time kernel memory requirements, you
+ may have to specify a smaller size of the initial pool using
+ "swiotlb=" on the kernel command line.
+
+ If unsure, say N.
+
+config DMA_BOUNCE_UNALIGNED_KMALLOC
+ bool
+ depends on SWIOTLB
+
+config DMA_RESTRICTED_POOL
+ bool "DMA Restricted Pool"
+ depends on OF && OF_RESERVED_MEM && SWIOTLB
+ help
+ This enables support for restricted DMA pools which provide a level of
+ DMA memory protection on systems with limited hardware protection
+ capabilities, such as those lacking an IOMMU.
+
+ For more information see
+ <Documentation/devicetree/bindings/reserved-memory/reserved-memory.txt>
+ and <kernel/dma/swiotlb.c>.
+ If unsure, say "n".
+
#
# Should be selected if we can mmap non-coherent mappings to userspace.
# The only thing that is really required is a way to set an uncached bit
@@ -78,15 +133,25 @@ config DMA_COHERENT_POOL
select GENERIC_ALLOCATOR
bool
-config DMA_REMAP
+config DMA_GLOBAL_POOL
+ select DMA_DECLARE_COHERENT
+ depends on !ARCH_HAS_DMA_SET_UNCACHED
+ depends on !DMA_DIRECT_REMAP
bool
- depends on MMU
- select DMA_NONCOHERENT_MMAP
config DMA_DIRECT_REMAP
bool
- select DMA_REMAP
select DMA_COHERENT_POOL
+ select DMA_NONCOHERENT_MMAP
+
+#
+# Fallback to arch code for DMA allocations. This should eventually go away.
+#
+config ARCH_HAS_DMA_ALLOC
+ depends on !ARCH_HAS_DMA_SET_UNCACHED
+ depends on !DMA_DIRECT_REMAP
+ depends on !DMA_GLOBAL_POOL
+ bool
config DMA_CMA
bool "DMA Contiguous Memory Allocator"
@@ -99,10 +164,22 @@ config DMA_CMA
You can disable CMA by specifying "cma=0" on the kernel's command
line.
- For more information see <include/linux/dma-contiguous.h>.
+ For more information see <kernel/dma/contiguous.c>.
If unsure, say "n".
if DMA_CMA
+
+config DMA_NUMA_CMA
+ bool "Enable separate DMA Contiguous Memory Area for NUMA Node"
+ depends on NUMA
+ help
+ Enable this option to get numa CMA areas so that NUMA devices
+ can get local memory by DMA coherent APIs.
+
+ You can set the size of pernuma CMA by specifying "cma_pernuma=size"
+ or set the node id and its size of CMA by specifying "numa_cma=
+ <node>:size[,<node>:size]" on the kernel's command line.
+
comment "Default contiguous memory area size:"
config CMA_SIZE_MBYTES
@@ -147,7 +224,7 @@ endchoice
config CMA_ALIGNMENT
int "Maximum PAGE_SIZE order of alignment for contiguous buffers"
- range 4 12
+ range 2 12
default 8
help
DMA mapping framework by default aligns all buffers to the smallest
@@ -174,11 +251,6 @@ config DMA_API_DEBUG
drivers like double-freeing of DMA mappings or freeing mappings that
were never allocated.
- This also attempts to catch cases where a page owned by DMA is
- accessed by the cpu in a way that could cause data corruption. For
- example, this enables cow_user_page() to check that the source page is
- not undergoing DMA.
-
This option causes a performance degradation. Use only if you want to
debug device drivers and dma interactions.
@@ -200,3 +272,12 @@ config DMA_API_DEBUG_SG
is technically out-of-spec.
If unsure, say N.
+
+config DMA_MAP_BENCHMARK
+ bool "Enable benchmarking of streaming DMA mapping"
+ depends on DEBUG_FS
+ help
+ Provides /sys/kernel/debug/dma_map_benchmark that helps with testing
+ performance of dma_(un)map_page.
+
+ See tools/testing/selftests/dma/dma_map_benchmark.c
diff --git a/kernel/dma/Makefile b/kernel/dma/Makefile
index 370f63344e9c..21926e46ef4f 100644
--- a/kernel/dma/Makefile
+++ b/kernel/dma/Makefile
@@ -1,10 +1,12 @@
# SPDX-License-Identifier: GPL-2.0
-obj-$(CONFIG_HAS_DMA) += mapping.o direct.o dummy.o
+obj-$(CONFIG_HAS_DMA) += mapping.o direct.o
+obj-$(CONFIG_DMA_OPS) += ops_helpers.o
+obj-$(CONFIG_DMA_OPS) += dummy.o
obj-$(CONFIG_DMA_CMA) += contiguous.o
obj-$(CONFIG_DMA_DECLARE_COHERENT) += coherent.o
-obj-$(CONFIG_DMA_VIRT_OPS) += virt.o
obj-$(CONFIG_DMA_API_DEBUG) += debug.o
obj-$(CONFIG_SWIOTLB) += swiotlb.o
obj-$(CONFIG_DMA_COHERENT_POOL) += pool.o
-obj-$(CONFIG_DMA_REMAP) += remap.o
+obj-$(CONFIG_MMU) += remap.o
+obj-$(CONFIG_DMA_MAP_BENCHMARK) += map_benchmark.o
diff --git a/kernel/dma/coherent.c b/kernel/dma/coherent.c
index 2a0c4985f38e..ff5683a57f77 100644
--- a/kernel/dma/coherent.c
+++ b/kernel/dma/coherent.c
@@ -7,7 +7,8 @@
#include <linux/slab.h>
#include <linux/kernel.h>
#include <linux/module.h>
-#include <linux/dma-mapping.h>
+#include <linux/dma-direct.h>
+#include <linux/dma-map-ops.h>
struct dma_coherent_mem {
void *virt_base;
@@ -19,8 +20,6 @@ struct dma_coherent_mem {
bool use_dev_dma_pfn_offset;
};
-static struct dma_coherent_mem *dma_coherent_default_memory __ro_after_init;
-
static inline struct dma_coherent_mem *dev_get_coherent_memory(struct device *dev)
{
if (dev && dev->dma_mem)
@@ -32,65 +31,56 @@ static inline dma_addr_t dma_get_device_base(struct device *dev,
struct dma_coherent_mem * mem)
{
if (mem->use_dev_dma_pfn_offset)
- return (mem->pfn_base - dev->dma_pfn_offset) << PAGE_SHIFT;
- else
- return mem->device_base;
+ return phys_to_dma(dev, PFN_PHYS(mem->pfn_base));
+ return mem->device_base;
}
-static int dma_init_coherent_memory(phys_addr_t phys_addr,
- dma_addr_t device_addr, size_t size,
- struct dma_coherent_mem **mem)
+static struct dma_coherent_mem *dma_init_coherent_memory(phys_addr_t phys_addr,
+ dma_addr_t device_addr, size_t size, bool use_dma_pfn_offset)
{
- struct dma_coherent_mem *dma_mem = NULL;
- void *mem_base = NULL;
+ struct dma_coherent_mem *dma_mem;
int pages = size >> PAGE_SHIFT;
- int bitmap_size = BITS_TO_LONGS(pages) * sizeof(long);
- int ret;
+ void *mem_base;
- if (!size) {
- ret = -EINVAL;
- goto out;
- }
+ if (!size)
+ return ERR_PTR(-EINVAL);
mem_base = memremap(phys_addr, size, MEMREMAP_WC);
- if (!mem_base) {
- ret = -EINVAL;
- goto out;
- }
+ if (!mem_base)
+ return ERR_PTR(-EINVAL);
+
dma_mem = kzalloc(sizeof(struct dma_coherent_mem), GFP_KERNEL);
- if (!dma_mem) {
- ret = -ENOMEM;
- goto out;
- }
- dma_mem->bitmap = kzalloc(bitmap_size, GFP_KERNEL);
- if (!dma_mem->bitmap) {
- ret = -ENOMEM;
- goto out;
- }
+ if (!dma_mem)
+ goto out_unmap_membase;
+ dma_mem->bitmap = bitmap_zalloc(pages, GFP_KERNEL);
+ if (!dma_mem->bitmap)
+ goto out_free_dma_mem;
dma_mem->virt_base = mem_base;
dma_mem->device_base = device_addr;
dma_mem->pfn_base = PFN_DOWN(phys_addr);
dma_mem->size = pages;
+ dma_mem->use_dev_dma_pfn_offset = use_dma_pfn_offset;
spin_lock_init(&dma_mem->spinlock);
- *mem = dma_mem;
- return 0;
+ return dma_mem;
-out:
+out_free_dma_mem:
kfree(dma_mem);
- if (mem_base)
- memunmap(mem_base);
- return ret;
+out_unmap_membase:
+ memunmap(mem_base);
+ pr_err("Reserved memory: failed to init DMA memory pool at %pa, size %zd MiB\n",
+ &phys_addr, size / SZ_1M);
+ return ERR_PTR(-ENOMEM);
}
-static void dma_release_coherent_memory(struct dma_coherent_mem *mem)
+static void _dma_release_coherent_memory(struct dma_coherent_mem *mem)
{
if (!mem)
return;
memunmap(mem->virt_base);
- kfree(mem->bitmap);
+ bitmap_free(mem->bitmap);
kfree(mem);
}
@@ -107,22 +97,47 @@ static int dma_assign_coherent_memory(struct device *dev,
return 0;
}
+/*
+ * Declare a region of memory to be handed out by dma_alloc_coherent() when it
+ * is asked for coherent memory for this device. This shall only be used
+ * from platform code, usually based on the device tree description.
+ *
+ * phys_addr is the CPU physical address to which the memory is currently
+ * assigned (this will be ioremapped so the CPU can access the region).
+ *
+ * device_addr is the DMA address the device needs to be programmed with to
+ * actually address this memory (this will be handed out as the dma_addr_t in
+ * dma_alloc_coherent()).
+ *
+ * size is the size of the area (must be a multiple of PAGE_SIZE).
+ *
+ * As a simplification for the platforms, only *one* such region of memory may
+ * be declared per device.
+ */
int dma_declare_coherent_memory(struct device *dev, phys_addr_t phys_addr,
dma_addr_t device_addr, size_t size)
{
struct dma_coherent_mem *mem;
int ret;
- ret = dma_init_coherent_memory(phys_addr, device_addr, size, &mem);
- if (ret)
- return ret;
+ mem = dma_init_coherent_memory(phys_addr, device_addr, size, false);
+ if (IS_ERR(mem))
+ return PTR_ERR(mem);
ret = dma_assign_coherent_memory(dev, mem);
if (ret)
- dma_release_coherent_memory(mem);
+ _dma_release_coherent_memory(mem);
return ret;
}
+void dma_release_coherent_memory(struct device *dev)
+{
+ if (dev) {
+ _dma_release_coherent_memory(dev->dma_mem);
+ dev->dma_mem = NULL;
+ }
+}
+
static void *__dma_alloc_from_coherent(struct device *dev,
struct dma_coherent_mem *mem,
ssize_t size, dma_addr_t *dma_handle)
@@ -181,16 +196,6 @@ int dma_alloc_from_dev_coherent(struct device *dev, ssize_t size,
return 1;
}
-void *dma_alloc_from_global_coherent(struct device *dev, ssize_t size,
- dma_addr_t *dma_handle)
-{
- if (!dma_coherent_default_memory)
- return NULL;
-
- return __dma_alloc_from_coherent(dev, dma_coherent_default_memory, size,
- dma_handle);
-}
-
static int __dma_release_from_coherent(struct dma_coherent_mem *mem,
int order, void *vaddr)
{
@@ -226,15 +231,6 @@ int dma_release_from_dev_coherent(struct device *dev, int order, void *vaddr)
return __dma_release_from_coherent(mem, order, vaddr);
}
-int dma_release_from_global_coherent(int order, void *vaddr)
-{
- if (!dma_coherent_default_memory)
- return 0;
-
- return __dma_release_from_coherent(dma_coherent_default_memory, order,
- vaddr);
-}
-
static int __dma_mmap_from_coherent(struct dma_coherent_mem *mem,
struct vm_area_struct *vma, void *vaddr, size_t size, int *ret)
{
@@ -280,6 +276,28 @@ int dma_mmap_from_dev_coherent(struct device *dev, struct vm_area_struct *vma,
return __dma_mmap_from_coherent(mem, vma, vaddr, size, ret);
}
+#ifdef CONFIG_DMA_GLOBAL_POOL
+static struct dma_coherent_mem *dma_coherent_default_memory __ro_after_init;
+
+void *dma_alloc_from_global_coherent(struct device *dev, ssize_t size,
+ dma_addr_t *dma_handle)
+{
+ if (!dma_coherent_default_memory)
+ return NULL;
+
+ return __dma_alloc_from_coherent(dev, dma_coherent_default_memory, size,
+ dma_handle);
+}
+
+int dma_release_from_global_coherent(int order, void *vaddr)
+{
+ if (!dma_coherent_default_memory)
+ return 0;
+
+ return __dma_release_from_coherent(dma_coherent_default_memory, order,
+ vaddr);
+}
+
int dma_mmap_from_global_coherent(struct vm_area_struct *vma, void *vaddr,
size_t size, int *ret)
{
@@ -290,6 +308,19 @@ int dma_mmap_from_global_coherent(struct vm_area_struct *vma, void *vaddr,
vaddr, size, ret);
}
+int dma_init_global_coherent(phys_addr_t phys_addr, size_t size)
+{
+ struct dma_coherent_mem *mem;
+
+ mem = dma_init_coherent_memory(phys_addr, phys_addr, size, true);
+ if (IS_ERR(mem))
+ return PTR_ERR(mem);
+ dma_coherent_default_memory = mem;
+ pr_info("DMA: default coherent area is set\n");
+ return 0;
+}
+#endif /* CONFIG_DMA_GLOBAL_POOL */
+
/*
* Support for reserved memory regions defined in device tree
*/
@@ -298,25 +329,22 @@ int dma_mmap_from_global_coherent(struct vm_area_struct *vma, void *vaddr,
#include <linux/of_fdt.h>
#include <linux/of_reserved_mem.h>
+#ifdef CONFIG_DMA_GLOBAL_POOL
static struct reserved_mem *dma_reserved_default_memory __initdata;
+#endif
static int rmem_dma_device_init(struct reserved_mem *rmem, struct device *dev)
{
- struct dma_coherent_mem *mem = rmem->priv;
- int ret;
-
- if (!mem) {
- ret = dma_init_coherent_memory(rmem->base, rmem->base,
- rmem->size, &mem);
- if (ret) {
- pr_err("Reserved memory: failed to init DMA memory pool at %pa, size %ld MiB\n",
- &rmem->base, (unsigned long)rmem->size / SZ_1M);
- return ret;
- }
+ if (!rmem->priv) {
+ struct dma_coherent_mem *mem;
+
+ mem = dma_init_coherent_memory(rmem->base, rmem->base,
+ rmem->size, true);
+ if (IS_ERR(mem))
+ return PTR_ERR(mem);
+ rmem->priv = mem;
}
- mem->use_dev_dma_pfn_offset = true;
- rmem->priv = mem;
- dma_assign_coherent_memory(dev, mem);
+ dma_assign_coherent_memory(dev, rmem->priv);
return 0;
}
@@ -344,7 +372,9 @@ static int __init rmem_dma_setup(struct reserved_mem *rmem)
pr_err("Reserved memory: regions without no-map are not yet supported\n");
return -EINVAL;
}
+#endif
+#ifdef CONFIG_DMA_GLOBAL_POOL
if (of_get_flat_dt_prop(node, "linux,dma-default", NULL)) {
WARN(dma_reserved_default_memory,
"Reserved memory: region for default DMA coherent area is redefined\n");
@@ -358,31 +388,16 @@ static int __init rmem_dma_setup(struct reserved_mem *rmem)
return 0;
}
+#ifdef CONFIG_DMA_GLOBAL_POOL
static int __init dma_init_reserved_memory(void)
{
- const struct reserved_mem_ops *ops;
- int ret;
-
if (!dma_reserved_default_memory)
return -ENOMEM;
-
- ops = dma_reserved_default_memory->ops;
-
- /*
- * We rely on rmem_dma_device_init() does not propagate error of
- * dma_assign_coherent_memory() for "NULL" device.
- */
- ret = ops->device_init(dma_reserved_default_memory, NULL);
-
- if (!ret) {
- dma_coherent_default_memory = dma_reserved_default_memory->priv;
- pr_info("DMA: default coherent area is set\n");
- }
-
- return ret;
+ return dma_init_global_coherent(dma_reserved_default_memory->base,
+ dma_reserved_default_memory->size);
}
-
core_initcall(dma_init_reserved_memory);
+#endif /* CONFIG_DMA_GLOBAL_POOL */
RESERVEDMEM_OF_DECLARE(dma, "shared-dma-pool", rmem_dma_setup);
#endif
diff --git a/kernel/dma/contiguous.c b/kernel/dma/contiguous.c
index 15bc5026c485..055da410ac71 100644
--- a/kernel/dma/contiguous.c
+++ b/kernel/dma/contiguous.c
@@ -5,24 +5,46 @@
* Written by:
* Marek Szyprowski <m.szyprowski@samsung.com>
* Michal Nazarewicz <mina86@mina86.com>
+ *
+ * Contiguous Memory Allocator
+ *
+ * The Contiguous Memory Allocator (CMA) makes it possible to
+ * allocate big contiguous chunks of memory after the system has
+ * booted.
+ *
+ * Why is it needed?
+ *
+ * Various devices on embedded systems have no scatter-getter and/or
+ * IO map support and require contiguous blocks of memory to
+ * operate. They include devices such as cameras, hardware video
+ * coders, etc.
+ *
+ * Such devices often require big memory buffers (a full HD frame
+ * is, for instance, more than 2 mega pixels large, i.e. more than 6
+ * MB of memory), which makes mechanisms such as kmalloc() or
+ * alloc_page() ineffective.
+ *
+ * At the same time, a solution where a big memory region is
+ * reserved for a device is suboptimal since often more memory is
+ * reserved then strictly required and, moreover, the memory is
+ * inaccessible to page system even if device drivers don't use it.
+ *
+ * CMA tries to solve this issue by operating on memory regions
+ * where only movable pages can be allocated from. This way, kernel
+ * can use the memory for pagecache and when device driver requests
+ * it, allocated pages can be migrated.
*/
#define pr_fmt(fmt) "cma: " fmt
-#ifdef CONFIG_CMA_DEBUG
-#ifndef DEBUG
-# define DEBUG
-#endif
-#endif
-
#include <asm/page.h>
-#include <asm/dma-contiguous.h>
#include <linux/memblock.h>
#include <linux/err.h>
#include <linux/sizes.h>
-#include <linux/dma-contiguous.h>
+#include <linux/dma-map-ops.h>
#include <linux/cma.h>
+#include <linux/nospec.h>
#ifdef CONFIG_CMA_SIZE_MBYTES
#define CMA_SIZE_MBYTES CONFIG_CMA_SIZE_MBYTES
@@ -69,20 +91,57 @@ static int __init early_cma(char *p)
}
early_param("cma", early_cma);
+#ifdef CONFIG_DMA_NUMA_CMA
+
+static struct cma *dma_contiguous_numa_area[MAX_NUMNODES];
+static phys_addr_t numa_cma_size[MAX_NUMNODES] __initdata;
+static struct cma *dma_contiguous_pernuma_area[MAX_NUMNODES];
+static phys_addr_t pernuma_size_bytes __initdata;
+
+static int __init early_numa_cma(char *p)
+{
+ int nid, count = 0;
+ unsigned long tmp;
+ char *s = p;
+
+ while (*s) {
+ if (sscanf(s, "%lu%n", &tmp, &count) != 1)
+ break;
+
+ if (s[count] == ':') {
+ if (tmp >= MAX_NUMNODES)
+ break;
+ nid = array_index_nospec(tmp, MAX_NUMNODES);
+
+ s += count + 1;
+ tmp = memparse(s, &s);
+ numa_cma_size[nid] = tmp;
+
+ if (*s == ',')
+ s++;
+ else
+ break;
+ } else
+ break;
+ }
+
+ return 0;
+}
+early_param("numa_cma", early_numa_cma);
+
+static int __init early_cma_pernuma(char *p)
+{
+ pernuma_size_bytes = memparse(p, &p);
+ return 0;
+}
+early_param("cma_pernuma", early_cma_pernuma);
+#endif
+
#ifdef CONFIG_CMA_SIZE_PERCENTAGE
static phys_addr_t __init __maybe_unused cma_early_percent_memory(void)
{
- struct memblock_region *reg;
- unsigned long total_pages = 0;
-
- /*
- * We cannot use memblock_phys_mem_size() here, because
- * memblock_analyze() has not been called yet.
- */
- for_each_memblock(memory, reg)
- total_pages += memblock_region_memory_end_pfn(reg) -
- memblock_region_memory_base_pfn(reg);
+ unsigned long total_pages = PHYS_PFN(memblock_phys_mem_size());
return (total_pages * CONFIG_CMA_SIZE_PERCENTAGE / 100) << PAGE_SHIFT;
}
@@ -96,6 +155,51 @@ static inline __maybe_unused phys_addr_t cma_early_percent_memory(void)
#endif
+#ifdef CONFIG_DMA_NUMA_CMA
+static void __init dma_numa_cma_reserve(void)
+{
+ int nid;
+
+ for_each_node(nid) {
+ int ret;
+ char name[CMA_MAX_NAME];
+ struct cma **cma;
+
+ if (!node_online(nid)) {
+ if (pernuma_size_bytes || numa_cma_size[nid])
+ pr_warn("invalid node %d specified\n", nid);
+ continue;
+ }
+
+ if (pernuma_size_bytes) {
+
+ cma = &dma_contiguous_pernuma_area[nid];
+ snprintf(name, sizeof(name), "pernuma%d", nid);
+ ret = cma_declare_contiguous_nid(0, pernuma_size_bytes, 0, 0,
+ 0, false, name, cma, nid);
+ if (ret)
+ pr_warn("%s: reservation failed: err %d, node %d", __func__,
+ ret, nid);
+ }
+
+ if (numa_cma_size[nid]) {
+
+ cma = &dma_contiguous_numa_area[nid];
+ snprintf(name, sizeof(name), "numa%d", nid);
+ ret = cma_declare_contiguous_nid(0, numa_cma_size[nid], 0, 0, 0, false,
+ name, cma, nid);
+ if (ret)
+ pr_warn("%s: reservation failed: err %d, node %d", __func__,
+ ret, nid);
+ }
+ }
+}
+#else
+static inline void __init dma_numa_cma_reserve(void)
+{
+}
+#endif
+
/**
* dma_contiguous_reserve() - reserve area(s) for contiguous memory handling
* @limit: End address of the reserved memory (optional, 0 for any).
@@ -112,6 +216,8 @@ void __init dma_contiguous_reserve(phys_addr_t limit)
phys_addr_t selected_limit = limit;
bool fixed = false;
+ dma_numa_cma_reserve();
+
pr_debug("%s(limit %08lx)\n", __func__, (unsigned long)limit);
if (size_cmdline != -1) {
@@ -143,6 +249,11 @@ void __init dma_contiguous_reserve(phys_addr_t limit)
}
}
+void __weak
+dma_contiguous_early_fixup(phys_addr_t base, unsigned long size)
+{
+}
+
/**
* dma_contiguous_reserve_area() - reserve custom contiguous area
* @size: Size of the reserved area (in bytes),
@@ -215,40 +326,65 @@ bool dma_release_from_contiguous(struct device *dev, struct page *pages,
return cma_release(dev_get_cma_area(dev), pages, count);
}
+static struct page *cma_alloc_aligned(struct cma *cma, size_t size, gfp_t gfp)
+{
+ unsigned int align = min(get_order(size), CONFIG_CMA_ALIGNMENT);
+
+ return cma_alloc(cma, size >> PAGE_SHIFT, align, gfp & __GFP_NOWARN);
+}
+
/**
* dma_alloc_contiguous() - allocate contiguous pages
* @dev: Pointer to device for which the allocation is performed.
* @size: Requested allocation size.
* @gfp: Allocation flags.
*
- * This function allocates contiguous memory buffer for specified device. It
- * tries to use device specific contiguous memory area if available, or the
- * default global one.
+ * tries to use device specific contiguous memory area if available, or it
+ * tries to use per-numa cma, if the allocation fails, it will fallback to
+ * try default global one.
*
- * Note that it byapss one-page size of allocations from the global area as
- * the addresses within one page are always contiguous, so there is no need
- * to waste CMA pages for that kind; it also helps reduce fragmentations.
+ * Note that it bypass one-page size of allocations from the per-numa and
+ * global area as the addresses within one page are always contiguous, so
+ * there is no need to waste CMA pages for that kind; it also helps reduce
+ * fragmentations.
*/
struct page *dma_alloc_contiguous(struct device *dev, size_t size, gfp_t gfp)
{
- size_t count = size >> PAGE_SHIFT;
- struct page *page = NULL;
- struct cma *cma = NULL;
-
- if (dev && dev->cma_area)
- cma = dev->cma_area;
- else if (count > 1)
- cma = dma_contiguous_default_area;
+#ifdef CONFIG_DMA_NUMA_CMA
+ int nid = dev_to_node(dev);
+#endif
/* CMA can be used only in the context which permits sleeping */
- if (cma && gfpflags_allow_blocking(gfp)) {
- size_t align = get_order(size);
- size_t cma_align = min_t(size_t, align, CONFIG_CMA_ALIGNMENT);
-
- page = cma_alloc(cma, count, cma_align, gfp & __GFP_NOWARN);
+ if (!gfpflags_allow_blocking(gfp))
+ return NULL;
+ if (dev->cma_area)
+ return cma_alloc_aligned(dev->cma_area, size, gfp);
+ if (size <= PAGE_SIZE)
+ return NULL;
+
+#ifdef CONFIG_DMA_NUMA_CMA
+ if (nid != NUMA_NO_NODE && !(gfp & (GFP_DMA | GFP_DMA32))) {
+ struct cma *cma = dma_contiguous_pernuma_area[nid];
+ struct page *page;
+
+ if (cma) {
+ page = cma_alloc_aligned(cma, size, gfp);
+ if (page)
+ return page;
+ }
+
+ cma = dma_contiguous_numa_area[nid];
+ if (cma) {
+ page = cma_alloc_aligned(cma, size, gfp);
+ if (page)
+ return page;
+ }
}
+#endif
+ if (!dma_contiguous_default_area)
+ return NULL;
- return page;
+ return cma_alloc_aligned(dma_contiguous_default_area, size, gfp);
}
/**
@@ -264,9 +400,30 @@ struct page *dma_alloc_contiguous(struct device *dev, size_t size, gfp_t gfp)
*/
void dma_free_contiguous(struct device *dev, struct page *page, size_t size)
{
- if (!cma_release(dev_get_cma_area(dev), page,
- PAGE_ALIGN(size) >> PAGE_SHIFT))
- __free_pages(page, get_order(size));
+ unsigned int count = PAGE_ALIGN(size) >> PAGE_SHIFT;
+
+ /* if dev has its own cma, free page from there */
+ if (dev->cma_area) {
+ if (cma_release(dev->cma_area, page, count))
+ return;
+ } else {
+ /*
+ * otherwise, page is from either per-numa cma or default cma
+ */
+#ifdef CONFIG_DMA_NUMA_CMA
+ if (cma_release(dma_contiguous_pernuma_area[page_to_nid(page)],
+ page, count))
+ return;
+ if (cma_release(dma_contiguous_numa_area[page_to_nid(page)],
+ page, count))
+ return;
+#endif
+ if (cma_release(dma_contiguous_default_area, page, count))
+ return;
+ }
+
+ /* not in any cma, free from buddy */
+ __free_pages(page, get_order(size));
}
/*
@@ -282,14 +439,14 @@ void dma_free_contiguous(struct device *dev, struct page *page, size_t size)
static int rmem_cma_device_init(struct reserved_mem *rmem, struct device *dev)
{
- dev_set_cma_area(dev, rmem->priv);
+ dev->cma_area = rmem->priv;
return 0;
}
static void rmem_cma_device_release(struct reserved_mem *rmem,
struct device *dev)
{
- dev_set_cma_area(dev, NULL);
+ dev->cma_area = NULL;
}
static const struct reserved_mem_ops rmem_cma_ops = {
@@ -299,8 +456,6 @@ static const struct reserved_mem_ops rmem_cma_ops = {
static int __init rmem_cma_setup(struct reserved_mem *rmem)
{
- phys_addr_t align = PAGE_SIZE << max(MAX_ORDER - 1, pageblock_order);
- phys_addr_t mask = align - 1;
unsigned long node = rmem->fdt_node;
bool default_cma = of_get_flat_dt_prop(node, "linux,cma-default", NULL);
struct cma *cma;
@@ -316,7 +471,7 @@ static int __init rmem_cma_setup(struct reserved_mem *rmem)
of_get_flat_dt_prop(node, "no-map", NULL))
return -EINVAL;
- if ((rmem->base & mask) || (rmem->size & mask)) {
+ if (!IS_ALIGNED(rmem->base | rmem->size, CMA_MIN_ALIGNMENT_BYTES)) {
pr_err("Reserved memory: incorrect alignment of CMA region\n");
return -EINVAL;
}
@@ -330,7 +485,7 @@ static int __init rmem_cma_setup(struct reserved_mem *rmem)
dma_contiguous_early_fixup(rmem->base, rmem->size);
if (default_cma)
- dma_contiguous_set_default(cma);
+ dma_contiguous_default_area = cma;
rmem->ops = &rmem_cma_ops;
rmem->priv = cma;
diff --git a/kernel/dma/debug.c b/kernel/dma/debug.c
index 36c962a86bf2..a6e3792b15f8 100644
--- a/kernel/dma/debug.c
+++ b/kernel/dma/debug.c
@@ -9,10 +9,9 @@
#include <linux/sched/task_stack.h>
#include <linux/scatterlist.h>
-#include <linux/dma-mapping.h>
+#include <linux/dma-map-ops.h>
#include <linux/sched/task.h>
#include <linux/stacktrace.h>
-#include <linux/dma-debug.h>
#include <linux/spinlock.h>
#include <linux/vmalloc.h>
#include <linux/debugfs.h>
@@ -24,8 +23,8 @@
#include <linux/ctype.h>
#include <linux/list.h>
#include <linux/slab.h>
-
#include <asm/sections.h>
+#include "debug.h"
#define HASH_SIZE 16384ULL
#define HASH_FN_SHIFT 13
@@ -54,6 +53,7 @@ enum map_err_types {
* struct dma_debug_entry - track a dma_map* or dma_alloc_coherent mapping
* @list: node on pre-allocated free_entries list
* @dev: 'dev' argument to dma_map_{page|single|sg} or dma_alloc_coherent
+ * @dev_addr: dma address
* @size: length of the mapping
* @type: single, page, sg, coherent
* @direction: enum dma_data_direction
@@ -62,7 +62,8 @@ enum map_err_types {
* @pfn: page frame of the start address
* @offset: offset of mapping relative to pfn
* @map_err_type: track whether dma_mapping_error() was checked
- * @stacktrace: support backtraces when a violation is detected
+ * @stack_len: number of backtrace entries in @stack_entries
+ * @stack_entries: stack of backtrace history
*/
struct dma_debug_entry {
struct list_head list;
@@ -139,13 +140,17 @@ static const char *const maperr2str[] = {
static const char *type2name[] = {
[dma_debug_single] = "single",
- [dma_debug_sg] = "scather-gather",
+ [dma_debug_sg] = "scatter-gather",
[dma_debug_coherent] = "coherent",
[dma_debug_resource] = "resource",
};
-static const char *dir2name[4] = { "DMA_BIDIRECTIONAL", "DMA_TO_DEVICE",
- "DMA_FROM_DEVICE", "DMA_NONE" };
+static const char *dir2name[] = {
+ [DMA_BIDIRECTIONAL] = "DMA_BIDIRECTIONAL",
+ [DMA_TO_DEVICE] = "DMA_TO_DEVICE",
+ [DMA_FROM_DEVICE] = "DMA_FROM_DEVICE",
+ [DMA_NONE] = "DMA_NONE",
+};
/*
* The access to some variables in this macro is racy. We can't use atomic_t
@@ -347,11 +352,10 @@ static struct dma_debug_entry *bucket_find_contain(struct hash_bucket **bucket,
unsigned long *flags)
{
- unsigned int max_range = dma_get_max_seg_size(ref->dev);
struct dma_debug_entry *entry, index = *ref;
- unsigned int range = 0;
+ int limit = min(HASH_SIZE, (index.dev_addr >> HASH_FN_SHIFT) + 1);
- while (range <= max_range) {
+ for (int i = 0; i < limit; i++) {
entry = __hash_bucket_find(*bucket, ref, containing_match);
if (entry)
@@ -361,7 +365,6 @@ static struct dma_debug_entry *bucket_find_contain(struct hash_bucket **bucket,
* Nothing found, go back a hash bucket
*/
put_hash_bucket(*bucket, *flags);
- range += (1 << HASH_FN_SHIFT);
index.dev_addr -= (1 << HASH_FN_SHIFT);
*bucket = get_hash_bucket(&index, flags);
}
@@ -395,37 +398,6 @@ static unsigned long long phys_addr(struct dma_debug_entry *entry)
}
/*
- * Dump mapping entries for debugging purposes
- */
-void debug_dma_dump_mappings(struct device *dev)
-{
- int idx;
-
- for (idx = 0; idx < HASH_SIZE; idx++) {
- struct hash_bucket *bucket = &dma_entry_hash[idx];
- struct dma_debug_entry *entry;
- unsigned long flags;
-
- spin_lock_irqsave(&bucket->lock, flags);
-
- list_for_each_entry(entry, &bucket->list, list) {
- if (!dev || dev == entry->dev) {
- dev_info(entry->dev,
- "%s idx %d P=%Lx N=%lx D=%Lx L=%Lx %s %s\n",
- type2name[entry->type], idx,
- phys_addr(entry), entry->pfn,
- entry->dev_addr, entry->size,
- dir2name[entry->direction],
- maperr2str[entry->map_err_type]);
- }
- }
-
- spin_unlock_irqrestore(&bucket->lock, flags);
- cond_resched();
- }
-}
-
-/*
* For each mapping (initial cacheline in the case of
* dma_alloc_coherent/dma_map_page, initial cacheline in each page of a
* scatterlist, or the cacheline specified in dma_map_single) insert
@@ -444,11 +416,8 @@ void debug_dma_dump_mappings(struct device *dev)
* dma_active_cacheline entry to track per event. dma_map_sg(), on the
* other hand, consumes a single dma_debug_entry, but inserts 'nents'
* entries into the tree.
- *
- * At any time debug_dma_assert_idle() can be called to trigger a
- * warning if any cachelines in the given page are in the active set.
*/
-static RADIX_TREE(dma_active_cacheline, GFP_NOWAIT);
+static RADIX_TREE(dma_active_cacheline, GFP_ATOMIC);
static DEFINE_SPINLOCK(radix_lock);
#define ACTIVE_CACHELINE_MAX_OVERLAP ((1 << RADIX_TREE_MAX_TAGS) - 1)
#define CACHELINE_PER_PAGE_SHIFT (PAGE_SHIFT - L1_CACHE_SHIFT)
@@ -493,10 +462,7 @@ static void active_cacheline_inc_overlap(phys_addr_t cln)
overlap = active_cacheline_set_overlap(cln, ++overlap);
/* If we overflowed the overlap counter then we're potentially
- * leaking dma-mappings. Otherwise, if maps and unmaps are
- * balanced then this overflow may cause false negatives in
- * debug_dma_assert_idle() as the cacheline may be marked idle
- * prematurely.
+ * leaking dma-mappings.
*/
WARN_ONCE(overlap > ACTIVE_CACHELINE_MAX_OVERLAP,
pr_fmt("exceeded %d overlapping mappings of cacheline %pa\n"),
@@ -551,58 +517,75 @@ static void active_cacheline_remove(struct dma_debug_entry *entry)
spin_unlock_irqrestore(&radix_lock, flags);
}
-/**
- * debug_dma_assert_idle() - assert that a page is not undergoing dma
- * @page: page to lookup in the dma_active_cacheline tree
- *
- * Place a call to this routine in cases where the cpu touching the page
- * before the dma completes (page is dma_unmapped) will lead to data
- * corruption.
+/*
+ * Dump mappings entries on kernel space for debugging purposes
*/
-void debug_dma_assert_idle(struct page *page)
+void debug_dma_dump_mappings(struct device *dev)
{
- static struct dma_debug_entry *ents[CACHELINES_PER_PAGE];
- struct dma_debug_entry *entry = NULL;
- void **results = (void **) &ents;
- unsigned int nents, i;
- unsigned long flags;
+ int idx;
phys_addr_t cln;
- if (dma_debug_disabled())
- return;
-
- if (!page)
- return;
+ for (idx = 0; idx < HASH_SIZE; idx++) {
+ struct hash_bucket *bucket = &dma_entry_hash[idx];
+ struct dma_debug_entry *entry;
+ unsigned long flags;
- cln = (phys_addr_t) page_to_pfn(page) << CACHELINE_PER_PAGE_SHIFT;
- spin_lock_irqsave(&radix_lock, flags);
- nents = radix_tree_gang_lookup(&dma_active_cacheline, results, cln,
- CACHELINES_PER_PAGE);
- for (i = 0; i < nents; i++) {
- phys_addr_t ent_cln = to_cacheline_number(ents[i]);
+ spin_lock_irqsave(&bucket->lock, flags);
+ list_for_each_entry(entry, &bucket->list, list) {
+ if (!dev || dev == entry->dev) {
+ cln = to_cacheline_number(entry);
+ dev_info(entry->dev,
+ "%s idx %d P=%llx N=%lx D=%llx L=%llx cln=%pa %s %s\n",
+ type2name[entry->type], idx,
+ phys_addr(entry), entry->pfn,
+ entry->dev_addr, entry->size,
+ &cln, dir2name[entry->direction],
+ maperr2str[entry->map_err_type]);
+ }
+ }
+ spin_unlock_irqrestore(&bucket->lock, flags);
- if (ent_cln == cln) {
- entry = ents[i];
- break;
- } else if (ent_cln >= cln + CACHELINES_PER_PAGE)
- break;
+ cond_resched();
}
- spin_unlock_irqrestore(&radix_lock, flags);
+}
- if (!entry)
- return;
+/*
+ * Dump mappings entries on user space via debugfs
+ */
+static int dump_show(struct seq_file *seq, void *v)
+{
+ int idx;
+ phys_addr_t cln;
- cln = to_cacheline_number(entry);
- err_printk(entry->dev, entry,
- "cpu touching an active dma mapped cacheline [cln=%pa]\n",
- &cln);
+ for (idx = 0; idx < HASH_SIZE; idx++) {
+ struct hash_bucket *bucket = &dma_entry_hash[idx];
+ struct dma_debug_entry *entry;
+ unsigned long flags;
+
+ spin_lock_irqsave(&bucket->lock, flags);
+ list_for_each_entry(entry, &bucket->list, list) {
+ cln = to_cacheline_number(entry);
+ seq_printf(seq,
+ "%s %s %s idx %d P=%llx N=%lx D=%llx L=%llx cln=%pa %s %s\n",
+ dev_driver_string(entry->dev),
+ dev_name(entry->dev),
+ type2name[entry->type], idx,
+ phys_addr(entry), entry->pfn,
+ entry->dev_addr, entry->size,
+ &cln, dir2name[entry->direction],
+ maperr2str[entry->map_err_type]);
+ }
+ spin_unlock_irqrestore(&bucket->lock, flags);
+ }
+ return 0;
}
+DEFINE_SHOW_ATTRIBUTE(dump);
/*
* Wrapper function for adding an entry to the hash.
* This function takes care of locking itself.
*/
-static void add_dma_entry(struct dma_debug_entry *entry)
+static void add_dma_entry(struct dma_debug_entry *entry, unsigned long attrs)
{
struct hash_bucket *bucket;
unsigned long flags;
@@ -614,13 +597,12 @@ static void add_dma_entry(struct dma_debug_entry *entry)
rc = active_cacheline_insert(entry);
if (rc == -ENOMEM) {
- pr_err("cacheline tracking ENOMEM, dma-debug disabled\n");
+ pr_err_once("cacheline tracking ENOMEM, dma-debug disabled\n");
global_disable = true;
+ } else if (rc == -EEXIST && !(attrs & DMA_ATTR_SKIP_CPU_SYNC)) {
+ err_printk(entry->dev, entry,
+ "cacheline tracking EEXIST, overlapping mappings aren't supported\n");
}
-
- /* TODO: report -EEXIST errors here as overlapping mappings are
- * not supported by the DMA API
- */
}
static int dma_debug_create_entries(gfp_t gfp)
@@ -656,15 +638,19 @@ static struct dma_debug_entry *__dma_entry_alloc(void)
return entry;
}
-static void __dma_entry_alloc_check_leak(void)
+/*
+ * This should be called outside of free_entries_lock scope to avoid potential
+ * deadlocks with serial consoles that use DMA.
+ */
+static void __dma_entry_alloc_check_leak(u32 nr_entries)
{
- u32 tmp = nr_total_entries % nr_prealloc_entries;
+ u32 tmp = nr_entries % nr_prealloc_entries;
/* Shout each time we tick over some multiple of the initial pool */
if (tmp < DMA_DEBUG_DYNAMIC_ENTRIES) {
pr_info("dma_debug_entry pool grown to %u (%u00%%)\n",
- nr_total_entries,
- (nr_total_entries / nr_prealloc_entries));
+ nr_entries,
+ (nr_entries / nr_prealloc_entries));
}
}
@@ -675,8 +661,10 @@ static void __dma_entry_alloc_check_leak(void)
*/
static struct dma_debug_entry *dma_entry_alloc(void)
{
+ bool alloc_check_leak = false;
struct dma_debug_entry *entry;
unsigned long flags;
+ u32 nr_entries;
spin_lock_irqsave(&free_entries_lock, flags);
if (num_free_entries == 0) {
@@ -686,13 +674,17 @@ static struct dma_debug_entry *dma_entry_alloc(void)
pr_err("debugging out of memory - disabling\n");
return NULL;
}
- __dma_entry_alloc_check_leak();
+ alloc_check_leak = true;
+ nr_entries = nr_total_entries;
}
entry = __dma_entry_alloc();
spin_unlock_irqrestore(&free_entries_lock, flags);
+ if (alloc_check_leak)
+ __dma_entry_alloc_check_leak(nr_entries);
+
#ifdef CONFIG_STACKTRACE
entry->stack_len = stack_trace_save(entry->stack_entries,
ARRAY_SIZE(entry->stack_entries),
@@ -817,34 +809,7 @@ static const struct file_operations filter_fops = {
.llseek = default_llseek,
};
-static int dump_show(struct seq_file *seq, void *v)
-{
- int idx;
-
- for (idx = 0; idx < HASH_SIZE; idx++) {
- struct hash_bucket *bucket = &dma_entry_hash[idx];
- struct dma_debug_entry *entry;
- unsigned long flags;
-
- spin_lock_irqsave(&bucket->lock, flags);
- list_for_each_entry(entry, &bucket->list, list) {
- seq_printf(seq,
- "%s %s %s idx %d P=%llx N=%lx D=%llx L=%llx %s %s\n",
- dev_name(entry->dev),
- dev_driver_string(entry->dev),
- type2name[entry->type], idx,
- phys_addr(entry), entry->pfn,
- entry->dev_addr, entry->size,
- dir2name[entry->direction],
- maperr2str[entry->map_err_type]);
- }
- spin_unlock_irqrestore(&bucket->lock, flags);
- }
- return 0;
-}
-DEFINE_SHOW_ATTRIBUTE(dump);
-
-static void dma_debug_fs_init(void)
+static int __init dma_debug_fs_init(void)
{
struct dentry *dentry = debugfs_create_dir("dma-api", NULL);
@@ -857,7 +822,10 @@ static void dma_debug_fs_init(void)
debugfs_create_u32("nr_total_entries", 0444, dentry, &nr_total_entries);
debugfs_create_file("driver_filter", 0644, dentry, NULL, &filter_fops);
debugfs_create_file("dump", 0444, dentry, NULL, &dump_fops);
+
+ return 0;
}
+core_initcall_sync(dma_debug_fs_init);
static int device_dma_allocations(struct device *dev, struct dma_debug_entry **out_entry)
{
@@ -882,7 +850,7 @@ static int device_dma_allocations(struct device *dev, struct dma_debug_entry **o
static int dma_debug_device_change(struct notifier_block *nb, unsigned long action, void *data)
{
struct device *dev = data;
- struct dma_debug_entry *uninitialized_var(entry);
+ struct dma_debug_entry *entry;
int count;
if (dma_debug_disabled())
@@ -909,7 +877,7 @@ static int dma_debug_device_change(struct notifier_block *nb, unsigned long acti
return 0;
}
-void dma_debug_add_bus(struct bus_type *bus)
+void dma_debug_add_bus(const struct bus_type *bus)
{
struct notifier_block *nb;
@@ -942,8 +910,6 @@ static int dma_debug_init(void)
spin_lock_init(&dma_entry_hash[i].lock);
}
- dma_debug_fs_init();
-
nr_pages = DIV_ROUND_UP(nr_prealloc_entries, DMA_DEBUG_DYNAMIC_ENTRIES);
for (i = 0; i < nr_pages; ++i)
dma_debug_create_entries(GFP_KERNEL);
@@ -977,7 +943,7 @@ static __init int dma_debug_cmdline(char *str)
global_disable = true;
}
- return 0;
+ return 1;
}
static __init int dma_debug_entries_cmdline(char *str)
@@ -986,7 +952,7 @@ static __init int dma_debug_entries_cmdline(char *str)
return -EINVAL;
if (!get_option(&str, &nr_prealloc_entries))
nr_prealloc_entries = PREALLOC_DMA_DEBUG_ENTRIES;
- return 0;
+ return 1;
}
__setup("dma_debug=", dma_debug_cmdline);
@@ -1071,7 +1037,7 @@ static void check_unmap(struct dma_debug_entry *ref)
/*
* Drivers should use dma_mapping_error() to check the returned
* addresses of dma_map_single() and dma_map_page().
- * If not, print this warning message. See Documentation/DMA-API.txt.
+ * If not, print this warning message. See Documentation/core-api/dma-api.rst.
*/
if (entry->map_err_type == MAP_ERR_NOT_CHECKED) {
err_printk(ref->dev, entry,
@@ -1116,20 +1082,10 @@ static void check_for_stack(struct device *dev,
}
}
-static inline bool overlap(void *addr, unsigned long len, void *start, void *end)
-{
- unsigned long a1 = (unsigned long)addr;
- unsigned long b1 = a1 + len;
- unsigned long a2 = (unsigned long)start;
- unsigned long b2 = (unsigned long)end;
-
- return !(b1 <= a2 || a1 >= b2);
-}
-
static void check_for_illegal_area(struct device *dev, void *addr, unsigned long len)
{
- if (overlap(addr, len, _stext, _etext) ||
- overlap(addr, len, __start_rodata, __end_rodata))
+ if (memory_intersects(_stext, _etext, addr, len) ||
+ memory_intersects(__start_rodata, __end_rodata, addr, len))
err_printk(dev, NULL, "device driver maps memory from kernel text or rodata [addr=%p] [len=%lu]\n", addr, len);
}
@@ -1251,7 +1207,8 @@ void debug_dma_map_single(struct device *dev, const void *addr,
EXPORT_SYMBOL(debug_dma_map_single);
void debug_dma_map_page(struct device *dev, struct page *page, size_t offset,
- size_t size, int direction, dma_addr_t dma_addr)
+ size_t size, int direction, dma_addr_t dma_addr,
+ unsigned long attrs)
{
struct dma_debug_entry *entry;
@@ -1268,7 +1225,7 @@ void debug_dma_map_page(struct device *dev, struct page *page, size_t offset,
entry->dev = dev;
entry->type = dma_debug_single;
entry->pfn = page_to_pfn(page);
- entry->offset = offset,
+ entry->offset = offset;
entry->dev_addr = dma_addr;
entry->size = size;
entry->direction = direction;
@@ -1282,9 +1239,8 @@ void debug_dma_map_page(struct device *dev, struct page *page, size_t offset,
check_for_illegal_area(dev, addr, size);
}
- add_dma_entry(entry);
+ add_dma_entry(entry, attrs);
}
-EXPORT_SYMBOL(debug_dma_map_page);
void debug_dma_mapping_error(struct device *dev, dma_addr_t dma_addr)
{
@@ -1324,13 +1280,13 @@ void debug_dma_mapping_error(struct device *dev, dma_addr_t dma_addr)
}
EXPORT_SYMBOL(debug_dma_mapping_error);
-void debug_dma_unmap_page(struct device *dev, dma_addr_t addr,
+void debug_dma_unmap_page(struct device *dev, dma_addr_t dma_addr,
size_t size, int direction)
{
struct dma_debug_entry ref = {
.type = dma_debug_single,
.dev = dev,
- .dev_addr = addr,
+ .dev_addr = dma_addr,
.size = size,
.direction = direction,
};
@@ -1339,10 +1295,10 @@ void debug_dma_unmap_page(struct device *dev, dma_addr_t addr,
return;
check_unmap(&ref);
}
-EXPORT_SYMBOL(debug_dma_unmap_page);
void debug_dma_map_sg(struct device *dev, struct scatterlist *sg,
- int nents, int mapped_ents, int direction)
+ int nents, int mapped_ents, int direction,
+ unsigned long attrs)
{
struct dma_debug_entry *entry;
struct scatterlist *s;
@@ -1351,6 +1307,12 @@ void debug_dma_map_sg(struct device *dev, struct scatterlist *sg,
if (unlikely(dma_debug_disabled()))
return;
+ for_each_sg(sg, s, nents, i) {
+ check_for_stack(dev, sg_page(s), s->offset);
+ if (!PageHighMem(sg_page(s)))
+ check_for_illegal_area(dev, sg_virt(s), s->length);
+ }
+
for_each_sg(sg, s, mapped_ents, i) {
entry = dma_entry_alloc();
if (!entry)
@@ -1359,25 +1321,18 @@ void debug_dma_map_sg(struct device *dev, struct scatterlist *sg,
entry->type = dma_debug_sg;
entry->dev = dev;
entry->pfn = page_to_pfn(sg_page(s));
- entry->offset = s->offset,
+ entry->offset = s->offset;
entry->size = sg_dma_len(s);
entry->dev_addr = sg_dma_address(s);
entry->direction = direction;
entry->sg_call_ents = nents;
entry->sg_mapped_ents = mapped_ents;
- check_for_stack(dev, sg_page(s), s->offset);
-
- if (!PageHighMem(sg_page(s))) {
- check_for_illegal_area(dev, sg_virt(s), sg_dma_len(s));
- }
-
check_sg_segment(dev, s);
- add_dma_entry(entry);
+ add_dma_entry(entry, attrs);
}
}
-EXPORT_SYMBOL(debug_dma_map_sg);
static int get_nr_mapped_entries(struct device *dev,
struct dma_debug_entry *ref)
@@ -1429,10 +1384,10 @@ void debug_dma_unmap_sg(struct device *dev, struct scatterlist *sglist,
check_unmap(&ref);
}
}
-EXPORT_SYMBOL(debug_dma_unmap_sg);
void debug_dma_alloc_coherent(struct device *dev, size_t size,
- dma_addr_t dma_addr, void *virt)
+ dma_addr_t dma_addr, void *virt,
+ unsigned long attrs)
{
struct dma_debug_entry *entry;
@@ -1462,17 +1417,17 @@ void debug_dma_alloc_coherent(struct device *dev, size_t size,
else
entry->pfn = page_to_pfn(virt_to_page(virt));
- add_dma_entry(entry);
+ add_dma_entry(entry, attrs);
}
void debug_dma_free_coherent(struct device *dev, size_t size,
- void *virt, dma_addr_t addr)
+ void *virt, dma_addr_t dma_addr)
{
struct dma_debug_entry ref = {
.type = dma_debug_coherent,
.dev = dev,
.offset = offset_in_page(virt),
- .dev_addr = addr,
+ .dev_addr = dma_addr,
.size = size,
.direction = DMA_BIDIRECTIONAL,
};
@@ -1493,7 +1448,8 @@ void debug_dma_free_coherent(struct device *dev, size_t size,
}
void debug_dma_map_resource(struct device *dev, phys_addr_t addr, size_t size,
- int direction, dma_addr_t dma_addr)
+ int direction, dma_addr_t dma_addr,
+ unsigned long attrs)
{
struct dma_debug_entry *entry;
@@ -1513,9 +1469,8 @@ void debug_dma_map_resource(struct device *dev, phys_addr_t addr, size_t size,
entry->direction = direction;
entry->map_err_type = MAP_ERR_NOT_CHECKED;
- add_dma_entry(entry);
+ add_dma_entry(entry, attrs);
}
-EXPORT_SYMBOL(debug_dma_map_resource);
void debug_dma_unmap_resource(struct device *dev, dma_addr_t dma_addr,
size_t size, int direction)
@@ -1533,7 +1488,6 @@ void debug_dma_unmap_resource(struct device *dev, dma_addr_t dma_addr,
check_unmap(&ref);
}
-EXPORT_SYMBOL(debug_dma_unmap_resource);
void debug_dma_sync_single_for_cpu(struct device *dev, dma_addr_t dma_handle,
size_t size, int direction)
@@ -1552,7 +1506,6 @@ void debug_dma_sync_single_for_cpu(struct device *dev, dma_addr_t dma_handle,
check_sync(dev, &ref, true);
}
-EXPORT_SYMBOL(debug_dma_sync_single_for_cpu);
void debug_dma_sync_single_for_device(struct device *dev,
dma_addr_t dma_handle, size_t size,
@@ -1572,7 +1525,6 @@ void debug_dma_sync_single_for_device(struct device *dev,
check_sync(dev, &ref, false);
}
-EXPORT_SYMBOL(debug_dma_sync_single_for_device);
void debug_dma_sync_sg_for_cpu(struct device *dev, struct scatterlist *sg,
int nelems, int direction)
@@ -1605,7 +1557,6 @@ void debug_dma_sync_sg_for_cpu(struct device *dev, struct scatterlist *sg,
check_sync(dev, &ref, true);
}
}
-EXPORT_SYMBOL(debug_dma_sync_sg_for_cpu);
void debug_dma_sync_sg_for_device(struct device *dev, struct scatterlist *sg,
int nelems, int direction)
@@ -1637,7 +1588,6 @@ void debug_dma_sync_sg_for_device(struct device *dev, struct scatterlist *sg,
check_sync(dev, &ref, false);
}
}
-EXPORT_SYMBOL(debug_dma_sync_sg_for_device);
static int __init dma_debug_driver_setup(char *str)
{
diff --git a/kernel/dma/debug.h b/kernel/dma/debug.h
new file mode 100644
index 000000000000..f525197d3cae
--- /dev/null
+++ b/kernel/dma/debug.h
@@ -0,0 +1,130 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * Copyright (C) 2008 Advanced Micro Devices, Inc.
+ *
+ * Author: Joerg Roedel <joerg.roedel@amd.com>
+ */
+
+#ifndef _KERNEL_DMA_DEBUG_H
+#define _KERNEL_DMA_DEBUG_H
+
+#ifdef CONFIG_DMA_API_DEBUG
+extern void debug_dma_map_page(struct device *dev, struct page *page,
+ size_t offset, size_t size,
+ int direction, dma_addr_t dma_addr,
+ unsigned long attrs);
+
+extern void debug_dma_unmap_page(struct device *dev, dma_addr_t addr,
+ size_t size, int direction);
+
+extern void debug_dma_map_sg(struct device *dev, struct scatterlist *sg,
+ int nents, int mapped_ents, int direction,
+ unsigned long attrs);
+
+extern void debug_dma_unmap_sg(struct device *dev, struct scatterlist *sglist,
+ int nelems, int dir);
+
+extern void debug_dma_alloc_coherent(struct device *dev, size_t size,
+ dma_addr_t dma_addr, void *virt,
+ unsigned long attrs);
+
+extern void debug_dma_free_coherent(struct device *dev, size_t size,
+ void *virt, dma_addr_t addr);
+
+extern void debug_dma_map_resource(struct device *dev, phys_addr_t addr,
+ size_t size, int direction,
+ dma_addr_t dma_addr,
+ unsigned long attrs);
+
+extern void debug_dma_unmap_resource(struct device *dev, dma_addr_t dma_addr,
+ size_t size, int direction);
+
+extern void debug_dma_sync_single_for_cpu(struct device *dev,
+ dma_addr_t dma_handle, size_t size,
+ int direction);
+
+extern void debug_dma_sync_single_for_device(struct device *dev,
+ dma_addr_t dma_handle,
+ size_t size, int direction);
+
+extern void debug_dma_sync_sg_for_cpu(struct device *dev,
+ struct scatterlist *sg,
+ int nelems, int direction);
+
+extern void debug_dma_sync_sg_for_device(struct device *dev,
+ struct scatterlist *sg,
+ int nelems, int direction);
+#else /* CONFIG_DMA_API_DEBUG */
+static inline void debug_dma_map_page(struct device *dev, struct page *page,
+ size_t offset, size_t size,
+ int direction, dma_addr_t dma_addr,
+ unsigned long attrs)
+{
+}
+
+static inline void debug_dma_unmap_page(struct device *dev, dma_addr_t addr,
+ size_t size, int direction)
+{
+}
+
+static inline void debug_dma_map_sg(struct device *dev, struct scatterlist *sg,
+ int nents, int mapped_ents, int direction,
+ unsigned long attrs)
+{
+}
+
+static inline void debug_dma_unmap_sg(struct device *dev,
+ struct scatterlist *sglist,
+ int nelems, int dir)
+{
+}
+
+static inline void debug_dma_alloc_coherent(struct device *dev, size_t size,
+ dma_addr_t dma_addr, void *virt,
+ unsigned long attrs)
+{
+}
+
+static inline void debug_dma_free_coherent(struct device *dev, size_t size,
+ void *virt, dma_addr_t addr)
+{
+}
+
+static inline void debug_dma_map_resource(struct device *dev, phys_addr_t addr,
+ size_t size, int direction,
+ dma_addr_t dma_addr,
+ unsigned long attrs)
+{
+}
+
+static inline void debug_dma_unmap_resource(struct device *dev,
+ dma_addr_t dma_addr, size_t size,
+ int direction)
+{
+}
+
+static inline void debug_dma_sync_single_for_cpu(struct device *dev,
+ dma_addr_t dma_handle,
+ size_t size, int direction)
+{
+}
+
+static inline void debug_dma_sync_single_for_device(struct device *dev,
+ dma_addr_t dma_handle,
+ size_t size, int direction)
+{
+}
+
+static inline void debug_dma_sync_sg_for_cpu(struct device *dev,
+ struct scatterlist *sg,
+ int nelems, int direction)
+{
+}
+
+static inline void debug_dma_sync_sg_for_device(struct device *dev,
+ struct scatterlist *sg,
+ int nelems, int direction)
+{
+}
+#endif /* CONFIG_DMA_API_DEBUG */
+#endif /* _KERNEL_DMA_DEBUG_H */
diff --git a/kernel/dma/direct.c b/kernel/dma/direct.c
index 67f060b86a73..4d543b1e9d57 100644
--- a/kernel/dma/direct.c
+++ b/kernel/dma/direct.c
@@ -1,23 +1,22 @@
// SPDX-License-Identifier: GPL-2.0
/*
- * Copyright (C) 2018 Christoph Hellwig.
+ * Copyright (C) 2018-2020 Christoph Hellwig.
*
* DMA operations that map physical memory directly without using an IOMMU.
*/
#include <linux/memblock.h> /* for max_pfn */
#include <linux/export.h>
#include <linux/mm.h>
-#include <linux/dma-direct.h>
+#include <linux/dma-map-ops.h>
#include <linux/scatterlist.h>
-#include <linux/dma-contiguous.h>
-#include <linux/dma-noncoherent.h>
#include <linux/pfn.h>
#include <linux/vmalloc.h>
#include <linux/set_memory.h>
-#include <linux/swiotlb.h>
+#include <linux/slab.h>
+#include "direct.h"
/*
- * Most architectures use ZONE_DMA for the first 16 Megabytes, but some use it
+ * Most architectures use ZONE_DMA for the first 16 Megabytes, but some use
* it for entirely different regions. In that case the arch code needs to
* override the variable below for dma-direct to work properly.
*/
@@ -27,7 +26,7 @@ static inline dma_addr_t phys_to_dma_direct(struct device *dev,
phys_addr_t phys)
{
if (force_dma_unencrypted(dev))
- return __phys_to_dma(dev, phys);
+ return phys_to_dma_unencrypted(dev, phys);
return phys_to_dma(dev, phys);
}
@@ -45,15 +44,11 @@ u64 dma_direct_get_required_mask(struct device *dev)
return (1ULL << (fls64(max_dma) - 1)) * 2 - 1;
}
-gfp_t dma_direct_optimal_gfp_mask(struct device *dev, u64 dma_mask,
- u64 *phys_limit)
+static gfp_t dma_direct_optimal_gfp_mask(struct device *dev, u64 *phys_limit)
{
- u64 dma_limit = min_not_zero(dma_mask, dev->bus_dma_limit);
-
- if (force_dma_unencrypted(dev))
- *phys_limit = __dma_to_phys(dev, dma_limit);
- else
- *phys_limit = dma_to_phys(dev, dma_limit);
+ u64 dma_limit = min_not_zero(
+ dev->coherent_dma_mask,
+ dev->bus_dma_limit);
/*
* Optimistically try the zone that the physical address mask falls
@@ -63,6 +58,7 @@ gfp_t dma_direct_optimal_gfp_mask(struct device *dev, u64 dma_mask,
* Note that GFP_DMA32 and GFP_DMA are no ops without the corresponding
* zones.
*/
+ *phys_limit = dma_to_phys(dev, dma_limit);
if (*phys_limit <= DMA_BIT_MASK(zone_dma_bits))
return GFP_DMA;
if (*phys_limit <= DMA_BIT_MASK(32))
@@ -72,45 +68,55 @@ gfp_t dma_direct_optimal_gfp_mask(struct device *dev, u64 dma_mask,
bool dma_coherent_ok(struct device *dev, phys_addr_t phys, size_t size)
{
- return phys_to_dma_direct(dev, phys) + size - 1 <=
- min_not_zero(dev->coherent_dma_mask, dev->bus_dma_limit);
+ dma_addr_t dma_addr = phys_to_dma_direct(dev, phys);
+
+ if (dma_addr == DMA_MAPPING_ERROR)
+ return false;
+ return dma_addr + size - 1 <=
+ min_not_zero(dev->coherent_dma_mask, dev->bus_dma_limit);
}
-/*
- * Decrypting memory is allowed to block, so if this device requires
- * unencrypted memory it must come from atomic pools.
- */
-static inline bool dma_should_alloc_from_pool(struct device *dev, gfp_t gfp,
- unsigned long attrs)
+static int dma_set_decrypted(struct device *dev, void *vaddr, size_t size)
{
- if (!IS_ENABLED(CONFIG_DMA_COHERENT_POOL))
- return false;
- if (gfpflags_allow_blocking(gfp))
- return false;
- if (force_dma_unencrypted(dev))
- return true;
- if (!IS_ENABLED(CONFIG_DMA_DIRECT_REMAP))
- return false;
- if (dma_alloc_need_uncached(dev, attrs))
- return true;
- return false;
+ if (!force_dma_unencrypted(dev))
+ return 0;
+ return set_memory_decrypted((unsigned long)vaddr, PFN_UP(size));
}
-static inline bool dma_should_free_from_pool(struct device *dev,
- unsigned long attrs)
+static int dma_set_encrypted(struct device *dev, void *vaddr, size_t size)
{
- if (IS_ENABLED(CONFIG_DMA_COHERENT_POOL))
- return true;
- if ((attrs & DMA_ATTR_NO_KERNEL_MAPPING) &&
- !force_dma_unencrypted(dev))
- return false;
- if (IS_ENABLED(CONFIG_DMA_DIRECT_REMAP))
- return true;
- return false;
+ int ret;
+
+ if (!force_dma_unencrypted(dev))
+ return 0;
+ ret = set_memory_encrypted((unsigned long)vaddr, PFN_UP(size));
+ if (ret)
+ pr_warn_ratelimited("leaking DMA memory that can't be re-encrypted\n");
+ return ret;
+}
+
+static void __dma_direct_free_pages(struct device *dev, struct page *page,
+ size_t size)
+{
+ if (swiotlb_free(dev, page, size))
+ return;
+ dma_free_contiguous(dev, page, size);
+}
+
+static struct page *dma_direct_alloc_swiotlb(struct device *dev, size_t size)
+{
+ struct page *page = swiotlb_alloc(dev, size);
+
+ if (page && !dma_coherent_ok(dev, page_to_phys(page), size)) {
+ swiotlb_free(dev, page, size);
+ return NULL;
+ }
+
+ return page;
}
static struct page *__dma_direct_alloc_pages(struct device *dev, size_t size,
- gfp_t gfp, unsigned long attrs)
+ gfp_t gfp, bool allow_highmem)
{
int node = dev_to_node(dev);
struct page *page = NULL;
@@ -118,17 +124,17 @@ static struct page *__dma_direct_alloc_pages(struct device *dev, size_t size,
WARN_ON_ONCE(!PAGE_ALIGNED(size));
- if (attrs & DMA_ATTR_NO_WARN)
- gfp |= __GFP_NOWARN;
+ if (is_swiotlb_for_alloc(dev))
+ return dma_direct_alloc_swiotlb(dev, size);
- /* we always manually zero the memory once we are done: */
- gfp &= ~__GFP_ZERO;
- gfp |= dma_direct_optimal_gfp_mask(dev, dev->coherent_dma_mask,
- &phys_limit);
+ gfp |= dma_direct_optimal_gfp_mask(dev, &phys_limit);
page = dma_alloc_contiguous(dev, size, gfp);
- if (page && !dma_coherent_ok(dev, page_to_phys(page), size)) {
- dma_free_contiguous(dev, page, size);
- page = NULL;
+ if (page) {
+ if (!dma_coherent_ok(dev, page_to_phys(page), size) ||
+ (!allow_highmem && PageHighMem(page))) {
+ dma_free_contiguous(dev, page, size);
+ page = NULL;
+ }
}
again:
if (!page)
@@ -153,170 +159,242 @@ again:
return page;
}
-void *dma_direct_alloc_pages(struct device *dev, size_t size,
+/*
+ * Check if a potentially blocking operations needs to dip into the atomic
+ * pools for the given device/gfp.
+ */
+static bool dma_direct_use_pool(struct device *dev, gfp_t gfp)
+{
+ return !gfpflags_allow_blocking(gfp) && !is_swiotlb_for_alloc(dev);
+}
+
+static void *dma_direct_alloc_from_pool(struct device *dev, size_t size,
+ dma_addr_t *dma_handle, gfp_t gfp)
+{
+ struct page *page;
+ u64 phys_limit;
+ void *ret;
+
+ if (WARN_ON_ONCE(!IS_ENABLED(CONFIG_DMA_COHERENT_POOL)))
+ return NULL;
+
+ gfp |= dma_direct_optimal_gfp_mask(dev, &phys_limit);
+ page = dma_alloc_from_pool(dev, size, &ret, gfp, dma_coherent_ok);
+ if (!page)
+ return NULL;
+ *dma_handle = phys_to_dma_direct(dev, page_to_phys(page));
+ return ret;
+}
+
+static void *dma_direct_alloc_no_mapping(struct device *dev, size_t size,
+ dma_addr_t *dma_handle, gfp_t gfp)
+{
+ struct page *page;
+
+ page = __dma_direct_alloc_pages(dev, size, gfp & ~__GFP_ZERO, true);
+ if (!page)
+ return NULL;
+
+ /* remove any dirty cache lines on the kernel alias */
+ if (!PageHighMem(page))
+ arch_dma_prep_coherent(page, size);
+
+ /* return the page pointer as the opaque cookie */
+ *dma_handle = phys_to_dma_direct(dev, page_to_phys(page));
+ return page;
+}
+
+void *dma_direct_alloc(struct device *dev, size_t size,
dma_addr_t *dma_handle, gfp_t gfp, unsigned long attrs)
{
+ bool remap = false, set_uncached = false;
struct page *page;
void *ret;
- int err;
size = PAGE_ALIGN(size);
+ if (attrs & DMA_ATTR_NO_WARN)
+ gfp |= __GFP_NOWARN;
- if (dma_should_alloc_from_pool(dev, gfp, attrs)) {
- ret = dma_alloc_from_pool(dev, size, &page, gfp);
- if (!ret)
+ if ((attrs & DMA_ATTR_NO_KERNEL_MAPPING) &&
+ !force_dma_unencrypted(dev) && !is_swiotlb_for_alloc(dev))
+ return dma_direct_alloc_no_mapping(dev, size, dma_handle, gfp);
+
+ if (!dev_is_dma_coherent(dev)) {
+ if (IS_ENABLED(CONFIG_ARCH_HAS_DMA_ALLOC) &&
+ !is_swiotlb_for_alloc(dev))
+ return arch_dma_alloc(dev, size, dma_handle, gfp,
+ attrs);
+
+ /*
+ * If there is a global pool, always allocate from it for
+ * non-coherent devices.
+ */
+ if (IS_ENABLED(CONFIG_DMA_GLOBAL_POOL))
+ return dma_alloc_from_global_coherent(dev, size,
+ dma_handle);
+
+ /*
+ * Otherwise we require the architecture to either be able to
+ * mark arbitrary parts of the kernel direct mapping uncached,
+ * or remapped it uncached.
+ */
+ set_uncached = IS_ENABLED(CONFIG_ARCH_HAS_DMA_SET_UNCACHED);
+ remap = IS_ENABLED(CONFIG_DMA_DIRECT_REMAP);
+ if (!set_uncached && !remap) {
+ pr_warn_once("coherent DMA allocations not supported on this platform.\n");
return NULL;
- goto done;
+ }
}
- page = __dma_direct_alloc_pages(dev, size, gfp, attrs);
+ /*
+ * Remapping or decrypting memory may block, allocate the memory from
+ * the atomic pools instead if we aren't allowed block.
+ */
+ if ((remap || force_dma_unencrypted(dev)) &&
+ dma_direct_use_pool(dev, gfp))
+ return dma_direct_alloc_from_pool(dev, size, dma_handle, gfp);
+
+ /* we always manually zero the memory once we are done */
+ page = __dma_direct_alloc_pages(dev, size, gfp & ~__GFP_ZERO, true);
if (!page)
return NULL;
- if ((attrs & DMA_ATTR_NO_KERNEL_MAPPING) &&
- !force_dma_unencrypted(dev)) {
- /* remove any dirty cache lines on the kernel alias */
- if (!PageHighMem(page))
- arch_dma_prep_coherent(page, size);
- /* return the page pointer as the opaque cookie */
- ret = page;
- goto done;
+ /*
+ * dma_alloc_contiguous can return highmem pages depending on a
+ * combination the cma= arguments and per-arch setup. These need to be
+ * remapped to return a kernel virtual address.
+ */
+ if (PageHighMem(page)) {
+ remap = true;
+ set_uncached = false;
}
- if ((IS_ENABLED(CONFIG_DMA_DIRECT_REMAP) &&
- dma_alloc_need_uncached(dev, attrs)) ||
- (IS_ENABLED(CONFIG_DMA_REMAP) && PageHighMem(page))) {
+ if (remap) {
+ pgprot_t prot = dma_pgprot(dev, PAGE_KERNEL, attrs);
+
+ if (force_dma_unencrypted(dev))
+ prot = pgprot_decrypted(prot);
+
/* remove any dirty cache lines on the kernel alias */
arch_dma_prep_coherent(page, size);
/* create a coherent mapping */
- ret = dma_common_contiguous_remap(page, size,
- dma_pgprot(dev, PAGE_KERNEL, attrs),
+ ret = dma_common_contiguous_remap(page, size, prot,
__builtin_return_address(0));
if (!ret)
goto out_free_pages;
- if (force_dma_unencrypted(dev)) {
- err = set_memory_decrypted((unsigned long)ret,
- 1 << get_order(size));
- if (err)
- goto out_free_pages;
- }
- memset(ret, 0, size);
- goto done;
- }
-
- if (PageHighMem(page)) {
- /*
- * Depending on the cma= arguments and per-arch setup
- * dma_alloc_contiguous could return highmem pages.
- * Without remapping there is no way to return them here,
- * so log an error and fail.
- */
- dev_info(dev, "Rejecting highmem page from CMA.\n");
- goto out_free_pages;
- }
-
- ret = page_address(page);
- if (force_dma_unencrypted(dev)) {
- err = set_memory_decrypted((unsigned long)ret,
- 1 << get_order(size));
- if (err)
- goto out_free_pages;
+ } else {
+ ret = page_address(page);
+ if (dma_set_decrypted(dev, ret, size))
+ goto out_leak_pages;
}
memset(ret, 0, size);
- if (IS_ENABLED(CONFIG_ARCH_HAS_DMA_SET_UNCACHED) &&
- dma_alloc_need_uncached(dev, attrs)) {
+ if (set_uncached) {
arch_dma_prep_coherent(page, size);
ret = arch_dma_set_uncached(ret, size);
if (IS_ERR(ret))
goto out_encrypt_pages;
}
-done:
- if (force_dma_unencrypted(dev))
- *dma_handle = __phys_to_dma(dev, page_to_phys(page));
- else
- *dma_handle = phys_to_dma(dev, page_to_phys(page));
+
+ *dma_handle = phys_to_dma_direct(dev, page_to_phys(page));
return ret;
out_encrypt_pages:
- if (force_dma_unencrypted(dev)) {
- err = set_memory_encrypted((unsigned long)page_address(page),
- 1 << get_order(size));
- /* If memory cannot be re-encrypted, it must be leaked */
- if (err)
- return NULL;
- }
+ if (dma_set_encrypted(dev, page_address(page), size))
+ return NULL;
out_free_pages:
- dma_free_contiguous(dev, page, size);
+ __dma_direct_free_pages(dev, page, size);
+ return NULL;
+out_leak_pages:
return NULL;
}
-void dma_direct_free_pages(struct device *dev, size_t size, void *cpu_addr,
- dma_addr_t dma_addr, unsigned long attrs)
+void dma_direct_free(struct device *dev, size_t size,
+ void *cpu_addr, dma_addr_t dma_addr, unsigned long attrs)
{
unsigned int page_order = get_order(size);
- /* If cpu_addr is not from an atomic pool, dma_free_from_pool() fails */
- if (dma_should_free_from_pool(dev, attrs) &&
- dma_free_from_pool(dev, cpu_addr, PAGE_ALIGN(size)))
- return;
-
if ((attrs & DMA_ATTR_NO_KERNEL_MAPPING) &&
- !force_dma_unencrypted(dev)) {
+ !force_dma_unencrypted(dev) && !is_swiotlb_for_alloc(dev)) {
/* cpu_addr is a struct page cookie, not a kernel address */
dma_free_contiguous(dev, cpu_addr, size);
return;
}
- if (force_dma_unencrypted(dev))
- set_memory_encrypted((unsigned long)cpu_addr, 1 << page_order);
+ if (IS_ENABLED(CONFIG_ARCH_HAS_DMA_ALLOC) &&
+ !dev_is_dma_coherent(dev) &&
+ !is_swiotlb_for_alloc(dev)) {
+ arch_dma_free(dev, size, cpu_addr, dma_addr, attrs);
+ return;
+ }
- if (IS_ENABLED(CONFIG_DMA_REMAP) && is_vmalloc_addr(cpu_addr))
+ if (IS_ENABLED(CONFIG_DMA_GLOBAL_POOL) &&
+ !dev_is_dma_coherent(dev)) {
+ if (!dma_release_from_global_coherent(page_order, cpu_addr))
+ WARN_ON_ONCE(1);
+ return;
+ }
+
+ /* If cpu_addr is not from an atomic pool, dma_free_from_pool() fails */
+ if (IS_ENABLED(CONFIG_DMA_COHERENT_POOL) &&
+ dma_free_from_pool(dev, cpu_addr, PAGE_ALIGN(size)))
+ return;
+
+ if (is_vmalloc_addr(cpu_addr)) {
vunmap(cpu_addr);
- else if (IS_ENABLED(CONFIG_ARCH_HAS_DMA_CLEAR_UNCACHED))
- arch_dma_clear_uncached(cpu_addr, size);
+ } else {
+ if (IS_ENABLED(CONFIG_ARCH_HAS_DMA_CLEAR_UNCACHED))
+ arch_dma_clear_uncached(cpu_addr, size);
+ if (dma_set_encrypted(dev, cpu_addr, size))
+ return;
+ }
- dma_free_contiguous(dev, dma_direct_to_page(dev, dma_addr), size);
+ __dma_direct_free_pages(dev, dma_direct_to_page(dev, dma_addr), size);
}
-void *dma_direct_alloc(struct device *dev, size_t size,
- dma_addr_t *dma_handle, gfp_t gfp, unsigned long attrs)
+struct page *dma_direct_alloc_pages(struct device *dev, size_t size,
+ dma_addr_t *dma_handle, enum dma_data_direction dir, gfp_t gfp)
{
- if (!IS_ENABLED(CONFIG_ARCH_HAS_DMA_SET_UNCACHED) &&
- !IS_ENABLED(CONFIG_DMA_DIRECT_REMAP) &&
- dma_alloc_need_uncached(dev, attrs))
- return arch_dma_alloc(dev, size, dma_handle, gfp, attrs);
- return dma_direct_alloc_pages(dev, size, dma_handle, gfp, attrs);
-}
+ struct page *page;
+ void *ret;
-void dma_direct_free(struct device *dev, size_t size,
- void *cpu_addr, dma_addr_t dma_addr, unsigned long attrs)
-{
- if (!IS_ENABLED(CONFIG_ARCH_HAS_DMA_SET_UNCACHED) &&
- !IS_ENABLED(CONFIG_DMA_DIRECT_REMAP) &&
- dma_alloc_need_uncached(dev, attrs))
- arch_dma_free(dev, size, cpu_addr, dma_addr, attrs);
- else
- dma_direct_free_pages(dev, size, cpu_addr, dma_addr, attrs);
+ if (force_dma_unencrypted(dev) && dma_direct_use_pool(dev, gfp))
+ return dma_direct_alloc_from_pool(dev, size, dma_handle, gfp);
+
+ page = __dma_direct_alloc_pages(dev, size, gfp, false);
+ if (!page)
+ return NULL;
+
+ ret = page_address(page);
+ if (dma_set_decrypted(dev, ret, size))
+ goto out_leak_pages;
+ memset(ret, 0, size);
+ *dma_handle = phys_to_dma_direct(dev, page_to_phys(page));
+ return page;
+out_leak_pages:
+ return NULL;
}
-#if defined(CONFIG_ARCH_HAS_SYNC_DMA_FOR_DEVICE) || \
- defined(CONFIG_SWIOTLB)
-void dma_direct_sync_single_for_device(struct device *dev,
- dma_addr_t addr, size_t size, enum dma_data_direction dir)
+void dma_direct_free_pages(struct device *dev, size_t size,
+ struct page *page, dma_addr_t dma_addr,
+ enum dma_data_direction dir)
{
- phys_addr_t paddr = dma_to_phys(dev, addr);
+ void *vaddr = page_address(page);
- if (unlikely(is_swiotlb_buffer(paddr)))
- swiotlb_tbl_sync_single(dev, paddr, size, dir, SYNC_FOR_DEVICE);
+ /* If cpu_addr is not from an atomic pool, dma_free_from_pool() fails */
+ if (IS_ENABLED(CONFIG_DMA_COHERENT_POOL) &&
+ dma_free_from_pool(dev, vaddr, size))
+ return;
- if (!dev_is_dma_coherent(dev))
- arch_sync_dma_for_device(paddr, size, dir);
+ if (dma_set_encrypted(dev, vaddr, size))
+ return;
+ __dma_direct_free_pages(dev, page, size);
}
-EXPORT_SYMBOL(dma_direct_sync_single_for_device);
+#if defined(CONFIG_ARCH_HAS_SYNC_DMA_FOR_DEVICE) || \
+ defined(CONFIG_SWIOTLB)
void dma_direct_sync_sg_for_device(struct device *dev,
struct scatterlist *sgl, int nents, enum dma_data_direction dir)
{
@@ -326,36 +404,20 @@ void dma_direct_sync_sg_for_device(struct device *dev,
for_each_sg(sgl, sg, nents, i) {
phys_addr_t paddr = dma_to_phys(dev, sg_dma_address(sg));
- if (unlikely(is_swiotlb_buffer(paddr)))
- swiotlb_tbl_sync_single(dev, paddr, sg->length,
- dir, SYNC_FOR_DEVICE);
+ if (unlikely(is_swiotlb_buffer(dev, paddr)))
+ swiotlb_sync_single_for_device(dev, paddr, sg->length,
+ dir);
if (!dev_is_dma_coherent(dev))
arch_sync_dma_for_device(paddr, sg->length,
dir);
}
}
-EXPORT_SYMBOL(dma_direct_sync_sg_for_device);
#endif
#if defined(CONFIG_ARCH_HAS_SYNC_DMA_FOR_CPU) || \
defined(CONFIG_ARCH_HAS_SYNC_DMA_FOR_CPU_ALL) || \
defined(CONFIG_SWIOTLB)
-void dma_direct_sync_single_for_cpu(struct device *dev,
- dma_addr_t addr, size_t size, enum dma_data_direction dir)
-{
- phys_addr_t paddr = dma_to_phys(dev, addr);
-
- if (!dev_is_dma_coherent(dev)) {
- arch_sync_dma_for_cpu(paddr, size, dir);
- arch_sync_dma_for_cpu_all();
- }
-
- if (unlikely(is_swiotlb_buffer(paddr)))
- swiotlb_tbl_sync_single(dev, paddr, size, dir, SYNC_FOR_CPU);
-}
-EXPORT_SYMBOL(dma_direct_sync_single_for_cpu);
-
void dma_direct_sync_sg_for_cpu(struct device *dev,
struct scatterlist *sgl, int nents, enum dma_data_direction dir)
{
@@ -368,79 +430,72 @@ void dma_direct_sync_sg_for_cpu(struct device *dev,
if (!dev_is_dma_coherent(dev))
arch_sync_dma_for_cpu(paddr, sg->length, dir);
- if (unlikely(is_swiotlb_buffer(paddr)))
- swiotlb_tbl_sync_single(dev, paddr, sg->length, dir,
- SYNC_FOR_CPU);
+ if (unlikely(is_swiotlb_buffer(dev, paddr)))
+ swiotlb_sync_single_for_cpu(dev, paddr, sg->length,
+ dir);
+
+ if (dir == DMA_FROM_DEVICE)
+ arch_dma_mark_clean(paddr, sg->length);
}
if (!dev_is_dma_coherent(dev))
arch_sync_dma_for_cpu_all();
}
-EXPORT_SYMBOL(dma_direct_sync_sg_for_cpu);
-
-void dma_direct_unmap_page(struct device *dev, dma_addr_t addr,
- size_t size, enum dma_data_direction dir, unsigned long attrs)
-{
- phys_addr_t phys = dma_to_phys(dev, addr);
-
- if (!(attrs & DMA_ATTR_SKIP_CPU_SYNC))
- dma_direct_sync_single_for_cpu(dev, addr, size, dir);
-
- if (unlikely(is_swiotlb_buffer(phys)))
- swiotlb_tbl_unmap_single(dev, phys, size, size, dir, attrs);
-}
-EXPORT_SYMBOL(dma_direct_unmap_page);
+/*
+ * Unmaps segments, except for ones marked as pci_p2pdma which do not
+ * require any further action as they contain a bus address.
+ */
void dma_direct_unmap_sg(struct device *dev, struct scatterlist *sgl,
int nents, enum dma_data_direction dir, unsigned long attrs)
{
struct scatterlist *sg;
int i;
- for_each_sg(sgl, sg, nents, i)
- dma_direct_unmap_page(dev, sg->dma_address, sg_dma_len(sg), dir,
- attrs);
-}
-EXPORT_SYMBOL(dma_direct_unmap_sg);
-#endif
-
-dma_addr_t dma_direct_map_page(struct device *dev, struct page *page,
- unsigned long offset, size_t size, enum dma_data_direction dir,
- unsigned long attrs)
-{
- phys_addr_t phys = page_to_phys(page) + offset;
- dma_addr_t dma_addr = phys_to_dma(dev, phys);
-
- if (unlikely(swiotlb_force == SWIOTLB_FORCE))
- return swiotlb_map(dev, phys, size, dir, attrs);
-
- if (unlikely(!dma_capable(dev, dma_addr, size, true))) {
- if (swiotlb_force != SWIOTLB_NO_FORCE)
- return swiotlb_map(dev, phys, size, dir, attrs);
-
- dev_WARN_ONCE(dev, 1,
- "DMA addr %pad+%zu overflow (mask %llx, bus limit %llx).\n",
- &dma_addr, size, *dev->dma_mask, dev->bus_dma_limit);
- return DMA_MAPPING_ERROR;
+ for_each_sg(sgl, sg, nents, i) {
+ if (sg_dma_is_bus_address(sg))
+ sg_dma_unmark_bus_address(sg);
+ else
+ dma_direct_unmap_page(dev, sg->dma_address,
+ sg_dma_len(sg), dir, attrs);
}
-
- if (!dev_is_dma_coherent(dev) && !(attrs & DMA_ATTR_SKIP_CPU_SYNC))
- arch_sync_dma_for_device(phys, size, dir);
- return dma_addr;
}
-EXPORT_SYMBOL(dma_direct_map_page);
+#endif
int dma_direct_map_sg(struct device *dev, struct scatterlist *sgl, int nents,
enum dma_data_direction dir, unsigned long attrs)
{
- int i;
+ struct pci_p2pdma_map_state p2pdma_state = {};
+ enum pci_p2pdma_map_type map;
struct scatterlist *sg;
+ int i, ret;
for_each_sg(sgl, sg, nents, i) {
+ if (is_pci_p2pdma_page(sg_page(sg))) {
+ map = pci_p2pdma_map_segment(&p2pdma_state, dev, sg);
+ switch (map) {
+ case PCI_P2PDMA_MAP_BUS_ADDR:
+ continue;
+ case PCI_P2PDMA_MAP_THRU_HOST_BRIDGE:
+ /*
+ * Any P2P mapping that traverses the PCI
+ * host bridge must be mapped with CPU physical
+ * address and not PCI bus addresses. This is
+ * done with dma_direct_map_page() below.
+ */
+ break;
+ default:
+ ret = -EREMOTEIO;
+ goto out_unmap;
+ }
+ }
+
sg->dma_address = dma_direct_map_page(dev, sg_page(sg),
sg->offset, sg->length, dir, attrs);
- if (sg->dma_address == DMA_MAPPING_ERROR)
+ if (sg->dma_address == DMA_MAPPING_ERROR) {
+ ret = -EIO;
goto out_unmap;
+ }
sg_dma_len(sg) = sg->length;
}
@@ -448,9 +503,8 @@ int dma_direct_map_sg(struct device *dev, struct scatterlist *sgl, int nents,
out_unmap:
dma_direct_unmap_sg(dev, sgl, i, dir, attrs | DMA_ATTR_SKIP_CPU_SYNC);
- return 0;
+ return ret;
}
-EXPORT_SYMBOL(dma_direct_map_sg);
dma_addr_t dma_direct_map_resource(struct device *dev, phys_addr_t paddr,
size_t size, enum dma_data_direction dir, unsigned long attrs)
@@ -467,7 +521,6 @@ dma_addr_t dma_direct_map_resource(struct device *dev, phys_addr_t paddr,
return dma_addr;
}
-EXPORT_SYMBOL(dma_direct_map_resource);
int dma_direct_get_sgtable(struct device *dev, struct sg_table *sgt,
void *cpu_addr, dma_addr_t dma_addr, size_t size,
@@ -498,9 +551,13 @@ int dma_direct_mmap(struct device *dev, struct vm_area_struct *vma,
int ret = -ENXIO;
vma->vm_page_prot = dma_pgprot(dev, vma->vm_page_prot, attrs);
+ if (force_dma_unencrypted(dev))
+ vma->vm_page_prot = pgprot_decrypted(vma->vm_page_prot);
if (dma_mmap_from_dev_coherent(dev, vma, cpu_addr, size, &ret))
return ret;
+ if (dma_mmap_from_global_coherent(vma, cpu_addr, size, &ret))
+ return ret;
if (vma->vm_pgoff >= count || user_count > count - vma->vm_pgoff)
return -ENXIO;
@@ -522,20 +579,60 @@ int dma_direct_supported(struct device *dev, u64 mask)
return 1;
/*
- * This check needs to be against the actual bit mask value, so
- * use __phys_to_dma() here so that the SME encryption mask isn't
+ * This check needs to be against the actual bit mask value, so use
+ * phys_to_dma_unencrypted() here so that the SME encryption mask isn't
* part of the check.
*/
if (IS_ENABLED(CONFIG_ZONE_DMA))
min_mask = min_t(u64, min_mask, DMA_BIT_MASK(zone_dma_bits));
- return mask >= __phys_to_dma(dev, min_mask);
+ return mask >= phys_to_dma_unencrypted(dev, min_mask);
+}
+
+/*
+ * To check whether all ram resource ranges are covered by dma range map
+ * Returns 0 when further check is needed
+ * Returns 1 if there is some RAM range can't be covered by dma_range_map
+ */
+static int check_ram_in_range_map(unsigned long start_pfn,
+ unsigned long nr_pages, void *data)
+{
+ unsigned long end_pfn = start_pfn + nr_pages;
+ const struct bus_dma_region *bdr = NULL;
+ const struct bus_dma_region *m;
+ struct device *dev = data;
+
+ while (start_pfn < end_pfn) {
+ for (m = dev->dma_range_map; PFN_DOWN(m->size); m++) {
+ unsigned long cpu_start_pfn = PFN_DOWN(m->cpu_start);
+
+ if (start_pfn >= cpu_start_pfn &&
+ start_pfn - cpu_start_pfn < PFN_DOWN(m->size)) {
+ bdr = m;
+ break;
+ }
+ }
+ if (!bdr)
+ return 1;
+
+ start_pfn = PFN_DOWN(bdr->cpu_start) + PFN_DOWN(bdr->size);
+ }
+
+ return 0;
+}
+
+bool dma_direct_all_ram_mapped(struct device *dev)
+{
+ if (!dev->dma_range_map)
+ return true;
+ return !walk_system_ram_range(0, PFN_DOWN(ULONG_MAX) + 1, dev,
+ check_ram_in_range_map);
}
size_t dma_direct_max_mapping_size(struct device *dev)
{
/* If SWIOTLB is active, use its maximum mapping size */
- if (is_swiotlb_active() &&
- (dma_addressing_limited(dev) || swiotlb_force == SWIOTLB_FORCE))
+ if (is_swiotlb_active(dev) &&
+ (dma_addressing_limited(dev) || is_swiotlb_force_bounce(dev)))
return swiotlb_max_mapping_size(dev);
return SIZE_MAX;
}
@@ -543,5 +640,45 @@ size_t dma_direct_max_mapping_size(struct device *dev)
bool dma_direct_need_sync(struct device *dev, dma_addr_t dma_addr)
{
return !dev_is_dma_coherent(dev) ||
- is_swiotlb_buffer(dma_to_phys(dev, dma_addr));
+ is_swiotlb_buffer(dev, dma_to_phys(dev, dma_addr));
+}
+
+/**
+ * dma_direct_set_offset - Assign scalar offset for a single DMA range.
+ * @dev: device pointer; needed to "own" the alloced memory.
+ * @cpu_start: beginning of memory region covered by this offset.
+ * @dma_start: beginning of DMA/PCI region covered by this offset.
+ * @size: size of the region.
+ *
+ * This is for the simple case of a uniform offset which cannot
+ * be discovered by "dma-ranges".
+ *
+ * It returns -ENOMEM if out of memory, -EINVAL if a map
+ * already exists, 0 otherwise.
+ *
+ * Note: any call to this from a driver is a bug. The mapping needs
+ * to be described by the device tree or other firmware interfaces.
+ */
+int dma_direct_set_offset(struct device *dev, phys_addr_t cpu_start,
+ dma_addr_t dma_start, u64 size)
+{
+ struct bus_dma_region *map;
+ u64 offset = (u64)cpu_start - (u64)dma_start;
+
+ if (dev->dma_range_map) {
+ dev_err(dev, "attempt to add DMA range to existing map\n");
+ return -EINVAL;
+ }
+
+ if (!offset)
+ return 0;
+
+ map = kcalloc(2, sizeof(*map), GFP_KERNEL);
+ if (!map)
+ return -ENOMEM;
+ map[0].cpu_start = cpu_start;
+ map[0].dma_start = dma_start;
+ map[0].size = size;
+ dev->dma_range_map = map;
+ return 0;
}
diff --git a/kernel/dma/direct.h b/kernel/dma/direct.h
new file mode 100644
index 000000000000..18d346118fe8
--- /dev/null
+++ b/kernel/dma/direct.h
@@ -0,0 +1,128 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Copyright (C) 2018 Christoph Hellwig.
+ *
+ * DMA operations that map physical memory directly without using an IOMMU.
+ */
+#ifndef _KERNEL_DMA_DIRECT_H
+#define _KERNEL_DMA_DIRECT_H
+
+#include <linux/dma-direct.h>
+#include <linux/memremap.h>
+
+int dma_direct_get_sgtable(struct device *dev, struct sg_table *sgt,
+ void *cpu_addr, dma_addr_t dma_addr, size_t size,
+ unsigned long attrs);
+bool dma_direct_can_mmap(struct device *dev);
+int dma_direct_mmap(struct device *dev, struct vm_area_struct *vma,
+ void *cpu_addr, dma_addr_t dma_addr, size_t size,
+ unsigned long attrs);
+bool dma_direct_need_sync(struct device *dev, dma_addr_t dma_addr);
+int dma_direct_map_sg(struct device *dev, struct scatterlist *sgl, int nents,
+ enum dma_data_direction dir, unsigned long attrs);
+bool dma_direct_all_ram_mapped(struct device *dev);
+size_t dma_direct_max_mapping_size(struct device *dev);
+
+#if defined(CONFIG_ARCH_HAS_SYNC_DMA_FOR_DEVICE) || \
+ defined(CONFIG_SWIOTLB)
+void dma_direct_sync_sg_for_device(struct device *dev, struct scatterlist *sgl,
+ int nents, enum dma_data_direction dir);
+#else
+static inline void dma_direct_sync_sg_for_device(struct device *dev,
+ struct scatterlist *sgl, int nents, enum dma_data_direction dir)
+{
+}
+#endif
+
+#if defined(CONFIG_ARCH_HAS_SYNC_DMA_FOR_CPU) || \
+ defined(CONFIG_ARCH_HAS_SYNC_DMA_FOR_CPU_ALL) || \
+ defined(CONFIG_SWIOTLB)
+void dma_direct_unmap_sg(struct device *dev, struct scatterlist *sgl,
+ int nents, enum dma_data_direction dir, unsigned long attrs);
+void dma_direct_sync_sg_for_cpu(struct device *dev,
+ struct scatterlist *sgl, int nents, enum dma_data_direction dir);
+#else
+static inline void dma_direct_unmap_sg(struct device *dev,
+ struct scatterlist *sgl, int nents, enum dma_data_direction dir,
+ unsigned long attrs)
+{
+}
+static inline void dma_direct_sync_sg_for_cpu(struct device *dev,
+ struct scatterlist *sgl, int nents, enum dma_data_direction dir)
+{
+}
+#endif
+
+static inline void dma_direct_sync_single_for_device(struct device *dev,
+ dma_addr_t addr, size_t size, enum dma_data_direction dir)
+{
+ phys_addr_t paddr = dma_to_phys(dev, addr);
+
+ if (unlikely(is_swiotlb_buffer(dev, paddr)))
+ swiotlb_sync_single_for_device(dev, paddr, size, dir);
+
+ if (!dev_is_dma_coherent(dev))
+ arch_sync_dma_for_device(paddr, size, dir);
+}
+
+static inline void dma_direct_sync_single_for_cpu(struct device *dev,
+ dma_addr_t addr, size_t size, enum dma_data_direction dir)
+{
+ phys_addr_t paddr = dma_to_phys(dev, addr);
+
+ if (!dev_is_dma_coherent(dev)) {
+ arch_sync_dma_for_cpu(paddr, size, dir);
+ arch_sync_dma_for_cpu_all();
+ }
+
+ if (unlikely(is_swiotlb_buffer(dev, paddr)))
+ swiotlb_sync_single_for_cpu(dev, paddr, size, dir);
+
+ if (dir == DMA_FROM_DEVICE)
+ arch_dma_mark_clean(paddr, size);
+}
+
+static inline dma_addr_t dma_direct_map_page(struct device *dev,
+ struct page *page, unsigned long offset, size_t size,
+ enum dma_data_direction dir, unsigned long attrs)
+{
+ phys_addr_t phys = page_to_phys(page) + offset;
+ dma_addr_t dma_addr = phys_to_dma(dev, phys);
+
+ if (is_swiotlb_force_bounce(dev)) {
+ if (is_pci_p2pdma_page(page))
+ return DMA_MAPPING_ERROR;
+ return swiotlb_map(dev, phys, size, dir, attrs);
+ }
+
+ if (unlikely(!dma_capable(dev, dma_addr, size, true)) ||
+ dma_kmalloc_needs_bounce(dev, size, dir)) {
+ if (is_pci_p2pdma_page(page))
+ return DMA_MAPPING_ERROR;
+ if (is_swiotlb_active(dev))
+ return swiotlb_map(dev, phys, size, dir, attrs);
+
+ dev_WARN_ONCE(dev, 1,
+ "DMA addr %pad+%zu overflow (mask %llx, bus limit %llx).\n",
+ &dma_addr, size, *dev->dma_mask, dev->bus_dma_limit);
+ return DMA_MAPPING_ERROR;
+ }
+
+ if (!dev_is_dma_coherent(dev) && !(attrs & DMA_ATTR_SKIP_CPU_SYNC))
+ arch_sync_dma_for_device(phys, size, dir);
+ return dma_addr;
+}
+
+static inline void dma_direct_unmap_page(struct device *dev, dma_addr_t addr,
+ size_t size, enum dma_data_direction dir, unsigned long attrs)
+{
+ phys_addr_t phys = dma_to_phys(dev, addr);
+
+ if (!(attrs & DMA_ATTR_SKIP_CPU_SYNC))
+ dma_direct_sync_single_for_cpu(dev, addr, size, dir);
+
+ if (unlikely(is_swiotlb_buffer(dev, phys)))
+ swiotlb_tbl_unmap_single(dev, phys, size, dir,
+ attrs | DMA_ATTR_SKIP_CPU_SYNC);
+}
+#endif /* _KERNEL_DMA_DIRECT_H */
diff --git a/kernel/dma/dummy.c b/kernel/dma/dummy.c
index 05607642c888..b492d59ac77e 100644
--- a/kernel/dma/dummy.c
+++ b/kernel/dma/dummy.c
@@ -2,7 +2,7 @@
/*
* Dummy DMA ops that always fail.
*/
-#include <linux/dma-mapping.h>
+#include <linux/dma-map-ops.h>
static int dma_dummy_mmap(struct device *dev, struct vm_area_struct *vma,
void *cpu_addr, dma_addr_t dma_addr, size_t size,
@@ -22,7 +22,7 @@ static int dma_dummy_map_sg(struct device *dev, struct scatterlist *sgl,
int nelems, enum dma_data_direction dir,
unsigned long attrs)
{
- return 0;
+ return -EINVAL;
}
static int dma_dummy_supported(struct device *hwdev, u64 mask)
@@ -36,4 +36,3 @@ const struct dma_map_ops dma_dummy_ops = {
.map_sg = dma_dummy_map_sg,
.dma_supported = dma_dummy_supported,
};
-EXPORT_SYMBOL(dma_dummy_ops);
diff --git a/kernel/dma/map_benchmark.c b/kernel/dma/map_benchmark.c
new file mode 100644
index 000000000000..02205ab53b7e
--- /dev/null
+++ b/kernel/dma/map_benchmark.c
@@ -0,0 +1,358 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright (C) 2020 HiSilicon Limited.
+ */
+
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+
+#include <linux/debugfs.h>
+#include <linux/delay.h>
+#include <linux/device.h>
+#include <linux/dma-mapping.h>
+#include <linux/kernel.h>
+#include <linux/kthread.h>
+#include <linux/map_benchmark.h>
+#include <linux/math64.h>
+#include <linux/module.h>
+#include <linux/pci.h>
+#include <linux/platform_device.h>
+#include <linux/slab.h>
+#include <linux/timekeeping.h>
+
+struct map_benchmark_data {
+ struct map_benchmark bparam;
+ struct device *dev;
+ struct dentry *debugfs;
+ enum dma_data_direction dir;
+ atomic64_t sum_map_100ns;
+ atomic64_t sum_unmap_100ns;
+ atomic64_t sum_sq_map;
+ atomic64_t sum_sq_unmap;
+ atomic64_t loops;
+};
+
+static int map_benchmark_thread(void *data)
+{
+ void *buf;
+ dma_addr_t dma_addr;
+ struct map_benchmark_data *map = data;
+ int npages = map->bparam.granule;
+ u64 size = npages * PAGE_SIZE;
+ int ret = 0;
+
+ buf = alloc_pages_exact(size, GFP_KERNEL);
+ if (!buf)
+ return -ENOMEM;
+
+ while (!kthread_should_stop()) {
+ u64 map_100ns, unmap_100ns, map_sq, unmap_sq;
+ ktime_t map_stime, map_etime, unmap_stime, unmap_etime;
+ ktime_t map_delta, unmap_delta;
+
+ /*
+ * for a non-coherent device, if we don't stain them in the
+ * cache, this will give an underestimate of the real-world
+ * overhead of BIDIRECTIONAL or TO_DEVICE mappings;
+ * 66 means evertything goes well! 66 is lucky.
+ */
+ if (map->dir != DMA_FROM_DEVICE)
+ memset(buf, 0x66, size);
+
+ map_stime = ktime_get();
+ dma_addr = dma_map_single(map->dev, buf, size, map->dir);
+ if (unlikely(dma_mapping_error(map->dev, dma_addr))) {
+ pr_err("dma_map_single failed on %s\n",
+ dev_name(map->dev));
+ ret = -ENOMEM;
+ goto out;
+ }
+ map_etime = ktime_get();
+ map_delta = ktime_sub(map_etime, map_stime);
+
+ /* Pretend DMA is transmitting */
+ ndelay(map->bparam.dma_trans_ns);
+
+ unmap_stime = ktime_get();
+ dma_unmap_single(map->dev, dma_addr, size, map->dir);
+ unmap_etime = ktime_get();
+ unmap_delta = ktime_sub(unmap_etime, unmap_stime);
+
+ /* calculate sum and sum of squares */
+
+ map_100ns = div64_ul(map_delta, 100);
+ unmap_100ns = div64_ul(unmap_delta, 100);
+ map_sq = map_100ns * map_100ns;
+ unmap_sq = unmap_100ns * unmap_100ns;
+
+ atomic64_add(map_100ns, &map->sum_map_100ns);
+ atomic64_add(unmap_100ns, &map->sum_unmap_100ns);
+ atomic64_add(map_sq, &map->sum_sq_map);
+ atomic64_add(unmap_sq, &map->sum_sq_unmap);
+ atomic64_inc(&map->loops);
+ }
+
+out:
+ free_pages_exact(buf, size);
+ return ret;
+}
+
+static int do_map_benchmark(struct map_benchmark_data *map)
+{
+ struct task_struct **tsk;
+ int threads = map->bparam.threads;
+ int node = map->bparam.node;
+ const cpumask_t *cpu_mask = cpumask_of_node(node);
+ u64 loops;
+ int ret = 0;
+ int i;
+
+ tsk = kmalloc_array(threads, sizeof(*tsk), GFP_KERNEL);
+ if (!tsk)
+ return -ENOMEM;
+
+ get_device(map->dev);
+
+ for (i = 0; i < threads; i++) {
+ tsk[i] = kthread_create_on_node(map_benchmark_thread, map,
+ map->bparam.node, "dma-map-benchmark/%d", i);
+ if (IS_ERR(tsk[i])) {
+ pr_err("create dma_map thread failed\n");
+ ret = PTR_ERR(tsk[i]);
+ goto out;
+ }
+
+ if (node != NUMA_NO_NODE)
+ kthread_bind_mask(tsk[i], cpu_mask);
+ }
+
+ /* clear the old value in the previous benchmark */
+ atomic64_set(&map->sum_map_100ns, 0);
+ atomic64_set(&map->sum_unmap_100ns, 0);
+ atomic64_set(&map->sum_sq_map, 0);
+ atomic64_set(&map->sum_sq_unmap, 0);
+ atomic64_set(&map->loops, 0);
+
+ for (i = 0; i < threads; i++) {
+ get_task_struct(tsk[i]);
+ wake_up_process(tsk[i]);
+ }
+
+ msleep_interruptible(map->bparam.seconds * 1000);
+
+ /* wait for the completion of benchmark threads */
+ for (i = 0; i < threads; i++) {
+ ret = kthread_stop(tsk[i]);
+ if (ret)
+ goto out;
+ }
+
+ loops = atomic64_read(&map->loops);
+ if (likely(loops > 0)) {
+ u64 map_variance, unmap_variance;
+ u64 sum_map = atomic64_read(&map->sum_map_100ns);
+ u64 sum_unmap = atomic64_read(&map->sum_unmap_100ns);
+ u64 sum_sq_map = atomic64_read(&map->sum_sq_map);
+ u64 sum_sq_unmap = atomic64_read(&map->sum_sq_unmap);
+
+ /* average latency */
+ map->bparam.avg_map_100ns = div64_u64(sum_map, loops);
+ map->bparam.avg_unmap_100ns = div64_u64(sum_unmap, loops);
+
+ /* standard deviation of latency */
+ map_variance = div64_u64(sum_sq_map, loops) -
+ map->bparam.avg_map_100ns *
+ map->bparam.avg_map_100ns;
+ unmap_variance = div64_u64(sum_sq_unmap, loops) -
+ map->bparam.avg_unmap_100ns *
+ map->bparam.avg_unmap_100ns;
+ map->bparam.map_stddev = int_sqrt64(map_variance);
+ map->bparam.unmap_stddev = int_sqrt64(unmap_variance);
+ }
+
+out:
+ for (i = 0; i < threads; i++)
+ put_task_struct(tsk[i]);
+ put_device(map->dev);
+ kfree(tsk);
+ return ret;
+}
+
+static long map_benchmark_ioctl(struct file *file, unsigned int cmd,
+ unsigned long arg)
+{
+ struct map_benchmark_data *map = file->private_data;
+ void __user *argp = (void __user *)arg;
+ u64 old_dma_mask;
+ int ret;
+
+ if (copy_from_user(&map->bparam, argp, sizeof(map->bparam)))
+ return -EFAULT;
+
+ switch (cmd) {
+ case DMA_MAP_BENCHMARK:
+ if (map->bparam.threads == 0 ||
+ map->bparam.threads > DMA_MAP_MAX_THREADS) {
+ pr_err("invalid thread number\n");
+ return -EINVAL;
+ }
+
+ if (map->bparam.seconds == 0 ||
+ map->bparam.seconds > DMA_MAP_MAX_SECONDS) {
+ pr_err("invalid duration seconds\n");
+ return -EINVAL;
+ }
+
+ if (map->bparam.dma_trans_ns > DMA_MAP_MAX_TRANS_DELAY) {
+ pr_err("invalid transmission delay\n");
+ return -EINVAL;
+ }
+
+ if (map->bparam.node != NUMA_NO_NODE &&
+ !node_possible(map->bparam.node)) {
+ pr_err("invalid numa node\n");
+ return -EINVAL;
+ }
+
+ if (map->bparam.granule < 1 || map->bparam.granule > 1024) {
+ pr_err("invalid granule size\n");
+ return -EINVAL;
+ }
+
+ switch (map->bparam.dma_dir) {
+ case DMA_MAP_BIDIRECTIONAL:
+ map->dir = DMA_BIDIRECTIONAL;
+ break;
+ case DMA_MAP_FROM_DEVICE:
+ map->dir = DMA_FROM_DEVICE;
+ break;
+ case DMA_MAP_TO_DEVICE:
+ map->dir = DMA_TO_DEVICE;
+ break;
+ default:
+ pr_err("invalid DMA direction\n");
+ return -EINVAL;
+ }
+
+ old_dma_mask = dma_get_mask(map->dev);
+
+ ret = dma_set_mask(map->dev,
+ DMA_BIT_MASK(map->bparam.dma_bits));
+ if (ret) {
+ pr_err("failed to set dma_mask on device %s\n",
+ dev_name(map->dev));
+ return -EINVAL;
+ }
+
+ ret = do_map_benchmark(map);
+
+ /*
+ * restore the original dma_mask as many devices' dma_mask are
+ * set by architectures, acpi, busses. When we bind them back
+ * to their original drivers, those drivers shouldn't see
+ * dma_mask changed by benchmark
+ */
+ dma_set_mask(map->dev, old_dma_mask);
+ break;
+ default:
+ return -EINVAL;
+ }
+
+ if (copy_to_user(argp, &map->bparam, sizeof(map->bparam)))
+ return -EFAULT;
+
+ return ret;
+}
+
+static const struct file_operations map_benchmark_fops = {
+ .open = simple_open,
+ .unlocked_ioctl = map_benchmark_ioctl,
+};
+
+static void map_benchmark_remove_debugfs(void *data)
+{
+ struct map_benchmark_data *map = (struct map_benchmark_data *)data;
+
+ debugfs_remove(map->debugfs);
+}
+
+static int __map_benchmark_probe(struct device *dev)
+{
+ struct dentry *entry;
+ struct map_benchmark_data *map;
+ int ret;
+
+ map = devm_kzalloc(dev, sizeof(*map), GFP_KERNEL);
+ if (!map)
+ return -ENOMEM;
+ map->dev = dev;
+
+ ret = devm_add_action(dev, map_benchmark_remove_debugfs, map);
+ if (ret) {
+ pr_err("Can't add debugfs remove action\n");
+ return ret;
+ }
+
+ /*
+ * we only permit a device bound with this driver, 2nd probe
+ * will fail
+ */
+ entry = debugfs_create_file("dma_map_benchmark", 0600, NULL, map,
+ &map_benchmark_fops);
+ if (IS_ERR(entry))
+ return PTR_ERR(entry);
+ map->debugfs = entry;
+
+ return 0;
+}
+
+static int map_benchmark_platform_probe(struct platform_device *pdev)
+{
+ return __map_benchmark_probe(&pdev->dev);
+}
+
+static struct platform_driver map_benchmark_platform_driver = {
+ .driver = {
+ .name = "dma_map_benchmark",
+ },
+ .probe = map_benchmark_platform_probe,
+};
+
+static int
+map_benchmark_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id)
+{
+ return __map_benchmark_probe(&pdev->dev);
+}
+
+static struct pci_driver map_benchmark_pci_driver = {
+ .name = "dma_map_benchmark",
+ .probe = map_benchmark_pci_probe,
+};
+
+static int __init map_benchmark_init(void)
+{
+ int ret;
+
+ ret = pci_register_driver(&map_benchmark_pci_driver);
+ if (ret)
+ return ret;
+
+ ret = platform_driver_register(&map_benchmark_platform_driver);
+ if (ret) {
+ pci_unregister_driver(&map_benchmark_pci_driver);
+ return ret;
+ }
+
+ return 0;
+}
+
+static void __exit map_benchmark_cleanup(void)
+{
+ platform_driver_unregister(&map_benchmark_platform_driver);
+ pci_unregister_driver(&map_benchmark_pci_driver);
+}
+
+module_init(map_benchmark_init);
+module_exit(map_benchmark_cleanup);
+
+MODULE_AUTHOR("Barry Song <song.bao.hua@hisilicon.com>");
+MODULE_DESCRIPTION("dma_map benchmark driver");
diff --git a/kernel/dma/mapping.c b/kernel/dma/mapping.c
index a8c18c9a796f..58db8fd70471 100644
--- a/kernel/dma/mapping.c
+++ b/kernel/dma/mapping.c
@@ -7,13 +7,21 @@
*/
#include <linux/memblock.h> /* for max_pfn */
#include <linux/acpi.h>
-#include <linux/dma-direct.h>
-#include <linux/dma-noncoherent.h>
+#include <linux/dma-map-ops.h>
#include <linux/export.h>
#include <linux/gfp.h>
+#include <linux/kmsan.h>
#include <linux/of_device.h>
#include <linux/slab.h>
#include <linux/vmalloc.h>
+#include "debug.h"
+#include "direct.h"
+
+#if defined(CONFIG_ARCH_HAS_SYNC_DMA_FOR_DEVICE) || \
+ defined(CONFIG_ARCH_HAS_SYNC_DMA_FOR_CPU) || \
+ defined(CONFIG_ARCH_HAS_SYNC_DMA_FOR_CPU_ALL)
+bool dma_default_coherent = IS_ENABLED(CONFIG_ARCH_DMA_DEFAULT_COHERENT);
+#endif
/*
* Managed DMA API
@@ -105,21 +113,277 @@ void *dmam_alloc_attrs(struct device *dev, size_t size, dma_addr_t *dma_handle,
}
EXPORT_SYMBOL(dmam_alloc_attrs);
+static bool dma_go_direct(struct device *dev, dma_addr_t mask,
+ const struct dma_map_ops *ops)
+{
+ if (likely(!ops))
+ return true;
+#ifdef CONFIG_DMA_OPS_BYPASS
+ if (dev->dma_ops_bypass)
+ return min_not_zero(mask, dev->bus_dma_limit) >=
+ dma_direct_get_required_mask(dev);
+#endif
+ return false;
+}
+
+
/*
- * Create scatter-list for the already allocated DMA buffer.
+ * Check if the devices uses a direct mapping for streaming DMA operations.
+ * This allows IOMMU drivers to set a bypass mode if the DMA mask is large
+ * enough.
*/
-int dma_common_get_sgtable(struct device *dev, struct sg_table *sgt,
- void *cpu_addr, dma_addr_t dma_addr, size_t size,
- unsigned long attrs)
+static inline bool dma_alloc_direct(struct device *dev,
+ const struct dma_map_ops *ops)
+{
+ return dma_go_direct(dev, dev->coherent_dma_mask, ops);
+}
+
+static inline bool dma_map_direct(struct device *dev,
+ const struct dma_map_ops *ops)
+{
+ return dma_go_direct(dev, *dev->dma_mask, ops);
+}
+
+dma_addr_t dma_map_page_attrs(struct device *dev, struct page *page,
+ size_t offset, size_t size, enum dma_data_direction dir,
+ unsigned long attrs)
+{
+ const struct dma_map_ops *ops = get_dma_ops(dev);
+ dma_addr_t addr;
+
+ BUG_ON(!valid_dma_direction(dir));
+
+ if (WARN_ON_ONCE(!dev->dma_mask))
+ return DMA_MAPPING_ERROR;
+
+ if (dma_map_direct(dev, ops) ||
+ arch_dma_map_page_direct(dev, page_to_phys(page) + offset + size))
+ addr = dma_direct_map_page(dev, page, offset, size, dir, attrs);
+ else
+ addr = ops->map_page(dev, page, offset, size, dir, attrs);
+ kmsan_handle_dma(page, offset, size, dir);
+ debug_dma_map_page(dev, page, offset, size, dir, addr, attrs);
+
+ return addr;
+}
+EXPORT_SYMBOL(dma_map_page_attrs);
+
+void dma_unmap_page_attrs(struct device *dev, dma_addr_t addr, size_t size,
+ enum dma_data_direction dir, unsigned long attrs)
+{
+ const struct dma_map_ops *ops = get_dma_ops(dev);
+
+ BUG_ON(!valid_dma_direction(dir));
+ if (dma_map_direct(dev, ops) ||
+ arch_dma_unmap_page_direct(dev, addr + size))
+ dma_direct_unmap_page(dev, addr, size, dir, attrs);
+ else if (ops->unmap_page)
+ ops->unmap_page(dev, addr, size, dir, attrs);
+ debug_dma_unmap_page(dev, addr, size, dir);
+}
+EXPORT_SYMBOL(dma_unmap_page_attrs);
+
+static int __dma_map_sg_attrs(struct device *dev, struct scatterlist *sg,
+ int nents, enum dma_data_direction dir, unsigned long attrs)
+{
+ const struct dma_map_ops *ops = get_dma_ops(dev);
+ int ents;
+
+ BUG_ON(!valid_dma_direction(dir));
+
+ if (WARN_ON_ONCE(!dev->dma_mask))
+ return 0;
+
+ if (dma_map_direct(dev, ops) ||
+ arch_dma_map_sg_direct(dev, sg, nents))
+ ents = dma_direct_map_sg(dev, sg, nents, dir, attrs);
+ else
+ ents = ops->map_sg(dev, sg, nents, dir, attrs);
+
+ if (ents > 0) {
+ kmsan_handle_dma_sg(sg, nents, dir);
+ debug_dma_map_sg(dev, sg, nents, ents, dir, attrs);
+ } else if (WARN_ON_ONCE(ents != -EINVAL && ents != -ENOMEM &&
+ ents != -EIO && ents != -EREMOTEIO)) {
+ return -EIO;
+ }
+
+ return ents;
+}
+
+/**
+ * dma_map_sg_attrs - Map the given buffer for DMA
+ * @dev: The device for which to perform the DMA operation
+ * @sg: The sg_table object describing the buffer
+ * @nents: Number of entries to map
+ * @dir: DMA direction
+ * @attrs: Optional DMA attributes for the map operation
+ *
+ * Maps a buffer described by a scatterlist passed in the sg argument with
+ * nents segments for the @dir DMA operation by the @dev device.
+ *
+ * Returns the number of mapped entries (which can be less than nents)
+ * on success. Zero is returned for any error.
+ *
+ * dma_unmap_sg_attrs() should be used to unmap the buffer with the
+ * original sg and original nents (not the value returned by this funciton).
+ */
+unsigned int dma_map_sg_attrs(struct device *dev, struct scatterlist *sg,
+ int nents, enum dma_data_direction dir, unsigned long attrs)
{
- struct page *page = virt_to_page(cpu_addr);
int ret;
- ret = sg_alloc_table(sgt, 1, GFP_KERNEL);
- if (!ret)
- sg_set_page(sgt->sgl, page, PAGE_ALIGN(size), 0);
+ ret = __dma_map_sg_attrs(dev, sg, nents, dir, attrs);
+ if (ret < 0)
+ return 0;
return ret;
}
+EXPORT_SYMBOL(dma_map_sg_attrs);
+
+/**
+ * dma_map_sgtable - Map the given buffer for DMA
+ * @dev: The device for which to perform the DMA operation
+ * @sgt: The sg_table object describing the buffer
+ * @dir: DMA direction
+ * @attrs: Optional DMA attributes for the map operation
+ *
+ * Maps a buffer described by a scatterlist stored in the given sg_table
+ * object for the @dir DMA operation by the @dev device. After success, the
+ * ownership for the buffer is transferred to the DMA domain. One has to
+ * call dma_sync_sgtable_for_cpu() or dma_unmap_sgtable() to move the
+ * ownership of the buffer back to the CPU domain before touching the
+ * buffer by the CPU.
+ *
+ * Returns 0 on success or a negative error code on error. The following
+ * error codes are supported with the given meaning:
+ *
+ * -EINVAL An invalid argument, unaligned access or other error
+ * in usage. Will not succeed if retried.
+ * -ENOMEM Insufficient resources (like memory or IOVA space) to
+ * complete the mapping. Should succeed if retried later.
+ * -EIO Legacy error code with an unknown meaning. eg. this is
+ * returned if a lower level call returned
+ * DMA_MAPPING_ERROR.
+ * -EREMOTEIO The DMA device cannot access P2PDMA memory specified
+ * in the sg_table. This will not succeed if retried.
+ */
+int dma_map_sgtable(struct device *dev, struct sg_table *sgt,
+ enum dma_data_direction dir, unsigned long attrs)
+{
+ int nents;
+
+ nents = __dma_map_sg_attrs(dev, sgt->sgl, sgt->orig_nents, dir, attrs);
+ if (nents < 0)
+ return nents;
+ sgt->nents = nents;
+ return 0;
+}
+EXPORT_SYMBOL_GPL(dma_map_sgtable);
+
+void dma_unmap_sg_attrs(struct device *dev, struct scatterlist *sg,
+ int nents, enum dma_data_direction dir,
+ unsigned long attrs)
+{
+ const struct dma_map_ops *ops = get_dma_ops(dev);
+
+ BUG_ON(!valid_dma_direction(dir));
+ debug_dma_unmap_sg(dev, sg, nents, dir);
+ if (dma_map_direct(dev, ops) ||
+ arch_dma_unmap_sg_direct(dev, sg, nents))
+ dma_direct_unmap_sg(dev, sg, nents, dir, attrs);
+ else if (ops->unmap_sg)
+ ops->unmap_sg(dev, sg, nents, dir, attrs);
+}
+EXPORT_SYMBOL(dma_unmap_sg_attrs);
+
+dma_addr_t dma_map_resource(struct device *dev, phys_addr_t phys_addr,
+ size_t size, enum dma_data_direction dir, unsigned long attrs)
+{
+ const struct dma_map_ops *ops = get_dma_ops(dev);
+ dma_addr_t addr = DMA_MAPPING_ERROR;
+
+ BUG_ON(!valid_dma_direction(dir));
+
+ if (WARN_ON_ONCE(!dev->dma_mask))
+ return DMA_MAPPING_ERROR;
+
+ if (dma_map_direct(dev, ops))
+ addr = dma_direct_map_resource(dev, phys_addr, size, dir, attrs);
+ else if (ops->map_resource)
+ addr = ops->map_resource(dev, phys_addr, size, dir, attrs);
+
+ debug_dma_map_resource(dev, phys_addr, size, dir, addr, attrs);
+ return addr;
+}
+EXPORT_SYMBOL(dma_map_resource);
+
+void dma_unmap_resource(struct device *dev, dma_addr_t addr, size_t size,
+ enum dma_data_direction dir, unsigned long attrs)
+{
+ const struct dma_map_ops *ops = get_dma_ops(dev);
+
+ BUG_ON(!valid_dma_direction(dir));
+ if (!dma_map_direct(dev, ops) && ops->unmap_resource)
+ ops->unmap_resource(dev, addr, size, dir, attrs);
+ debug_dma_unmap_resource(dev, addr, size, dir);
+}
+EXPORT_SYMBOL(dma_unmap_resource);
+
+void dma_sync_single_for_cpu(struct device *dev, dma_addr_t addr, size_t size,
+ enum dma_data_direction dir)
+{
+ const struct dma_map_ops *ops = get_dma_ops(dev);
+
+ BUG_ON(!valid_dma_direction(dir));
+ if (dma_map_direct(dev, ops))
+ dma_direct_sync_single_for_cpu(dev, addr, size, dir);
+ else if (ops->sync_single_for_cpu)
+ ops->sync_single_for_cpu(dev, addr, size, dir);
+ debug_dma_sync_single_for_cpu(dev, addr, size, dir);
+}
+EXPORT_SYMBOL(dma_sync_single_for_cpu);
+
+void dma_sync_single_for_device(struct device *dev, dma_addr_t addr,
+ size_t size, enum dma_data_direction dir)
+{
+ const struct dma_map_ops *ops = get_dma_ops(dev);
+
+ BUG_ON(!valid_dma_direction(dir));
+ if (dma_map_direct(dev, ops))
+ dma_direct_sync_single_for_device(dev, addr, size, dir);
+ else if (ops->sync_single_for_device)
+ ops->sync_single_for_device(dev, addr, size, dir);
+ debug_dma_sync_single_for_device(dev, addr, size, dir);
+}
+EXPORT_SYMBOL(dma_sync_single_for_device);
+
+void dma_sync_sg_for_cpu(struct device *dev, struct scatterlist *sg,
+ int nelems, enum dma_data_direction dir)
+{
+ const struct dma_map_ops *ops = get_dma_ops(dev);
+
+ BUG_ON(!valid_dma_direction(dir));
+ if (dma_map_direct(dev, ops))
+ dma_direct_sync_sg_for_cpu(dev, sg, nelems, dir);
+ else if (ops->sync_sg_for_cpu)
+ ops->sync_sg_for_cpu(dev, sg, nelems, dir);
+ debug_dma_sync_sg_for_cpu(dev, sg, nelems, dir);
+}
+EXPORT_SYMBOL(dma_sync_sg_for_cpu);
+
+void dma_sync_sg_for_device(struct device *dev, struct scatterlist *sg,
+ int nelems, enum dma_data_direction dir)
+{
+ const struct dma_map_ops *ops = get_dma_ops(dev);
+
+ BUG_ON(!valid_dma_direction(dir));
+ if (dma_map_direct(dev, ops))
+ dma_direct_sync_sg_for_device(dev, sg, nelems, dir);
+ else if (ops->sync_sg_for_device)
+ ops->sync_sg_for_device(dev, sg, nelems, dir);
+ debug_dma_sync_sg_for_device(dev, sg, nelems, dir);
+}
+EXPORT_SYMBOL(dma_sync_sg_for_device);
/*
* The whole dma_get_sgtable() idea is fundamentally unsafe - it seems
@@ -138,7 +402,7 @@ int dma_get_sgtable_attrs(struct device *dev, struct sg_table *sgt,
{
const struct dma_map_ops *ops = get_dma_ops(dev);
- if (dma_is_direct(ops))
+ if (dma_alloc_direct(dev, ops))
return dma_direct_get_sgtable(dev, sgt, cpu_addr, dma_addr,
size, attrs);
if (!ops->get_sgtable)
@@ -154,11 +418,7 @@ EXPORT_SYMBOL(dma_get_sgtable_attrs);
*/
pgprot_t dma_pgprot(struct device *dev, pgprot_t prot, unsigned long attrs)
{
- if (force_dma_unencrypted(dev))
- prot = pgprot_decrypted(prot);
- if (dev_is_dma_coherent(dev) ||
- (IS_ENABLED(CONFIG_DMA_NONCOHERENT_CACHE_SYNC) &&
- (attrs & DMA_ATTR_NON_CONSISTENT)))
+ if (dev_is_dma_coherent(dev))
return prot;
#ifdef CONFIG_ARCH_HAS_DMA_WRITE_COMBINE
if (attrs & DMA_ATTR_WRITE_COMBINE)
@@ -168,35 +428,6 @@ pgprot_t dma_pgprot(struct device *dev, pgprot_t prot, unsigned long attrs)
}
#endif /* CONFIG_MMU */
-/*
- * Create userspace mapping for the DMA-coherent memory.
- */
-int dma_common_mmap(struct device *dev, struct vm_area_struct *vma,
- void *cpu_addr, dma_addr_t dma_addr, size_t size,
- unsigned long attrs)
-{
-#ifdef CONFIG_MMU
- unsigned long user_count = vma_pages(vma);
- unsigned long count = PAGE_ALIGN(size) >> PAGE_SHIFT;
- unsigned long off = vma->vm_pgoff;
- int ret = -ENXIO;
-
- vma->vm_page_prot = dma_pgprot(dev, vma->vm_page_prot, attrs);
-
- if (dma_mmap_from_dev_coherent(dev, vma, cpu_addr, size, &ret))
- return ret;
-
- if (off >= count || user_count > count - off)
- return -ENXIO;
-
- return remap_pfn_range(vma, vma->vm_start,
- page_to_pfn(virt_to_page(cpu_addr)) + vma->vm_pgoff,
- user_count << PAGE_SHIFT, vma->vm_page_prot);
-#else
- return -ENXIO;
-#endif /* CONFIG_MMU */
-}
-
/**
* dma_can_mmap - check if a given device supports dma_mmap_*
* @dev: device to check
@@ -208,7 +439,7 @@ bool dma_can_mmap(struct device *dev)
{
const struct dma_map_ops *ops = get_dma_ops(dev);
- if (dma_is_direct(ops))
+ if (dma_alloc_direct(dev, ops))
return dma_direct_can_mmap(dev);
return ops->mmap != NULL;
}
@@ -233,7 +464,7 @@ int dma_mmap_attrs(struct device *dev, struct vm_area_struct *vma,
{
const struct dma_map_ops *ops = get_dma_ops(dev);
- if (dma_is_direct(ops))
+ if (dma_alloc_direct(dev, ops))
return dma_direct_mmap(dev, vma, cpu_addr, dma_addr, size,
attrs);
if (!ops->mmap)
@@ -246,7 +477,7 @@ u64 dma_get_required_mask(struct device *dev)
{
const struct dma_map_ops *ops = get_dma_ops(dev);
- if (dma_is_direct(ops))
+ if (dma_alloc_direct(dev, ops))
return dma_direct_get_required_mask(dev);
if (ops->get_required_mask)
return ops->get_required_mask(dev);
@@ -271,20 +502,28 @@ void *dma_alloc_attrs(struct device *dev, size_t size, dma_addr_t *dma_handle,
WARN_ON_ONCE(!dev->coherent_dma_mask);
+ /*
+ * DMA allocations can never be turned back into a page pointer, so
+ * requesting compound pages doesn't make sense (and can't even be
+ * supported at all by various backends).
+ */
+ if (WARN_ON_ONCE(flag & __GFP_COMP))
+ return NULL;
+
if (dma_alloc_from_dev_coherent(dev, size, dma_handle, &cpu_addr))
return cpu_addr;
/* let the implementation decide on the zone to allocate from: */
flag &= ~(__GFP_DMA | __GFP_DMA32 | __GFP_HIGHMEM);
- if (dma_is_direct(ops))
+ if (dma_alloc_direct(dev, ops))
cpu_addr = dma_direct_alloc(dev, size, dma_handle, flag, attrs);
else if (ops->alloc)
cpu_addr = ops->alloc(dev, size, dma_handle, flag, attrs);
else
return NULL;
- debug_dma_alloc_coherent(dev, size, *dma_handle, cpu_addr);
+ debug_dma_alloc_coherent(dev, size, *dma_handle, cpu_addr, attrs);
return cpu_addr;
}
EXPORT_SYMBOL(dma_alloc_attrs);
@@ -309,30 +548,217 @@ void dma_free_attrs(struct device *dev, size_t size, void *cpu_addr,
return;
debug_dma_free_coherent(dev, size, cpu_addr, dma_handle);
- if (dma_is_direct(ops))
+ if (dma_alloc_direct(dev, ops))
dma_direct_free(dev, size, cpu_addr, dma_handle, attrs);
else if (ops->free)
ops->free(dev, size, cpu_addr, dma_handle, attrs);
}
EXPORT_SYMBOL(dma_free_attrs);
-int dma_supported(struct device *dev, u64 mask)
+static struct page *__dma_alloc_pages(struct device *dev, size_t size,
+ dma_addr_t *dma_handle, enum dma_data_direction dir, gfp_t gfp)
+{
+ const struct dma_map_ops *ops = get_dma_ops(dev);
+
+ if (WARN_ON_ONCE(!dev->coherent_dma_mask))
+ return NULL;
+ if (WARN_ON_ONCE(gfp & (__GFP_DMA | __GFP_DMA32 | __GFP_HIGHMEM)))
+ return NULL;
+ if (WARN_ON_ONCE(gfp & __GFP_COMP))
+ return NULL;
+
+ size = PAGE_ALIGN(size);
+ if (dma_alloc_direct(dev, ops))
+ return dma_direct_alloc_pages(dev, size, dma_handle, dir, gfp);
+ if (!ops->alloc_pages)
+ return NULL;
+ return ops->alloc_pages(dev, size, dma_handle, dir, gfp);
+}
+
+struct page *dma_alloc_pages(struct device *dev, size_t size,
+ dma_addr_t *dma_handle, enum dma_data_direction dir, gfp_t gfp)
+{
+ struct page *page = __dma_alloc_pages(dev, size, dma_handle, dir, gfp);
+
+ if (page)
+ debug_dma_map_page(dev, page, 0, size, dir, *dma_handle, 0);
+ return page;
+}
+EXPORT_SYMBOL_GPL(dma_alloc_pages);
+
+static void __dma_free_pages(struct device *dev, size_t size, struct page *page,
+ dma_addr_t dma_handle, enum dma_data_direction dir)
+{
+ const struct dma_map_ops *ops = get_dma_ops(dev);
+
+ size = PAGE_ALIGN(size);
+ if (dma_alloc_direct(dev, ops))
+ dma_direct_free_pages(dev, size, page, dma_handle, dir);
+ else if (ops->free_pages)
+ ops->free_pages(dev, size, page, dma_handle, dir);
+}
+
+void dma_free_pages(struct device *dev, size_t size, struct page *page,
+ dma_addr_t dma_handle, enum dma_data_direction dir)
+{
+ debug_dma_unmap_page(dev, dma_handle, size, dir);
+ __dma_free_pages(dev, size, page, dma_handle, dir);
+}
+EXPORT_SYMBOL_GPL(dma_free_pages);
+
+int dma_mmap_pages(struct device *dev, struct vm_area_struct *vma,
+ size_t size, struct page *page)
+{
+ unsigned long count = PAGE_ALIGN(size) >> PAGE_SHIFT;
+
+ if (vma->vm_pgoff >= count || vma_pages(vma) > count - vma->vm_pgoff)
+ return -ENXIO;
+ return remap_pfn_range(vma, vma->vm_start,
+ page_to_pfn(page) + vma->vm_pgoff,
+ vma_pages(vma) << PAGE_SHIFT, vma->vm_page_prot);
+}
+EXPORT_SYMBOL_GPL(dma_mmap_pages);
+
+static struct sg_table *alloc_single_sgt(struct device *dev, size_t size,
+ enum dma_data_direction dir, gfp_t gfp)
+{
+ struct sg_table *sgt;
+ struct page *page;
+
+ sgt = kmalloc(sizeof(*sgt), gfp);
+ if (!sgt)
+ return NULL;
+ if (sg_alloc_table(sgt, 1, gfp))
+ goto out_free_sgt;
+ page = __dma_alloc_pages(dev, size, &sgt->sgl->dma_address, dir, gfp);
+ if (!page)
+ goto out_free_table;
+ sg_set_page(sgt->sgl, page, PAGE_ALIGN(size), 0);
+ sg_dma_len(sgt->sgl) = sgt->sgl->length;
+ return sgt;
+out_free_table:
+ sg_free_table(sgt);
+out_free_sgt:
+ kfree(sgt);
+ return NULL;
+}
+
+struct sg_table *dma_alloc_noncontiguous(struct device *dev, size_t size,
+ enum dma_data_direction dir, gfp_t gfp, unsigned long attrs)
+{
+ const struct dma_map_ops *ops = get_dma_ops(dev);
+ struct sg_table *sgt;
+
+ if (WARN_ON_ONCE(attrs & ~DMA_ATTR_ALLOC_SINGLE_PAGES))
+ return NULL;
+ if (WARN_ON_ONCE(gfp & __GFP_COMP))
+ return NULL;
+
+ if (ops && ops->alloc_noncontiguous)
+ sgt = ops->alloc_noncontiguous(dev, size, dir, gfp, attrs);
+ else
+ sgt = alloc_single_sgt(dev, size, dir, gfp);
+
+ if (sgt) {
+ sgt->nents = 1;
+ debug_dma_map_sg(dev, sgt->sgl, sgt->orig_nents, 1, dir, attrs);
+ }
+ return sgt;
+}
+EXPORT_SYMBOL_GPL(dma_alloc_noncontiguous);
+
+static void free_single_sgt(struct device *dev, size_t size,
+ struct sg_table *sgt, enum dma_data_direction dir)
+{
+ __dma_free_pages(dev, size, sg_page(sgt->sgl), sgt->sgl->dma_address,
+ dir);
+ sg_free_table(sgt);
+ kfree(sgt);
+}
+
+void dma_free_noncontiguous(struct device *dev, size_t size,
+ struct sg_table *sgt, enum dma_data_direction dir)
+{
+ const struct dma_map_ops *ops = get_dma_ops(dev);
+
+ debug_dma_unmap_sg(dev, sgt->sgl, sgt->orig_nents, dir);
+ if (ops && ops->free_noncontiguous)
+ ops->free_noncontiguous(dev, size, sgt, dir);
+ else
+ free_single_sgt(dev, size, sgt, dir);
+}
+EXPORT_SYMBOL_GPL(dma_free_noncontiguous);
+
+void *dma_vmap_noncontiguous(struct device *dev, size_t size,
+ struct sg_table *sgt)
+{
+ const struct dma_map_ops *ops = get_dma_ops(dev);
+ unsigned long count = PAGE_ALIGN(size) >> PAGE_SHIFT;
+
+ if (ops && ops->alloc_noncontiguous)
+ return vmap(sgt_handle(sgt)->pages, count, VM_MAP, PAGE_KERNEL);
+ return page_address(sg_page(sgt->sgl));
+}
+EXPORT_SYMBOL_GPL(dma_vmap_noncontiguous);
+
+void dma_vunmap_noncontiguous(struct device *dev, void *vaddr)
+{
+ const struct dma_map_ops *ops = get_dma_ops(dev);
+
+ if (ops && ops->alloc_noncontiguous)
+ vunmap(vaddr);
+}
+EXPORT_SYMBOL_GPL(dma_vunmap_noncontiguous);
+
+int dma_mmap_noncontiguous(struct device *dev, struct vm_area_struct *vma,
+ size_t size, struct sg_table *sgt)
{
const struct dma_map_ops *ops = get_dma_ops(dev);
- if (dma_is_direct(ops))
+ if (ops && ops->alloc_noncontiguous) {
+ unsigned long count = PAGE_ALIGN(size) >> PAGE_SHIFT;
+
+ if (vma->vm_pgoff >= count ||
+ vma_pages(vma) > count - vma->vm_pgoff)
+ return -ENXIO;
+ return vm_map_pages(vma, sgt_handle(sgt)->pages, count);
+ }
+ return dma_mmap_pages(dev, vma, size, sg_page(sgt->sgl));
+}
+EXPORT_SYMBOL_GPL(dma_mmap_noncontiguous);
+
+static int dma_supported(struct device *dev, u64 mask)
+{
+ const struct dma_map_ops *ops = get_dma_ops(dev);
+
+ /*
+ * ->dma_supported sets the bypass flag, so we must always call
+ * into the method here unless the device is truly direct mapped.
+ */
+ if (!ops)
return dma_direct_supported(dev, mask);
if (!ops->dma_supported)
return 1;
return ops->dma_supported(dev, mask);
}
-EXPORT_SYMBOL(dma_supported);
-#ifdef CONFIG_ARCH_HAS_DMA_SET_MASK
-void arch_dma_set_mask(struct device *dev, u64 mask);
-#else
-#define arch_dma_set_mask(dev, mask) do { } while (0)
-#endif
+bool dma_pci_p2pdma_supported(struct device *dev)
+{
+ const struct dma_map_ops *ops = get_dma_ops(dev);
+
+ /* if ops is not set, dma direct will be used which supports P2PDMA */
+ if (!ops)
+ return true;
+
+ /*
+ * Note: dma_ops_bypass is not checked here because P2PDMA should
+ * not be used with dma mapping ops that do not have support even
+ * if the specific device is bypassing them.
+ */
+
+ return ops->flags & DMA_F_PCI_P2PDMA_SUPPORTED;
+}
+EXPORT_SYMBOL_GPL(dma_pci_p2pdma_supported);
int dma_set_mask(struct device *dev, u64 mask)
{
@@ -351,7 +777,6 @@ int dma_set_mask(struct device *dev, u64 mask)
}
EXPORT_SYMBOL(dma_set_mask);
-#ifndef CONFIG_ARCH_HAS_DMA_SET_COHERENT_MASK
int dma_set_coherent_mask(struct device *dev, u64 mask)
{
/*
@@ -367,28 +792,35 @@ int dma_set_coherent_mask(struct device *dev, u64 mask)
return 0;
}
EXPORT_SYMBOL(dma_set_coherent_mask);
-#endif
-void dma_cache_sync(struct device *dev, void *vaddr, size_t size,
- enum dma_data_direction dir)
+/**
+ * dma_addressing_limited - return if the device is addressing limited
+ * @dev: device to check
+ *
+ * Return %true if the devices DMA mask is too small to address all memory in
+ * the system, else %false. Lack of addressing bits is the prime reason for
+ * bounce buffering, but might not be the only one.
+ */
+bool dma_addressing_limited(struct device *dev)
{
const struct dma_map_ops *ops = get_dma_ops(dev);
- BUG_ON(!valid_dma_direction(dir));
+ if (min_not_zero(dma_get_mask(dev), dev->bus_dma_limit) <
+ dma_get_required_mask(dev))
+ return true;
- if (dma_is_direct(ops))
- arch_dma_cache_sync(dev, vaddr, size, dir);
- else if (ops->cache_sync)
- ops->cache_sync(dev, vaddr, size, dir);
+ if (unlikely(ops))
+ return false;
+ return !dma_direct_all_ram_mapped(dev);
}
-EXPORT_SYMBOL(dma_cache_sync);
+EXPORT_SYMBOL_GPL(dma_addressing_limited);
size_t dma_max_mapping_size(struct device *dev)
{
const struct dma_map_ops *ops = get_dma_ops(dev);
size_t size = SIZE_MAX;
- if (dma_is_direct(ops))
+ if (dma_map_direct(dev, ops))
size = dma_direct_max_mapping_size(dev);
else if (ops && ops->max_mapping_size)
size = ops->max_mapping_size(dev);
@@ -397,11 +829,23 @@ size_t dma_max_mapping_size(struct device *dev)
}
EXPORT_SYMBOL_GPL(dma_max_mapping_size);
+size_t dma_opt_mapping_size(struct device *dev)
+{
+ const struct dma_map_ops *ops = get_dma_ops(dev);
+ size_t size = SIZE_MAX;
+
+ if (ops && ops->opt_mapping_size)
+ size = ops->opt_mapping_size();
+
+ return min(dma_max_mapping_size(dev), size);
+}
+EXPORT_SYMBOL_GPL(dma_opt_mapping_size);
+
bool dma_need_sync(struct device *dev, dma_addr_t dma_addr)
{
const struct dma_map_ops *ops = get_dma_ops(dev);
- if (dma_is_direct(ops))
+ if (dma_map_direct(dev, ops))
return dma_direct_need_sync(dev, dma_addr);
return ops->sync_single_for_cpu || ops->sync_single_for_device;
}
diff --git a/kernel/dma/ops_helpers.c b/kernel/dma/ops_helpers.c
new file mode 100644
index 000000000000..af4a6ef48ce0
--- /dev/null
+++ b/kernel/dma/ops_helpers.c
@@ -0,0 +1,93 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Helpers for DMA ops implementations. These generally rely on the fact that
+ * the allocated memory contains normal pages in the direct kernel mapping.
+ */
+#include <linux/dma-map-ops.h>
+
+static struct page *dma_common_vaddr_to_page(void *cpu_addr)
+{
+ if (is_vmalloc_addr(cpu_addr))
+ return vmalloc_to_page(cpu_addr);
+ return virt_to_page(cpu_addr);
+}
+
+/*
+ * Create scatter-list for the already allocated DMA buffer.
+ */
+int dma_common_get_sgtable(struct device *dev, struct sg_table *sgt,
+ void *cpu_addr, dma_addr_t dma_addr, size_t size,
+ unsigned long attrs)
+{
+ struct page *page = dma_common_vaddr_to_page(cpu_addr);
+ int ret;
+
+ ret = sg_alloc_table(sgt, 1, GFP_KERNEL);
+ if (!ret)
+ sg_set_page(sgt->sgl, page, PAGE_ALIGN(size), 0);
+ return ret;
+}
+
+/*
+ * Create userspace mapping for the DMA-coherent memory.
+ */
+int dma_common_mmap(struct device *dev, struct vm_area_struct *vma,
+ void *cpu_addr, dma_addr_t dma_addr, size_t size,
+ unsigned long attrs)
+{
+#ifdef CONFIG_MMU
+ unsigned long user_count = vma_pages(vma);
+ unsigned long count = PAGE_ALIGN(size) >> PAGE_SHIFT;
+ unsigned long off = vma->vm_pgoff;
+ struct page *page = dma_common_vaddr_to_page(cpu_addr);
+ int ret = -ENXIO;
+
+ vma->vm_page_prot = dma_pgprot(dev, vma->vm_page_prot, attrs);
+
+ if (dma_mmap_from_dev_coherent(dev, vma, cpu_addr, size, &ret))
+ return ret;
+
+ if (off >= count || user_count > count - off)
+ return -ENXIO;
+
+ return remap_pfn_range(vma, vma->vm_start,
+ page_to_pfn(page) + vma->vm_pgoff,
+ user_count << PAGE_SHIFT, vma->vm_page_prot);
+#else
+ return -ENXIO;
+#endif /* CONFIG_MMU */
+}
+
+struct page *dma_common_alloc_pages(struct device *dev, size_t size,
+ dma_addr_t *dma_handle, enum dma_data_direction dir, gfp_t gfp)
+{
+ const struct dma_map_ops *ops = get_dma_ops(dev);
+ struct page *page;
+
+ page = dma_alloc_contiguous(dev, size, gfp);
+ if (!page)
+ page = alloc_pages_node(dev_to_node(dev), gfp, get_order(size));
+ if (!page)
+ return NULL;
+
+ *dma_handle = ops->map_page(dev, page, 0, size, dir,
+ DMA_ATTR_SKIP_CPU_SYNC);
+ if (*dma_handle == DMA_MAPPING_ERROR) {
+ dma_free_contiguous(dev, page, size);
+ return NULL;
+ }
+
+ memset(page_address(page), 0, size);
+ return page;
+}
+
+void dma_common_free_pages(struct device *dev, size_t size, struct page *page,
+ dma_addr_t dma_handle, enum dma_data_direction dir)
+{
+ const struct dma_map_ops *ops = get_dma_ops(dev);
+
+ if (ops->unmap_page)
+ ops->unmap_page(dev, dma_handle, size, dir,
+ DMA_ATTR_SKIP_CPU_SYNC);
+ dma_free_contiguous(dev, page, size);
+}
diff --git a/kernel/dma/pool.c b/kernel/dma/pool.c
index 6bc74a2d5127..d10613eb0f63 100644
--- a/kernel/dma/pool.c
+++ b/kernel/dma/pool.c
@@ -3,9 +3,10 @@
* Copyright (C) 2012 ARM Ltd.
* Copyright (C) 2020 Google LLC
*/
+#include <linux/cma.h>
#include <linux/debugfs.h>
+#include <linux/dma-map-ops.h>
#include <linux/dma-direct.h>
-#include <linux/dma-noncoherent.h>
#include <linux/init.h>
#include <linux/genalloc.h>
#include <linux/set_memory.h>
@@ -37,9 +38,6 @@ static void __init dma_atomic_pool_debugfs_init(void)
struct dentry *root;
root = debugfs_create_dir("dma_pools", NULL);
- if (IS_ERR_OR_NULL(root))
- return;
-
debugfs_create_ulong("pool_size_dma", 0400, root, &pool_size_dma);
debugfs_create_ulong("pool_size_dma32", 0400, root, &pool_size_dma32);
debugfs_create_ulong("pool_size_kernel", 0400, root, &pool_size_kernel);
@@ -55,20 +53,47 @@ static void dma_atomic_pool_size_add(gfp_t gfp, size_t size)
pool_size_kernel += size;
}
+static bool cma_in_zone(gfp_t gfp)
+{
+ unsigned long size;
+ phys_addr_t end;
+ struct cma *cma;
+
+ cma = dev_get_cma_area(NULL);
+ if (!cma)
+ return false;
+
+ size = cma_get_size(cma);
+ if (!size)
+ return false;
+
+ /* CMA can't cross zone boundaries, see cma_activate_area() */
+ end = cma_get_base(cma) + size - 1;
+ if (IS_ENABLED(CONFIG_ZONE_DMA) && (gfp & GFP_DMA))
+ return end <= DMA_BIT_MASK(zone_dma_bits);
+ if (IS_ENABLED(CONFIG_ZONE_DMA32) && (gfp & GFP_DMA32))
+ return end <= DMA_BIT_MASK(32);
+ return true;
+}
+
static int atomic_pool_expand(struct gen_pool *pool, size_t pool_size,
gfp_t gfp)
{
unsigned int order;
- struct page *page;
+ struct page *page = NULL;
void *addr;
int ret = -ENOMEM;
- /* Cannot allocate larger than MAX_ORDER-1 */
- order = min(get_order(pool_size), MAX_ORDER-1);
+ /* Cannot allocate larger than MAX_PAGE_ORDER */
+ order = min(get_order(pool_size), MAX_PAGE_ORDER);
do {
pool_size = 1 << (PAGE_SHIFT + order);
- page = alloc_pages(gfp, order);
+ if (cma_in_zone(gfp))
+ page = dma_alloc_from_contiguous(NULL, 1 << order,
+ order, false);
+ if (!page)
+ page = alloc_pages(gfp, order);
} while (!page && order-- > 0);
if (!page)
goto out;
@@ -86,7 +111,7 @@ static int atomic_pool_expand(struct gen_pool *pool, size_t pool_size,
#endif
/*
* Memory in the atomic DMA pools must be unencrypted, the pools do not
- * shrink so no re-encryption occurs in dma_direct_free_pages().
+ * shrink so no re-encryption occurs in dma_direct_free().
*/
ret = set_memory_decrypted((unsigned long)page_to_virt(page),
1 << order);
@@ -110,9 +135,9 @@ encrypt_mapping:
remove_mapping:
#ifdef CONFIG_DMA_DIRECT_REMAP
dma_common_free_remap(addr, pool_size);
-#endif
-free_page: __maybe_unused
+free_page:
__free_pages(page, order);
+#endif
out:
return ret;
}
@@ -165,7 +190,7 @@ static int __init dma_atomic_pool_init(void)
/*
* If coherent_pool was not used on the command line, default the pool
- * sizes to 128KB per 1GB of memory, min 128KB, max MAX_ORDER-1.
+ * sizes to 128KB per 1GB of memory, min 128KB, max MAX_PAGE_ORDER.
*/
if (!atomic_pool_size) {
unsigned long pages = totalram_pages() / (SZ_1G / SZ_128K);
@@ -178,7 +203,7 @@ static int __init dma_atomic_pool_init(void)
GFP_KERNEL);
if (!atomic_pool_kernel)
ret = -ENOMEM;
- if (IS_ENABLED(CONFIG_ZONE_DMA)) {
+ if (has_managed_dma()) {
atomic_pool_dma = __dma_atomic_pool_init(atomic_pool_size,
GFP_KERNEL | GFP_DMA);
if (!atomic_pool_dma)
@@ -196,93 +221,75 @@ static int __init dma_atomic_pool_init(void)
}
postcore_initcall(dma_atomic_pool_init);
-static inline struct gen_pool *dma_guess_pool_from_device(struct device *dev)
+static inline struct gen_pool *dma_guess_pool(struct gen_pool *prev, gfp_t gfp)
{
- u64 phys_mask;
- gfp_t gfp;
-
- gfp = dma_direct_optimal_gfp_mask(dev, dev->coherent_dma_mask,
- &phys_mask);
- if (IS_ENABLED(CONFIG_ZONE_DMA) && gfp == GFP_DMA)
+ if (prev == NULL) {
+ if (IS_ENABLED(CONFIG_ZONE_DMA32) && (gfp & GFP_DMA32))
+ return atomic_pool_dma32;
+ if (atomic_pool_dma && (gfp & GFP_DMA))
+ return atomic_pool_dma;
+ return atomic_pool_kernel;
+ }
+ if (prev == atomic_pool_kernel)
+ return atomic_pool_dma32 ? atomic_pool_dma32 : atomic_pool_dma;
+ if (prev == atomic_pool_dma32)
return atomic_pool_dma;
- if (IS_ENABLED(CONFIG_ZONE_DMA32) && gfp == GFP_DMA32)
- return atomic_pool_dma32;
- return atomic_pool_kernel;
+ return NULL;
}
-static inline struct gen_pool *dma_get_safer_pool(struct gen_pool *bad_pool)
+static struct page *__dma_alloc_from_pool(struct device *dev, size_t size,
+ struct gen_pool *pool, void **cpu_addr,
+ bool (*phys_addr_ok)(struct device *, phys_addr_t, size_t))
{
- if (bad_pool == atomic_pool_kernel)
- return atomic_pool_dma32 ? : atomic_pool_dma;
+ unsigned long addr;
+ phys_addr_t phys;
- if (bad_pool == atomic_pool_dma32)
- return atomic_pool_dma;
+ addr = gen_pool_alloc(pool, size);
+ if (!addr)
+ return NULL;
- return NULL;
-}
+ phys = gen_pool_virt_to_phys(pool, addr);
+ if (phys_addr_ok && !phys_addr_ok(dev, phys, size)) {
+ gen_pool_free(pool, addr, size);
+ return NULL;
+ }
-static inline struct gen_pool *dma_guess_pool(struct device *dev,
- struct gen_pool *bad_pool)
-{
- if (bad_pool)
- return dma_get_safer_pool(bad_pool);
+ if (gen_pool_avail(pool) < atomic_pool_size)
+ schedule_work(&atomic_pool_work);
- return dma_guess_pool_from_device(dev);
+ *cpu_addr = (void *)addr;
+ memset(*cpu_addr, 0, size);
+ return pfn_to_page(__phys_to_pfn(phys));
}
-void *dma_alloc_from_pool(struct device *dev, size_t size,
- struct page **ret_page, gfp_t flags)
+struct page *dma_alloc_from_pool(struct device *dev, size_t size,
+ void **cpu_addr, gfp_t gfp,
+ bool (*phys_addr_ok)(struct device *, phys_addr_t, size_t))
{
struct gen_pool *pool = NULL;
- unsigned long val = 0;
- void *ptr = NULL;
- phys_addr_t phys;
-
- while (1) {
- pool = dma_guess_pool(dev, pool);
- if (!pool) {
- WARN(1, "Failed to get suitable pool for %s\n",
- dev_name(dev));
- break;
- }
-
- val = gen_pool_alloc(pool, size);
- if (!val)
- continue;
-
- phys = gen_pool_virt_to_phys(pool, val);
- if (dma_coherent_ok(dev, phys, size))
- break;
-
- gen_pool_free(pool, val, size);
- val = 0;
- }
-
-
- if (val) {
- *ret_page = pfn_to_page(__phys_to_pfn(phys));
- ptr = (void *)val;
- memset(ptr, 0, size);
+ struct page *page;
- if (gen_pool_avail(pool) < atomic_pool_size)
- schedule_work(&atomic_pool_work);
+ while ((pool = dma_guess_pool(pool, gfp))) {
+ page = __dma_alloc_from_pool(dev, size, pool, cpu_addr,
+ phys_addr_ok);
+ if (page)
+ return page;
}
- return ptr;
+ WARN(1, "Failed to get suitable pool for %s\n", dev_name(dev));
+ return NULL;
}
bool dma_free_from_pool(struct device *dev, void *start, size_t size)
{
struct gen_pool *pool = NULL;
- while (1) {
- pool = dma_guess_pool(dev, pool);
- if (!pool)
- return false;
-
- if (gen_pool_has_addr(pool, (unsigned long)start, size)) {
- gen_pool_free(pool, (unsigned long)start, size);
- return true;
- }
+ while ((pool = dma_guess_pool(pool, 0))) {
+ if (!gen_pool_has_addr(pool, (unsigned long)start, size))
+ continue;
+ gen_pool_free(pool, (unsigned long)start, size);
+ return true;
}
+
+ return false;
}
diff --git a/kernel/dma/remap.c b/kernel/dma/remap.c
index 78b23f089cf1..27596f3b4aef 100644
--- a/kernel/dma/remap.c
+++ b/kernel/dma/remap.c
@@ -2,7 +2,7 @@
/*
* Copyright (c) 2014 The Linux Foundation
*/
-#include <linux/dma-mapping.h>
+#include <linux/dma-map-ops.h>
#include <linux/slab.h>
#include <linux/vmalloc.h>
@@ -43,13 +43,13 @@ void *dma_common_contiguous_remap(struct page *page, size_t size,
void *vaddr;
int i;
- pages = kmalloc_array(count, sizeof(struct page *), GFP_KERNEL);
+ pages = kvmalloc_array(count, sizeof(struct page *), GFP_KERNEL);
if (!pages)
return NULL;
for (i = 0; i < count; i++)
pages[i] = nth_page(page, i);
vaddr = vmap(pages, count, VM_DMA_COHERENT, prot);
- kfree(pages);
+ kvfree(pages);
return vaddr;
}
@@ -66,6 +66,5 @@ void dma_common_free_remap(void *cpu_addr, size_t size)
return;
}
- unmap_kernel_range((unsigned long)cpu_addr, PAGE_ALIGN(size));
vunmap(cpu_addr);
}
diff --git a/kernel/dma/swiotlb.c b/kernel/dma/swiotlb.c
index c19379fabd20..0de66f0ff43a 100644
--- a/kernel/dma/swiotlb.c
+++ b/kernel/dma/swiotlb.c
@@ -21,38 +21,37 @@
#define pr_fmt(fmt) "software IO TLB: " fmt
#include <linux/cache.h>
+#include <linux/cc_platform.h>
+#include <linux/ctype.h>
+#include <linux/debugfs.h>
#include <linux/dma-direct.h>
-#include <linux/dma-noncoherent.h>
-#include <linux/mm.h>
+#include <linux/dma-map-ops.h>
#include <linux/export.h>
+#include <linux/gfp.h>
+#include <linux/highmem.h>
+#include <linux/io.h>
+#include <linux/iommu-helper.h>
+#include <linux/init.h>
+#include <linux/memblock.h>
+#include <linux/mm.h>
+#include <linux/pfn.h>
+#include <linux/rculist.h>
+#include <linux/scatterlist.h>
+#include <linux/set_memory.h>
#include <linux/spinlock.h>
#include <linux/string.h>
#include <linux/swiotlb.h>
-#include <linux/pfn.h>
#include <linux/types.h>
-#include <linux/ctype.h>
-#include <linux/highmem.h>
-#include <linux/gfp.h>
-#include <linux/scatterlist.h>
-#include <linux/mem_encrypt.h>
-#include <linux/set_memory.h>
-#ifdef CONFIG_DEBUG_FS
-#include <linux/debugfs.h>
+#ifdef CONFIG_DMA_RESTRICTED_POOL
+#include <linux/of.h>
+#include <linux/of_fdt.h>
+#include <linux/of_reserved_mem.h>
+#include <linux/slab.h>
#endif
-#include <asm/io.h>
-#include <asm/dma.h>
-
-#include <linux/init.h>
-#include <linux/memblock.h>
-#include <linux/iommu-helper.h>
-
#define CREATE_TRACE_POINTS
#include <trace/events/swiotlb.h>
-#define OFFSET(val,align) ((unsigned long) \
- ( (val) & ( (align) - 1)))
-
#define SLABS_PER_PAGE (1 << (PAGE_SHIFT - IO_TLB_SHIFT))
/*
@@ -62,120 +61,192 @@
*/
#define IO_TLB_MIN_SLABS ((1<<20) >> IO_TLB_SHIFT)
-enum swiotlb_force swiotlb_force;
+#define INVALID_PHYS_ADDR (~(phys_addr_t)0)
-/*
- * Used to do a quick range check in swiotlb_tbl_unmap_single and
- * swiotlb_tbl_sync_single_*, to see if the memory was in fact allocated by this
- * API.
+/**
+ * struct io_tlb_slot - IO TLB slot descriptor
+ * @orig_addr: The original address corresponding to a mapped entry.
+ * @alloc_size: Size of the allocated buffer.
+ * @list: The free list describing the number of free entries available
+ * from each index.
+ * @pad_slots: Number of preceding padding slots. Valid only in the first
+ * allocated non-padding slot.
*/
-phys_addr_t io_tlb_start, io_tlb_end;
+struct io_tlb_slot {
+ phys_addr_t orig_addr;
+ size_t alloc_size;
+ unsigned short list;
+ unsigned short pad_slots;
+};
-/*
- * The number of IO TLB blocks (in groups of 64) between io_tlb_start and
- * io_tlb_end. This is command line adjustable via setup_io_tlb_npages.
- */
-static unsigned long io_tlb_nslabs;
+static bool swiotlb_force_bounce;
+static bool swiotlb_force_disable;
-/*
- * The number of used IO TLB block
- */
-static unsigned long io_tlb_used;
+#ifdef CONFIG_SWIOTLB_DYNAMIC
-/*
- * This is a free list describing the number of free entries available from
- * each index
- */
-static unsigned int *io_tlb_list;
-static unsigned int io_tlb_index;
+static void swiotlb_dyn_alloc(struct work_struct *work);
-/*
- * Max segment that we can provide which (if pages are contingous) will
- * not be bounced (unless SWIOTLB_FORCE is set).
+static struct io_tlb_mem io_tlb_default_mem = {
+ .lock = __SPIN_LOCK_UNLOCKED(io_tlb_default_mem.lock),
+ .pools = LIST_HEAD_INIT(io_tlb_default_mem.pools),
+ .dyn_alloc = __WORK_INITIALIZER(io_tlb_default_mem.dyn_alloc,
+ swiotlb_dyn_alloc),
+};
+
+#else /* !CONFIG_SWIOTLB_DYNAMIC */
+
+static struct io_tlb_mem io_tlb_default_mem;
+
+#endif /* CONFIG_SWIOTLB_DYNAMIC */
+
+static unsigned long default_nslabs = IO_TLB_DEFAULT_SIZE >> IO_TLB_SHIFT;
+static unsigned long default_nareas;
+
+/**
+ * struct io_tlb_area - IO TLB memory area descriptor
+ *
+ * This is a single area with a single lock.
+ *
+ * @used: The number of used IO TLB block.
+ * @index: The slot index to start searching in this area for next round.
+ * @lock: The lock to protect the above data structures in the map and
+ * unmap calls.
*/
-unsigned int max_segment;
+struct io_tlb_area {
+ unsigned long used;
+ unsigned int index;
+ spinlock_t lock;
+};
/*
- * We need to save away the original address corresponding to a mapped entry
- * for the sync operations.
+ * Round up number of slabs to the next power of 2. The last area is going
+ * be smaller than the rest if default_nslabs is not power of two.
+ * The number of slot in an area should be a multiple of IO_TLB_SEGSIZE,
+ * otherwise a segment may span two or more areas. It conflicts with free
+ * contiguous slots tracking: free slots are treated contiguous no matter
+ * whether they cross an area boundary.
+ *
+ * Return true if default_nslabs is rounded up.
*/
-#define INVALID_PHYS_ADDR (~(phys_addr_t)0)
-static phys_addr_t *io_tlb_orig_addr;
+static bool round_up_default_nslabs(void)
+{
+ if (!default_nareas)
+ return false;
+
+ if (default_nslabs < IO_TLB_SEGSIZE * default_nareas)
+ default_nslabs = IO_TLB_SEGSIZE * default_nareas;
+ else if (is_power_of_2(default_nslabs))
+ return false;
+ default_nslabs = roundup_pow_of_two(default_nslabs);
+ return true;
+}
-/*
- * Protect the above data structures in the map and unmap calls
+/**
+ * swiotlb_adjust_nareas() - adjust the number of areas and slots
+ * @nareas: Desired number of areas. Zero is treated as 1.
+ *
+ * Adjust the default number of areas in a memory pool.
+ * The default size of the memory pool may also change to meet minimum area
+ * size requirements.
*/
-static DEFINE_SPINLOCK(io_tlb_lock);
+static void swiotlb_adjust_nareas(unsigned int nareas)
+{
+ if (!nareas)
+ nareas = 1;
+ else if (!is_power_of_2(nareas))
+ nareas = roundup_pow_of_two(nareas);
+
+ default_nareas = nareas;
+
+ pr_info("area num %d.\n", nareas);
+ if (round_up_default_nslabs())
+ pr_info("SWIOTLB bounce buffer size roundup to %luMB",
+ (default_nslabs << IO_TLB_SHIFT) >> 20);
+}
-static int late_alloc;
+/**
+ * limit_nareas() - get the maximum number of areas for a given memory pool size
+ * @nareas: Desired number of areas.
+ * @nslots: Total number of slots in the memory pool.
+ *
+ * Limit the number of areas to the maximum possible number of areas in
+ * a memory pool of the given size.
+ *
+ * Return: Maximum possible number of areas.
+ */
+static unsigned int limit_nareas(unsigned int nareas, unsigned long nslots)
+{
+ if (nslots < nareas * IO_TLB_SEGSIZE)
+ return nslots / IO_TLB_SEGSIZE;
+ return nareas;
+}
static int __init
setup_io_tlb_npages(char *str)
{
if (isdigit(*str)) {
- io_tlb_nslabs = simple_strtoul(str, &str, 0);
/* avoid tail segment of size < IO_TLB_SEGSIZE */
- io_tlb_nslabs = ALIGN(io_tlb_nslabs, IO_TLB_SEGSIZE);
+ default_nslabs =
+ ALIGN(simple_strtoul(str, &str, 0), IO_TLB_SEGSIZE);
}
if (*str == ',')
++str;
- if (!strcmp(str, "force")) {
- swiotlb_force = SWIOTLB_FORCE;
- } else if (!strcmp(str, "noforce")) {
- swiotlb_force = SWIOTLB_NO_FORCE;
- io_tlb_nslabs = 1;
- }
+ if (isdigit(*str))
+ swiotlb_adjust_nareas(simple_strtoul(str, &str, 0));
+ if (*str == ',')
+ ++str;
+ if (!strcmp(str, "force"))
+ swiotlb_force_bounce = true;
+ else if (!strcmp(str, "noforce"))
+ swiotlb_force_disable = true;
return 0;
}
early_param("swiotlb", setup_io_tlb_npages);
-static bool no_iotlb_memory;
-
-unsigned long swiotlb_nr_tbl(void)
-{
- return unlikely(no_iotlb_memory) ? 0 : io_tlb_nslabs;
-}
-EXPORT_SYMBOL_GPL(swiotlb_nr_tbl);
-
-unsigned int swiotlb_max_segment(void)
+unsigned long swiotlb_size_or_default(void)
{
- return unlikely(no_iotlb_memory) ? 0 : max_segment;
+ return default_nslabs << IO_TLB_SHIFT;
}
-EXPORT_SYMBOL_GPL(swiotlb_max_segment);
-void swiotlb_set_max_segment(unsigned int val)
+void __init swiotlb_adjust_size(unsigned long size)
{
- if (swiotlb_force == SWIOTLB_FORCE)
- max_segment = 1;
- else
- max_segment = rounddown(val, PAGE_SIZE);
-}
-
-/* default to 64MB */
-#define IO_TLB_DEFAULT_SIZE (64UL<<20)
-unsigned long swiotlb_size_or_default(void)
-{
- unsigned long size;
-
- size = io_tlb_nslabs << IO_TLB_SHIFT;
+ /*
+ * If swiotlb parameter has not been specified, give a chance to
+ * architectures such as those supporting memory encryption to
+ * adjust/expand SWIOTLB size for their use.
+ */
+ if (default_nslabs != IO_TLB_DEFAULT_SIZE >> IO_TLB_SHIFT)
+ return;
- return size ? size : (IO_TLB_DEFAULT_SIZE);
+ size = ALIGN(size, IO_TLB_SIZE);
+ default_nslabs = ALIGN(size >> IO_TLB_SHIFT, IO_TLB_SEGSIZE);
+ if (round_up_default_nslabs())
+ size = default_nslabs << IO_TLB_SHIFT;
+ pr_info("SWIOTLB bounce buffer size adjusted to %luMB", size >> 20);
}
void swiotlb_print_info(void)
{
- unsigned long bytes = io_tlb_nslabs << IO_TLB_SHIFT;
+ struct io_tlb_pool *mem = &io_tlb_default_mem.defpool;
- if (no_iotlb_memory) {
+ if (!mem->nslabs) {
pr_warn("No low mem\n");
return;
}
- pr_info("mapped [mem %#010llx-%#010llx] (%luMB)\n",
- (unsigned long long)io_tlb_start,
- (unsigned long long)io_tlb_end,
- bytes >> 20);
+ pr_info("mapped [mem %pa-%pa] (%luMB)\n", &mem->start, &mem->end,
+ (mem->nslabs << IO_TLB_SHIFT) >> 20);
+}
+
+static inline unsigned long io_tlb_offset(unsigned long val)
+{
+ return val & (IO_TLB_SEGSIZE - 1);
+}
+
+static inline unsigned long nr_slots(u64 val)
+{
+ return DIV_ROUND_UP(val, IO_TLB_SIZE);
}
/*
@@ -186,87 +257,169 @@ void swiotlb_print_info(void)
*/
void __init swiotlb_update_mem_attributes(void)
{
- void *vaddr;
+ struct io_tlb_pool *mem = &io_tlb_default_mem.defpool;
unsigned long bytes;
- if (no_iotlb_memory || late_alloc)
+ if (!mem->nslabs || mem->late_alloc)
return;
+ bytes = PAGE_ALIGN(mem->nslabs << IO_TLB_SHIFT);
+ set_memory_decrypted((unsigned long)mem->vaddr, bytes >> PAGE_SHIFT);
+}
+
+static void swiotlb_init_io_tlb_pool(struct io_tlb_pool *mem, phys_addr_t start,
+ unsigned long nslabs, bool late_alloc, unsigned int nareas)
+{
+ void *vaddr = phys_to_virt(start);
+ unsigned long bytes = nslabs << IO_TLB_SHIFT, i;
+
+ mem->nslabs = nslabs;
+ mem->start = start;
+ mem->end = mem->start + bytes;
+ mem->late_alloc = late_alloc;
+ mem->nareas = nareas;
+ mem->area_nslabs = nslabs / mem->nareas;
+
+ for (i = 0; i < mem->nareas; i++) {
+ spin_lock_init(&mem->areas[i].lock);
+ mem->areas[i].index = 0;
+ mem->areas[i].used = 0;
+ }
+
+ for (i = 0; i < mem->nslabs; i++) {
+ mem->slots[i].list = min(IO_TLB_SEGSIZE - io_tlb_offset(i),
+ mem->nslabs - i);
+ mem->slots[i].orig_addr = INVALID_PHYS_ADDR;
+ mem->slots[i].alloc_size = 0;
+ mem->slots[i].pad_slots = 0;
+ }
- vaddr = phys_to_virt(io_tlb_start);
- bytes = PAGE_ALIGN(io_tlb_nslabs << IO_TLB_SHIFT);
- set_memory_decrypted((unsigned long)vaddr, bytes >> PAGE_SHIFT);
memset(vaddr, 0, bytes);
+ mem->vaddr = vaddr;
+ return;
}
-int __init swiotlb_init_with_tbl(char *tlb, unsigned long nslabs, int verbose)
+/**
+ * add_mem_pool() - add a memory pool to the allocator
+ * @mem: Software IO TLB allocator.
+ * @pool: Memory pool to be added.
+ */
+static void add_mem_pool(struct io_tlb_mem *mem, struct io_tlb_pool *pool)
{
- unsigned long i, bytes;
- size_t alloc_size;
-
- bytes = nslabs << IO_TLB_SHIFT;
+#ifdef CONFIG_SWIOTLB_DYNAMIC
+ spin_lock(&mem->lock);
+ list_add_rcu(&pool->node, &mem->pools);
+ mem->nslabs += pool->nslabs;
+ spin_unlock(&mem->lock);
+#else
+ mem->nslabs = pool->nslabs;
+#endif
+}
- io_tlb_nslabs = nslabs;
- io_tlb_start = __pa(tlb);
- io_tlb_end = io_tlb_start + bytes;
+static void __init *swiotlb_memblock_alloc(unsigned long nslabs,
+ unsigned int flags,
+ int (*remap)(void *tlb, unsigned long nslabs))
+{
+ size_t bytes = PAGE_ALIGN(nslabs << IO_TLB_SHIFT);
+ void *tlb;
/*
- * Allocate and initialize the free list array. This array is used
- * to find contiguous free memory regions of size up to IO_TLB_SEGSIZE
- * between io_tlb_start and io_tlb_end.
+ * By default allocate the bounce buffer memory from low memory, but
+ * allow to pick a location everywhere for hypervisors with guest
+ * memory encryption.
*/
- alloc_size = PAGE_ALIGN(io_tlb_nslabs * sizeof(int));
- io_tlb_list = memblock_alloc(alloc_size, PAGE_SIZE);
- if (!io_tlb_list)
- panic("%s: Failed to allocate %zu bytes align=0x%lx\n",
- __func__, alloc_size, PAGE_SIZE);
-
- alloc_size = PAGE_ALIGN(io_tlb_nslabs * sizeof(phys_addr_t));
- io_tlb_orig_addr = memblock_alloc(alloc_size, PAGE_SIZE);
- if (!io_tlb_orig_addr)
- panic("%s: Failed to allocate %zu bytes align=0x%lx\n",
- __func__, alloc_size, PAGE_SIZE);
+ if (flags & SWIOTLB_ANY)
+ tlb = memblock_alloc(bytes, PAGE_SIZE);
+ else
+ tlb = memblock_alloc_low(bytes, PAGE_SIZE);
- for (i = 0; i < io_tlb_nslabs; i++) {
- io_tlb_list[i] = IO_TLB_SEGSIZE - OFFSET(i, IO_TLB_SEGSIZE);
- io_tlb_orig_addr[i] = INVALID_PHYS_ADDR;
+ if (!tlb) {
+ pr_warn("%s: Failed to allocate %zu bytes tlb structure\n",
+ __func__, bytes);
+ return NULL;
}
- io_tlb_index = 0;
- if (verbose)
- swiotlb_print_info();
+ if (remap && remap(tlb, nslabs) < 0) {
+ memblock_free(tlb, PAGE_ALIGN(bytes));
+ pr_warn("%s: Failed to remap %zu bytes\n", __func__, bytes);
+ return NULL;
+ }
- swiotlb_set_max_segment(io_tlb_nslabs << IO_TLB_SHIFT);
- return 0;
+ return tlb;
}
/*
* Statically reserve bounce buffer space and initialize bounce buffer data
* structures for the software IO TLB used to implement the DMA API.
*/
-void __init
-swiotlb_init(int verbose)
+void __init swiotlb_init_remap(bool addressing_limit, unsigned int flags,
+ int (*remap)(void *tlb, unsigned long nslabs))
{
- size_t default_size = IO_TLB_DEFAULT_SIZE;
- unsigned char *vstart;
- unsigned long bytes;
+ struct io_tlb_pool *mem = &io_tlb_default_mem.defpool;
+ unsigned long nslabs;
+ unsigned int nareas;
+ size_t alloc_size;
+ void *tlb;
+
+ if (!addressing_limit && !swiotlb_force_bounce)
+ return;
+ if (swiotlb_force_disable)
+ return;
+
+ io_tlb_default_mem.force_bounce =
+ swiotlb_force_bounce || (flags & SWIOTLB_FORCE);
+
+#ifdef CONFIG_SWIOTLB_DYNAMIC
+ if (!remap)
+ io_tlb_default_mem.can_grow = true;
+ if (flags & SWIOTLB_ANY)
+ io_tlb_default_mem.phys_limit = virt_to_phys(high_memory - 1);
+ else
+ io_tlb_default_mem.phys_limit = ARCH_LOW_ADDRESS_LIMIT;
+#endif
+
+ if (!default_nareas)
+ swiotlb_adjust_nareas(num_possible_cpus());
+
+ nslabs = default_nslabs;
+ nareas = limit_nareas(default_nareas, nslabs);
+ while ((tlb = swiotlb_memblock_alloc(nslabs, flags, remap)) == NULL) {
+ if (nslabs <= IO_TLB_MIN_SLABS)
+ return;
+ nslabs = ALIGN(nslabs >> 1, IO_TLB_SEGSIZE);
+ nareas = limit_nareas(nareas, nslabs);
+ }
- if (!io_tlb_nslabs) {
- io_tlb_nslabs = (default_size >> IO_TLB_SHIFT);
- io_tlb_nslabs = ALIGN(io_tlb_nslabs, IO_TLB_SEGSIZE);
+ if (default_nslabs != nslabs) {
+ pr_info("SWIOTLB bounce buffer size adjusted %lu -> %lu slabs",
+ default_nslabs, nslabs);
+ default_nslabs = nslabs;
}
- bytes = io_tlb_nslabs << IO_TLB_SHIFT;
+ alloc_size = PAGE_ALIGN(array_size(sizeof(*mem->slots), nslabs));
+ mem->slots = memblock_alloc(alloc_size, PAGE_SIZE);
+ if (!mem->slots) {
+ pr_warn("%s: Failed to allocate %zu bytes align=0x%lx\n",
+ __func__, alloc_size, PAGE_SIZE);
+ return;
+ }
- /* Get IO TLB memory from the low pages */
- vstart = memblock_alloc_low(PAGE_ALIGN(bytes), PAGE_SIZE);
- if (vstart && !swiotlb_init_with_tbl(vstart, io_tlb_nslabs, verbose))
+ mem->areas = memblock_alloc(array_size(sizeof(struct io_tlb_area),
+ nareas), SMP_CACHE_BYTES);
+ if (!mem->areas) {
+ pr_warn("%s: Failed to allocate mem->areas.\n", __func__);
return;
+ }
- if (io_tlb_start)
- memblock_free_early(io_tlb_start,
- PAGE_ALIGN(io_tlb_nslabs << IO_TLB_SHIFT));
- pr_warn("Cannot allocate buffer");
- no_iotlb_memory = true;
+ swiotlb_init_io_tlb_pool(mem, __pa(tlb), nslabs, false, nareas);
+ add_mem_pool(&io_tlb_default_mem, mem);
+
+ if (flags & SWIOTLB_VERBOSE)
+ swiotlb_print_info();
+}
+
+void __init swiotlb_init(bool addressing_limit, unsigned int flags)
+{
+ swiotlb_init_remap(addressing_limit, flags, NULL);
}
/*
@@ -274,148 +427,473 @@ swiotlb_init(int verbose)
* initialize the swiotlb later using the slab allocator if needed.
* This should be just like above, but with some error catching.
*/
-int
-swiotlb_late_init_with_default_size(size_t default_size)
+int swiotlb_init_late(size_t size, gfp_t gfp_mask,
+ int (*remap)(void *tlb, unsigned long nslabs))
{
- unsigned long bytes, req_nslabs = io_tlb_nslabs;
+ struct io_tlb_pool *mem = &io_tlb_default_mem.defpool;
+ unsigned long nslabs = ALIGN(size >> IO_TLB_SHIFT, IO_TLB_SEGSIZE);
+ unsigned int nareas;
unsigned char *vstart = NULL;
- unsigned int order;
+ unsigned int order, area_order;
+ bool retried = false;
int rc = 0;
- if (!io_tlb_nslabs) {
- io_tlb_nslabs = (default_size >> IO_TLB_SHIFT);
- io_tlb_nslabs = ALIGN(io_tlb_nslabs, IO_TLB_SEGSIZE);
- }
+ if (io_tlb_default_mem.nslabs)
+ return 0;
- /*
- * Get IO TLB memory from the low pages
- */
- order = get_order(io_tlb_nslabs << IO_TLB_SHIFT);
- io_tlb_nslabs = SLABS_PER_PAGE << order;
- bytes = io_tlb_nslabs << IO_TLB_SHIFT;
+ if (swiotlb_force_disable)
+ return 0;
+
+ io_tlb_default_mem.force_bounce = swiotlb_force_bounce;
+
+#ifdef CONFIG_SWIOTLB_DYNAMIC
+ if (!remap)
+ io_tlb_default_mem.can_grow = true;
+ if (IS_ENABLED(CONFIG_ZONE_DMA) && (gfp_mask & __GFP_DMA))
+ io_tlb_default_mem.phys_limit = DMA_BIT_MASK(zone_dma_bits);
+ else if (IS_ENABLED(CONFIG_ZONE_DMA32) && (gfp_mask & __GFP_DMA32))
+ io_tlb_default_mem.phys_limit = DMA_BIT_MASK(32);
+ else
+ io_tlb_default_mem.phys_limit = virt_to_phys(high_memory - 1);
+#endif
+
+ if (!default_nareas)
+ swiotlb_adjust_nareas(num_possible_cpus());
+
+retry:
+ order = get_order(nslabs << IO_TLB_SHIFT);
+ nslabs = SLABS_PER_PAGE << order;
while ((SLABS_PER_PAGE << order) > IO_TLB_MIN_SLABS) {
- vstart = (void *)__get_free_pages(GFP_DMA | __GFP_NOWARN,
+ vstart = (void *)__get_free_pages(gfp_mask | __GFP_NOWARN,
order);
if (vstart)
break;
order--;
+ nslabs = SLABS_PER_PAGE << order;
+ retried = true;
}
- if (!vstart) {
- io_tlb_nslabs = req_nslabs;
+ if (!vstart)
return -ENOMEM;
+
+ if (remap)
+ rc = remap(vstart, nslabs);
+ if (rc) {
+ free_pages((unsigned long)vstart, order);
+
+ nslabs = ALIGN(nslabs >> 1, IO_TLB_SEGSIZE);
+ if (nslabs < IO_TLB_MIN_SLABS)
+ return rc;
+ retried = true;
+ goto retry;
}
- if (order != get_order(bytes)) {
+
+ if (retried) {
pr_warn("only able to allocate %ld MB\n",
(PAGE_SIZE << order) >> 20);
- io_tlb_nslabs = SLABS_PER_PAGE << order;
}
- rc = swiotlb_late_init_with_tbl(vstart, io_tlb_nslabs);
- if (rc)
- free_pages((unsigned long)vstart, order);
- return rc;
+ nareas = limit_nareas(default_nareas, nslabs);
+ area_order = get_order(array_size(sizeof(*mem->areas), nareas));
+ mem->areas = (struct io_tlb_area *)
+ __get_free_pages(GFP_KERNEL | __GFP_ZERO, area_order);
+ if (!mem->areas)
+ goto error_area;
+
+ mem->slots = (void *)__get_free_pages(GFP_KERNEL | __GFP_ZERO,
+ get_order(array_size(sizeof(*mem->slots), nslabs)));
+ if (!mem->slots)
+ goto error_slots;
+
+ set_memory_decrypted((unsigned long)vstart,
+ (nslabs << IO_TLB_SHIFT) >> PAGE_SHIFT);
+ swiotlb_init_io_tlb_pool(mem, virt_to_phys(vstart), nslabs, true,
+ nareas);
+ add_mem_pool(&io_tlb_default_mem, mem);
+
+ swiotlb_print_info();
+ return 0;
+
+error_slots:
+ free_pages((unsigned long)mem->areas, area_order);
+error_area:
+ free_pages((unsigned long)vstart, order);
+ return -ENOMEM;
}
-static void swiotlb_cleanup(void)
+void __init swiotlb_exit(void)
{
- io_tlb_end = 0;
- io_tlb_start = 0;
- io_tlb_nslabs = 0;
- max_segment = 0;
+ struct io_tlb_pool *mem = &io_tlb_default_mem.defpool;
+ unsigned long tbl_vaddr;
+ size_t tbl_size, slots_size;
+ unsigned int area_order;
+
+ if (swiotlb_force_bounce)
+ return;
+
+ if (!mem->nslabs)
+ return;
+
+ pr_info("tearing down default memory pool\n");
+ tbl_vaddr = (unsigned long)phys_to_virt(mem->start);
+ tbl_size = PAGE_ALIGN(mem->end - mem->start);
+ slots_size = PAGE_ALIGN(array_size(sizeof(*mem->slots), mem->nslabs));
+
+ set_memory_encrypted(tbl_vaddr, tbl_size >> PAGE_SHIFT);
+ if (mem->late_alloc) {
+ area_order = get_order(array_size(sizeof(*mem->areas),
+ mem->nareas));
+ free_pages((unsigned long)mem->areas, area_order);
+ free_pages(tbl_vaddr, get_order(tbl_size));
+ free_pages((unsigned long)mem->slots, get_order(slots_size));
+ } else {
+ memblock_free_late(__pa(mem->areas),
+ array_size(sizeof(*mem->areas), mem->nareas));
+ memblock_free_late(mem->start, tbl_size);
+ memblock_free_late(__pa(mem->slots), slots_size);
+ }
+
+ memset(mem, 0, sizeof(*mem));
}
-int
-swiotlb_late_init_with_tbl(char *tlb, unsigned long nslabs)
+#ifdef CONFIG_SWIOTLB_DYNAMIC
+
+/**
+ * alloc_dma_pages() - allocate pages to be used for DMA
+ * @gfp: GFP flags for the allocation.
+ * @bytes: Size of the buffer.
+ * @phys_limit: Maximum allowed physical address of the buffer.
+ *
+ * Allocate pages from the buddy allocator. If successful, make the allocated
+ * pages decrypted that they can be used for DMA.
+ *
+ * Return: Decrypted pages, %NULL on allocation failure, or ERR_PTR(-EAGAIN)
+ * if the allocated physical address was above @phys_limit.
+ */
+static struct page *alloc_dma_pages(gfp_t gfp, size_t bytes, u64 phys_limit)
{
- unsigned long i, bytes;
+ unsigned int order = get_order(bytes);
+ struct page *page;
+ phys_addr_t paddr;
+ void *vaddr;
+
+ page = alloc_pages(gfp, order);
+ if (!page)
+ return NULL;
- bytes = nslabs << IO_TLB_SHIFT;
+ paddr = page_to_phys(page);
+ if (paddr + bytes - 1 > phys_limit) {
+ __free_pages(page, order);
+ return ERR_PTR(-EAGAIN);
+ }
+
+ vaddr = phys_to_virt(paddr);
+ if (set_memory_decrypted((unsigned long)vaddr, PFN_UP(bytes)))
+ goto error;
+ return page;
- io_tlb_nslabs = nslabs;
- io_tlb_start = virt_to_phys(tlb);
- io_tlb_end = io_tlb_start + bytes;
+error:
+ /* Intentional leak if pages cannot be encrypted again. */
+ if (!set_memory_encrypted((unsigned long)vaddr, PFN_UP(bytes)))
+ __free_pages(page, order);
+ return NULL;
+}
- set_memory_decrypted((unsigned long)tlb, bytes >> PAGE_SHIFT);
- memset(tlb, 0, bytes);
+/**
+ * swiotlb_alloc_tlb() - allocate a dynamic IO TLB buffer
+ * @dev: Device for which a memory pool is allocated.
+ * @bytes: Size of the buffer.
+ * @phys_limit: Maximum allowed physical address of the buffer.
+ * @gfp: GFP flags for the allocation.
+ *
+ * Return: Allocated pages, or %NULL on allocation failure.
+ */
+static struct page *swiotlb_alloc_tlb(struct device *dev, size_t bytes,
+ u64 phys_limit, gfp_t gfp)
+{
+ struct page *page;
/*
- * Allocate and initialize the free list array. This array is used
- * to find contiguous free memory regions of size up to IO_TLB_SEGSIZE
- * between io_tlb_start and io_tlb_end.
+ * Allocate from the atomic pools if memory is encrypted and
+ * the allocation is atomic, because decrypting may block.
*/
- io_tlb_list = (unsigned int *)__get_free_pages(GFP_KERNEL,
- get_order(io_tlb_nslabs * sizeof(int)));
- if (!io_tlb_list)
- goto cleanup3;
+ if (!gfpflags_allow_blocking(gfp) && dev && force_dma_unencrypted(dev)) {
+ void *vaddr;
- io_tlb_orig_addr = (phys_addr_t *)
- __get_free_pages(GFP_KERNEL,
- get_order(io_tlb_nslabs *
- sizeof(phys_addr_t)));
- if (!io_tlb_orig_addr)
- goto cleanup4;
+ if (!IS_ENABLED(CONFIG_DMA_COHERENT_POOL))
+ return NULL;
- for (i = 0; i < io_tlb_nslabs; i++) {
- io_tlb_list[i] = IO_TLB_SEGSIZE - OFFSET(i, IO_TLB_SEGSIZE);
- io_tlb_orig_addr[i] = INVALID_PHYS_ADDR;
+ return dma_alloc_from_pool(dev, bytes, &vaddr, gfp,
+ dma_coherent_ok);
}
- io_tlb_index = 0;
- swiotlb_print_info();
+ gfp &= ~GFP_ZONEMASK;
+ if (phys_limit <= DMA_BIT_MASK(zone_dma_bits))
+ gfp |= __GFP_DMA;
+ else if (phys_limit <= DMA_BIT_MASK(32))
+ gfp |= __GFP_DMA32;
+
+ while (IS_ERR(page = alloc_dma_pages(gfp, bytes, phys_limit))) {
+ if (IS_ENABLED(CONFIG_ZONE_DMA32) &&
+ phys_limit < DMA_BIT_MASK(64) &&
+ !(gfp & (__GFP_DMA32 | __GFP_DMA)))
+ gfp |= __GFP_DMA32;
+ else if (IS_ENABLED(CONFIG_ZONE_DMA) &&
+ !(gfp & __GFP_DMA))
+ gfp = (gfp & ~__GFP_DMA32) | __GFP_DMA;
+ else
+ return NULL;
+ }
- late_alloc = 1;
+ return page;
+}
- swiotlb_set_max_segment(io_tlb_nslabs << IO_TLB_SHIFT);
+/**
+ * swiotlb_free_tlb() - free a dynamically allocated IO TLB buffer
+ * @vaddr: Virtual address of the buffer.
+ * @bytes: Size of the buffer.
+ */
+static void swiotlb_free_tlb(void *vaddr, size_t bytes)
+{
+ if (IS_ENABLED(CONFIG_DMA_COHERENT_POOL) &&
+ dma_free_from_pool(NULL, vaddr, bytes))
+ return;
- return 0;
+ /* Intentional leak if pages cannot be encrypted again. */
+ if (!set_memory_encrypted((unsigned long)vaddr, PFN_UP(bytes)))
+ __free_pages(virt_to_page(vaddr), get_order(bytes));
+}
-cleanup4:
- free_pages((unsigned long)io_tlb_list, get_order(io_tlb_nslabs *
- sizeof(int)));
- io_tlb_list = NULL;
-cleanup3:
- swiotlb_cleanup();
- return -ENOMEM;
+/**
+ * swiotlb_alloc_pool() - allocate a new IO TLB memory pool
+ * @dev: Device for which a memory pool is allocated.
+ * @minslabs: Minimum number of slabs.
+ * @nslabs: Desired (maximum) number of slabs.
+ * @nareas: Number of areas.
+ * @phys_limit: Maximum DMA buffer physical address.
+ * @gfp: GFP flags for the allocations.
+ *
+ * Allocate and initialize a new IO TLB memory pool. The actual number of
+ * slabs may be reduced if allocation of @nslabs fails. If even
+ * @minslabs cannot be allocated, this function fails.
+ *
+ * Return: New memory pool, or %NULL on allocation failure.
+ */
+static struct io_tlb_pool *swiotlb_alloc_pool(struct device *dev,
+ unsigned long minslabs, unsigned long nslabs,
+ unsigned int nareas, u64 phys_limit, gfp_t gfp)
+{
+ struct io_tlb_pool *pool;
+ unsigned int slot_order;
+ struct page *tlb;
+ size_t pool_size;
+ size_t tlb_size;
+
+ if (nslabs > SLABS_PER_PAGE << MAX_PAGE_ORDER) {
+ nslabs = SLABS_PER_PAGE << MAX_PAGE_ORDER;
+ nareas = limit_nareas(nareas, nslabs);
+ }
+
+ pool_size = sizeof(*pool) + array_size(sizeof(*pool->areas), nareas);
+ pool = kzalloc(pool_size, gfp);
+ if (!pool)
+ goto error;
+ pool->areas = (void *)pool + sizeof(*pool);
+
+ tlb_size = nslabs << IO_TLB_SHIFT;
+ while (!(tlb = swiotlb_alloc_tlb(dev, tlb_size, phys_limit, gfp))) {
+ if (nslabs <= minslabs)
+ goto error_tlb;
+ nslabs = ALIGN(nslabs >> 1, IO_TLB_SEGSIZE);
+ nareas = limit_nareas(nareas, nslabs);
+ tlb_size = nslabs << IO_TLB_SHIFT;
+ }
+
+ slot_order = get_order(array_size(sizeof(*pool->slots), nslabs));
+ pool->slots = (struct io_tlb_slot *)
+ __get_free_pages(gfp, slot_order);
+ if (!pool->slots)
+ goto error_slots;
+
+ swiotlb_init_io_tlb_pool(pool, page_to_phys(tlb), nslabs, true, nareas);
+ return pool;
+
+error_slots:
+ swiotlb_free_tlb(page_address(tlb), tlb_size);
+error_tlb:
+ kfree(pool);
+error:
+ return NULL;
}
-void __init swiotlb_exit(void)
+/**
+ * swiotlb_dyn_alloc() - dynamic memory pool allocation worker
+ * @work: Pointer to dyn_alloc in struct io_tlb_mem.
+ */
+static void swiotlb_dyn_alloc(struct work_struct *work)
{
- if (!io_tlb_orig_addr)
+ struct io_tlb_mem *mem =
+ container_of(work, struct io_tlb_mem, dyn_alloc);
+ struct io_tlb_pool *pool;
+
+ pool = swiotlb_alloc_pool(NULL, IO_TLB_MIN_SLABS, default_nslabs,
+ default_nareas, mem->phys_limit, GFP_KERNEL);
+ if (!pool) {
+ pr_warn_ratelimited("Failed to allocate new pool");
return;
+ }
- if (late_alloc) {
- free_pages((unsigned long)io_tlb_orig_addr,
- get_order(io_tlb_nslabs * sizeof(phys_addr_t)));
- free_pages((unsigned long)io_tlb_list, get_order(io_tlb_nslabs *
- sizeof(int)));
- free_pages((unsigned long)phys_to_virt(io_tlb_start),
- get_order(io_tlb_nslabs << IO_TLB_SHIFT));
- } else {
- memblock_free_late(__pa(io_tlb_orig_addr),
- PAGE_ALIGN(io_tlb_nslabs * sizeof(phys_addr_t)));
- memblock_free_late(__pa(io_tlb_list),
- PAGE_ALIGN(io_tlb_nslabs * sizeof(int)));
- memblock_free_late(io_tlb_start,
- PAGE_ALIGN(io_tlb_nslabs << IO_TLB_SHIFT));
+ add_mem_pool(mem, pool);
+}
+
+/**
+ * swiotlb_dyn_free() - RCU callback to free a memory pool
+ * @rcu: RCU head in the corresponding struct io_tlb_pool.
+ */
+static void swiotlb_dyn_free(struct rcu_head *rcu)
+{
+ struct io_tlb_pool *pool = container_of(rcu, struct io_tlb_pool, rcu);
+ size_t slots_size = array_size(sizeof(*pool->slots), pool->nslabs);
+ size_t tlb_size = pool->end - pool->start;
+
+ free_pages((unsigned long)pool->slots, get_order(slots_size));
+ swiotlb_free_tlb(pool->vaddr, tlb_size);
+ kfree(pool);
+}
+
+/**
+ * swiotlb_find_pool() - find the IO TLB pool for a physical address
+ * @dev: Device which has mapped the DMA buffer.
+ * @paddr: Physical address within the DMA buffer.
+ *
+ * Find the IO TLB memory pool descriptor which contains the given physical
+ * address, if any.
+ *
+ * Return: Memory pool which contains @paddr, or %NULL if none.
+ */
+struct io_tlb_pool *swiotlb_find_pool(struct device *dev, phys_addr_t paddr)
+{
+ struct io_tlb_mem *mem = dev->dma_io_tlb_mem;
+ struct io_tlb_pool *pool;
+
+ rcu_read_lock();
+ list_for_each_entry_rcu(pool, &mem->pools, node) {
+ if (paddr >= pool->start && paddr < pool->end)
+ goto out;
}
- swiotlb_cleanup();
+
+ list_for_each_entry_rcu(pool, &dev->dma_io_tlb_pools, node) {
+ if (paddr >= pool->start && paddr < pool->end)
+ goto out;
+ }
+ pool = NULL;
+out:
+ rcu_read_unlock();
+ return pool;
+}
+
+/**
+ * swiotlb_del_pool() - remove an IO TLB pool from a device
+ * @dev: Owning device.
+ * @pool: Memory pool to be removed.
+ */
+static void swiotlb_del_pool(struct device *dev, struct io_tlb_pool *pool)
+{
+ unsigned long flags;
+
+ spin_lock_irqsave(&dev->dma_io_tlb_lock, flags);
+ list_del_rcu(&pool->node);
+ spin_unlock_irqrestore(&dev->dma_io_tlb_lock, flags);
+
+ call_rcu(&pool->rcu, swiotlb_dyn_free);
+}
+
+#endif /* CONFIG_SWIOTLB_DYNAMIC */
+
+/**
+ * swiotlb_dev_init() - initialize swiotlb fields in &struct device
+ * @dev: Device to be initialized.
+ */
+void swiotlb_dev_init(struct device *dev)
+{
+ dev->dma_io_tlb_mem = &io_tlb_default_mem;
+#ifdef CONFIG_SWIOTLB_DYNAMIC
+ INIT_LIST_HEAD(&dev->dma_io_tlb_pools);
+ spin_lock_init(&dev->dma_io_tlb_lock);
+ dev->dma_uses_io_tlb = false;
+#endif
+}
+
+/**
+ * swiotlb_align_offset() - Get required offset into an IO TLB allocation.
+ * @dev: Owning device.
+ * @align_mask: Allocation alignment mask.
+ * @addr: DMA address.
+ *
+ * Return the minimum offset from the start of an IO TLB allocation which is
+ * required for a given buffer address and allocation alignment to keep the
+ * device happy.
+ *
+ * First, the address bits covered by min_align_mask must be identical in the
+ * original address and the bounce buffer address. High bits are preserved by
+ * choosing a suitable IO TLB slot, but bits below IO_TLB_SHIFT require extra
+ * padding bytes before the bounce buffer.
+ *
+ * Second, @align_mask specifies which bits of the first allocated slot must
+ * be zero. This may require allocating additional padding slots, and then the
+ * offset (in bytes) from the first such padding slot is returned.
+ */
+static unsigned int swiotlb_align_offset(struct device *dev,
+ unsigned int align_mask, u64 addr)
+{
+ return addr & dma_get_min_align_mask(dev) &
+ (align_mask | (IO_TLB_SIZE - 1));
}
/*
* Bounce: copy the swiotlb buffer from or back to the original dma location
*/
-static void swiotlb_bounce(phys_addr_t orig_addr, phys_addr_t tlb_addr,
- size_t size, enum dma_data_direction dir)
+static void swiotlb_bounce(struct device *dev, phys_addr_t tlb_addr, size_t size,
+ enum dma_data_direction dir)
{
+ struct io_tlb_pool *mem = swiotlb_find_pool(dev, tlb_addr);
+ int index = (tlb_addr - mem->start) >> IO_TLB_SHIFT;
+ phys_addr_t orig_addr = mem->slots[index].orig_addr;
+ size_t alloc_size = mem->slots[index].alloc_size;
unsigned long pfn = PFN_DOWN(orig_addr);
- unsigned char *vaddr = phys_to_virt(tlb_addr);
+ unsigned char *vaddr = mem->vaddr + tlb_addr - mem->start;
+ int tlb_offset;
+
+ if (orig_addr == INVALID_PHYS_ADDR)
+ return;
+
+ /*
+ * It's valid for tlb_offset to be negative. This can happen when the
+ * "offset" returned by swiotlb_align_offset() is non-zero, and the
+ * tlb_addr is pointing within the first "offset" bytes of the second
+ * or subsequent slots of the allocated swiotlb area. While it's not
+ * valid for tlb_addr to be pointing within the first "offset" bytes
+ * of the first slot, there's no way to check for such an error since
+ * this function can't distinguish the first slot from the second and
+ * subsequent slots.
+ */
+ tlb_offset = (tlb_addr & (IO_TLB_SIZE - 1)) -
+ swiotlb_align_offset(dev, 0, orig_addr);
+
+ orig_addr += tlb_offset;
+ alloc_size -= tlb_offset;
+
+ if (size > alloc_size) {
+ dev_WARN_ONCE(dev, 1,
+ "Buffer overflow detected. Allocation size: %zu. Mapping size: %zu.\n",
+ alloc_size, size);
+ size = alloc_size;
+ }
if (PageHighMem(pfn_to_page(pfn))) {
- /* The buffer does not have a mapping. Map it in and copy */
unsigned int offset = orig_addr & ~PAGE_MASK;
- char *buffer;
+ struct page *page;
unsigned int sz = 0;
unsigned long flags;
@@ -423,12 +901,11 @@ static void swiotlb_bounce(phys_addr_t orig_addr, phys_addr_t tlb_addr,
sz = min_t(size_t, PAGE_SIZE - offset, size);
local_irq_save(flags);
- buffer = kmap_atomic(pfn_to_page(pfn));
+ page = pfn_to_page(pfn);
if (dir == DMA_TO_DEVICE)
- memcpy(vaddr, buffer + offset, sz);
+ memcpy_from_page(vaddr, page, offset, sz);
else
- memcpy(buffer + offset, vaddr, sz);
- kunmap_atomic(buffer);
+ memcpy_to_page(page, offset, vaddr, sz);
local_irq_restore(flags);
size -= sz;
@@ -443,157 +920,504 @@ static void swiotlb_bounce(phys_addr_t orig_addr, phys_addr_t tlb_addr,
}
}
-phys_addr_t swiotlb_tbl_map_single(struct device *hwdev,
- dma_addr_t tbl_dma_addr,
- phys_addr_t orig_addr,
- size_t mapping_size,
- size_t alloc_size,
- enum dma_data_direction dir,
- unsigned long attrs)
+static inline phys_addr_t slot_addr(phys_addr_t start, phys_addr_t idx)
{
- unsigned long flags;
- phys_addr_t tlb_addr;
- unsigned int nslots, stride, index, wrap;
- int i;
- unsigned long mask;
- unsigned long offset_slots;
- unsigned long max_slots;
- unsigned long tmp_io_tlb_used;
+ return start + (idx << IO_TLB_SHIFT);
+}
- if (no_iotlb_memory)
- panic("Can not allocate SWIOTLB buffer earlier and can't now provide you with the DMA bounce buffer");
+/*
+ * Carefully handle integer overflow which can occur when boundary_mask == ~0UL.
+ */
+static inline unsigned long get_max_slots(unsigned long boundary_mask)
+{
+ return (boundary_mask >> IO_TLB_SHIFT) + 1;
+}
- if (mem_encrypt_active())
- pr_warn_once("Memory encryption is active and system is using DMA bounce buffers\n");
+static unsigned int wrap_area_index(struct io_tlb_pool *mem, unsigned int index)
+{
+ if (index >= mem->area_nslabs)
+ return 0;
+ return index;
+}
- if (mapping_size > alloc_size) {
- dev_warn_once(hwdev, "Invalid sizes (mapping: %zd bytes, alloc: %zd bytes)",
- mapping_size, alloc_size);
- return (phys_addr_t)DMA_MAPPING_ERROR;
- }
+/*
+ * Track the total used slots with a global atomic value in order to have
+ * correct information to determine the high water mark. The mem_used()
+ * function gives imprecise results because there's no locking across
+ * multiple areas.
+ */
+#ifdef CONFIG_DEBUG_FS
+static void inc_used_and_hiwater(struct io_tlb_mem *mem, unsigned int nslots)
+{
+ unsigned long old_hiwater, new_used;
+
+ new_used = atomic_long_add_return(nslots, &mem->total_used);
+ old_hiwater = atomic_long_read(&mem->used_hiwater);
+ do {
+ if (new_used <= old_hiwater)
+ break;
+ } while (!atomic_long_try_cmpxchg(&mem->used_hiwater,
+ &old_hiwater, new_used));
+}
- mask = dma_get_seg_boundary(hwdev);
+static void dec_used(struct io_tlb_mem *mem, unsigned int nslots)
+{
+ atomic_long_sub(nslots, &mem->total_used);
+}
- tbl_dma_addr &= mask;
+#else /* !CONFIG_DEBUG_FS */
+static void inc_used_and_hiwater(struct io_tlb_mem *mem, unsigned int nslots)
+{
+}
+static void dec_used(struct io_tlb_mem *mem, unsigned int nslots)
+{
+}
+#endif /* CONFIG_DEBUG_FS */
- offset_slots = ALIGN(tbl_dma_addr, 1 << IO_TLB_SHIFT) >> IO_TLB_SHIFT;
+#ifdef CONFIG_SWIOTLB_DYNAMIC
+#ifdef CONFIG_DEBUG_FS
+static void inc_transient_used(struct io_tlb_mem *mem, unsigned int nslots)
+{
+ atomic_long_add(nslots, &mem->transient_nslabs);
+}
+
+static void dec_transient_used(struct io_tlb_mem *mem, unsigned int nslots)
+{
+ atomic_long_sub(nslots, &mem->transient_nslabs);
+}
+
+#else /* !CONFIG_DEBUG_FS */
+static void inc_transient_used(struct io_tlb_mem *mem, unsigned int nslots)
+{
+}
+static void dec_transient_used(struct io_tlb_mem *mem, unsigned int nslots)
+{
+}
+#endif /* CONFIG_DEBUG_FS */
+#endif /* CONFIG_SWIOTLB_DYNAMIC */
+
+/**
+ * swiotlb_search_pool_area() - search one memory area in one pool
+ * @dev: Device which maps the buffer.
+ * @pool: Memory pool to be searched.
+ * @area_index: Index of the IO TLB memory area to be searched.
+ * @orig_addr: Original (non-bounced) IO buffer address.
+ * @alloc_size: Total requested size of the bounce buffer,
+ * including initial alignment padding.
+ * @alloc_align_mask: Required alignment of the allocated buffer.
+ *
+ * Find a suitable sequence of IO TLB entries for the request and allocate
+ * a buffer from the given IO TLB memory area.
+ * This function takes care of locking.
+ *
+ * Return: Index of the first allocated slot, or -1 on error.
+ */
+static int swiotlb_search_pool_area(struct device *dev, struct io_tlb_pool *pool,
+ int area_index, phys_addr_t orig_addr, size_t alloc_size,
+ unsigned int alloc_align_mask)
+{
+ struct io_tlb_area *area = pool->areas + area_index;
+ unsigned long boundary_mask = dma_get_seg_boundary(dev);
+ dma_addr_t tbl_dma_addr =
+ phys_to_dma_unencrypted(dev, pool->start) & boundary_mask;
+ unsigned long max_slots = get_max_slots(boundary_mask);
+ unsigned int iotlb_align_mask = dma_get_min_align_mask(dev);
+ unsigned int nslots = nr_slots(alloc_size), stride;
+ unsigned int offset = swiotlb_align_offset(dev, 0, orig_addr);
+ unsigned int index, slots_checked, count = 0, i;
+ unsigned long flags;
+ unsigned int slot_base;
+ unsigned int slot_index;
+
+ BUG_ON(!nslots);
+ BUG_ON(area_index >= pool->nareas);
/*
- * Carefully handle integer overflow which can occur when mask == ~0UL.
+ * Historically, swiotlb allocations >= PAGE_SIZE were guaranteed to be
+ * page-aligned in the absence of any other alignment requirements.
+ * 'alloc_align_mask' was later introduced to specify the alignment
+ * explicitly, however this is passed as zero for streaming mappings
+ * and so we preserve the old behaviour there in case any drivers are
+ * relying on it.
*/
- max_slots = mask + 1
- ? ALIGN(mask + 1, 1 << IO_TLB_SHIFT) >> IO_TLB_SHIFT
- : 1UL << (BITS_PER_LONG - IO_TLB_SHIFT);
+ if (!alloc_align_mask && !iotlb_align_mask && alloc_size >= PAGE_SIZE)
+ alloc_align_mask = PAGE_SIZE - 1;
/*
- * For mappings greater than or equal to a page, we limit the stride
- * (and hence alignment) to a page size.
+ * Ensure that the allocation is at least slot-aligned and update
+ * 'iotlb_align_mask' to ignore bits that will be preserved when
+ * offsetting into the allocation.
*/
- nslots = ALIGN(alloc_size, 1 << IO_TLB_SHIFT) >> IO_TLB_SHIFT;
- if (alloc_size >= PAGE_SIZE)
- stride = (1 << (PAGE_SHIFT - IO_TLB_SHIFT));
- else
- stride = 1;
-
- BUG_ON(!nslots);
+ alloc_align_mask |= (IO_TLB_SIZE - 1);
+ iotlb_align_mask &= ~alloc_align_mask;
/*
- * Find suitable number of IO TLB entries size that will fit this
- * request and allocate a buffer from that IO TLB pool.
+ * For mappings with an alignment requirement don't bother looping to
+ * unaligned slots once we found an aligned one.
*/
- spin_lock_irqsave(&io_tlb_lock, flags);
+ stride = get_max_slots(max(alloc_align_mask, iotlb_align_mask));
- if (unlikely(nslots > io_tlb_nslabs - io_tlb_used))
+ spin_lock_irqsave(&area->lock, flags);
+ if (unlikely(nslots > pool->area_nslabs - area->used))
goto not_found;
- index = ALIGN(io_tlb_index, stride);
- if (index >= io_tlb_nslabs)
- index = 0;
- wrap = index;
+ slot_base = area_index * pool->area_nslabs;
+ index = area->index;
- do {
- while (iommu_is_span_boundary(index, nslots, offset_slots,
- max_slots)) {
- index += stride;
- if (index >= io_tlb_nslabs)
- index = 0;
- if (index == wrap)
- goto not_found;
- }
+ for (slots_checked = 0; slots_checked < pool->area_nslabs; ) {
+ phys_addr_t tlb_addr;
- /*
- * If we find a slot that indicates we have 'nslots' number of
- * contiguous buffers, we allocate the buffers from that slot
- * and mark the entries as '0' indicating unavailable.
- */
- if (io_tlb_list[index] >= nslots) {
- int count = 0;
-
- for (i = index; i < (int) (index + nslots); i++)
- io_tlb_list[i] = 0;
- for (i = index - 1; (OFFSET(i, IO_TLB_SEGSIZE) != IO_TLB_SEGSIZE - 1) && io_tlb_list[i]; i--)
- io_tlb_list[i] = ++count;
- tlb_addr = io_tlb_start + (index << IO_TLB_SHIFT);
-
- /*
- * Update the indices to avoid searching in the next
- * round.
- */
- io_tlb_index = ((index + nslots) < io_tlb_nslabs
- ? (index + nslots) : 0);
+ slot_index = slot_base + index;
+ tlb_addr = slot_addr(tbl_dma_addr, slot_index);
- goto found;
+ if ((tlb_addr & alloc_align_mask) ||
+ (orig_addr && (tlb_addr & iotlb_align_mask) !=
+ (orig_addr & iotlb_align_mask))) {
+ index = wrap_area_index(pool, index + 1);
+ slots_checked++;
+ continue;
+ }
+
+ if (!iommu_is_span_boundary(slot_index, nslots,
+ nr_slots(tbl_dma_addr),
+ max_slots)) {
+ if (pool->slots[slot_index].list >= nslots)
+ goto found;
}
- index += stride;
- if (index >= io_tlb_nslabs)
- index = 0;
- } while (index != wrap);
+ index = wrap_area_index(pool, index + stride);
+ slots_checked += stride;
+ }
not_found:
- tmp_io_tlb_used = io_tlb_used;
+ spin_unlock_irqrestore(&area->lock, flags);
+ return -1;
- spin_unlock_irqrestore(&io_tlb_lock, flags);
- if (!(attrs & DMA_ATTR_NO_WARN) && printk_ratelimit())
- dev_warn(hwdev, "swiotlb buffer is full (sz: %zd bytes), total %lu (slots), used %lu (slots)\n",
- alloc_size, io_tlb_nslabs, tmp_io_tlb_used);
- return (phys_addr_t)DMA_MAPPING_ERROR;
found:
- io_tlb_used += nslots;
- spin_unlock_irqrestore(&io_tlb_lock, flags);
+ /*
+ * If we find a slot that indicates we have 'nslots' number of
+ * contiguous buffers, we allocate the buffers from that slot onwards
+ * and set the list of free entries to '0' indicating unavailable.
+ */
+ for (i = slot_index; i < slot_index + nslots; i++) {
+ pool->slots[i].list = 0;
+ pool->slots[i].alloc_size = alloc_size - (offset +
+ ((i - slot_index) << IO_TLB_SHIFT));
+ }
+ for (i = slot_index - 1;
+ io_tlb_offset(i) != IO_TLB_SEGSIZE - 1 &&
+ pool->slots[i].list; i--)
+ pool->slots[i].list = ++count;
/*
- * Save away the mapping from the original address to the DMA address.
- * This is needed when we sync the memory. Then we sync the buffer if
- * needed.
+ * Update the indices to avoid searching in the next round.
*/
- for (i = 0; i < nslots; i++)
- io_tlb_orig_addr[index+i] = orig_addr + (i << IO_TLB_SHIFT);
- if (!(attrs & DMA_ATTR_SKIP_CPU_SYNC) &&
- (dir == DMA_TO_DEVICE || dir == DMA_BIDIRECTIONAL))
- swiotlb_bounce(orig_addr, tlb_addr, mapping_size, DMA_TO_DEVICE);
+ area->index = wrap_area_index(pool, index + nslots);
+ area->used += nslots;
+ spin_unlock_irqrestore(&area->lock, flags);
- return tlb_addr;
+ inc_used_and_hiwater(dev->dma_io_tlb_mem, nslots);
+ return slot_index;
}
-/*
- * tlb_addr is the physical address of the bounce buffer to unmap.
+#ifdef CONFIG_SWIOTLB_DYNAMIC
+
+/**
+ * swiotlb_search_area() - search one memory area in all pools
+ * @dev: Device which maps the buffer.
+ * @start_cpu: Start CPU number.
+ * @cpu_offset: Offset from @start_cpu.
+ * @orig_addr: Original (non-bounced) IO buffer address.
+ * @alloc_size: Total requested size of the bounce buffer,
+ * including initial alignment padding.
+ * @alloc_align_mask: Required alignment of the allocated buffer.
+ * @retpool: Used memory pool, updated on return.
+ *
+ * Search one memory area in all pools for a sequence of slots that match the
+ * allocation constraints.
+ *
+ * Return: Index of the first allocated slot, or -1 on error.
+ */
+static int swiotlb_search_area(struct device *dev, int start_cpu,
+ int cpu_offset, phys_addr_t orig_addr, size_t alloc_size,
+ unsigned int alloc_align_mask, struct io_tlb_pool **retpool)
+{
+ struct io_tlb_mem *mem = dev->dma_io_tlb_mem;
+ struct io_tlb_pool *pool;
+ int area_index;
+ int index = -1;
+
+ rcu_read_lock();
+ list_for_each_entry_rcu(pool, &mem->pools, node) {
+ if (cpu_offset >= pool->nareas)
+ continue;
+ area_index = (start_cpu + cpu_offset) & (pool->nareas - 1);
+ index = swiotlb_search_pool_area(dev, pool, area_index,
+ orig_addr, alloc_size,
+ alloc_align_mask);
+ if (index >= 0) {
+ *retpool = pool;
+ break;
+ }
+ }
+ rcu_read_unlock();
+ return index;
+}
+
+/**
+ * swiotlb_find_slots() - search for slots in the whole swiotlb
+ * @dev: Device which maps the buffer.
+ * @orig_addr: Original (non-bounced) IO buffer address.
+ * @alloc_size: Total requested size of the bounce buffer,
+ * including initial alignment padding.
+ * @alloc_align_mask: Required alignment of the allocated buffer.
+ * @retpool: Used memory pool, updated on return.
+ *
+ * Search through the whole software IO TLB to find a sequence of slots that
+ * match the allocation constraints.
+ *
+ * Return: Index of the first allocated slot, or -1 on error.
*/
-void swiotlb_tbl_unmap_single(struct device *hwdev, phys_addr_t tlb_addr,
- size_t mapping_size, size_t alloc_size,
- enum dma_data_direction dir, unsigned long attrs)
+static int swiotlb_find_slots(struct device *dev, phys_addr_t orig_addr,
+ size_t alloc_size, unsigned int alloc_align_mask,
+ struct io_tlb_pool **retpool)
{
+ struct io_tlb_mem *mem = dev->dma_io_tlb_mem;
+ struct io_tlb_pool *pool;
+ unsigned long nslabs;
unsigned long flags;
- int i, count, nslots = ALIGN(alloc_size, 1 << IO_TLB_SHIFT) >> IO_TLB_SHIFT;
- int index = (tlb_addr - io_tlb_start) >> IO_TLB_SHIFT;
- phys_addr_t orig_addr = io_tlb_orig_addr[index];
+ u64 phys_limit;
+ int cpu, i;
+ int index;
+
+ if (alloc_size > IO_TLB_SEGSIZE * IO_TLB_SIZE)
+ return -1;
+
+ cpu = raw_smp_processor_id();
+ for (i = 0; i < default_nareas; ++i) {
+ index = swiotlb_search_area(dev, cpu, i, orig_addr, alloc_size,
+ alloc_align_mask, &pool);
+ if (index >= 0)
+ goto found;
+ }
+
+ if (!mem->can_grow)
+ return -1;
+
+ schedule_work(&mem->dyn_alloc);
+
+ nslabs = nr_slots(alloc_size);
+ phys_limit = min_not_zero(*dev->dma_mask, dev->bus_dma_limit);
+ pool = swiotlb_alloc_pool(dev, nslabs, nslabs, 1, phys_limit,
+ GFP_NOWAIT | __GFP_NOWARN);
+ if (!pool)
+ return -1;
+
+ index = swiotlb_search_pool_area(dev, pool, 0, orig_addr,
+ alloc_size, alloc_align_mask);
+ if (index < 0) {
+ swiotlb_dyn_free(&pool->rcu);
+ return -1;
+ }
+
+ pool->transient = true;
+ spin_lock_irqsave(&dev->dma_io_tlb_lock, flags);
+ list_add_rcu(&pool->node, &dev->dma_io_tlb_pools);
+ spin_unlock_irqrestore(&dev->dma_io_tlb_lock, flags);
+ inc_transient_used(mem, pool->nslabs);
+
+found:
+ WRITE_ONCE(dev->dma_uses_io_tlb, true);
/*
- * First, sync the memory before unmapping the entry
+ * The general barrier orders reads and writes against a presumed store
+ * of the SWIOTLB buffer address by a device driver (to a driver private
+ * data structure). It serves two purposes.
+ *
+ * First, the store to dev->dma_uses_io_tlb must be ordered before the
+ * presumed store. This guarantees that the returned buffer address
+ * cannot be passed to another CPU before updating dev->dma_uses_io_tlb.
+ *
+ * Second, the load from mem->pools must be ordered before the same
+ * presumed store. This guarantees that the returned buffer address
+ * cannot be observed by another CPU before an update of the RCU list
+ * that was made by swiotlb_dyn_alloc() on a third CPU (cf. multicopy
+ * atomicity).
+ *
+ * See also the comment in is_swiotlb_buffer().
+ */
+ smp_mb();
+
+ *retpool = pool;
+ return index;
+}
+
+#else /* !CONFIG_SWIOTLB_DYNAMIC */
+
+static int swiotlb_find_slots(struct device *dev, phys_addr_t orig_addr,
+ size_t alloc_size, unsigned int alloc_align_mask,
+ struct io_tlb_pool **retpool)
+{
+ struct io_tlb_pool *pool;
+ int start, i;
+ int index;
+
+ *retpool = pool = &dev->dma_io_tlb_mem->defpool;
+ i = start = raw_smp_processor_id() & (pool->nareas - 1);
+ do {
+ index = swiotlb_search_pool_area(dev, pool, i, orig_addr,
+ alloc_size, alloc_align_mask);
+ if (index >= 0)
+ return index;
+ if (++i >= pool->nareas)
+ i = 0;
+ } while (i != start);
+ return -1;
+}
+
+#endif /* CONFIG_SWIOTLB_DYNAMIC */
+
+#ifdef CONFIG_DEBUG_FS
+
+/**
+ * mem_used() - get number of used slots in an allocator
+ * @mem: Software IO TLB allocator.
+ *
+ * The result is accurate in this version of the function, because an atomic
+ * counter is available if CONFIG_DEBUG_FS is set.
+ *
+ * Return: Number of used slots.
+ */
+static unsigned long mem_used(struct io_tlb_mem *mem)
+{
+ return atomic_long_read(&mem->total_used);
+}
+
+#else /* !CONFIG_DEBUG_FS */
+
+/**
+ * mem_pool_used() - get number of used slots in a memory pool
+ * @pool: Software IO TLB memory pool.
+ *
+ * The result is not accurate, see mem_used().
+ *
+ * Return: Approximate number of used slots.
+ */
+static unsigned long mem_pool_used(struct io_tlb_pool *pool)
+{
+ int i;
+ unsigned long used = 0;
+
+ for (i = 0; i < pool->nareas; i++)
+ used += pool->areas[i].used;
+ return used;
+}
+
+/**
+ * mem_used() - get number of used slots in an allocator
+ * @mem: Software IO TLB allocator.
+ *
+ * The result is not accurate, because there is no locking of individual
+ * areas.
+ *
+ * Return: Approximate number of used slots.
+ */
+static unsigned long mem_used(struct io_tlb_mem *mem)
+{
+#ifdef CONFIG_SWIOTLB_DYNAMIC
+ struct io_tlb_pool *pool;
+ unsigned long used = 0;
+
+ rcu_read_lock();
+ list_for_each_entry_rcu(pool, &mem->pools, node)
+ used += mem_pool_used(pool);
+ rcu_read_unlock();
+
+ return used;
+#else
+ return mem_pool_used(&mem->defpool);
+#endif
+}
+
+#endif /* CONFIG_DEBUG_FS */
+
+phys_addr_t swiotlb_tbl_map_single(struct device *dev, phys_addr_t orig_addr,
+ size_t mapping_size, size_t alloc_size,
+ unsigned int alloc_align_mask, enum dma_data_direction dir,
+ unsigned long attrs)
+{
+ struct io_tlb_mem *mem = dev->dma_io_tlb_mem;
+ unsigned int offset;
+ struct io_tlb_pool *pool;
+ unsigned int i;
+ int index;
+ phys_addr_t tlb_addr;
+ unsigned short pad_slots;
+
+ if (!mem || !mem->nslabs) {
+ dev_warn_ratelimited(dev,
+ "Can not allocate SWIOTLB buffer earlier and can't now provide you with the DMA bounce buffer");
+ return (phys_addr_t)DMA_MAPPING_ERROR;
+ }
+
+ if (cc_platform_has(CC_ATTR_MEM_ENCRYPT))
+ pr_warn_once("Memory encryption is active and system is using DMA bounce buffers\n");
+
+ if (mapping_size > alloc_size) {
+ dev_warn_once(dev, "Invalid sizes (mapping: %zd bytes, alloc: %zd bytes)",
+ mapping_size, alloc_size);
+ return (phys_addr_t)DMA_MAPPING_ERROR;
+ }
+
+ offset = swiotlb_align_offset(dev, alloc_align_mask, orig_addr);
+ index = swiotlb_find_slots(dev, orig_addr,
+ alloc_size + offset, alloc_align_mask, &pool);
+ if (index == -1) {
+ if (!(attrs & DMA_ATTR_NO_WARN))
+ dev_warn_ratelimited(dev,
+ "swiotlb buffer is full (sz: %zd bytes), total %lu (slots), used %lu (slots)\n",
+ alloc_size, mem->nslabs, mem_used(mem));
+ return (phys_addr_t)DMA_MAPPING_ERROR;
+ }
+
+ /*
+ * Save away the mapping from the original address to the DMA address.
+ * This is needed when we sync the memory. Then we sync the buffer if
+ * needed.
+ */
+ pad_slots = offset >> IO_TLB_SHIFT;
+ offset &= (IO_TLB_SIZE - 1);
+ index += pad_slots;
+ pool->slots[index].pad_slots = pad_slots;
+ for (i = 0; i < nr_slots(alloc_size + offset); i++)
+ pool->slots[index + i].orig_addr = slot_addr(orig_addr, i);
+ tlb_addr = slot_addr(pool->start, index) + offset;
+ /*
+ * When the device is writing memory, i.e. dir == DMA_FROM_DEVICE, copy
+ * the original buffer to the TLB buffer before initiating DMA in order
+ * to preserve the original's data if the device does a partial write,
+ * i.e. if the device doesn't overwrite the entire buffer. Preserving
+ * the original data, even if it's garbage, is necessary to match
+ * hardware behavior. Use of swiotlb is supposed to be transparent,
+ * i.e. swiotlb must not corrupt memory by clobbering unwritten bytes.
*/
- if (orig_addr != INVALID_PHYS_ADDR &&
- !(attrs & DMA_ATTR_SKIP_CPU_SYNC) &&
- ((dir == DMA_FROM_DEVICE) || (dir == DMA_BIDIRECTIONAL)))
- swiotlb_bounce(orig_addr, tlb_addr, mapping_size, DMA_FROM_DEVICE);
+ swiotlb_bounce(dev, tlb_addr, mapping_size, DMA_TO_DEVICE);
+ return tlb_addr;
+}
+
+static void swiotlb_release_slots(struct device *dev, phys_addr_t tlb_addr)
+{
+ struct io_tlb_pool *mem = swiotlb_find_pool(dev, tlb_addr);
+ unsigned long flags;
+ unsigned int offset = swiotlb_align_offset(dev, 0, tlb_addr);
+ int index, nslots, aindex;
+ struct io_tlb_area *area;
+ int count, i;
+
+ index = (tlb_addr - offset - mem->start) >> IO_TLB_SHIFT;
+ index -= mem->slots[index].pad_slots;
+ nslots = nr_slots(mem->slots[index].alloc_size + offset);
+ aindex = index / mem->area_nslabs;
+ area = &mem->areas[aindex];
/*
* Return the buffer to the free list by setting the corresponding
@@ -601,59 +1425,110 @@ void swiotlb_tbl_unmap_single(struct device *hwdev, phys_addr_t tlb_addr,
* While returning the entries to the free list, we merge the entries
* with slots below and above the pool being returned.
*/
- spin_lock_irqsave(&io_tlb_lock, flags);
- {
- count = ((index + nslots) < ALIGN(index + 1, IO_TLB_SEGSIZE) ?
- io_tlb_list[index + nslots] : 0);
- /*
- * Step 1: return the slots to the free list, merging the
- * slots with superceeding slots
- */
- for (i = index + nslots - 1; i >= index; i--) {
- io_tlb_list[i] = ++count;
- io_tlb_orig_addr[i] = INVALID_PHYS_ADDR;
- }
- /*
- * Step 2: merge the returned slots with the preceding slots,
- * if available (non zero)
- */
- for (i = index - 1; (OFFSET(i, IO_TLB_SEGSIZE) != IO_TLB_SEGSIZE -1) && io_tlb_list[i]; i--)
- io_tlb_list[i] = ++count;
+ BUG_ON(aindex >= mem->nareas);
+
+ spin_lock_irqsave(&area->lock, flags);
+ if (index + nslots < ALIGN(index + 1, IO_TLB_SEGSIZE))
+ count = mem->slots[index + nslots].list;
+ else
+ count = 0;
- io_tlb_used -= nslots;
+ /*
+ * Step 1: return the slots to the free list, merging the slots with
+ * superceeding slots
+ */
+ for (i = index + nslots - 1; i >= index; i--) {
+ mem->slots[i].list = ++count;
+ mem->slots[i].orig_addr = INVALID_PHYS_ADDR;
+ mem->slots[i].alloc_size = 0;
+ mem->slots[i].pad_slots = 0;
}
- spin_unlock_irqrestore(&io_tlb_lock, flags);
+
+ /*
+ * Step 2: merge the returned slots with the preceding slots, if
+ * available (non zero)
+ */
+ for (i = index - 1;
+ io_tlb_offset(i) != IO_TLB_SEGSIZE - 1 && mem->slots[i].list;
+ i--)
+ mem->slots[i].list = ++count;
+ area->used -= nslots;
+ spin_unlock_irqrestore(&area->lock, flags);
+
+ dec_used(dev->dma_io_tlb_mem, nslots);
}
-void swiotlb_tbl_sync_single(struct device *hwdev, phys_addr_t tlb_addr,
- size_t size, enum dma_data_direction dir,
- enum dma_sync_target target)
+#ifdef CONFIG_SWIOTLB_DYNAMIC
+
+/**
+ * swiotlb_del_transient() - delete a transient memory pool
+ * @dev: Device which mapped the buffer.
+ * @tlb_addr: Physical address within a bounce buffer.
+ *
+ * Check whether the address belongs to a transient SWIOTLB memory pool.
+ * If yes, then delete the pool.
+ *
+ * Return: %true if @tlb_addr belonged to a transient pool that was released.
+ */
+static bool swiotlb_del_transient(struct device *dev, phys_addr_t tlb_addr)
{
- int index = (tlb_addr - io_tlb_start) >> IO_TLB_SHIFT;
- phys_addr_t orig_addr = io_tlb_orig_addr[index];
+ struct io_tlb_pool *pool;
- if (orig_addr == INVALID_PHYS_ADDR)
+ pool = swiotlb_find_pool(dev, tlb_addr);
+ if (!pool->transient)
+ return false;
+
+ dec_used(dev->dma_io_tlb_mem, pool->nslabs);
+ swiotlb_del_pool(dev, pool);
+ dec_transient_used(dev->dma_io_tlb_mem, pool->nslabs);
+ return true;
+}
+
+#else /* !CONFIG_SWIOTLB_DYNAMIC */
+
+static inline bool swiotlb_del_transient(struct device *dev,
+ phys_addr_t tlb_addr)
+{
+ return false;
+}
+
+#endif /* CONFIG_SWIOTLB_DYNAMIC */
+
+/*
+ * tlb_addr is the physical address of the bounce buffer to unmap.
+ */
+void swiotlb_tbl_unmap_single(struct device *dev, phys_addr_t tlb_addr,
+ size_t mapping_size, enum dma_data_direction dir,
+ unsigned long attrs)
+{
+ /*
+ * First, sync the memory before unmapping the entry
+ */
+ if (!(attrs & DMA_ATTR_SKIP_CPU_SYNC) &&
+ (dir == DMA_FROM_DEVICE || dir == DMA_BIDIRECTIONAL))
+ swiotlb_bounce(dev, tlb_addr, mapping_size, DMA_FROM_DEVICE);
+
+ if (swiotlb_del_transient(dev, tlb_addr))
return;
- orig_addr += (unsigned long)tlb_addr & ((1 << IO_TLB_SHIFT) - 1);
+ swiotlb_release_slots(dev, tlb_addr);
+}
- switch (target) {
- case SYNC_FOR_CPU:
- if (likely(dir == DMA_FROM_DEVICE || dir == DMA_BIDIRECTIONAL))
- swiotlb_bounce(orig_addr, tlb_addr,
- size, DMA_FROM_DEVICE);
- else
- BUG_ON(dir != DMA_TO_DEVICE);
- break;
- case SYNC_FOR_DEVICE:
- if (likely(dir == DMA_TO_DEVICE || dir == DMA_BIDIRECTIONAL))
- swiotlb_bounce(orig_addr, tlb_addr,
- size, DMA_TO_DEVICE);
- else
- BUG_ON(dir != DMA_FROM_DEVICE);
- break;
- default:
- BUG();
- }
+void swiotlb_sync_single_for_device(struct device *dev, phys_addr_t tlb_addr,
+ size_t size, enum dma_data_direction dir)
+{
+ if (dir == DMA_TO_DEVICE || dir == DMA_BIDIRECTIONAL)
+ swiotlb_bounce(dev, tlb_addr, size, DMA_TO_DEVICE);
+ else
+ BUG_ON(dir != DMA_FROM_DEVICE);
+}
+
+void swiotlb_sync_single_for_cpu(struct device *dev, phys_addr_t tlb_addr,
+ size_t size, enum dma_data_direction dir)
+{
+ if (dir == DMA_FROM_DEVICE || dir == DMA_BIDIRECTIONAL)
+ swiotlb_bounce(dev, tlb_addr, size, DMA_FROM_DEVICE);
+ else
+ BUG_ON(dir != DMA_TO_DEVICE);
}
/*
@@ -666,19 +1541,17 @@ dma_addr_t swiotlb_map(struct device *dev, phys_addr_t paddr, size_t size,
phys_addr_t swiotlb_addr;
dma_addr_t dma_addr;
- trace_swiotlb_bounced(dev, phys_to_dma(dev, paddr), size,
- swiotlb_force);
+ trace_swiotlb_bounced(dev, phys_to_dma(dev, paddr), size);
- swiotlb_addr = swiotlb_tbl_map_single(dev,
- __phys_to_dma(dev, io_tlb_start),
- paddr, size, size, dir, attrs);
+ swiotlb_addr = swiotlb_tbl_map_single(dev, paddr, size, size, 0, dir,
+ attrs);
if (swiotlb_addr == (phys_addr_t)DMA_MAPPING_ERROR)
return DMA_MAPPING_ERROR;
/* Ensure that the address returned is DMA'ble */
- dma_addr = __phys_to_dma(dev, swiotlb_addr);
+ dma_addr = phys_to_dma_unencrypted(dev, swiotlb_addr);
if (unlikely(!dma_capable(dev, dma_addr, size, true))) {
- swiotlb_tbl_unmap_single(dev, swiotlb_addr, size, size, dir,
+ swiotlb_tbl_unmap_single(dev, swiotlb_addr, size, dir,
attrs | DMA_ATTR_SKIP_CPU_SYNC);
dev_WARN_ONCE(dev, 1,
"swiotlb addr %pad+%zu overflow (mask %llx, bus limit %llx).\n",
@@ -693,30 +1566,278 @@ dma_addr_t swiotlb_map(struct device *dev, phys_addr_t paddr, size_t size,
size_t swiotlb_max_mapping_size(struct device *dev)
{
- return ((size_t)1 << IO_TLB_SHIFT) * IO_TLB_SEGSIZE;
-}
+ int min_align_mask = dma_get_min_align_mask(dev);
+ int min_align = 0;
-bool is_swiotlb_active(void)
-{
/*
- * When SWIOTLB is initialized, even if io_tlb_start points to physical
- * address zero, io_tlb_end surely doesn't.
+ * swiotlb_find_slots() skips slots according to
+ * min align mask. This affects max mapping size.
+ * Take it into acount here.
*/
- return io_tlb_end != 0;
+ if (min_align_mask)
+ min_align = roundup(min_align_mask, IO_TLB_SIZE);
+
+ return ((size_t)IO_TLB_SIZE) * IO_TLB_SEGSIZE - min_align;
+}
+
+/**
+ * is_swiotlb_allocated() - check if the default software IO TLB is initialized
+ */
+bool is_swiotlb_allocated(void)
+{
+ return io_tlb_default_mem.nslabs;
+}
+
+bool is_swiotlb_active(struct device *dev)
+{
+ struct io_tlb_mem *mem = dev->dma_io_tlb_mem;
+
+ return mem && mem->nslabs;
+}
+
+/**
+ * default_swiotlb_base() - get the base address of the default SWIOTLB
+ *
+ * Get the lowest physical address used by the default software IO TLB pool.
+ */
+phys_addr_t default_swiotlb_base(void)
+{
+#ifdef CONFIG_SWIOTLB_DYNAMIC
+ io_tlb_default_mem.can_grow = false;
+#endif
+ return io_tlb_default_mem.defpool.start;
+}
+
+/**
+ * default_swiotlb_limit() - get the address limit of the default SWIOTLB
+ *
+ * Get the highest physical address used by the default software IO TLB pool.
+ */
+phys_addr_t default_swiotlb_limit(void)
+{
+#ifdef CONFIG_SWIOTLB_DYNAMIC
+ return io_tlb_default_mem.phys_limit;
+#else
+ return io_tlb_default_mem.defpool.end - 1;
+#endif
}
#ifdef CONFIG_DEBUG_FS
+#ifdef CONFIG_SWIOTLB_DYNAMIC
+static unsigned long mem_transient_used(struct io_tlb_mem *mem)
+{
+ return atomic_long_read(&mem->transient_nslabs);
+}
+
+static int io_tlb_transient_used_get(void *data, u64 *val)
+{
+ struct io_tlb_mem *mem = data;
+
+ *val = mem_transient_used(mem);
+ return 0;
+}
-static int __init swiotlb_create_debugfs(void)
+DEFINE_DEBUGFS_ATTRIBUTE(fops_io_tlb_transient_used, io_tlb_transient_used_get,
+ NULL, "%llu\n");
+#endif /* CONFIG_SWIOTLB_DYNAMIC */
+
+static int io_tlb_used_get(void *data, u64 *val)
{
- struct dentry *root;
+ struct io_tlb_mem *mem = data;
- root = debugfs_create_dir("swiotlb", NULL);
- debugfs_create_ulong("io_tlb_nslabs", 0400, root, &io_tlb_nslabs);
- debugfs_create_ulong("io_tlb_used", 0400, root, &io_tlb_used);
+ *val = mem_used(mem);
return 0;
}
-late_initcall(swiotlb_create_debugfs);
+static int io_tlb_hiwater_get(void *data, u64 *val)
+{
+ struct io_tlb_mem *mem = data;
+
+ *val = atomic_long_read(&mem->used_hiwater);
+ return 0;
+}
+static int io_tlb_hiwater_set(void *data, u64 val)
+{
+ struct io_tlb_mem *mem = data;
+
+ /* Only allow setting to zero */
+ if (val != 0)
+ return -EINVAL;
+
+ atomic_long_set(&mem->used_hiwater, val);
+ return 0;
+}
+
+DEFINE_DEBUGFS_ATTRIBUTE(fops_io_tlb_used, io_tlb_used_get, NULL, "%llu\n");
+DEFINE_DEBUGFS_ATTRIBUTE(fops_io_tlb_hiwater, io_tlb_hiwater_get,
+ io_tlb_hiwater_set, "%llu\n");
+
+static void swiotlb_create_debugfs_files(struct io_tlb_mem *mem,
+ const char *dirname)
+{
+ mem->debugfs = debugfs_create_dir(dirname, io_tlb_default_mem.debugfs);
+ if (!mem->nslabs)
+ return;
+
+ debugfs_create_ulong("io_tlb_nslabs", 0400, mem->debugfs, &mem->nslabs);
+ debugfs_create_file("io_tlb_used", 0400, mem->debugfs, mem,
+ &fops_io_tlb_used);
+ debugfs_create_file("io_tlb_used_hiwater", 0600, mem->debugfs, mem,
+ &fops_io_tlb_hiwater);
+#ifdef CONFIG_SWIOTLB_DYNAMIC
+ debugfs_create_file("io_tlb_transient_nslabs", 0400, mem->debugfs,
+ mem, &fops_io_tlb_transient_used);
+#endif
+}
+
+static int __init swiotlb_create_default_debugfs(void)
+{
+ swiotlb_create_debugfs_files(&io_tlb_default_mem, "swiotlb");
+ return 0;
+}
+
+late_initcall(swiotlb_create_default_debugfs);
+
+#else /* !CONFIG_DEBUG_FS */
+
+static inline void swiotlb_create_debugfs_files(struct io_tlb_mem *mem,
+ const char *dirname)
+{
+}
+
+#endif /* CONFIG_DEBUG_FS */
+
+#ifdef CONFIG_DMA_RESTRICTED_POOL
+
+struct page *swiotlb_alloc(struct device *dev, size_t size)
+{
+ struct io_tlb_mem *mem = dev->dma_io_tlb_mem;
+ struct io_tlb_pool *pool;
+ phys_addr_t tlb_addr;
+ unsigned int align;
+ int index;
+
+ if (!mem)
+ return NULL;
+
+ align = (1 << (get_order(size) + PAGE_SHIFT)) - 1;
+ index = swiotlb_find_slots(dev, 0, size, align, &pool);
+ if (index == -1)
+ return NULL;
+
+ tlb_addr = slot_addr(pool->start, index);
+ if (unlikely(!PAGE_ALIGNED(tlb_addr))) {
+ dev_WARN_ONCE(dev, 1, "Cannot allocate pages from non page-aligned swiotlb addr 0x%pa.\n",
+ &tlb_addr);
+ swiotlb_release_slots(dev, tlb_addr);
+ return NULL;
+ }
+
+ return pfn_to_page(PFN_DOWN(tlb_addr));
+}
+
+bool swiotlb_free(struct device *dev, struct page *page, size_t size)
+{
+ phys_addr_t tlb_addr = page_to_phys(page);
+
+ if (!is_swiotlb_buffer(dev, tlb_addr))
+ return false;
+
+ swiotlb_release_slots(dev, tlb_addr);
+
+ return true;
+}
+
+static int rmem_swiotlb_device_init(struct reserved_mem *rmem,
+ struct device *dev)
+{
+ struct io_tlb_mem *mem = rmem->priv;
+ unsigned long nslabs = rmem->size >> IO_TLB_SHIFT;
+
+ /* Set Per-device io tlb area to one */
+ unsigned int nareas = 1;
+
+ if (PageHighMem(pfn_to_page(PHYS_PFN(rmem->base)))) {
+ dev_err(dev, "Restricted DMA pool must be accessible within the linear mapping.");
+ return -EINVAL;
+ }
+
+ /*
+ * Since multiple devices can share the same pool, the private data,
+ * io_tlb_mem struct, will be initialized by the first device attached
+ * to it.
+ */
+ if (!mem) {
+ struct io_tlb_pool *pool;
+
+ mem = kzalloc(sizeof(*mem), GFP_KERNEL);
+ if (!mem)
+ return -ENOMEM;
+ pool = &mem->defpool;
+
+ pool->slots = kcalloc(nslabs, sizeof(*pool->slots), GFP_KERNEL);
+ if (!pool->slots) {
+ kfree(mem);
+ return -ENOMEM;
+ }
+
+ pool->areas = kcalloc(nareas, sizeof(*pool->areas),
+ GFP_KERNEL);
+ if (!pool->areas) {
+ kfree(pool->slots);
+ kfree(mem);
+ return -ENOMEM;
+ }
+
+ set_memory_decrypted((unsigned long)phys_to_virt(rmem->base),
+ rmem->size >> PAGE_SHIFT);
+ swiotlb_init_io_tlb_pool(pool, rmem->base, nslabs,
+ false, nareas);
+ mem->force_bounce = true;
+ mem->for_alloc = true;
+#ifdef CONFIG_SWIOTLB_DYNAMIC
+ spin_lock_init(&mem->lock);
+ INIT_LIST_HEAD_RCU(&mem->pools);
#endif
+ add_mem_pool(mem, pool);
+
+ rmem->priv = mem;
+
+ swiotlb_create_debugfs_files(mem, rmem->name);
+ }
+
+ dev->dma_io_tlb_mem = mem;
+
+ return 0;
+}
+
+static void rmem_swiotlb_device_release(struct reserved_mem *rmem,
+ struct device *dev)
+{
+ dev->dma_io_tlb_mem = &io_tlb_default_mem;
+}
+
+static const struct reserved_mem_ops rmem_swiotlb_ops = {
+ .device_init = rmem_swiotlb_device_init,
+ .device_release = rmem_swiotlb_device_release,
+};
+
+static int __init rmem_swiotlb_setup(struct reserved_mem *rmem)
+{
+ unsigned long node = rmem->fdt_node;
+
+ if (of_get_flat_dt_prop(node, "reusable", NULL) ||
+ of_get_flat_dt_prop(node, "linux,cma-default", NULL) ||
+ of_get_flat_dt_prop(node, "linux,dma-default", NULL) ||
+ of_get_flat_dt_prop(node, "no-map", NULL))
+ return -EINVAL;
+
+ rmem->ops = &rmem_swiotlb_ops;
+ pr_info("Reserved memory: created restricted DMA pool at %pa, size %ld MiB\n",
+ &rmem->base, (unsigned long)rmem->size / SZ_1M);
+ return 0;
+}
+
+RESERVEDMEM_OF_DECLARE(dma, "restricted-dma-pool", rmem_swiotlb_setup);
+#endif /* CONFIG_DMA_RESTRICTED_POOL */
diff --git a/kernel/dma/virt.c b/kernel/dma/virt.c
deleted file mode 100644
index ebe128833af7..000000000000
--- a/kernel/dma/virt.c
+++ /dev/null
@@ -1,59 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-/*
- * DMA operations that map to virtual addresses without flushing memory.
- */
-#include <linux/export.h>
-#include <linux/mm.h>
-#include <linux/dma-mapping.h>
-#include <linux/scatterlist.h>
-
-static void *dma_virt_alloc(struct device *dev, size_t size,
- dma_addr_t *dma_handle, gfp_t gfp,
- unsigned long attrs)
-{
- void *ret;
-
- ret = (void *)__get_free_pages(gfp | __GFP_ZERO, get_order(size));
- if (ret)
- *dma_handle = (uintptr_t)ret;
- return ret;
-}
-
-static void dma_virt_free(struct device *dev, size_t size,
- void *cpu_addr, dma_addr_t dma_addr,
- unsigned long attrs)
-{
- free_pages((unsigned long)cpu_addr, get_order(size));
-}
-
-static dma_addr_t dma_virt_map_page(struct device *dev, struct page *page,
- unsigned long offset, size_t size,
- enum dma_data_direction dir,
- unsigned long attrs)
-{
- return (uintptr_t)(page_address(page) + offset);
-}
-
-static int dma_virt_map_sg(struct device *dev, struct scatterlist *sgl,
- int nents, enum dma_data_direction dir,
- unsigned long attrs)
-{
- int i;
- struct scatterlist *sg;
-
- for_each_sg(sgl, sg, nents, i) {
- BUG_ON(!sg_page(sg));
- sg_dma_address(sg) = (uintptr_t)sg_virt(sg);
- sg_dma_len(sg) = sg->length;
- }
-
- return nents;
-}
-
-const struct dma_map_ops dma_virt_ops = {
- .alloc = dma_virt_alloc,
- .free = dma_virt_free,
- .map_page = dma_virt_map_page,
- .map_sg = dma_virt_map_sg,
-};
-EXPORT_SYMBOL(dma_virt_ops);