summaryrefslogtreecommitdiff
path: root/kernel/dma/swiotlb.c
diff options
context:
space:
mode:
Diffstat (limited to 'kernel/dma/swiotlb.c')
-rw-r--r--kernel/dma/swiotlb.c1253
1 files changed, 1028 insertions, 225 deletions
diff --git a/kernel/dma/swiotlb.c b/kernel/dma/swiotlb.c
index a34c38bbe28f..a547c7693135 100644
--- a/kernel/dma/swiotlb.c
+++ b/kernel/dma/swiotlb.c
@@ -35,6 +35,7 @@
#include <linux/memblock.h>
#include <linux/mm.h>
#include <linux/pfn.h>
+#include <linux/rculist.h>
#include <linux/scatterlist.h>
#include <linux/set_memory.h>
#include <linux/spinlock.h>
@@ -60,20 +61,41 @@
*/
#define IO_TLB_MIN_SLABS ((1<<20) >> IO_TLB_SHIFT)
-#define INVALID_PHYS_ADDR (~(phys_addr_t)0)
-
+/**
+ * struct io_tlb_slot - IO TLB slot descriptor
+ * @orig_addr: The original address corresponding to a mapped entry.
+ * @alloc_size: Size of the allocated buffer.
+ * @list: The free list describing the number of free entries available
+ * from each index.
+ * @pad_slots: Number of preceding padding slots. Valid only in the first
+ * allocated non-padding slot.
+ */
struct io_tlb_slot {
phys_addr_t orig_addr;
size_t alloc_size;
- unsigned int list;
+ unsigned short list;
+ unsigned short pad_slots;
};
static bool swiotlb_force_bounce;
static bool swiotlb_force_disable;
-struct io_tlb_mem io_tlb_default_mem;
+#ifdef CONFIG_SWIOTLB_DYNAMIC
+
+static void swiotlb_dyn_alloc(struct work_struct *work);
+
+static struct io_tlb_mem io_tlb_default_mem = {
+ .lock = __SPIN_LOCK_UNLOCKED(io_tlb_default_mem.lock),
+ .pools = LIST_HEAD_INIT(io_tlb_default_mem.pools),
+ .dyn_alloc = __WORK_INITIALIZER(io_tlb_default_mem.dyn_alloc,
+ swiotlb_dyn_alloc),
+};
+
+#else /* !CONFIG_SWIOTLB_DYNAMIC */
-phys_addr_t swiotlb_unencrypted_base;
+static struct io_tlb_mem io_tlb_default_mem;
+
+#endif /* CONFIG_SWIOTLB_DYNAMIC */
static unsigned long default_nslabs = IO_TLB_DEFAULT_SIZE >> IO_TLB_SHIFT;
static unsigned long default_nareas;
@@ -117,9 +139,16 @@ static bool round_up_default_nslabs(void)
return true;
}
+/**
+ * swiotlb_adjust_nareas() - adjust the number of areas and slots
+ * @nareas: Desired number of areas. Zero is treated as 1.
+ *
+ * Adjust the default number of areas in a memory pool.
+ * The default size of the memory pool may also change to meet minimum area
+ * size requirements.
+ */
static void swiotlb_adjust_nareas(unsigned int nareas)
{
- /* use a single area when non is specified */
if (!nareas)
nareas = 1;
else if (!is_power_of_2(nareas))
@@ -133,6 +162,23 @@ static void swiotlb_adjust_nareas(unsigned int nareas)
(default_nslabs << IO_TLB_SHIFT) >> 20);
}
+/**
+ * limit_nareas() - get the maximum number of areas for a given memory pool size
+ * @nareas: Desired number of areas.
+ * @nslots: Total number of slots in the memory pool.
+ *
+ * Limit the number of areas to the maximum possible number of areas in
+ * a memory pool of the given size.
+ *
+ * Return: Maximum possible number of areas.
+ */
+static unsigned int limit_nareas(unsigned int nareas, unsigned long nslots)
+{
+ if (nslots < nareas * IO_TLB_SEGSIZE)
+ return nslots / IO_TLB_SEGSIZE;
+ return nareas;
+}
+
static int __init
setup_io_tlb_npages(char *str)
{
@@ -156,14 +202,6 @@ setup_io_tlb_npages(char *str)
}
early_param("swiotlb", setup_io_tlb_npages);
-unsigned int swiotlb_max_segment(void)
-{
- if (!io_tlb_default_mem.nslabs)
- return 0;
- return rounddown(io_tlb_default_mem.nslabs << IO_TLB_SHIFT, PAGE_SIZE);
-}
-EXPORT_SYMBOL_GPL(swiotlb_max_segment);
-
unsigned long swiotlb_size_or_default(void)
{
return default_nslabs << IO_TLB_SHIFT;
@@ -188,7 +226,7 @@ void __init swiotlb_adjust_size(unsigned long size)
void swiotlb_print_info(void)
{
- struct io_tlb_mem *mem = &io_tlb_default_mem;
+ struct io_tlb_pool *mem = &io_tlb_default_mem.defpool;
if (!mem->nslabs) {
pr_warn("No low mem\n");
@@ -210,34 +248,6 @@ static inline unsigned long nr_slots(u64 val)
}
/*
- * Remap swioltb memory in the unencrypted physical address space
- * when swiotlb_unencrypted_base is set. (e.g. for Hyper-V AMD SEV-SNP
- * Isolation VMs).
- */
-#ifdef CONFIG_HAS_IOMEM
-static void *swiotlb_mem_remap(struct io_tlb_mem *mem, unsigned long bytes)
-{
- void *vaddr = NULL;
-
- if (swiotlb_unencrypted_base) {
- phys_addr_t paddr = mem->start + swiotlb_unencrypted_base;
-
- vaddr = memremap(paddr, bytes, MEMREMAP_WB);
- if (!vaddr)
- pr_err("Failed to map the unencrypted memory %pa size %lx.\n",
- &paddr, bytes);
- }
-
- return vaddr;
-}
-#else
-static void *swiotlb_mem_remap(struct io_tlb_mem *mem, unsigned long bytes)
-{
- return NULL;
-}
-#endif
-
-/*
* Early SWIOTLB allocation may be too early to allow an architecture to
* perform the desired operations. This function allows the architecture to
* call SWIOTLB when the operations are possible. It needs to be called
@@ -245,24 +255,17 @@ static void *swiotlb_mem_remap(struct io_tlb_mem *mem, unsigned long bytes)
*/
void __init swiotlb_update_mem_attributes(void)
{
- struct io_tlb_mem *mem = &io_tlb_default_mem;
- void *vaddr;
+ struct io_tlb_pool *mem = &io_tlb_default_mem.defpool;
unsigned long bytes;
if (!mem->nslabs || mem->late_alloc)
return;
- vaddr = phys_to_virt(mem->start);
bytes = PAGE_ALIGN(mem->nslabs << IO_TLB_SHIFT);
- set_memory_decrypted((unsigned long)vaddr, bytes >> PAGE_SHIFT);
-
- mem->vaddr = swiotlb_mem_remap(mem, bytes);
- if (!mem->vaddr)
- mem->vaddr = vaddr;
+ set_memory_decrypted((unsigned long)mem->vaddr, bytes >> PAGE_SHIFT);
}
-static void swiotlb_init_io_tlb_mem(struct io_tlb_mem *mem, phys_addr_t start,
- unsigned long nslabs, unsigned int flags,
- bool late_alloc, unsigned int nareas)
+static void swiotlb_init_io_tlb_pool(struct io_tlb_pool *mem, phys_addr_t start,
+ unsigned long nslabs, bool late_alloc, unsigned int nareas)
{
void *vaddr = phys_to_virt(start);
unsigned long bytes = nslabs << IO_TLB_SHIFT, i;
@@ -274,8 +277,6 @@ static void swiotlb_init_io_tlb_mem(struct io_tlb_mem *mem, phys_addr_t start,
mem->nareas = nareas;
mem->area_nslabs = nslabs / mem->nareas;
- mem->force_bounce = swiotlb_force_bounce || (flags & SWIOTLB_FORCE);
-
for (i = 0; i < mem->nareas; i++) {
spin_lock_init(&mem->areas[i].lock);
mem->areas[i].index = 0;
@@ -283,24 +284,37 @@ static void swiotlb_init_io_tlb_mem(struct io_tlb_mem *mem, phys_addr_t start,
}
for (i = 0; i < mem->nslabs; i++) {
- mem->slots[i].list = IO_TLB_SEGSIZE - io_tlb_offset(i);
+ mem->slots[i].list = min(IO_TLB_SEGSIZE - io_tlb_offset(i),
+ mem->nslabs - i);
mem->slots[i].orig_addr = INVALID_PHYS_ADDR;
mem->slots[i].alloc_size = 0;
+ mem->slots[i].pad_slots = 0;
}
- /*
- * If swiotlb_unencrypted_base is set, the bounce buffer memory will
- * be remapped and cleared in swiotlb_update_mem_attributes.
- */
- if (swiotlb_unencrypted_base)
- return;
-
memset(vaddr, 0, bytes);
mem->vaddr = vaddr;
return;
}
-static void *swiotlb_memblock_alloc(unsigned long nslabs, unsigned int flags,
+/**
+ * add_mem_pool() - add a memory pool to the allocator
+ * @mem: Software IO TLB allocator.
+ * @pool: Memory pool to be added.
+ */
+static void add_mem_pool(struct io_tlb_mem *mem, struct io_tlb_pool *pool)
+{
+#ifdef CONFIG_SWIOTLB_DYNAMIC
+ spin_lock(&mem->lock);
+ list_add_rcu(&pool->node, &mem->pools);
+ mem->nslabs += pool->nslabs;
+ spin_unlock(&mem->lock);
+#else
+ mem->nslabs = pool->nslabs;
+#endif
+}
+
+static void __init *swiotlb_memblock_alloc(unsigned long nslabs,
+ unsigned int flags,
int (*remap)(void *tlb, unsigned long nslabs))
{
size_t bytes = PAGE_ALIGN(nslabs << IO_TLB_SHIFT);
@@ -338,8 +352,9 @@ static void *swiotlb_memblock_alloc(unsigned long nslabs, unsigned int flags,
void __init swiotlb_init_remap(bool addressing_limit, unsigned int flags,
int (*remap)(void *tlb, unsigned long nslabs))
{
- struct io_tlb_mem *mem = &io_tlb_default_mem;
+ struct io_tlb_pool *mem = &io_tlb_default_mem.defpool;
unsigned long nslabs;
+ unsigned int nareas;
size_t alloc_size;
void *tlb;
@@ -348,18 +363,28 @@ void __init swiotlb_init_remap(bool addressing_limit, unsigned int flags,
if (swiotlb_force_disable)
return;
- /*
- * default_nslabs maybe changed when adjust area number.
- * So allocate bounce buffer after adjusting area number.
- */
+ io_tlb_default_mem.force_bounce =
+ swiotlb_force_bounce || (flags & SWIOTLB_FORCE);
+
+#ifdef CONFIG_SWIOTLB_DYNAMIC
+ if (!remap)
+ io_tlb_default_mem.can_grow = true;
+ if (flags & SWIOTLB_ANY)
+ io_tlb_default_mem.phys_limit = virt_to_phys(high_memory - 1);
+ else
+ io_tlb_default_mem.phys_limit = ARCH_LOW_ADDRESS_LIMIT;
+#endif
+
if (!default_nareas)
swiotlb_adjust_nareas(num_possible_cpus());
nslabs = default_nslabs;
+ nareas = limit_nareas(default_nareas, nslabs);
while ((tlb = swiotlb_memblock_alloc(nslabs, flags, remap)) == NULL) {
if (nslabs <= IO_TLB_MIN_SLABS)
return;
nslabs = ALIGN(nslabs >> 1, IO_TLB_SEGSIZE);
+ nareas = limit_nareas(nareas, nslabs);
}
if (default_nslabs != nslabs) {
@@ -377,14 +402,14 @@ void __init swiotlb_init_remap(bool addressing_limit, unsigned int flags,
}
mem->areas = memblock_alloc(array_size(sizeof(struct io_tlb_area),
- default_nareas), SMP_CACHE_BYTES);
+ nareas), SMP_CACHE_BYTES);
if (!mem->areas) {
pr_warn("%s: Failed to allocate mem->areas.\n", __func__);
return;
}
- swiotlb_init_io_tlb_mem(mem, __pa(tlb), nslabs, flags, false,
- default_nareas);
+ swiotlb_init_io_tlb_pool(mem, __pa(tlb), nslabs, false, nareas);
+ add_mem_pool(&io_tlb_default_mem, mem);
if (flags & SWIOTLB_VERBOSE)
swiotlb_print_info();
@@ -403,16 +428,36 @@ void __init swiotlb_init(bool addressing_limit, unsigned int flags)
int swiotlb_init_late(size_t size, gfp_t gfp_mask,
int (*remap)(void *tlb, unsigned long nslabs))
{
- struct io_tlb_mem *mem = &io_tlb_default_mem;
+ struct io_tlb_pool *mem = &io_tlb_default_mem.defpool;
unsigned long nslabs = ALIGN(size >> IO_TLB_SHIFT, IO_TLB_SEGSIZE);
+ unsigned int nareas;
unsigned char *vstart = NULL;
unsigned int order, area_order;
bool retried = false;
int rc = 0;
+ if (io_tlb_default_mem.nslabs)
+ return 0;
+
if (swiotlb_force_disable)
return 0;
+ io_tlb_default_mem.force_bounce = swiotlb_force_bounce;
+
+#ifdef CONFIG_SWIOTLB_DYNAMIC
+ if (!remap)
+ io_tlb_default_mem.can_grow = true;
+ if (IS_ENABLED(CONFIG_ZONE_DMA) && (gfp_mask & __GFP_DMA))
+ io_tlb_default_mem.phys_limit = zone_dma_limit;
+ else if (IS_ENABLED(CONFIG_ZONE_DMA32) && (gfp_mask & __GFP_DMA32))
+ io_tlb_default_mem.phys_limit = max(DMA_BIT_MASK(32), zone_dma_limit);
+ else
+ io_tlb_default_mem.phys_limit = virt_to_phys(high_memory - 1);
+#endif
+
+ if (!default_nareas)
+ swiotlb_adjust_nareas(num_possible_cpus());
+
retry:
order = get_order(nslabs << IO_TLB_SHIFT);
nslabs = SLABS_PER_PAGE << order;
@@ -447,11 +492,8 @@ retry:
(PAGE_SIZE << order) >> 20);
}
- if (!default_nareas)
- swiotlb_adjust_nareas(num_possible_cpus());
-
- area_order = get_order(array_size(sizeof(*mem->areas),
- default_nareas));
+ nareas = limit_nareas(default_nareas, nslabs);
+ area_order = get_order(array_size(sizeof(*mem->areas), nareas));
mem->areas = (struct io_tlb_area *)
__get_free_pages(GFP_KERNEL | __GFP_ZERO, area_order);
if (!mem->areas)
@@ -464,8 +506,9 @@ retry:
set_memory_decrypted((unsigned long)vstart,
(nslabs << IO_TLB_SHIFT) >> PAGE_SHIFT);
- swiotlb_init_io_tlb_mem(mem, virt_to_phys(vstart), nslabs, 0, true,
- default_nareas);
+ swiotlb_init_io_tlb_pool(mem, virt_to_phys(vstart), nslabs, true,
+ nareas);
+ add_mem_pool(&io_tlb_default_mem, mem);
swiotlb_print_info();
return 0;
@@ -479,7 +522,7 @@ error_area:
void __init swiotlb_exit(void)
{
- struct io_tlb_mem *mem = &io_tlb_default_mem;
+ struct io_tlb_pool *mem = &io_tlb_default_mem.defpool;
unsigned long tbl_vaddr;
size_t tbl_size, slots_size;
unsigned int area_order;
@@ -512,47 +555,330 @@ void __init swiotlb_exit(void)
memset(mem, 0, sizeof(*mem));
}
-/*
- * Return the offset into a iotlb slot required to keep the device happy.
+#ifdef CONFIG_SWIOTLB_DYNAMIC
+
+/**
+ * alloc_dma_pages() - allocate pages to be used for DMA
+ * @gfp: GFP flags for the allocation.
+ * @bytes: Size of the buffer.
+ * @phys_limit: Maximum allowed physical address of the buffer.
+ *
+ * Allocate pages from the buddy allocator. If successful, make the allocated
+ * pages decrypted that they can be used for DMA.
+ *
+ * Return: Decrypted pages, %NULL on allocation failure, or ERR_PTR(-EAGAIN)
+ * if the allocated physical address was above @phys_limit.
+ */
+static struct page *alloc_dma_pages(gfp_t gfp, size_t bytes, u64 phys_limit)
+{
+ unsigned int order = get_order(bytes);
+ struct page *page;
+ phys_addr_t paddr;
+ void *vaddr;
+
+ page = alloc_pages(gfp, order);
+ if (!page)
+ return NULL;
+
+ paddr = page_to_phys(page);
+ if (paddr + bytes - 1 > phys_limit) {
+ __free_pages(page, order);
+ return ERR_PTR(-EAGAIN);
+ }
+
+ vaddr = phys_to_virt(paddr);
+ if (set_memory_decrypted((unsigned long)vaddr, PFN_UP(bytes)))
+ goto error;
+ return page;
+
+error:
+ /* Intentional leak if pages cannot be encrypted again. */
+ if (!set_memory_encrypted((unsigned long)vaddr, PFN_UP(bytes)))
+ __free_pages(page, order);
+ return NULL;
+}
+
+/**
+ * swiotlb_alloc_tlb() - allocate a dynamic IO TLB buffer
+ * @dev: Device for which a memory pool is allocated.
+ * @bytes: Size of the buffer.
+ * @phys_limit: Maximum allowed physical address of the buffer.
+ * @gfp: GFP flags for the allocation.
+ *
+ * Return: Allocated pages, or %NULL on allocation failure.
+ */
+static struct page *swiotlb_alloc_tlb(struct device *dev, size_t bytes,
+ u64 phys_limit, gfp_t gfp)
+{
+ struct page *page;
+
+ /*
+ * Allocate from the atomic pools if memory is encrypted and
+ * the allocation is atomic, because decrypting may block.
+ */
+ if (!gfpflags_allow_blocking(gfp) && dev && force_dma_unencrypted(dev)) {
+ void *vaddr;
+
+ if (!IS_ENABLED(CONFIG_DMA_COHERENT_POOL))
+ return NULL;
+
+ return dma_alloc_from_pool(dev, bytes, &vaddr, gfp,
+ dma_coherent_ok);
+ }
+
+ gfp &= ~GFP_ZONEMASK;
+ if (phys_limit <= zone_dma_limit)
+ gfp |= __GFP_DMA;
+ else if (phys_limit <= DMA_BIT_MASK(32))
+ gfp |= __GFP_DMA32;
+
+ while (IS_ERR(page = alloc_dma_pages(gfp, bytes, phys_limit))) {
+ if (IS_ENABLED(CONFIG_ZONE_DMA32) &&
+ phys_limit < DMA_BIT_MASK(64) &&
+ !(gfp & (__GFP_DMA32 | __GFP_DMA)))
+ gfp |= __GFP_DMA32;
+ else if (IS_ENABLED(CONFIG_ZONE_DMA) &&
+ !(gfp & __GFP_DMA))
+ gfp = (gfp & ~__GFP_DMA32) | __GFP_DMA;
+ else
+ return NULL;
+ }
+
+ return page;
+}
+
+/**
+ * swiotlb_free_tlb() - free a dynamically allocated IO TLB buffer
+ * @vaddr: Virtual address of the buffer.
+ * @bytes: Size of the buffer.
+ */
+static void swiotlb_free_tlb(void *vaddr, size_t bytes)
+{
+ if (IS_ENABLED(CONFIG_DMA_COHERENT_POOL) &&
+ dma_free_from_pool(NULL, vaddr, bytes))
+ return;
+
+ /* Intentional leak if pages cannot be encrypted again. */
+ if (!set_memory_encrypted((unsigned long)vaddr, PFN_UP(bytes)))
+ __free_pages(virt_to_page(vaddr), get_order(bytes));
+}
+
+/**
+ * swiotlb_alloc_pool() - allocate a new IO TLB memory pool
+ * @dev: Device for which a memory pool is allocated.
+ * @minslabs: Minimum number of slabs.
+ * @nslabs: Desired (maximum) number of slabs.
+ * @nareas: Number of areas.
+ * @phys_limit: Maximum DMA buffer physical address.
+ * @gfp: GFP flags for the allocations.
+ *
+ * Allocate and initialize a new IO TLB memory pool. The actual number of
+ * slabs may be reduced if allocation of @nslabs fails. If even
+ * @minslabs cannot be allocated, this function fails.
+ *
+ * Return: New memory pool, or %NULL on allocation failure.
+ */
+static struct io_tlb_pool *swiotlb_alloc_pool(struct device *dev,
+ unsigned long minslabs, unsigned long nslabs,
+ unsigned int nareas, u64 phys_limit, gfp_t gfp)
+{
+ struct io_tlb_pool *pool;
+ unsigned int slot_order;
+ struct page *tlb;
+ size_t pool_size;
+ size_t tlb_size;
+
+ if (nslabs > SLABS_PER_PAGE << MAX_PAGE_ORDER) {
+ nslabs = SLABS_PER_PAGE << MAX_PAGE_ORDER;
+ nareas = limit_nareas(nareas, nslabs);
+ }
+
+ pool_size = sizeof(*pool) + array_size(sizeof(*pool->areas), nareas);
+ pool = kzalloc(pool_size, gfp);
+ if (!pool)
+ goto error;
+ pool->areas = (void *)pool + sizeof(*pool);
+
+ tlb_size = nslabs << IO_TLB_SHIFT;
+ while (!(tlb = swiotlb_alloc_tlb(dev, tlb_size, phys_limit, gfp))) {
+ if (nslabs <= minslabs)
+ goto error_tlb;
+ nslabs = ALIGN(nslabs >> 1, IO_TLB_SEGSIZE);
+ nareas = limit_nareas(nareas, nslabs);
+ tlb_size = nslabs << IO_TLB_SHIFT;
+ }
+
+ slot_order = get_order(array_size(sizeof(*pool->slots), nslabs));
+ pool->slots = (struct io_tlb_slot *)
+ __get_free_pages(gfp, slot_order);
+ if (!pool->slots)
+ goto error_slots;
+
+ swiotlb_init_io_tlb_pool(pool, page_to_phys(tlb), nslabs, true, nareas);
+ return pool;
+
+error_slots:
+ swiotlb_free_tlb(page_address(tlb), tlb_size);
+error_tlb:
+ kfree(pool);
+error:
+ return NULL;
+}
+
+/**
+ * swiotlb_dyn_alloc() - dynamic memory pool allocation worker
+ * @work: Pointer to dyn_alloc in struct io_tlb_mem.
+ */
+static void swiotlb_dyn_alloc(struct work_struct *work)
+{
+ struct io_tlb_mem *mem =
+ container_of(work, struct io_tlb_mem, dyn_alloc);
+ struct io_tlb_pool *pool;
+
+ pool = swiotlb_alloc_pool(NULL, IO_TLB_MIN_SLABS, default_nslabs,
+ default_nareas, mem->phys_limit, GFP_KERNEL);
+ if (!pool) {
+ pr_warn_ratelimited("Failed to allocate new pool");
+ return;
+ }
+
+ add_mem_pool(mem, pool);
+}
+
+/**
+ * swiotlb_dyn_free() - RCU callback to free a memory pool
+ * @rcu: RCU head in the corresponding struct io_tlb_pool.
+ */
+static void swiotlb_dyn_free(struct rcu_head *rcu)
+{
+ struct io_tlb_pool *pool = container_of(rcu, struct io_tlb_pool, rcu);
+ size_t slots_size = array_size(sizeof(*pool->slots), pool->nslabs);
+ size_t tlb_size = pool->end - pool->start;
+
+ free_pages((unsigned long)pool->slots, get_order(slots_size));
+ swiotlb_free_tlb(pool->vaddr, tlb_size);
+ kfree(pool);
+}
+
+/**
+ * __swiotlb_find_pool() - find the IO TLB pool for a physical address
+ * @dev: Device which has mapped the DMA buffer.
+ * @paddr: Physical address within the DMA buffer.
+ *
+ * Find the IO TLB memory pool descriptor which contains the given physical
+ * address, if any. This function is for use only when the dev is known to
+ * be using swiotlb. Use swiotlb_find_pool() for the more general case
+ * when this condition is not met.
+ *
+ * Return: Memory pool which contains @paddr, or %NULL if none.
+ */
+struct io_tlb_pool *__swiotlb_find_pool(struct device *dev, phys_addr_t paddr)
+{
+ struct io_tlb_mem *mem = dev->dma_io_tlb_mem;
+ struct io_tlb_pool *pool;
+
+ rcu_read_lock();
+ list_for_each_entry_rcu(pool, &mem->pools, node) {
+ if (paddr >= pool->start && paddr < pool->end)
+ goto out;
+ }
+
+ list_for_each_entry_rcu(pool, &dev->dma_io_tlb_pools, node) {
+ if (paddr >= pool->start && paddr < pool->end)
+ goto out;
+ }
+ pool = NULL;
+out:
+ rcu_read_unlock();
+ return pool;
+}
+
+/**
+ * swiotlb_del_pool() - remove an IO TLB pool from a device
+ * @dev: Owning device.
+ * @pool: Memory pool to be removed.
*/
-static unsigned int swiotlb_align_offset(struct device *dev, u64 addr)
+static void swiotlb_del_pool(struct device *dev, struct io_tlb_pool *pool)
{
- return addr & dma_get_min_align_mask(dev) & (IO_TLB_SIZE - 1);
+ unsigned long flags;
+
+ spin_lock_irqsave(&dev->dma_io_tlb_lock, flags);
+ list_del_rcu(&pool->node);
+ spin_unlock_irqrestore(&dev->dma_io_tlb_lock, flags);
+
+ call_rcu(&pool->rcu, swiotlb_dyn_free);
+}
+
+#endif /* CONFIG_SWIOTLB_DYNAMIC */
+
+/**
+ * swiotlb_dev_init() - initialize swiotlb fields in &struct device
+ * @dev: Device to be initialized.
+ */
+void swiotlb_dev_init(struct device *dev)
+{
+ dev->dma_io_tlb_mem = &io_tlb_default_mem;
+#ifdef CONFIG_SWIOTLB_DYNAMIC
+ INIT_LIST_HEAD(&dev->dma_io_tlb_pools);
+ spin_lock_init(&dev->dma_io_tlb_lock);
+ dev->dma_uses_io_tlb = false;
+#endif
+}
+
+/**
+ * swiotlb_align_offset() - Get required offset into an IO TLB allocation.
+ * @dev: Owning device.
+ * @align_mask: Allocation alignment mask.
+ * @addr: DMA address.
+ *
+ * Return the minimum offset from the start of an IO TLB allocation which is
+ * required for a given buffer address and allocation alignment to keep the
+ * device happy.
+ *
+ * First, the address bits covered by min_align_mask must be identical in the
+ * original address and the bounce buffer address. High bits are preserved by
+ * choosing a suitable IO TLB slot, but bits below IO_TLB_SHIFT require extra
+ * padding bytes before the bounce buffer.
+ *
+ * Second, @align_mask specifies which bits of the first allocated slot must
+ * be zero. This may require allocating additional padding slots, and then the
+ * offset (in bytes) from the first such padding slot is returned.
+ */
+static unsigned int swiotlb_align_offset(struct device *dev,
+ unsigned int align_mask, u64 addr)
+{
+ return addr & dma_get_min_align_mask(dev) &
+ (align_mask | (IO_TLB_SIZE - 1));
}
/*
* Bounce: copy the swiotlb buffer from or back to the original dma location
*/
static void swiotlb_bounce(struct device *dev, phys_addr_t tlb_addr, size_t size,
- enum dma_data_direction dir)
+ enum dma_data_direction dir, struct io_tlb_pool *mem)
{
- struct io_tlb_mem *mem = dev->dma_io_tlb_mem;
int index = (tlb_addr - mem->start) >> IO_TLB_SHIFT;
phys_addr_t orig_addr = mem->slots[index].orig_addr;
size_t alloc_size = mem->slots[index].alloc_size;
unsigned long pfn = PFN_DOWN(orig_addr);
unsigned char *vaddr = mem->vaddr + tlb_addr - mem->start;
- unsigned int tlb_offset, orig_addr_offset;
+ int tlb_offset;
if (orig_addr == INVALID_PHYS_ADDR)
return;
- tlb_offset = tlb_addr & (IO_TLB_SIZE - 1);
- orig_addr_offset = swiotlb_align_offset(dev, orig_addr);
- if (tlb_offset < orig_addr_offset) {
- dev_WARN_ONCE(dev, 1,
- "Access before mapping start detected. orig offset %u, requested offset %u.\n",
- orig_addr_offset, tlb_offset);
- return;
- }
-
- tlb_offset -= orig_addr_offset;
- if (tlb_offset > alloc_size) {
- dev_WARN_ONCE(dev, 1,
- "Buffer overflow detected. Allocation size: %zu. Mapping size: %zu+%u.\n",
- alloc_size, size, tlb_offset);
- return;
- }
+ /*
+ * It's valid for tlb_offset to be negative. This can happen when the
+ * "offset" returned by swiotlb_align_offset() is non-zero, and the
+ * tlb_addr is pointing within the first "offset" bytes of the second
+ * or subsequent slots of the allocated swiotlb area. While it's not
+ * valid for tlb_addr to be pointing within the first "offset" bytes
+ * of the first slot, there's no way to check for such an error since
+ * this function can't distinguish the first slot from the second and
+ * subsequent slots.
+ */
+ tlb_offset = (tlb_addr & (IO_TLB_SIZE - 1)) -
+ swiotlb_align_offset(dev, 0, orig_addr);
orig_addr += tlb_offset;
alloc_size -= tlb_offset;
@@ -603,12 +929,10 @@ static inline phys_addr_t slot_addr(phys_addr_t start, phys_addr_t idx)
*/
static inline unsigned long get_max_slots(unsigned long boundary_mask)
{
- if (boundary_mask == ~0UL)
- return 1UL << (BITS_PER_LONG - IO_TLB_SHIFT);
- return nr_slots(boundary_mask + 1);
+ return (boundary_mask >> IO_TLB_SHIFT) + 1;
}
-static unsigned int wrap_area_index(struct io_tlb_mem *mem, unsigned int index)
+static unsigned int wrap_area_index(struct io_tlb_pool *mem, unsigned int index)
{
if (index >= mem->area_nslabs)
return 0;
@@ -616,138 +940,442 @@ static unsigned int wrap_area_index(struct io_tlb_mem *mem, unsigned int index)
}
/*
- * Find a suitable number of IO TLB entries size that will fit this request and
- * allocate a buffer from that IO TLB pool.
+ * Track the total used slots with a global atomic value in order to have
+ * correct information to determine the high water mark. The mem_used()
+ * function gives imprecise results because there's no locking across
+ * multiple areas.
+ */
+#ifdef CONFIG_DEBUG_FS
+static void inc_used_and_hiwater(struct io_tlb_mem *mem, unsigned int nslots)
+{
+ unsigned long old_hiwater, new_used;
+
+ new_used = atomic_long_add_return(nslots, &mem->total_used);
+ old_hiwater = atomic_long_read(&mem->used_hiwater);
+ do {
+ if (new_used <= old_hiwater)
+ break;
+ } while (!atomic_long_try_cmpxchg(&mem->used_hiwater,
+ &old_hiwater, new_used));
+}
+
+static void dec_used(struct io_tlb_mem *mem, unsigned int nslots)
+{
+ atomic_long_sub(nslots, &mem->total_used);
+}
+
+#else /* !CONFIG_DEBUG_FS */
+static void inc_used_and_hiwater(struct io_tlb_mem *mem, unsigned int nslots)
+{
+}
+static void dec_used(struct io_tlb_mem *mem, unsigned int nslots)
+{
+}
+#endif /* CONFIG_DEBUG_FS */
+
+#ifdef CONFIG_SWIOTLB_DYNAMIC
+#ifdef CONFIG_DEBUG_FS
+static void inc_transient_used(struct io_tlb_mem *mem, unsigned int nslots)
+{
+ atomic_long_add(nslots, &mem->transient_nslabs);
+}
+
+static void dec_transient_used(struct io_tlb_mem *mem, unsigned int nslots)
+{
+ atomic_long_sub(nslots, &mem->transient_nslabs);
+}
+
+#else /* !CONFIG_DEBUG_FS */
+static void inc_transient_used(struct io_tlb_mem *mem, unsigned int nslots)
+{
+}
+static void dec_transient_used(struct io_tlb_mem *mem, unsigned int nslots)
+{
+}
+#endif /* CONFIG_DEBUG_FS */
+#endif /* CONFIG_SWIOTLB_DYNAMIC */
+
+/**
+ * swiotlb_search_pool_area() - search one memory area in one pool
+ * @dev: Device which maps the buffer.
+ * @pool: Memory pool to be searched.
+ * @area_index: Index of the IO TLB memory area to be searched.
+ * @orig_addr: Original (non-bounced) IO buffer address.
+ * @alloc_size: Total requested size of the bounce buffer,
+ * including initial alignment padding.
+ * @alloc_align_mask: Required alignment of the allocated buffer.
+ *
+ * Find a suitable sequence of IO TLB entries for the request and allocate
+ * a buffer from the given IO TLB memory area.
+ * This function takes care of locking.
+ *
+ * Return: Index of the first allocated slot, or -1 on error.
*/
-static int swiotlb_do_find_slots(struct device *dev, int area_index,
- phys_addr_t orig_addr, size_t alloc_size,
+static int swiotlb_search_pool_area(struct device *dev, struct io_tlb_pool *pool,
+ int area_index, phys_addr_t orig_addr, size_t alloc_size,
unsigned int alloc_align_mask)
{
- struct io_tlb_mem *mem = dev->dma_io_tlb_mem;
- struct io_tlb_area *area = mem->areas + area_index;
+ struct io_tlb_area *area = pool->areas + area_index;
unsigned long boundary_mask = dma_get_seg_boundary(dev);
dma_addr_t tbl_dma_addr =
- phys_to_dma_unencrypted(dev, mem->start) & boundary_mask;
+ phys_to_dma_unencrypted(dev, pool->start) & boundary_mask;
unsigned long max_slots = get_max_slots(boundary_mask);
- unsigned int iotlb_align_mask =
- dma_get_min_align_mask(dev) & ~(IO_TLB_SIZE - 1);
+ unsigned int iotlb_align_mask = dma_get_min_align_mask(dev);
unsigned int nslots = nr_slots(alloc_size), stride;
- unsigned int index, wrap, count = 0, i;
- unsigned int offset = swiotlb_align_offset(dev, orig_addr);
+ unsigned int offset = swiotlb_align_offset(dev, 0, orig_addr);
+ unsigned int index, slots_checked, count = 0, i;
unsigned long flags;
unsigned int slot_base;
unsigned int slot_index;
BUG_ON(!nslots);
- BUG_ON(area_index >= mem->nareas);
+ BUG_ON(area_index >= pool->nareas);
+
+ /*
+ * Historically, swiotlb allocations >= PAGE_SIZE were guaranteed to be
+ * page-aligned in the absence of any other alignment requirements.
+ * 'alloc_align_mask' was later introduced to specify the alignment
+ * explicitly, however this is passed as zero for streaming mappings
+ * and so we preserve the old behaviour there in case any drivers are
+ * relying on it.
+ */
+ if (!alloc_align_mask && !iotlb_align_mask && alloc_size >= PAGE_SIZE)
+ alloc_align_mask = PAGE_SIZE - 1;
+
+ /*
+ * Ensure that the allocation is at least slot-aligned and update
+ * 'iotlb_align_mask' to ignore bits that will be preserved when
+ * offsetting into the allocation.
+ */
+ alloc_align_mask |= (IO_TLB_SIZE - 1);
+ iotlb_align_mask &= ~alloc_align_mask;
/*
* For mappings with an alignment requirement don't bother looping to
- * unaligned slots once we found an aligned one. For allocations of
- * PAGE_SIZE or larger only look for page aligned allocations.
+ * unaligned slots once we found an aligned one.
*/
- stride = (iotlb_align_mask >> IO_TLB_SHIFT) + 1;
- if (alloc_size >= PAGE_SIZE)
- stride = max(stride, stride << (PAGE_SHIFT - IO_TLB_SHIFT));
- stride = max(stride, (alloc_align_mask >> IO_TLB_SHIFT) + 1);
+ stride = get_max_slots(max(alloc_align_mask, iotlb_align_mask));
spin_lock_irqsave(&area->lock, flags);
- if (unlikely(nslots > mem->area_nslabs - area->used))
+ if (unlikely(nslots > pool->area_nslabs - area->used))
goto not_found;
- slot_base = area_index * mem->area_nslabs;
- index = wrap = wrap_area_index(mem, ALIGN(area->index, stride));
+ slot_base = area_index * pool->area_nslabs;
+ index = area->index;
+
+ for (slots_checked = 0; slots_checked < pool->area_nslabs; ) {
+ phys_addr_t tlb_addr;
- do {
slot_index = slot_base + index;
+ tlb_addr = slot_addr(tbl_dma_addr, slot_index);
- if (orig_addr &&
- (slot_addr(tbl_dma_addr, slot_index) &
- iotlb_align_mask) != (orig_addr & iotlb_align_mask)) {
- index = wrap_area_index(mem, index + 1);
+ if ((tlb_addr & alloc_align_mask) ||
+ (orig_addr && (tlb_addr & iotlb_align_mask) !=
+ (orig_addr & iotlb_align_mask))) {
+ index = wrap_area_index(pool, index + 1);
+ slots_checked++;
continue;
}
- /*
- * If we find a slot that indicates we have 'nslots' number of
- * contiguous buffers, we allocate the buffers from that slot
- * and mark the entries as '0' indicating unavailable.
- */
if (!iommu_is_span_boundary(slot_index, nslots,
nr_slots(tbl_dma_addr),
max_slots)) {
- if (mem->slots[slot_index].list >= nslots)
+ if (pool->slots[slot_index].list >= nslots)
goto found;
}
- index = wrap_area_index(mem, index + stride);
- } while (index != wrap);
+ index = wrap_area_index(pool, index + stride);
+ slots_checked += stride;
+ }
not_found:
spin_unlock_irqrestore(&area->lock, flags);
return -1;
found:
+ /*
+ * If we find a slot that indicates we have 'nslots' number of
+ * contiguous buffers, we allocate the buffers from that slot onwards
+ * and set the list of free entries to '0' indicating unavailable.
+ */
for (i = slot_index; i < slot_index + nslots; i++) {
- mem->slots[i].list = 0;
- mem->slots[i].alloc_size = alloc_size - (offset +
+ pool->slots[i].list = 0;
+ pool->slots[i].alloc_size = alloc_size - (offset +
((i - slot_index) << IO_TLB_SHIFT));
}
for (i = slot_index - 1;
io_tlb_offset(i) != IO_TLB_SEGSIZE - 1 &&
- mem->slots[i].list; i--)
- mem->slots[i].list = ++count;
+ pool->slots[i].list; i--)
+ pool->slots[i].list = ++count;
/*
* Update the indices to avoid searching in the next round.
*/
- if (index + nslots < mem->area_nslabs)
- area->index = index + nslots;
- else
- area->index = 0;
+ area->index = wrap_area_index(pool, index + nslots);
area->used += nslots;
spin_unlock_irqrestore(&area->lock, flags);
+
+ inc_used_and_hiwater(dev->dma_io_tlb_mem, nslots);
return slot_index;
}
+#ifdef CONFIG_SWIOTLB_DYNAMIC
+
+/**
+ * swiotlb_search_area() - search one memory area in all pools
+ * @dev: Device which maps the buffer.
+ * @start_cpu: Start CPU number.
+ * @cpu_offset: Offset from @start_cpu.
+ * @orig_addr: Original (non-bounced) IO buffer address.
+ * @alloc_size: Total requested size of the bounce buffer,
+ * including initial alignment padding.
+ * @alloc_align_mask: Required alignment of the allocated buffer.
+ * @retpool: Used memory pool, updated on return.
+ *
+ * Search one memory area in all pools for a sequence of slots that match the
+ * allocation constraints.
+ *
+ * Return: Index of the first allocated slot, or -1 on error.
+ */
+static int swiotlb_search_area(struct device *dev, int start_cpu,
+ int cpu_offset, phys_addr_t orig_addr, size_t alloc_size,
+ unsigned int alloc_align_mask, struct io_tlb_pool **retpool)
+{
+ struct io_tlb_mem *mem = dev->dma_io_tlb_mem;
+ struct io_tlb_pool *pool;
+ int area_index;
+ int index = -1;
+
+ rcu_read_lock();
+ list_for_each_entry_rcu(pool, &mem->pools, node) {
+ if (cpu_offset >= pool->nareas)
+ continue;
+ area_index = (start_cpu + cpu_offset) & (pool->nareas - 1);
+ index = swiotlb_search_pool_area(dev, pool, area_index,
+ orig_addr, alloc_size,
+ alloc_align_mask);
+ if (index >= 0) {
+ *retpool = pool;
+ break;
+ }
+ }
+ rcu_read_unlock();
+ return index;
+}
+
+/**
+ * swiotlb_find_slots() - search for slots in the whole swiotlb
+ * @dev: Device which maps the buffer.
+ * @orig_addr: Original (non-bounced) IO buffer address.
+ * @alloc_size: Total requested size of the bounce buffer,
+ * including initial alignment padding.
+ * @alloc_align_mask: Required alignment of the allocated buffer.
+ * @retpool: Used memory pool, updated on return.
+ *
+ * Search through the whole software IO TLB to find a sequence of slots that
+ * match the allocation constraints.
+ *
+ * Return: Index of the first allocated slot, or -1 on error.
+ */
static int swiotlb_find_slots(struct device *dev, phys_addr_t orig_addr,
- size_t alloc_size, unsigned int alloc_align_mask)
+ size_t alloc_size, unsigned int alloc_align_mask,
+ struct io_tlb_pool **retpool)
{
struct io_tlb_mem *mem = dev->dma_io_tlb_mem;
- int start = raw_smp_processor_id() & (mem->nareas - 1);
- int i = start, index;
+ struct io_tlb_pool *pool;
+ unsigned long nslabs;
+ unsigned long flags;
+ u64 phys_limit;
+ int cpu, i;
+ int index;
+ if (alloc_size > IO_TLB_SEGSIZE * IO_TLB_SIZE)
+ return -1;
+
+ cpu = raw_smp_processor_id();
+ for (i = 0; i < default_nareas; ++i) {
+ index = swiotlb_search_area(dev, cpu, i, orig_addr, alloc_size,
+ alloc_align_mask, &pool);
+ if (index >= 0)
+ goto found;
+ }
+
+ if (!mem->can_grow)
+ return -1;
+
+ schedule_work(&mem->dyn_alloc);
+
+ nslabs = nr_slots(alloc_size);
+ phys_limit = min_not_zero(*dev->dma_mask, dev->bus_dma_limit);
+ pool = swiotlb_alloc_pool(dev, nslabs, nslabs, 1, phys_limit,
+ GFP_NOWAIT);
+ if (!pool)
+ return -1;
+
+ index = swiotlb_search_pool_area(dev, pool, 0, orig_addr,
+ alloc_size, alloc_align_mask);
+ if (index < 0) {
+ swiotlb_dyn_free(&pool->rcu);
+ return -1;
+ }
+
+ pool->transient = true;
+ spin_lock_irqsave(&dev->dma_io_tlb_lock, flags);
+ list_add_rcu(&pool->node, &dev->dma_io_tlb_pools);
+ spin_unlock_irqrestore(&dev->dma_io_tlb_lock, flags);
+ inc_transient_used(mem, pool->nslabs);
+
+found:
+ WRITE_ONCE(dev->dma_uses_io_tlb, true);
+
+ /*
+ * The general barrier orders reads and writes against a presumed store
+ * of the SWIOTLB buffer address by a device driver (to a driver private
+ * data structure). It serves two purposes.
+ *
+ * First, the store to dev->dma_uses_io_tlb must be ordered before the
+ * presumed store. This guarantees that the returned buffer address
+ * cannot be passed to another CPU before updating dev->dma_uses_io_tlb.
+ *
+ * Second, the load from mem->pools must be ordered before the same
+ * presumed store. This guarantees that the returned buffer address
+ * cannot be observed by another CPU before an update of the RCU list
+ * that was made by swiotlb_dyn_alloc() on a third CPU (cf. multicopy
+ * atomicity).
+ *
+ * See also the comment in swiotlb_find_pool().
+ */
+ smp_mb();
+
+ *retpool = pool;
+ return index;
+}
+
+#else /* !CONFIG_SWIOTLB_DYNAMIC */
+
+static int swiotlb_find_slots(struct device *dev, phys_addr_t orig_addr,
+ size_t alloc_size, unsigned int alloc_align_mask,
+ struct io_tlb_pool **retpool)
+{
+ struct io_tlb_pool *pool;
+ int start, i;
+ int index;
+
+ *retpool = pool = &dev->dma_io_tlb_mem->defpool;
+ i = start = raw_smp_processor_id() & (pool->nareas - 1);
do {
- index = swiotlb_do_find_slots(dev, i, orig_addr, alloc_size,
- alloc_align_mask);
+ index = swiotlb_search_pool_area(dev, pool, i, orig_addr,
+ alloc_size, alloc_align_mask);
if (index >= 0)
return index;
- if (++i >= mem->nareas)
+ if (++i >= pool->nareas)
i = 0;
} while (i != start);
-
return -1;
}
+#endif /* CONFIG_SWIOTLB_DYNAMIC */
+
+#ifdef CONFIG_DEBUG_FS
+
+/**
+ * mem_used() - get number of used slots in an allocator
+ * @mem: Software IO TLB allocator.
+ *
+ * The result is accurate in this version of the function, because an atomic
+ * counter is available if CONFIG_DEBUG_FS is set.
+ *
+ * Return: Number of used slots.
+ */
static unsigned long mem_used(struct io_tlb_mem *mem)
{
+ return atomic_long_read(&mem->total_used);
+}
+
+#else /* !CONFIG_DEBUG_FS */
+
+/**
+ * mem_pool_used() - get number of used slots in a memory pool
+ * @pool: Software IO TLB memory pool.
+ *
+ * The result is not accurate, see mem_used().
+ *
+ * Return: Approximate number of used slots.
+ */
+static unsigned long mem_pool_used(struct io_tlb_pool *pool)
+{
int i;
unsigned long used = 0;
- for (i = 0; i < mem->nareas; i++)
- used += mem->areas[i].used;
+ for (i = 0; i < pool->nareas; i++)
+ used += pool->areas[i].used;
+ return used;
+}
+
+/**
+ * mem_used() - get number of used slots in an allocator
+ * @mem: Software IO TLB allocator.
+ *
+ * The result is not accurate, because there is no locking of individual
+ * areas.
+ *
+ * Return: Approximate number of used slots.
+ */
+static unsigned long mem_used(struct io_tlb_mem *mem)
+{
+#ifdef CONFIG_SWIOTLB_DYNAMIC
+ struct io_tlb_pool *pool;
+ unsigned long used = 0;
+
+ rcu_read_lock();
+ list_for_each_entry_rcu(pool, &mem->pools, node)
+ used += mem_pool_used(pool);
+ rcu_read_unlock();
+
return used;
+#else
+ return mem_pool_used(&mem->defpool);
+#endif
}
+#endif /* CONFIG_DEBUG_FS */
+
+/**
+ * swiotlb_tbl_map_single() - bounce buffer map a single contiguous physical area
+ * @dev: Device which maps the buffer.
+ * @orig_addr: Original (non-bounced) physical IO buffer address
+ * @mapping_size: Requested size of the actual bounce buffer, excluding
+ * any pre- or post-padding for alignment
+ * @alloc_align_mask: Required start and end alignment of the allocated buffer
+ * @dir: DMA direction
+ * @attrs: Optional DMA attributes for the map operation
+ *
+ * Find and allocate a suitable sequence of IO TLB slots for the request.
+ * The allocated space starts at an alignment specified by alloc_align_mask,
+ * and the size of the allocated space is rounded up so that the total amount
+ * of allocated space is a multiple of (alloc_align_mask + 1). If
+ * alloc_align_mask is zero, the allocated space may be at any alignment and
+ * the size is not rounded up.
+ *
+ * The returned address is within the allocated space and matches the bits
+ * of orig_addr that are specified in the DMA min_align_mask for the device. As
+ * such, this returned address may be offset from the beginning of the allocated
+ * space. The bounce buffer space starting at the returned address for
+ * mapping_size bytes is initialized to the contents of the original IO buffer
+ * area. Any pre-padding (due to an offset) and any post-padding (due to
+ * rounding-up the size) is not initialized.
+ */
phys_addr_t swiotlb_tbl_map_single(struct device *dev, phys_addr_t orig_addr,
- size_t mapping_size, size_t alloc_size,
- unsigned int alloc_align_mask, enum dma_data_direction dir,
- unsigned long attrs)
+ size_t mapping_size, unsigned int alloc_align_mask,
+ enum dma_data_direction dir, unsigned long attrs)
{
struct io_tlb_mem *mem = dev->dma_io_tlb_mem;
- unsigned int offset = swiotlb_align_offset(dev, orig_addr);
+ unsigned int offset;
+ struct io_tlb_pool *pool;
unsigned int i;
+ size_t size;
int index;
phys_addr_t tlb_addr;
+ unsigned short pad_slots;
if (!mem || !mem->nslabs) {
dev_warn_ratelimited(dev,
@@ -758,52 +1386,73 @@ phys_addr_t swiotlb_tbl_map_single(struct device *dev, phys_addr_t orig_addr,
if (cc_platform_has(CC_ATTR_MEM_ENCRYPT))
pr_warn_once("Memory encryption is active and system is using DMA bounce buffers\n");
- if (mapping_size > alloc_size) {
- dev_warn_once(dev, "Invalid sizes (mapping: %zd bytes, alloc: %zd bytes)",
- mapping_size, alloc_size);
- return (phys_addr_t)DMA_MAPPING_ERROR;
- }
+ /*
+ * The default swiotlb memory pool is allocated with PAGE_SIZE
+ * alignment. If a mapping is requested with larger alignment,
+ * the mapping may be unable to use the initial slot(s) in all
+ * sets of IO_TLB_SEGSIZE slots. In such case, a mapping request
+ * of or near the maximum mapping size would always fail.
+ */
+ dev_WARN_ONCE(dev, alloc_align_mask > ~PAGE_MASK,
+ "Alloc alignment may prevent fulfilling requests with max mapping_size\n");
- index = swiotlb_find_slots(dev, orig_addr,
- alloc_size + offset, alloc_align_mask);
+ offset = swiotlb_align_offset(dev, alloc_align_mask, orig_addr);
+ size = ALIGN(mapping_size + offset, alloc_align_mask + 1);
+ index = swiotlb_find_slots(dev, orig_addr, size, alloc_align_mask, &pool);
if (index == -1) {
if (!(attrs & DMA_ATTR_NO_WARN))
dev_warn_ratelimited(dev,
"swiotlb buffer is full (sz: %zd bytes), total %lu (slots), used %lu (slots)\n",
- alloc_size, mem->nslabs, mem_used(mem));
+ size, mem->nslabs, mem_used(mem));
return (phys_addr_t)DMA_MAPPING_ERROR;
}
/*
+ * If dma_skip_sync was set, reset it on first SWIOTLB buffer
+ * mapping to always sync SWIOTLB buffers.
+ */
+ dma_reset_need_sync(dev);
+
+ /*
* Save away the mapping from the original address to the DMA address.
* This is needed when we sync the memory. Then we sync the buffer if
* needed.
*/
- for (i = 0; i < nr_slots(alloc_size + offset); i++)
- mem->slots[index + i].orig_addr = slot_addr(orig_addr, i);
- tlb_addr = slot_addr(mem->start, index) + offset;
+ pad_slots = offset >> IO_TLB_SHIFT;
+ offset &= (IO_TLB_SIZE - 1);
+ index += pad_slots;
+ pool->slots[index].pad_slots = pad_slots;
+ for (i = 0; i < (nr_slots(size) - pad_slots); i++)
+ pool->slots[index + i].orig_addr = slot_addr(orig_addr, i);
+ tlb_addr = slot_addr(pool->start, index) + offset;
/*
- * When dir == DMA_FROM_DEVICE we could omit the copy from the orig
- * to the tlb buffer, if we knew for sure the device will
- * overwrite the entire current content. But we don't. Thus
- * unconditional bounce may prevent leaking swiotlb content (i.e.
- * kernel memory) to user-space.
+ * When the device is writing memory, i.e. dir == DMA_FROM_DEVICE, copy
+ * the original buffer to the TLB buffer before initiating DMA in order
+ * to preserve the original's data if the device does a partial write,
+ * i.e. if the device doesn't overwrite the entire buffer. Preserving
+ * the original data, even if it's garbage, is necessary to match
+ * hardware behavior. Use of swiotlb is supposed to be transparent,
+ * i.e. swiotlb must not corrupt memory by clobbering unwritten bytes.
*/
- swiotlb_bounce(dev, tlb_addr, mapping_size, DMA_TO_DEVICE);
+ swiotlb_bounce(dev, tlb_addr, mapping_size, DMA_TO_DEVICE, pool);
return tlb_addr;
}
-static void swiotlb_release_slots(struct device *dev, phys_addr_t tlb_addr)
+static void swiotlb_release_slots(struct device *dev, phys_addr_t tlb_addr,
+ struct io_tlb_pool *mem)
{
- struct io_tlb_mem *mem = dev->dma_io_tlb_mem;
unsigned long flags;
- unsigned int offset = swiotlb_align_offset(dev, tlb_addr);
- int index = (tlb_addr - offset - mem->start) >> IO_TLB_SHIFT;
- int nslots = nr_slots(mem->slots[index].alloc_size + offset);
- int aindex = index / mem->area_nslabs;
- struct io_tlb_area *area = &mem->areas[aindex];
+ unsigned int offset = swiotlb_align_offset(dev, 0, tlb_addr);
+ int index, nslots, aindex;
+ struct io_tlb_area *area;
int count, i;
+ index = (tlb_addr - offset - mem->start) >> IO_TLB_SHIFT;
+ index -= mem->slots[index].pad_slots;
+ nslots = nr_slots(mem->slots[index].alloc_size + offset);
+ aindex = index / mem->area_nslabs;
+ area = &mem->areas[aindex];
+
/*
* Return the buffer to the free list by setting the corresponding
* entries to indicate the number of contiguous entries available.
@@ -826,6 +1475,7 @@ static void swiotlb_release_slots(struct device *dev, phys_addr_t tlb_addr)
mem->slots[i].list = ++count;
mem->slots[i].orig_addr = INVALID_PHYS_ADDR;
mem->slots[i].alloc_size = 0;
+ mem->slots[i].pad_slots = 0;
}
/*
@@ -838,39 +1488,81 @@ static void swiotlb_release_slots(struct device *dev, phys_addr_t tlb_addr)
mem->slots[i].list = ++count;
area->used -= nslots;
spin_unlock_irqrestore(&area->lock, flags);
+
+ dec_used(dev->dma_io_tlb_mem, nslots);
+}
+
+#ifdef CONFIG_SWIOTLB_DYNAMIC
+
+/**
+ * swiotlb_del_transient() - delete a transient memory pool
+ * @dev: Device which mapped the buffer.
+ * @tlb_addr: Physical address within a bounce buffer.
+ * @pool: Pointer to the transient memory pool to be checked and deleted.
+ *
+ * Check whether the address belongs to a transient SWIOTLB memory pool.
+ * If yes, then delete the pool.
+ *
+ * Return: %true if @tlb_addr belonged to a transient pool that was released.
+ */
+static bool swiotlb_del_transient(struct device *dev, phys_addr_t tlb_addr,
+ struct io_tlb_pool *pool)
+{
+ if (!pool->transient)
+ return false;
+
+ dec_used(dev->dma_io_tlb_mem, pool->nslabs);
+ swiotlb_del_pool(dev, pool);
+ dec_transient_used(dev->dma_io_tlb_mem, pool->nslabs);
+ return true;
}
+#else /* !CONFIG_SWIOTLB_DYNAMIC */
+
+static inline bool swiotlb_del_transient(struct device *dev,
+ phys_addr_t tlb_addr, struct io_tlb_pool *pool)
+{
+ return false;
+}
+
+#endif /* CONFIG_SWIOTLB_DYNAMIC */
+
/*
* tlb_addr is the physical address of the bounce buffer to unmap.
*/
-void swiotlb_tbl_unmap_single(struct device *dev, phys_addr_t tlb_addr,
- size_t mapping_size, enum dma_data_direction dir,
- unsigned long attrs)
+void __swiotlb_tbl_unmap_single(struct device *dev, phys_addr_t tlb_addr,
+ size_t mapping_size, enum dma_data_direction dir,
+ unsigned long attrs, struct io_tlb_pool *pool)
{
/*
* First, sync the memory before unmapping the entry
*/
if (!(attrs & DMA_ATTR_SKIP_CPU_SYNC) &&
(dir == DMA_FROM_DEVICE || dir == DMA_BIDIRECTIONAL))
- swiotlb_bounce(dev, tlb_addr, mapping_size, DMA_FROM_DEVICE);
+ swiotlb_bounce(dev, tlb_addr, mapping_size,
+ DMA_FROM_DEVICE, pool);
- swiotlb_release_slots(dev, tlb_addr);
+ if (swiotlb_del_transient(dev, tlb_addr, pool))
+ return;
+ swiotlb_release_slots(dev, tlb_addr, pool);
}
-void swiotlb_sync_single_for_device(struct device *dev, phys_addr_t tlb_addr,
- size_t size, enum dma_data_direction dir)
+void __swiotlb_sync_single_for_device(struct device *dev, phys_addr_t tlb_addr,
+ size_t size, enum dma_data_direction dir,
+ struct io_tlb_pool *pool)
{
if (dir == DMA_TO_DEVICE || dir == DMA_BIDIRECTIONAL)
- swiotlb_bounce(dev, tlb_addr, size, DMA_TO_DEVICE);
+ swiotlb_bounce(dev, tlb_addr, size, DMA_TO_DEVICE, pool);
else
BUG_ON(dir != DMA_FROM_DEVICE);
}
-void swiotlb_sync_single_for_cpu(struct device *dev, phys_addr_t tlb_addr,
- size_t size, enum dma_data_direction dir)
+void __swiotlb_sync_single_for_cpu(struct device *dev, phys_addr_t tlb_addr,
+ size_t size, enum dma_data_direction dir,
+ struct io_tlb_pool *pool)
{
if (dir == DMA_FROM_DEVICE || dir == DMA_BIDIRECTIONAL)
- swiotlb_bounce(dev, tlb_addr, size, DMA_FROM_DEVICE);
+ swiotlb_bounce(dev, tlb_addr, size, DMA_FROM_DEVICE, pool);
else
BUG_ON(dir != DMA_TO_DEVICE);
}
@@ -887,16 +1579,16 @@ dma_addr_t swiotlb_map(struct device *dev, phys_addr_t paddr, size_t size,
trace_swiotlb_bounced(dev, phys_to_dma(dev, paddr), size);
- swiotlb_addr = swiotlb_tbl_map_single(dev, paddr, size, size, 0, dir,
- attrs);
+ swiotlb_addr = swiotlb_tbl_map_single(dev, paddr, size, 0, dir, attrs);
if (swiotlb_addr == (phys_addr_t)DMA_MAPPING_ERROR)
return DMA_MAPPING_ERROR;
/* Ensure that the address returned is DMA'ble */
dma_addr = phys_to_dma_unencrypted(dev, swiotlb_addr);
if (unlikely(!dma_capable(dev, dma_addr, size, true))) {
- swiotlb_tbl_unmap_single(dev, swiotlb_addr, size, dir,
- attrs | DMA_ATTR_SKIP_CPU_SYNC);
+ __swiotlb_tbl_unmap_single(dev, swiotlb_addr, size, dir,
+ attrs | DMA_ATTR_SKIP_CPU_SYNC,
+ swiotlb_find_pool(dev, swiotlb_addr));
dev_WARN_ONCE(dev, 1,
"swiotlb addr %pad+%zu overflow (mask %llx, bus limit %llx).\n",
&dma_addr, size, *dev->dma_mask, dev->bus_dma_limit);
@@ -924,20 +1616,98 @@ size_t swiotlb_max_mapping_size(struct device *dev)
return ((size_t)IO_TLB_SIZE) * IO_TLB_SEGSIZE - min_align;
}
+/**
+ * is_swiotlb_allocated() - check if the default software IO TLB is initialized
+ */
+bool is_swiotlb_allocated(void)
+{
+ return io_tlb_default_mem.nslabs;
+}
+
bool is_swiotlb_active(struct device *dev)
{
struct io_tlb_mem *mem = dev->dma_io_tlb_mem;
return mem && mem->nslabs;
}
-EXPORT_SYMBOL_GPL(is_swiotlb_active);
+
+/**
+ * default_swiotlb_base() - get the base address of the default SWIOTLB
+ *
+ * Get the lowest physical address used by the default software IO TLB pool.
+ */
+phys_addr_t default_swiotlb_base(void)
+{
+#ifdef CONFIG_SWIOTLB_DYNAMIC
+ io_tlb_default_mem.can_grow = false;
+#endif
+ return io_tlb_default_mem.defpool.start;
+}
+
+/**
+ * default_swiotlb_limit() - get the address limit of the default SWIOTLB
+ *
+ * Get the highest physical address used by the default software IO TLB pool.
+ */
+phys_addr_t default_swiotlb_limit(void)
+{
+#ifdef CONFIG_SWIOTLB_DYNAMIC
+ return io_tlb_default_mem.phys_limit;
+#else
+ return io_tlb_default_mem.defpool.end - 1;
+#endif
+}
+
+#ifdef CONFIG_DEBUG_FS
+#ifdef CONFIG_SWIOTLB_DYNAMIC
+static unsigned long mem_transient_used(struct io_tlb_mem *mem)
+{
+ return atomic_long_read(&mem->transient_nslabs);
+}
+
+static int io_tlb_transient_used_get(void *data, u64 *val)
+{
+ struct io_tlb_mem *mem = data;
+
+ *val = mem_transient_used(mem);
+ return 0;
+}
+
+DEFINE_DEBUGFS_ATTRIBUTE(fops_io_tlb_transient_used, io_tlb_transient_used_get,
+ NULL, "%llu\n");
+#endif /* CONFIG_SWIOTLB_DYNAMIC */
static int io_tlb_used_get(void *data, u64 *val)
{
- *val = mem_used(&io_tlb_default_mem);
+ struct io_tlb_mem *mem = data;
+
+ *val = mem_used(mem);
+ return 0;
+}
+
+static int io_tlb_hiwater_get(void *data, u64 *val)
+{
+ struct io_tlb_mem *mem = data;
+
+ *val = atomic_long_read(&mem->used_hiwater);
+ return 0;
+}
+
+static int io_tlb_hiwater_set(void *data, u64 val)
+{
+ struct io_tlb_mem *mem = data;
+
+ /* Only allow setting to zero */
+ if (val != 0)
+ return -EINVAL;
+
+ atomic_long_set(&mem->used_hiwater, val);
return 0;
}
+
DEFINE_DEBUGFS_ATTRIBUTE(fops_io_tlb_used, io_tlb_used_get, NULL, "%llu\n");
+DEFINE_DEBUGFS_ATTRIBUTE(fops_io_tlb_hiwater, io_tlb_hiwater_get,
+ io_tlb_hiwater_set, "%llu\n");
static void swiotlb_create_debugfs_files(struct io_tlb_mem *mem,
const char *dirname)
@@ -947,36 +1717,58 @@ static void swiotlb_create_debugfs_files(struct io_tlb_mem *mem,
return;
debugfs_create_ulong("io_tlb_nslabs", 0400, mem->debugfs, &mem->nslabs);
- debugfs_create_file("io_tlb_used", 0400, mem->debugfs, NULL,
+ debugfs_create_file("io_tlb_used", 0400, mem->debugfs, mem,
&fops_io_tlb_used);
+ debugfs_create_file("io_tlb_used_hiwater", 0600, mem->debugfs, mem,
+ &fops_io_tlb_hiwater);
+#ifdef CONFIG_SWIOTLB_DYNAMIC
+ debugfs_create_file("io_tlb_transient_nslabs", 0400, mem->debugfs,
+ mem, &fops_io_tlb_transient_used);
+#endif
}
-static int __init __maybe_unused swiotlb_create_default_debugfs(void)
+static int __init swiotlb_create_default_debugfs(void)
{
swiotlb_create_debugfs_files(&io_tlb_default_mem, "swiotlb");
return 0;
}
-#ifdef CONFIG_DEBUG_FS
late_initcall(swiotlb_create_default_debugfs);
-#endif
+
+#else /* !CONFIG_DEBUG_FS */
+
+static inline void swiotlb_create_debugfs_files(struct io_tlb_mem *mem,
+ const char *dirname)
+{
+}
+
+#endif /* CONFIG_DEBUG_FS */
#ifdef CONFIG_DMA_RESTRICTED_POOL
struct page *swiotlb_alloc(struct device *dev, size_t size)
{
struct io_tlb_mem *mem = dev->dma_io_tlb_mem;
+ struct io_tlb_pool *pool;
phys_addr_t tlb_addr;
+ unsigned int align;
int index;
if (!mem)
return NULL;
- index = swiotlb_find_slots(dev, 0, size, 0);
+ align = (1 << (get_order(size) + PAGE_SHIFT)) - 1;
+ index = swiotlb_find_slots(dev, 0, size, align, &pool);
if (index == -1)
return NULL;
- tlb_addr = slot_addr(mem->start, index);
+ tlb_addr = slot_addr(pool->start, index);
+ if (unlikely(!PAGE_ALIGNED(tlb_addr))) {
+ dev_WARN_ONCE(dev, 1, "Cannot allocate pages from non page-aligned swiotlb addr 0x%pa.\n",
+ &tlb_addr);
+ swiotlb_release_slots(dev, tlb_addr, pool);
+ return NULL;
+ }
return pfn_to_page(PFN_DOWN(tlb_addr));
}
@@ -984,11 +1776,13 @@ struct page *swiotlb_alloc(struct device *dev, size_t size)
bool swiotlb_free(struct device *dev, struct page *page, size_t size)
{
phys_addr_t tlb_addr = page_to_phys(page);
+ struct io_tlb_pool *pool;
- if (!is_swiotlb_buffer(dev, tlb_addr))
+ pool = swiotlb_find_pool(dev, tlb_addr);
+ if (!pool)
return false;
- swiotlb_release_slots(dev, tlb_addr);
+ swiotlb_release_slots(dev, tlb_addr, pool);
return true;
}
@@ -1002,35 +1796,49 @@ static int rmem_swiotlb_device_init(struct reserved_mem *rmem,
/* Set Per-device io tlb area to one */
unsigned int nareas = 1;
+ if (PageHighMem(pfn_to_page(PHYS_PFN(rmem->base)))) {
+ dev_err(dev, "Restricted DMA pool must be accessible within the linear mapping.");
+ return -EINVAL;
+ }
+
/*
* Since multiple devices can share the same pool, the private data,
* io_tlb_mem struct, will be initialized by the first device attached
* to it.
*/
if (!mem) {
+ struct io_tlb_pool *pool;
+
mem = kzalloc(sizeof(*mem), GFP_KERNEL);
if (!mem)
return -ENOMEM;
+ pool = &mem->defpool;
- mem->slots = kcalloc(nslabs, sizeof(*mem->slots), GFP_KERNEL);
- if (!mem->slots) {
+ pool->slots = kcalloc(nslabs, sizeof(*pool->slots), GFP_KERNEL);
+ if (!pool->slots) {
kfree(mem);
return -ENOMEM;
}
- mem->areas = kcalloc(nareas, sizeof(*mem->areas),
+ pool->areas = kcalloc(nareas, sizeof(*pool->areas),
GFP_KERNEL);
- if (!mem->areas) {
- kfree(mem->slots);
+ if (!pool->areas) {
+ kfree(pool->slots);
kfree(mem);
return -ENOMEM;
}
set_memory_decrypted((unsigned long)phys_to_virt(rmem->base),
rmem->size >> PAGE_SHIFT);
- swiotlb_init_io_tlb_mem(mem, rmem->base, nslabs, SWIOTLB_FORCE,
- false, nareas);
+ swiotlb_init_io_tlb_pool(pool, rmem->base, nslabs,
+ false, nareas);
+ mem->force_bounce = true;
mem->for_alloc = true;
+#ifdef CONFIG_SWIOTLB_DYNAMIC
+ spin_lock_init(&mem->lock);
+ INIT_LIST_HEAD_RCU(&mem->pools);
+#endif
+ add_mem_pool(mem, pool);
rmem->priv = mem;
@@ -1063,11 +1871,6 @@ static int __init rmem_swiotlb_setup(struct reserved_mem *rmem)
of_get_flat_dt_prop(node, "no-map", NULL))
return -EINVAL;
- if (PageHighMem(pfn_to_page(PHYS_PFN(rmem->base)))) {
- pr_err("Restricted DMA pool must be accessible within the linear mapping.");
- return -EINVAL;
- }
-
rmem->ops = &rmem_swiotlb_ops;
pr_info("Reserved memory: created restricted DMA pool at %pa, size %ld MiB\n",
&rmem->base, (unsigned long)rmem->size / SZ_1M);