diff options
Diffstat (limited to 'drivers/iommu/iova.c')
| -rw-r--r-- | drivers/iommu/iova.c | 725 |
1 files changed, 418 insertions, 307 deletions
diff --git a/drivers/iommu/iova.c b/drivers/iommu/iova.c index 246f14c83944..18f839721813 100644 --- a/drivers/iommu/iova.c +++ b/drivers/iommu/iova.c @@ -1,28 +1,23 @@ +// SPDX-License-Identifier: GPL-2.0-only /* * Copyright © 2006-2009, Intel Corporation. * - * This program is free software; you can redistribute it and/or modify it - * under the terms and conditions of the GNU General Public License, - * version 2, as published by the Free Software Foundation. - * - * This program is distributed in the hope it will be useful, but WITHOUT - * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or - * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for - * more details. - * - * You should have received a copy of the GNU General Public License along with - * this program; if not, write to the Free Software Foundation, Inc., 59 Temple - * Place - Suite 330, Boston, MA 02111-1307 USA. - * * Author: Anil S Keshavamurthy <anil.s.keshavamurthy@intel.com> */ #include <linux/iova.h> +#include <linux/kmemleak.h> #include <linux/module.h> #include <linux/slab.h> #include <linux/smp.h> #include <linux/bitops.h> #include <linux/cpu.h> +#include <linux/workqueue.h> + +/* The anchor node sits above the top of the usable address space */ +#define IOVA_ANCHOR ~0UL + +#define IOVA_RANGE_CACHE_MAX_SIZE 6 /* log of max cached IOVA range size (in pages) */ static bool iova_rcache_insert(struct iova_domain *iovad, unsigned long pfn, @@ -30,12 +25,18 @@ static bool iova_rcache_insert(struct iova_domain *iovad, static unsigned long iova_rcache_get(struct iova_domain *iovad, unsigned long size, unsigned long limit_pfn); -static void init_iova_rcaches(struct iova_domain *iovad); static void free_iova_rcaches(struct iova_domain *iovad); +static void free_cpu_cached_iovas(unsigned int cpu, struct iova_domain *iovad); +static void free_global_cached_iovas(struct iova_domain *iovad); + +static struct iova *to_iova(struct rb_node *node) +{ + return rb_entry(node, struct iova, node); +} void init_iova_domain(struct iova_domain *iovad, unsigned long granule, - unsigned long start_pfn, unsigned long pfn_32bit) + unsigned long start_pfn) { /* * IOVA granularity will normally be equal to the smallest @@ -46,59 +47,90 @@ init_iova_domain(struct iova_domain *iovad, unsigned long granule, spin_lock_init(&iovad->iova_rbtree_lock); iovad->rbroot = RB_ROOT; - iovad->cached32_node = NULL; + iovad->cached_node = &iovad->anchor.node; + iovad->cached32_node = &iovad->anchor.node; iovad->granule = granule; iovad->start_pfn = start_pfn; - iovad->dma_32bit_pfn = pfn_32bit + 1; - init_iova_rcaches(iovad); + iovad->dma_32bit_pfn = 1UL << (32 - iova_shift(iovad)); + iovad->max32_alloc_size = iovad->dma_32bit_pfn; + iovad->anchor.pfn_lo = iovad->anchor.pfn_hi = IOVA_ANCHOR; + rb_link_node(&iovad->anchor.node, NULL, &iovad->rbroot.rb_node); + rb_insert_color(&iovad->anchor.node, &iovad->rbroot); } EXPORT_SYMBOL_GPL(init_iova_domain); static struct rb_node * -__get_cached_rbnode(struct iova_domain *iovad, unsigned long *limit_pfn) -{ - if ((*limit_pfn > iovad->dma_32bit_pfn) || - (iovad->cached32_node == NULL)) - return rb_last(&iovad->rbroot); - else { - struct rb_node *prev_node = rb_prev(iovad->cached32_node); - struct iova *curr_iova = - rb_entry(iovad->cached32_node, struct iova, node); - *limit_pfn = curr_iova->pfn_lo; - return prev_node; - } +__get_cached_rbnode(struct iova_domain *iovad, unsigned long limit_pfn) +{ + if (limit_pfn <= iovad->dma_32bit_pfn) + return iovad->cached32_node; + + return iovad->cached_node; } static void -__cached_rbnode_insert_update(struct iova_domain *iovad, - unsigned long limit_pfn, struct iova *new) +__cached_rbnode_insert_update(struct iova_domain *iovad, struct iova *new) { - if (limit_pfn != iovad->dma_32bit_pfn) - return; - iovad->cached32_node = &new->node; + if (new->pfn_hi < iovad->dma_32bit_pfn) + iovad->cached32_node = &new->node; + else + iovad->cached_node = &new->node; } static void __cached_rbnode_delete_update(struct iova_domain *iovad, struct iova *free) { struct iova *cached_iova; - struct rb_node *curr; - if (!iovad->cached32_node) - return; - curr = iovad->cached32_node; - cached_iova = rb_entry(curr, struct iova, node); + cached_iova = to_iova(iovad->cached32_node); + if (free == cached_iova || + (free->pfn_hi < iovad->dma_32bit_pfn && + free->pfn_lo >= cached_iova->pfn_lo)) + iovad->cached32_node = rb_next(&free->node); - if (free->pfn_lo >= cached_iova->pfn_lo) { - struct rb_node *node = rb_next(&free->node); - struct iova *iova = rb_entry(node, struct iova, node); + if (free->pfn_lo < iovad->dma_32bit_pfn) + iovad->max32_alloc_size = iovad->dma_32bit_pfn; - /* only cache if it's below 32bit pfn */ - if (node && iova->pfn_lo < iovad->dma_32bit_pfn) - iovad->cached32_node = node; - else - iovad->cached32_node = NULL; + cached_iova = to_iova(iovad->cached_node); + if (free->pfn_lo >= cached_iova->pfn_lo) + iovad->cached_node = rb_next(&free->node); +} + +static struct rb_node *iova_find_limit(struct iova_domain *iovad, unsigned long limit_pfn) +{ + struct rb_node *node, *next; + /* + * Ideally what we'd like to judge here is whether limit_pfn is close + * enough to the highest-allocated IOVA that starting the allocation + * walk from the anchor node will be quicker than this initial work to + * find an exact starting point (especially if that ends up being the + * anchor node anyway). This is an incredibly crude approximation which + * only really helps the most likely case, but is at least trivially easy. + */ + if (limit_pfn > iovad->dma_32bit_pfn) + return &iovad->anchor.node; + + node = iovad->rbroot.rb_node; + while (to_iova(node)->pfn_hi < limit_pfn) + node = node->rb_right; + +search_left: + while (node->rb_left && to_iova(node->rb_left)->pfn_lo >= limit_pfn) + node = node->rb_left; + + if (!node->rb_left) + return node; + + next = node->rb_left; + while (next->rb_right) { + next = next->rb_right; + if (to_iova(next)->pfn_lo >= limit_pfn) { + node = next; + goto search_left; + } } + + return node; } /* Insert the iova into domain rbtree by holding writer lock */ @@ -111,7 +143,7 @@ iova_insert_rbtree(struct rb_root *root, struct iova *iova, new = (start) ? &start : &(root->rb_node); /* Figure out where to put new node */ while (*new) { - struct iova *this = rb_entry(*new, struct iova, node); + struct iova *this = to_iova(*new); parent = *new; @@ -129,120 +161,81 @@ iova_insert_rbtree(struct rb_root *root, struct iova *iova, rb_insert_color(&iova->node, root); } -/* - * Computes the padding size required, to make the start address - * naturally aligned on the power-of-two order of its size - */ -static unsigned int -iova_get_pad_size(unsigned int size, unsigned int limit_pfn) -{ - return (limit_pfn - size) & (__roundup_pow_of_two(size) - 1); -} - static int __alloc_and_insert_iova_range(struct iova_domain *iovad, unsigned long size, unsigned long limit_pfn, struct iova *new, bool size_aligned) { - struct rb_node *prev, *curr = NULL; + struct rb_node *curr, *prev; + struct iova *curr_iova; unsigned long flags; - unsigned long saved_pfn; - unsigned int pad_size = 0; + unsigned long new_pfn, retry_pfn; + unsigned long align_mask = ~0UL; + unsigned long high_pfn = limit_pfn, low_pfn = iovad->start_pfn; + + if (size_aligned) + align_mask <<= fls_long(size - 1); /* Walk the tree backwards */ spin_lock_irqsave(&iovad->iova_rbtree_lock, flags); - saved_pfn = limit_pfn; - curr = __get_cached_rbnode(iovad, &limit_pfn); - prev = curr; - while (curr) { - struct iova *curr_iova = rb_entry(curr, struct iova, node); - - if (limit_pfn <= curr_iova->pfn_lo) { - goto move_left; - } else if (limit_pfn > curr_iova->pfn_hi) { - if (size_aligned) - pad_size = iova_get_pad_size(size, limit_pfn); - if ((curr_iova->pfn_hi + size + pad_size) < limit_pfn) - break; /* found a free slot */ - } - limit_pfn = curr_iova->pfn_lo; -move_left: + if (limit_pfn <= iovad->dma_32bit_pfn && + size >= iovad->max32_alloc_size) + goto iova32_full; + + curr = __get_cached_rbnode(iovad, limit_pfn); + curr_iova = to_iova(curr); + retry_pfn = curr_iova->pfn_hi; + +retry: + do { + high_pfn = min(high_pfn, curr_iova->pfn_lo); + new_pfn = (high_pfn - size) & align_mask; prev = curr; curr = rb_prev(curr); - } - - if (!curr) { - if (size_aligned) - pad_size = iova_get_pad_size(size, limit_pfn); - if ((iovad->start_pfn + size + pad_size) > limit_pfn) { - spin_unlock_irqrestore(&iovad->iova_rbtree_lock, flags); - return -ENOMEM; + curr_iova = to_iova(curr); + } while (curr && new_pfn <= curr_iova->pfn_hi && new_pfn >= low_pfn); + + if (high_pfn < size || new_pfn < low_pfn) { + if (low_pfn == iovad->start_pfn && retry_pfn < limit_pfn) { + high_pfn = limit_pfn; + low_pfn = retry_pfn + 1; + curr = iova_find_limit(iovad, limit_pfn); + curr_iova = to_iova(curr); + goto retry; } + iovad->max32_alloc_size = size; + goto iova32_full; } /* pfn_lo will point to size aligned address if size_aligned is set */ - new->pfn_lo = limit_pfn - (size + pad_size); + new->pfn_lo = new_pfn; new->pfn_hi = new->pfn_lo + size - 1; /* If we have 'prev', it's a valid place to start the insertion. */ iova_insert_rbtree(&iovad->rbroot, new, prev); - __cached_rbnode_insert_update(iovad, saved_pfn, new); + __cached_rbnode_insert_update(iovad, new); spin_unlock_irqrestore(&iovad->iova_rbtree_lock, flags); - - return 0; + +iova32_full: + spin_unlock_irqrestore(&iovad->iova_rbtree_lock, flags); + return -ENOMEM; } static struct kmem_cache *iova_cache; static unsigned int iova_cache_users; static DEFINE_MUTEX(iova_cache_mutex); -struct iova *alloc_iova_mem(void) -{ - return kmem_cache_alloc(iova_cache, GFP_ATOMIC); -} -EXPORT_SYMBOL(alloc_iova_mem); - -void free_iova_mem(struct iova *iova) -{ - kmem_cache_free(iova_cache, iova); -} -EXPORT_SYMBOL(free_iova_mem); - -int iova_cache_get(void) +static struct iova *alloc_iova_mem(void) { - mutex_lock(&iova_cache_mutex); - if (!iova_cache_users) { - iova_cache = kmem_cache_create( - "iommu_iova", sizeof(struct iova), 0, - SLAB_HWCACHE_ALIGN, NULL); - if (!iova_cache) { - mutex_unlock(&iova_cache_mutex); - printk(KERN_ERR "Couldn't create iova cache\n"); - return -ENOMEM; - } - } - - iova_cache_users++; - mutex_unlock(&iova_cache_mutex); - - return 0; + return kmem_cache_zalloc(iova_cache, GFP_ATOMIC | __GFP_NOWARN); } -EXPORT_SYMBOL_GPL(iova_cache_get); -void iova_cache_put(void) +static void free_iova_mem(struct iova *iova) { - mutex_lock(&iova_cache_mutex); - if (WARN_ON(!iova_cache_users)) { - mutex_unlock(&iova_cache_mutex); - return; - } - iova_cache_users--; - if (!iova_cache_users) - kmem_cache_destroy(iova_cache); - mutex_unlock(&iova_cache_mutex); + if (iova->pfn_lo != IOVA_ANCHOR) + kmem_cache_free(iova_cache, iova); } -EXPORT_SYMBOL_GPL(iova_cache_put); /** * alloc_iova - allocates an iova @@ -287,28 +280,24 @@ private_find_iova(struct iova_domain *iovad, unsigned long pfn) assert_spin_locked(&iovad->iova_rbtree_lock); while (node) { - struct iova *iova = rb_entry(node, struct iova, node); - - /* If pfn falls within iova's range, return iova */ - if ((pfn >= iova->pfn_lo) && (pfn <= iova->pfn_hi)) { - return iova; - } + struct iova *iova = to_iova(node); if (pfn < iova->pfn_lo) node = node->rb_left; - else if (pfn > iova->pfn_lo) + else if (pfn > iova->pfn_hi) node = node->rb_right; + else + return iova; /* pfn falls within iova's range */ } return NULL; } -static void private_free_iova(struct iova_domain *iovad, struct iova *iova) +static void remove_iova(struct iova_domain *iovad, struct iova *iova) { assert_spin_locked(&iovad->iova_rbtree_lock); __cached_rbnode_delete_update(iovad, iova); rb_erase(&iova->node, &iovad->rbroot); - free_iova_mem(iova); } /** @@ -316,7 +305,7 @@ static void private_free_iova(struct iova_domain *iovad, struct iova *iova) * @iovad: - iova domain in question. * @pfn: - page frame number * This function finds and returns an iova belonging to the - * given doamin which matches the given pfn. + * given domain which matches the given pfn. */ struct iova *find_iova(struct iova_domain *iovad, unsigned long pfn) { @@ -343,8 +332,9 @@ __free_iova(struct iova_domain *iovad, struct iova *iova) unsigned long flags; spin_lock_irqsave(&iovad->iova_rbtree_lock, flags); - private_free_iova(iovad, iova); + remove_iova(iovad, iova); spin_unlock_irqrestore(&iovad->iova_rbtree_lock, flags); + free_iova_mem(iova); } EXPORT_SYMBOL_GPL(__free_iova); @@ -358,11 +348,18 @@ EXPORT_SYMBOL_GPL(__free_iova); void free_iova(struct iova_domain *iovad, unsigned long pfn) { - struct iova *iova = find_iova(iovad, pfn); - - if (iova) - __free_iova(iovad, iova); + unsigned long flags; + struct iova *iova; + spin_lock_irqsave(&iovad->iova_rbtree_lock, flags); + iova = private_find_iova(iovad, pfn); + if (!iova) { + spin_unlock_irqrestore(&iovad->iova_rbtree_lock, flags); + return; + } + remove_iova(iovad, iova); + spin_unlock_irqrestore(&iovad->iova_rbtree_lock, flags); + free_iova_mem(iova); } EXPORT_SYMBOL_GPL(free_iova); @@ -371,18 +368,28 @@ EXPORT_SYMBOL_GPL(free_iova); * @iovad: - iova domain in question * @size: - size of page frames to allocate * @limit_pfn: - max limit address + * @flush_rcache: - set to flush rcache on regular allocation failure * This function tries to satisfy an iova allocation from the rcache, - * and falls back to regular allocation on failure. + * and falls back to regular allocation on failure. If regular allocation + * fails too and the flush_rcache flag is set then the rcache will be flushed. */ unsigned long alloc_iova_fast(struct iova_domain *iovad, unsigned long size, - unsigned long limit_pfn) + unsigned long limit_pfn, bool flush_rcache) { - bool flushed_rcache = false; unsigned long iova_pfn; struct iova *new_iova; - iova_pfn = iova_rcache_get(iovad, size, limit_pfn); + /* + * Freeing non-power-of-two-sized allocations back into the IOVA caches + * will come back to bite us badly, so we have to waste a bit of space + * rounding up anything cacheable to make sure that can't happen. The + * order of the unadjusted size will still match upon freeing. + */ + if (size < (1 << (IOVA_RANGE_CACHE_MAX_SIZE - 1))) + size = roundup_pow_of_two(size); + + iova_pfn = iova_rcache_get(iovad, size, limit_pfn + 1); if (iova_pfn) return iova_pfn; @@ -391,13 +398,14 @@ retry: if (!new_iova) { unsigned int cpu; - if (flushed_rcache) + if (!flush_rcache) return 0; /* Try replenishing IOVAs by flushing rcache. */ - flushed_rcache = true; + flush_rcache = false; for_each_online_cpu(cpu) free_cpu_cached_iovas(cpu, iovad); + free_global_cached_iovas(iovad); goto retry; } @@ -423,27 +431,27 @@ free_iova_fast(struct iova_domain *iovad, unsigned long pfn, unsigned long size) } EXPORT_SYMBOL_GPL(free_iova_fast); +static void iova_domain_free_rcaches(struct iova_domain *iovad) +{ + cpuhp_state_remove_instance_nocalls(CPUHP_IOMMU_IOVA_DEAD, + &iovad->cpuhp_dead); + free_iova_rcaches(iovad); +} + /** - * put_iova_domain - destroys the iova doamin + * put_iova_domain - destroys the iova domain * @iovad: - iova domain in question. * All the iova's in that domain are destroyed. */ void put_iova_domain(struct iova_domain *iovad) { - struct rb_node *node; - unsigned long flags; + struct iova *iova, *tmp; - free_iova_rcaches(iovad); - spin_lock_irqsave(&iovad->iova_rbtree_lock, flags); - node = rb_first(&iovad->rbroot); - while (node) { - struct iova *iova = rb_entry(node, struct iova, node); + if (iovad->rcaches) + iova_domain_free_rcaches(iovad); - rb_erase(node, &iovad->rbroot); + rbtree_postorder_for_each_entry_safe(iova, tmp, &iovad->rbroot, node) free_iova_mem(iova); - node = rb_first(&iovad->rbroot); - } - spin_unlock_irqrestore(&iovad->iova_rbtree_lock, flags); } EXPORT_SYMBOL_GPL(put_iova_domain); @@ -451,7 +459,7 @@ static int __is_range_overlap(struct rb_node *node, unsigned long pfn_lo, unsigned long pfn_hi) { - struct iova *iova = rb_entry(node, struct iova, node); + struct iova *iova = to_iova(node); if ((pfn_lo <= iova->pfn_hi) && (pfn_hi >= iova->pfn_lo)) return 1; @@ -499,7 +507,7 @@ __adjust_overlap_range(struct iova *iova, * reserve_iova - reserves an iova in the given range * @iovad: - iova domain pointer * @pfn_lo: - lower page frame address - * @pfn_hi:- higher pfn adderss + * @pfn_hi:- higher pfn address * This function allocates reserves the address range from pfn_lo to pfn_hi so * that this address is not dished out as part of alloc_iova. */ @@ -512,10 +520,14 @@ reserve_iova(struct iova_domain *iovad, struct iova *iova; unsigned int overlap = 0; + /* Don't allow nonsensical pfns */ + if (WARN_ON((pfn_hi | pfn_lo) > (ULLONG_MAX >> iova_shift(iovad)))) + return NULL; + spin_lock_irqsave(&iovad->iova_rbtree_lock, flags); for (node = rb_first(&iovad->rbroot); node; node = rb_next(node)) { if (__is_range_overlap(node, pfn_lo, pfn_hi)) { - iova = rb_entry(node, struct iova, node); + iova = to_iova(node); __adjust_overlap_range(iova, &pfn_lo, &pfn_hi); if ((pfn_lo >= iova->pfn_lo) && (pfn_hi <= iova->pfn_hi)) @@ -537,74 +549,6 @@ finish: } EXPORT_SYMBOL_GPL(reserve_iova); -/** - * copy_reserved_iova - copies the reserved between domains - * @from: - source doamin from where to copy - * @to: - destination domin where to copy - * This function copies reserved iova's from one doamin to - * other. - */ -void -copy_reserved_iova(struct iova_domain *from, struct iova_domain *to) -{ - unsigned long flags; - struct rb_node *node; - - spin_lock_irqsave(&from->iova_rbtree_lock, flags); - for (node = rb_first(&from->rbroot); node; node = rb_next(node)) { - struct iova *iova = rb_entry(node, struct iova, node); - struct iova *new_iova; - - new_iova = reserve_iova(to, iova->pfn_lo, iova->pfn_hi); - if (!new_iova) - printk(KERN_ERR "Reserve iova range %lx@%lx failed\n", - iova->pfn_lo, iova->pfn_lo); - } - spin_unlock_irqrestore(&from->iova_rbtree_lock, flags); -} -EXPORT_SYMBOL_GPL(copy_reserved_iova); - -struct iova * -split_and_remove_iova(struct iova_domain *iovad, struct iova *iova, - unsigned long pfn_lo, unsigned long pfn_hi) -{ - unsigned long flags; - struct iova *prev = NULL, *next = NULL; - - spin_lock_irqsave(&iovad->iova_rbtree_lock, flags); - if (iova->pfn_lo < pfn_lo) { - prev = alloc_and_init_iova(iova->pfn_lo, pfn_lo - 1); - if (prev == NULL) - goto error; - } - if (iova->pfn_hi > pfn_hi) { - next = alloc_and_init_iova(pfn_hi + 1, iova->pfn_hi); - if (next == NULL) - goto error; - } - - __cached_rbnode_delete_update(iovad, iova); - rb_erase(&iova->node, &iovad->rbroot); - - if (prev) { - iova_insert_rbtree(&iovad->rbroot, prev, NULL); - iova->pfn_lo = pfn_lo; - } - if (next) { - iova_insert_rbtree(&iovad->rbroot, next, NULL); - iova->pfn_hi = pfn_hi; - } - spin_unlock_irqrestore(&iovad->iova_rbtree_lock, flags); - - return iova; - -error: - spin_unlock_irqrestore(&iovad->iova_rbtree_lock, flags); - if (prev) - free_iova_mem(prev); - return NULL; -} - /* * Magazine caches for IOVA ranges. For an introduction to magazines, * see the USENIX 2001 paper "Magazines and Vmem: Extending the Slab @@ -613,12 +557,24 @@ error: * dynamic size tuning described in the paper. */ -#define IOVA_MAG_SIZE 128 +/* + * As kmalloc's buffer size is fixed to power of 2, 127 is chosen to + * assure size of 'iova_magazine' to be 1024 bytes, so that no memory + * will be wasted. Since only full magazines are inserted into the depot, + * we don't need to waste PFN capacity on a separate list head either. + */ +#define IOVA_MAG_SIZE 127 + +#define IOVA_DEPOT_DELAY msecs_to_jiffies(100) struct iova_magazine { - unsigned long size; + union { + unsigned long size; + struct iova_magazine *next; + }; unsigned long pfns[IOVA_MAG_SIZE]; }; +static_assert(!(sizeof(struct iova_magazine) & (sizeof(struct iova_magazine) - 1))); struct iova_cpu_rcache { spinlock_t lock; @@ -626,14 +582,36 @@ struct iova_cpu_rcache { struct iova_magazine *prev; }; +struct iova_rcache { + spinlock_t lock; + unsigned int depot_size; + struct iova_magazine *depot; + struct iova_cpu_rcache __percpu *cpu_rcaches; + struct iova_domain *iovad; + struct delayed_work work; +}; + +static struct kmem_cache *iova_magazine_cache; + +unsigned long iova_rcache_range(void) +{ + return PAGE_SIZE << (IOVA_RANGE_CACHE_MAX_SIZE - 1); +} + static struct iova_magazine *iova_magazine_alloc(gfp_t flags) { - return kzalloc(sizeof(struct iova_magazine), flags); + struct iova_magazine *mag; + + mag = kmem_cache_alloc(iova_magazine_cache, flags); + if (mag) + mag->size = 0; + + return mag; } static void iova_magazine_free(struct iova_magazine *mag) { - kfree(mag); + kmem_cache_free(iova_magazine_cache, mag); } static void @@ -642,16 +620,16 @@ iova_magazine_free_pfns(struct iova_magazine *mag, struct iova_domain *iovad) unsigned long flags; int i; - if (!mag) - return; - spin_lock_irqsave(&iovad->iova_rbtree_lock, flags); for (i = 0 ; i < mag->size; ++i) { struct iova *iova = private_find_iova(iovad, mag->pfns[i]); - BUG_ON(!iova); - private_free_iova(iovad, iova); + if (WARN_ON(!iova)) + continue; + + remove_iova(iovad, iova); + free_iova_mem(iova); } spin_unlock_irqrestore(&iovad->iova_rbtree_lock, flags); @@ -661,54 +639,126 @@ iova_magazine_free_pfns(struct iova_magazine *mag, struct iova_domain *iovad) static bool iova_magazine_full(struct iova_magazine *mag) { - return (mag && mag->size == IOVA_MAG_SIZE); + return mag->size == IOVA_MAG_SIZE; } static bool iova_magazine_empty(struct iova_magazine *mag) { - return (!mag || mag->size == 0); + return mag->size == 0; } static unsigned long iova_magazine_pop(struct iova_magazine *mag, unsigned long limit_pfn) { - BUG_ON(iova_magazine_empty(mag)); + int i; + unsigned long pfn; - if (mag->pfns[mag->size - 1] >= limit_pfn) - return 0; + /* Only fall back to the rbtree if we have no suitable pfns at all */ + for (i = mag->size - 1; mag->pfns[i] > limit_pfn; i--) + if (i == 0) + return 0; + + /* Swap it to pop it */ + pfn = mag->pfns[i]; + mag->pfns[i] = mag->pfns[--mag->size]; - return mag->pfns[--mag->size]; + return pfn; } static void iova_magazine_push(struct iova_magazine *mag, unsigned long pfn) { - BUG_ON(iova_magazine_full(mag)); - mag->pfns[mag->size++] = pfn; } -static void init_iova_rcaches(struct iova_domain *iovad) +static struct iova_magazine *iova_depot_pop(struct iova_rcache *rcache) +{ + struct iova_magazine *mag = rcache->depot; + + /* + * As the mag->next pointer is moved to rcache->depot and reset via + * the mag->size assignment, mark it as a transient false positive. + */ + kmemleak_transient_leak(mag->next); + rcache->depot = mag->next; + mag->size = IOVA_MAG_SIZE; + rcache->depot_size--; + return mag; +} + +static void iova_depot_push(struct iova_rcache *rcache, struct iova_magazine *mag) +{ + mag->next = rcache->depot; + rcache->depot = mag; + rcache->depot_size++; +} + +static void iova_depot_work_func(struct work_struct *work) +{ + struct iova_rcache *rcache = container_of(work, typeof(*rcache), work.work); + struct iova_magazine *mag = NULL; + unsigned long flags; + + spin_lock_irqsave(&rcache->lock, flags); + if (rcache->depot_size > num_online_cpus()) + mag = iova_depot_pop(rcache); + spin_unlock_irqrestore(&rcache->lock, flags); + + if (mag) { + iova_magazine_free_pfns(mag, rcache->iovad); + iova_magazine_free(mag); + schedule_delayed_work(&rcache->work, IOVA_DEPOT_DELAY); + } +} + +int iova_domain_init_rcaches(struct iova_domain *iovad) { - struct iova_cpu_rcache *cpu_rcache; - struct iova_rcache *rcache; unsigned int cpu; - int i; + int i, ret; + + iovad->rcaches = kcalloc(IOVA_RANGE_CACHE_MAX_SIZE, + sizeof(struct iova_rcache), + GFP_KERNEL); + if (!iovad->rcaches) + return -ENOMEM; for (i = 0; i < IOVA_RANGE_CACHE_MAX_SIZE; ++i) { + struct iova_cpu_rcache *cpu_rcache; + struct iova_rcache *rcache; + rcache = &iovad->rcaches[i]; spin_lock_init(&rcache->lock); - rcache->depot_size = 0; - rcache->cpu_rcaches = __alloc_percpu(sizeof(*cpu_rcache), cache_line_size()); - if (WARN_ON(!rcache->cpu_rcaches)) - continue; + rcache->iovad = iovad; + INIT_DELAYED_WORK(&rcache->work, iova_depot_work_func); + rcache->cpu_rcaches = __alloc_percpu(sizeof(*cpu_rcache), + cache_line_size()); + if (!rcache->cpu_rcaches) { + ret = -ENOMEM; + goto out_err; + } for_each_possible_cpu(cpu) { cpu_rcache = per_cpu_ptr(rcache->cpu_rcaches, cpu); + spin_lock_init(&cpu_rcache->lock); cpu_rcache->loaded = iova_magazine_alloc(GFP_KERNEL); cpu_rcache->prev = iova_magazine_alloc(GFP_KERNEL); + if (!cpu_rcache->loaded || !cpu_rcache->prev) { + ret = -ENOMEM; + goto out_err; + } } } + + ret = cpuhp_state_add_instance_nocalls(CPUHP_IOMMU_IOVA_DEAD, + &iovad->cpuhp_dead); + if (ret) + goto out_err; + return 0; + +out_err: + free_iova_rcaches(iovad); + return ret; } +EXPORT_SYMBOL_GPL(iova_domain_init_rcaches); /* * Try inserting IOVA range starting with 'iova_pfn' into 'rcache', and @@ -720,7 +770,6 @@ static bool __iova_rcache_insert(struct iova_domain *iovad, struct iova_rcache *rcache, unsigned long iova_pfn) { - struct iova_magazine *mag_to_free = NULL; struct iova_cpu_rcache *cpu_rcache; bool can_insert = false; unsigned long flags; @@ -738,13 +787,9 @@ static bool __iova_rcache_insert(struct iova_domain *iovad, if (new_mag) { spin_lock(&rcache->lock); - if (rcache->depot_size < MAX_GLOBAL_MAGS) { - rcache->depot[rcache->depot_size++] = - cpu_rcache->loaded; - } else { - mag_to_free = cpu_rcache->loaded; - } + iova_depot_push(rcache, cpu_rcache->loaded); spin_unlock(&rcache->lock); + schedule_delayed_work(&rcache->work, IOVA_DEPOT_DELAY); cpu_rcache->loaded = new_mag; can_insert = true; @@ -756,11 +801,6 @@ static bool __iova_rcache_insert(struct iova_domain *iovad, spin_unlock_irqrestore(&cpu_rcache->lock, flags); - if (mag_to_free) { - iova_magazine_free_pfns(mag_to_free, iovad); - iova_magazine_free(mag_to_free); - } - return can_insert; } @@ -798,9 +838,9 @@ static unsigned long __iova_rcache_get(struct iova_rcache *rcache, has_pfn = true; } else { spin_lock(&rcache->lock); - if (rcache->depot_size > 0) { + if (rcache->depot) { iova_magazine_free(cpu_rcache->loaded); - cpu_rcache->loaded = rcache->depot[--rcache->depot_size]; + cpu_rcache->loaded = iova_depot_pop(rcache); has_pfn = true; } spin_unlock(&rcache->lock); @@ -828,27 +868,7 @@ static unsigned long iova_rcache_get(struct iova_domain *iovad, if (log_size >= IOVA_RANGE_CACHE_MAX_SIZE) return 0; - return __iova_rcache_get(&iovad->rcaches[log_size], limit_pfn); -} - -/* - * Free a cpu's rcache. - */ -static void free_cpu_iova_rcache(unsigned int cpu, struct iova_domain *iovad, - struct iova_rcache *rcache) -{ - struct iova_cpu_rcache *cpu_rcache = per_cpu_ptr(rcache->cpu_rcaches, cpu); - unsigned long flags; - - spin_lock_irqsave(&cpu_rcache->lock, flags); - - iova_magazine_free_pfns(cpu_rcache->loaded, iovad); - iova_magazine_free(cpu_rcache->loaded); - - iova_magazine_free_pfns(cpu_rcache->prev, iovad); - iova_magazine_free(cpu_rcache->prev); - - spin_unlock_irqrestore(&cpu_rcache->lock, flags); + return __iova_rcache_get(&iovad->rcaches[log_size], limit_pfn - size); } /* @@ -857,28 +877,32 @@ static void free_cpu_iova_rcache(unsigned int cpu, struct iova_domain *iovad, static void free_iova_rcaches(struct iova_domain *iovad) { struct iova_rcache *rcache; - unsigned long flags; + struct iova_cpu_rcache *cpu_rcache; unsigned int cpu; - int i, j; - for (i = 0; i < IOVA_RANGE_CACHE_MAX_SIZE; ++i) { + for (int i = 0; i < IOVA_RANGE_CACHE_MAX_SIZE; ++i) { rcache = &iovad->rcaches[i]; - for_each_possible_cpu(cpu) - free_cpu_iova_rcache(cpu, iovad, rcache); - spin_lock_irqsave(&rcache->lock, flags); - free_percpu(rcache->cpu_rcaches); - for (j = 0; j < rcache->depot_size; ++j) { - iova_magazine_free_pfns(rcache->depot[j], iovad); - iova_magazine_free(rcache->depot[j]); + if (!rcache->cpu_rcaches) + break; + for_each_possible_cpu(cpu) { + cpu_rcache = per_cpu_ptr(rcache->cpu_rcaches, cpu); + iova_magazine_free(cpu_rcache->loaded); + iova_magazine_free(cpu_rcache->prev); } - spin_unlock_irqrestore(&rcache->lock, flags); + free_percpu(rcache->cpu_rcaches); + cancel_delayed_work_sync(&rcache->work); + while (rcache->depot) + iova_magazine_free(iova_depot_pop(rcache)); } + + kfree(iovad->rcaches); + iovad->rcaches = NULL; } /* * free all the IOVA ranges cached by a cpu (used when cpu is unplugged) */ -void free_cpu_cached_iovas(unsigned int cpu, struct iova_domain *iovad) +static void free_cpu_cached_iovas(unsigned int cpu, struct iova_domain *iovad) { struct iova_cpu_rcache *cpu_rcache; struct iova_rcache *rcache; @@ -895,5 +919,92 @@ void free_cpu_cached_iovas(unsigned int cpu, struct iova_domain *iovad) } } +/* + * free all the IOVA ranges of global cache + */ +static void free_global_cached_iovas(struct iova_domain *iovad) +{ + struct iova_rcache *rcache; + unsigned long flags; + + for (int i = 0; i < IOVA_RANGE_CACHE_MAX_SIZE; ++i) { + rcache = &iovad->rcaches[i]; + spin_lock_irqsave(&rcache->lock, flags); + while (rcache->depot) { + struct iova_magazine *mag = iova_depot_pop(rcache); + + iova_magazine_free_pfns(mag, iovad); + iova_magazine_free(mag); + } + spin_unlock_irqrestore(&rcache->lock, flags); + } +} + +static int iova_cpuhp_dead(unsigned int cpu, struct hlist_node *node) +{ + struct iova_domain *iovad; + + iovad = hlist_entry_safe(node, struct iova_domain, cpuhp_dead); + + free_cpu_cached_iovas(cpu, iovad); + return 0; +} + +int iova_cache_get(void) +{ + int err = -ENOMEM; + + mutex_lock(&iova_cache_mutex); + if (!iova_cache_users) { + iova_cache = kmem_cache_create("iommu_iova", sizeof(struct iova), 0, + SLAB_HWCACHE_ALIGN, NULL); + if (!iova_cache) + goto out_err; + + iova_magazine_cache = kmem_cache_create("iommu_iova_magazine", + sizeof(struct iova_magazine), + 0, SLAB_HWCACHE_ALIGN, NULL); + if (!iova_magazine_cache) + goto out_err; + + err = cpuhp_setup_state_multi(CPUHP_IOMMU_IOVA_DEAD, "iommu/iova:dead", + NULL, iova_cpuhp_dead); + if (err) { + pr_err("IOVA: Couldn't register cpuhp handler: %pe\n", ERR_PTR(err)); + goto out_err; + } + } + + iova_cache_users++; + mutex_unlock(&iova_cache_mutex); + + return 0; + +out_err: + kmem_cache_destroy(iova_cache); + kmem_cache_destroy(iova_magazine_cache); + mutex_unlock(&iova_cache_mutex); + return err; +} +EXPORT_SYMBOL_GPL(iova_cache_get); + +void iova_cache_put(void) +{ + mutex_lock(&iova_cache_mutex); + if (WARN_ON(!iova_cache_users)) { + mutex_unlock(&iova_cache_mutex); + return; + } + iova_cache_users--; + if (!iova_cache_users) { + cpuhp_remove_multi_state(CPUHP_IOMMU_IOVA_DEAD); + kmem_cache_destroy(iova_cache); + kmem_cache_destroy(iova_magazine_cache); + } + mutex_unlock(&iova_cache_mutex); +} +EXPORT_SYMBOL_GPL(iova_cache_put); + MODULE_AUTHOR("Anil S Keshavamurthy <anil.s.keshavamurthy@intel.com>"); +MODULE_DESCRIPTION("IOMMU I/O Virtual Address management"); MODULE_LICENSE("GPL"); |
