From 6d1bcb957be2850e0776f24c289e1f87c256baeb Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Tue, 2 Jul 2019 16:43:07 +0100 Subject: iommu: Remove empty iommu_tlb_range_add() callback from iommu_ops Commit add02cfdc9bc ("iommu: Introduce Interface for IOMMU TLB Flushing") added three new TLB flushing operations to the IOMMU API so that the underlying driver operations can be batched when unmapping large regions of IO virtual address space. However, the ->iotlb_range_add() callback has not been implemented by any IOMMU drivers (amd_iommu.c implements it as an empty function, which incurs the overhead of an indirect branch). Instead, drivers either flush the entire IOTLB in the ->iotlb_sync() callback or perform the necessary invalidation during ->unmap(). Attempting to implement ->iotlb_range_add() for arm-smmu-v3.c revealed two major issues: 1. The page size used to map the region in the page-table is not known, and so it is not generally possible to issue TLB flushes in the most efficient manner. 2. The only mutable state passed to the callback is a pointer to the iommu_domain, which can be accessed concurrently and therefore requires expensive synchronisation to keep track of the outstanding flushes. Remove the callback entirely in preparation for extending ->unmap() and ->iotlb_sync() to update a token on the caller's stack. Signed-off-by: Will Deacon --- drivers/vfio/vfio_iommu_type1.c | 1 - 1 file changed, 1 deletion(-) (limited to 'drivers/vfio') diff --git a/drivers/vfio/vfio_iommu_type1.c b/drivers/vfio/vfio_iommu_type1.c index 054391f30fa8..fad7fd8c167c 100644 --- a/drivers/vfio/vfio_iommu_type1.c +++ b/drivers/vfio/vfio_iommu_type1.c @@ -696,7 +696,6 @@ static size_t unmap_unpin_fast(struct vfio_domain *domain, if (!unmapped) { kfree(entry); } else { - iommu_tlb_range_add(domain->domain, *iova, unmapped); entry->iova = *iova; entry->phys = phys; entry->len = unmapped; -- cgit From a7d20dc19d9ea7012227be5144353012ffa3ddc4 Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Tue, 2 Jul 2019 16:43:48 +0100 Subject: iommu: Introduce struct iommu_iotlb_gather for batching TLB flushes To permit batching of TLB flushes across multiple calls to the IOMMU driver's ->unmap() implementation, introduce a new structure for tracking the address range to be flushed and the granularity at which the flushing is required. This is hooked into the IOMMU API and its caller are updated to make use of the new structure. Subsequent patches will plumb this into the IOMMU drivers as well, but for now the gathering information is ignored. Signed-off-by: Will Deacon --- drivers/vfio/vfio_iommu_type1.c | 26 +++++++++++++++++--------- 1 file changed, 17 insertions(+), 9 deletions(-) (limited to 'drivers/vfio') diff --git a/drivers/vfio/vfio_iommu_type1.c b/drivers/vfio/vfio_iommu_type1.c index fad7fd8c167c..ad830abe1021 100644 --- a/drivers/vfio/vfio_iommu_type1.c +++ b/drivers/vfio/vfio_iommu_type1.c @@ -650,12 +650,13 @@ unpin_exit: } static long vfio_sync_unpin(struct vfio_dma *dma, struct vfio_domain *domain, - struct list_head *regions) + struct list_head *regions, + struct iommu_iotlb_gather *iotlb_gather) { long unlocked = 0; struct vfio_regions *entry, *next; - iommu_tlb_sync(domain->domain); + iommu_tlb_sync(domain->domain, iotlb_gather); list_for_each_entry_safe(entry, next, regions, list) { unlocked += vfio_unpin_pages_remote(dma, @@ -685,13 +686,15 @@ static size_t unmap_unpin_fast(struct vfio_domain *domain, struct vfio_dma *dma, dma_addr_t *iova, size_t len, phys_addr_t phys, long *unlocked, struct list_head *unmapped_list, - int *unmapped_cnt) + int *unmapped_cnt, + struct iommu_iotlb_gather *iotlb_gather) { size_t unmapped = 0; struct vfio_regions *entry = kzalloc(sizeof(*entry), GFP_KERNEL); if (entry) { - unmapped = iommu_unmap_fast(domain->domain, *iova, len); + unmapped = iommu_unmap_fast(domain->domain, *iova, len, + iotlb_gather); if (!unmapped) { kfree(entry); @@ -711,8 +714,8 @@ static size_t unmap_unpin_fast(struct vfio_domain *domain, * or in case of errors. */ if (*unmapped_cnt >= VFIO_IOMMU_TLB_SYNC_MAX || !unmapped) { - *unlocked += vfio_sync_unpin(dma, domain, - unmapped_list); + *unlocked += vfio_sync_unpin(dma, domain, unmapped_list, + iotlb_gather); *unmapped_cnt = 0; } @@ -743,6 +746,7 @@ static long vfio_unmap_unpin(struct vfio_iommu *iommu, struct vfio_dma *dma, dma_addr_t iova = dma->iova, end = dma->iova + dma->size; struct vfio_domain *domain, *d; LIST_HEAD(unmapped_region_list); + struct iommu_iotlb_gather iotlb_gather; int unmapped_region_cnt = 0; long unlocked = 0; @@ -767,6 +771,7 @@ static long vfio_unmap_unpin(struct vfio_iommu *iommu, struct vfio_dma *dma, cond_resched(); } + iommu_iotlb_gather_init(&iotlb_gather); while (iova < end) { size_t unmapped, len; phys_addr_t phys, next; @@ -795,7 +800,8 @@ static long vfio_unmap_unpin(struct vfio_iommu *iommu, struct vfio_dma *dma, */ unmapped = unmap_unpin_fast(domain, dma, &iova, len, phys, &unlocked, &unmapped_region_list, - &unmapped_region_cnt); + &unmapped_region_cnt, + &iotlb_gather); if (!unmapped) { unmapped = unmap_unpin_slow(domain, dma, &iova, len, phys, &unlocked); @@ -806,8 +812,10 @@ static long vfio_unmap_unpin(struct vfio_iommu *iommu, struct vfio_dma *dma, dma->iommu_mapped = false; - if (unmapped_region_cnt) - unlocked += vfio_sync_unpin(dma, domain, &unmapped_region_list); + if (unmapped_region_cnt) { + unlocked += vfio_sync_unpin(dma, domain, &unmapped_region_list, + &iotlb_gather); + } if (do_accounting) { vfio_lock_acct(dma, -unlocked, true); -- cgit