diff options
40 files changed, 1351 insertions, 2847 deletions
diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt index 0a1731a0f0ef..14f56c448edc 100644 --- a/Documentation/admin-guide/kernel-parameters.txt +++ b/Documentation/admin-guide/kernel-parameters.txt @@ -2220,7 +2220,7 @@ forcing Dual Address Cycle for PCI cards supporting greater than 32-bit addressing. - iommu.strict= [ARM64, X86] Configure TLB invalidation behaviour + iommu.strict= [ARM64, X86, S390] Configure TLB invalidation behaviour Format: { "0" | "1" } 0 - Lazy mode. Request that DMA unmap operations use deferred @@ -5611,9 +5611,10 @@ s390_iommu= [HW,S390] Set s390 IOTLB flushing mode strict - With strict flushing every unmap operation will result in - an IOTLB flush. Default is lazy flushing before reuse, - which is faster. + With strict flushing every unmap operation will result + in an IOTLB flush. Default is lazy flushing before + reuse, which is faster. Deprecated, equivalent to + iommu.strict=1. s390_iommu_aperture= [KNL,S390] Specifies the size of the per device DMA address space diff --git a/Documentation/devicetree/bindings/iommu/arm,smmu.yaml b/Documentation/devicetree/bindings/iommu/arm,smmu.yaml index b1b2cf81b42f..aa9e1c0895a5 100644 --- a/Documentation/devicetree/bindings/iommu/arm,smmu.yaml +++ b/Documentation/devicetree/bindings/iommu/arm,smmu.yaml @@ -110,6 +110,7 @@ properties: - qcom,sdm630-smmu-v2 - qcom,sdm845-smmu-v2 - qcom,sm6350-smmu-v2 + - qcom,sm7150-smmu-v2 - const: qcom,adreno-smmu - const: qcom,smmu-v2 - description: Qcom Adreno GPUs on Google Cheza platform @@ -409,6 +410,7 @@ allOf: contains: enum: - qcom,sm6350-smmu-v2 + - qcom,sm7150-smmu-v2 - qcom,sm8150-smmu-500 - qcom,sm8250-smmu-500 then: diff --git a/arch/s390/include/asm/pci.h b/arch/s390/include/asm/pci.h index b248694e0024..e91cd6bbc330 100644 --- a/arch/s390/include/asm/pci.h +++ b/arch/s390/include/asm/pci.h @@ -159,13 +159,6 @@ struct zpci_dev { unsigned long *dma_table; int tlb_refresh; - spinlock_t iommu_bitmap_lock; - unsigned long *iommu_bitmap; - unsigned long *lazy_bitmap; - unsigned long iommu_size; - unsigned long iommu_pages; - unsigned int next_bit; - struct iommu_device iommu_dev; /* IOMMU core handle */ char res_name[16]; @@ -180,10 +173,6 @@ struct zpci_dev { struct zpci_fmb *fmb; u16 fmb_update; /* update interval */ u16 fmb_length; - /* software counters */ - atomic64_t allocated_pages; - atomic64_t mapped_pages; - atomic64_t unmapped_pages; u8 version; enum pci_bus_speed max_bus_speed; diff --git a/arch/s390/include/asm/pci_clp.h b/arch/s390/include/asm/pci_clp.h index d6189ed14f84..f0c677ddd270 100644 --- a/arch/s390/include/asm/pci_clp.h +++ b/arch/s390/include/asm/pci_clp.h @@ -50,6 +50,9 @@ struct clp_fh_list_entry { #define CLP_UTIL_STR_LEN 64 #define CLP_PFIP_NR_SEGMENTS 4 +/* PCI function type numbers */ +#define PCI_FUNC_TYPE_ISM 0x5 /* ISM device */ + extern bool zpci_unique_uid; struct clp_rsp_slpc_pci { diff --git a/arch/s390/include/asm/pci_dma.h b/arch/s390/include/asm/pci_dma.h index 7119c04c51c5..42d7cc4262ca 100644 --- a/arch/s390/include/asm/pci_dma.h +++ b/arch/s390/include/asm/pci_dma.h @@ -82,117 +82,16 @@ enum zpci_ioat_dtype { #define ZPCI_TABLE_VALID_MASK 0x20 #define ZPCI_TABLE_PROT_MASK 0x200 -static inline unsigned int calc_rtx(dma_addr_t ptr) -{ - return ((unsigned long) ptr >> ZPCI_RT_SHIFT) & ZPCI_INDEX_MASK; -} - -static inline unsigned int calc_sx(dma_addr_t ptr) -{ - return ((unsigned long) ptr >> ZPCI_ST_SHIFT) & ZPCI_INDEX_MASK; -} - -static inline unsigned int calc_px(dma_addr_t ptr) -{ - return ((unsigned long) ptr >> PAGE_SHIFT) & ZPCI_PT_MASK; -} - -static inline void set_pt_pfaa(unsigned long *entry, phys_addr_t pfaa) -{ - *entry &= ZPCI_PTE_FLAG_MASK; - *entry |= (pfaa & ZPCI_PTE_ADDR_MASK); -} - -static inline void set_rt_sto(unsigned long *entry, phys_addr_t sto) -{ - *entry &= ZPCI_RTE_FLAG_MASK; - *entry |= (sto & ZPCI_RTE_ADDR_MASK); - *entry |= ZPCI_TABLE_TYPE_RTX; -} - -static inline void set_st_pto(unsigned long *entry, phys_addr_t pto) -{ - *entry &= ZPCI_STE_FLAG_MASK; - *entry |= (pto & ZPCI_STE_ADDR_MASK); - *entry |= ZPCI_TABLE_TYPE_SX; -} - -static inline void validate_rt_entry(unsigned long *entry) -{ - *entry &= ~ZPCI_TABLE_VALID_MASK; - *entry &= ~ZPCI_TABLE_OFFSET_MASK; - *entry |= ZPCI_TABLE_VALID; - *entry |= ZPCI_TABLE_LEN_RTX; -} - -static inline void validate_st_entry(unsigned long *entry) -{ - *entry &= ~ZPCI_TABLE_VALID_MASK; - *entry |= ZPCI_TABLE_VALID; -} - -static inline void invalidate_pt_entry(unsigned long *entry) -{ - WARN_ON_ONCE((*entry & ZPCI_PTE_VALID_MASK) == ZPCI_PTE_INVALID); - *entry &= ~ZPCI_PTE_VALID_MASK; - *entry |= ZPCI_PTE_INVALID; -} - -static inline void validate_pt_entry(unsigned long *entry) -{ - WARN_ON_ONCE((*entry & ZPCI_PTE_VALID_MASK) == ZPCI_PTE_VALID); - *entry &= ~ZPCI_PTE_VALID_MASK; - *entry |= ZPCI_PTE_VALID; -} - -static inline void entry_set_protected(unsigned long *entry) -{ - *entry &= ~ZPCI_TABLE_PROT_MASK; - *entry |= ZPCI_TABLE_PROTECTED; -} - -static inline void entry_clr_protected(unsigned long *entry) -{ - *entry &= ~ZPCI_TABLE_PROT_MASK; - *entry |= ZPCI_TABLE_UNPROTECTED; -} - -static inline int reg_entry_isvalid(unsigned long entry) -{ - return (entry & ZPCI_TABLE_VALID_MASK) == ZPCI_TABLE_VALID; -} - -static inline int pt_entry_isvalid(unsigned long entry) -{ - return (entry & ZPCI_PTE_VALID_MASK) == ZPCI_PTE_VALID; -} - -static inline unsigned long *get_rt_sto(unsigned long entry) -{ - if ((entry & ZPCI_TABLE_TYPE_MASK) == ZPCI_TABLE_TYPE_RTX) - return phys_to_virt(entry & ZPCI_RTE_ADDR_MASK); - else - return NULL; - -} - -static inline unsigned long *get_st_pto(unsigned long entry) -{ - if ((entry & ZPCI_TABLE_TYPE_MASK) == ZPCI_TABLE_TYPE_SX) - return phys_to_virt(entry & ZPCI_STE_ADDR_MASK); - else - return NULL; -} - -/* Prototypes */ -void dma_free_seg_table(unsigned long); -unsigned long *dma_alloc_cpu_table(gfp_t gfp); -void dma_cleanup_tables(unsigned long *); -unsigned long *dma_walk_cpu_trans(unsigned long *rto, dma_addr_t dma_addr, - gfp_t gfp); -void dma_update_cpu_trans(unsigned long *entry, phys_addr_t page_addr, int flags); - -extern const struct dma_map_ops s390_pci_dma_ops; +struct zpci_iommu_ctrs { + atomic64_t mapped_pages; + atomic64_t unmapped_pages; + atomic64_t global_rpcits; + atomic64_t sync_map_rpcits; + atomic64_t sync_rpcits; +}; + +struct zpci_dev; +struct zpci_iommu_ctrs *zpci_get_iommu_ctrs(struct zpci_dev *zdev); #endif diff --git a/arch/s390/pci/Makefile b/arch/s390/pci/Makefile index 5ae31ca9dd44..0547a10406e7 100644 --- a/arch/s390/pci/Makefile +++ b/arch/s390/pci/Makefile @@ -3,7 +3,7 @@ # Makefile for the s390 PCI subsystem. # -obj-$(CONFIG_PCI) += pci.o pci_irq.o pci_dma.o pci_clp.o pci_sysfs.o \ +obj-$(CONFIG_PCI) += pci.o pci_irq.o pci_clp.o pci_sysfs.o \ pci_event.o pci_debug.o pci_insn.o pci_mmio.o \ pci_bus.o pci_kvm_hook.o obj-$(CONFIG_PCI_IOV) += pci_iov.o diff --git a/arch/s390/pci/pci.c b/arch/s390/pci/pci.c index d34d5813d006..63fd9e1d9f22 100644 --- a/arch/s390/pci/pci.c +++ b/arch/s390/pci/pci.c @@ -124,7 +124,11 @@ int zpci_register_ioat(struct zpci_dev *zdev, u8 dmaas, WARN_ON_ONCE(iota & 0x3fff); fib.pba = base; - fib.pal = limit; + /* Work around off by one in ISM virt device */ + if (zdev->pft == PCI_FUNC_TYPE_ISM && limit > base) + fib.pal = limit + (1 << 12); + else + fib.pal = limit; fib.iota = iota | ZPCI_IOTA_RTTO_FLAG; fib.gd = zdev->gisa; cc = zpci_mod_fc(req, &fib, status); @@ -153,6 +157,7 @@ int zpci_unregister_ioat(struct zpci_dev *zdev, u8 dmaas) int zpci_fmb_enable_device(struct zpci_dev *zdev) { u64 req = ZPCI_CREATE_REQ(zdev->fh, 0, ZPCI_MOD_FC_SET_MEASURE); + struct zpci_iommu_ctrs *ctrs; struct zpci_fib fib = {0}; u8 cc, status; @@ -165,9 +170,15 @@ int zpci_fmb_enable_device(struct zpci_dev *zdev) WARN_ON((u64) zdev->fmb & 0xf); /* reset software counters */ - atomic64_set(&zdev->allocated_pages, 0); - atomic64_set(&zdev->mapped_pages, 0); - atomic64_set(&zdev->unmapped_pages, 0); + ctrs = zpci_get_iommu_ctrs(zdev); + if (ctrs) { + atomic64_set(&ctrs->mapped_pages, 0); + atomic64_set(&ctrs->unmapped_pages, 0); + atomic64_set(&ctrs->global_rpcits, 0); + atomic64_set(&ctrs->sync_map_rpcits, 0); + atomic64_set(&ctrs->sync_rpcits, 0); + } + fib.fmb_addr = virt_to_phys(zdev->fmb); fib.gd = zdev->gisa; @@ -582,7 +593,6 @@ int pcibios_device_add(struct pci_dev *pdev) pdev->no_vf_scan = 1; pdev->dev.groups = zpci_attr_groups; - pdev->dev.dma_ops = &s390_pci_dma_ops; zpci_map_resources(pdev); for (i = 0; i < PCI_STD_NUM_BARS; i++) { @@ -756,8 +766,6 @@ int zpci_hot_reset_device(struct zpci_dev *zdev) if (zdev->dma_table) rc = zpci_register_ioat(zdev, 0, zdev->start_dma, zdev->end_dma, virt_to_phys(zdev->dma_table), &status); - else - rc = zpci_dma_init_device(zdev); if (rc) { zpci_disable_device(zdev); return rc; @@ -865,11 +873,6 @@ int zpci_deconfigure_device(struct zpci_dev *zdev) if (zdev->zbus->bus) zpci_bus_remove_device(zdev, false); - if (zdev->dma_table) { - rc = zpci_dma_exit_device(zdev); - if (rc) - return rc; - } if (zdev_enabled(zdev)) { rc = zpci_disable_device(zdev); if (rc) @@ -918,8 +921,6 @@ void zpci_release_device(struct kref *kref) if (zdev->zbus->bus) zpci_bus_remove_device(zdev, false); - if (zdev->dma_table) - zpci_dma_exit_device(zdev); if (zdev_enabled(zdev)) zpci_disable_device(zdev); @@ -1109,10 +1110,6 @@ static int __init pci_base_init(void) if (rc) goto out_irq; - rc = zpci_dma_init(); - if (rc) - goto out_dma; - rc = clp_scan_pci_devices(); if (rc) goto out_find; @@ -1122,8 +1119,6 @@ static int __init pci_base_init(void) return 0; out_find: - zpci_dma_exit(); -out_dma: zpci_irq_exit(); out_irq: zpci_mem_exit(); diff --git a/arch/s390/pci/pci_bus.c b/arch/s390/pci/pci_bus.c index 32245b970a0c..daa5d7450c7d 100644 --- a/arch/s390/pci/pci_bus.c +++ b/arch/s390/pci/pci_bus.c @@ -47,11 +47,6 @@ static int zpci_bus_prepare_device(struct zpci_dev *zdev) rc = zpci_enable_device(zdev); if (rc) return rc; - rc = zpci_dma_init_device(zdev); - if (rc) { - zpci_disable_device(zdev); - return rc; - } } if (!zdev->has_resources) { diff --git a/arch/s390/pci/pci_debug.c b/arch/s390/pci/pci_debug.c index ca6bd98eec13..6dde2263c79d 100644 --- a/arch/s390/pci/pci_debug.c +++ b/arch/s390/pci/pci_debug.c @@ -53,9 +53,11 @@ static char *pci_fmt3_names[] = { }; static char *pci_sw_names[] = { - "Allocated pages", "Mapped pages", "Unmapped pages", + "Global RPCITs", + "Sync Map RPCITs", + "Sync RPCITs", }; static void pci_fmb_show(struct seq_file *m, char *name[], int length, @@ -69,10 +71,14 @@ static void pci_fmb_show(struct seq_file *m, char *name[], int length, static void pci_sw_counter_show(struct seq_file *m) { - struct zpci_dev *zdev = m->private; - atomic64_t *counter = &zdev->allocated_pages; + struct zpci_iommu_ctrs *ctrs = zpci_get_iommu_ctrs(m->private); + atomic64_t *counter; int i; + if (!ctrs) + return; + + counter = &ctrs->mapped_pages; for (i = 0; i < ARRAY_SIZE(pci_sw_names); i++, counter++) seq_printf(m, "%26s:\t%llu\n", pci_sw_names[i], atomic64_read(counter)); diff --git a/arch/s390/pci/pci_dma.c b/arch/s390/pci/pci_dma.c deleted file mode 100644 index 99209085c75b..000000000000 --- a/arch/s390/pci/pci_dma.c +++ /dev/null @@ -1,746 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -/* - * Copyright IBM Corp. 2012 - * - * Author(s): - * Jan Glauber <jang@linux.vnet.ibm.com> - */ - -#include <linux/kernel.h> -#include <linux/slab.h> -#include <linux/export.h> -#include <linux/iommu-helper.h> -#include <linux/dma-map-ops.h> -#include <linux/vmalloc.h> -#include <linux/pci.h> -#include <asm/pci_dma.h> - -static struct kmem_cache *dma_region_table_cache; -static struct kmem_cache *dma_page_table_cache; -static int s390_iommu_strict; -static u64 s390_iommu_aperture; -static u32 s390_iommu_aperture_factor = 1; - -static int zpci_refresh_global(struct zpci_dev *zdev) -{ - return zpci_refresh_trans((u64) zdev->fh << 32, zdev->start_dma, - zdev->iommu_pages * PAGE_SIZE); -} - -unsigned long *dma_alloc_cpu_table(gfp_t gfp) -{ - unsigned long *table, *entry; - - table = kmem_cache_alloc(dma_region_table_cache, gfp); - if (!table) - return NULL; - - for (entry = table; entry < table + ZPCI_TABLE_ENTRIES; entry++) - *entry = ZPCI_TABLE_INVALID; - return table; -} - -static void dma_free_cpu_table(void *table) -{ - kmem_cache_free(dma_region_table_cache, table); -} - -static unsigned long *dma_alloc_page_table(gfp_t gfp) -{ - unsigned long *table, *entry; - - table = kmem_cache_alloc(dma_page_table_cache, gfp); - if (!table) - return NULL; - - for (entry = table; entry < table + ZPCI_PT_ENTRIES; entry++) - *entry = ZPCI_PTE_INVALID; - return table; -} - -static void dma_free_page_table(void *table) -{ - kmem_cache_free(dma_page_table_cache, table); -} - -static unsigned long *dma_get_seg_table_origin(unsigned long *rtep, gfp_t gfp) -{ - unsigned long old_rte, rte; - unsigned long *sto; - - rte = READ_ONCE(*rtep); - if (reg_entry_isvalid(rte)) { - sto = get_rt_sto(rte); - } else { - sto = dma_alloc_cpu_table(gfp); - if (!sto) - return NULL; - - set_rt_sto(&rte, virt_to_phys(sto)); - validate_rt_entry(&rte); - entry_clr_protected(&rte); - - old_rte = cmpxchg(rtep, ZPCI_TABLE_INVALID, rte); - if (old_rte != ZPCI_TABLE_INVALID) { - /* Somone else was faster, use theirs */ - dma_free_cpu_table(sto); - sto = get_rt_sto(old_rte); - } - } - return sto; -} - -static unsigned long *dma_get_page_table_origin(unsigned long *step, gfp_t gfp) -{ - unsigned long old_ste, ste; - unsigned long *pto; - - ste = READ_ONCE(*step); - if (reg_entry_isvalid(ste)) { - pto = get_st_pto(ste); - } else { - pto = dma_alloc_page_table(gfp); - if (!pto) - return NULL; - set_st_pto(&ste, virt_to_phys(pto)); - validate_st_entry(&ste); - entry_clr_protected(&ste); - - old_ste = cmpxchg(step, ZPCI_TABLE_INVALID, ste); - if (old_ste != ZPCI_TABLE_INVALID) { - /* Somone else was faster, use theirs */ - dma_free_page_table(pto); - pto = get_st_pto(old_ste); - } - } - return pto; -} - -unsigned long *dma_walk_cpu_trans(unsigned long *rto, dma_addr_t dma_addr, - gfp_t gfp) -{ - unsigned long *sto, *pto; - unsigned int rtx, sx, px; - - rtx = calc_rtx(dma_addr); - sto = dma_get_seg_table_origin(&rto[rtx], gfp); - if (!sto) - return NULL; - - sx = calc_sx(dma_addr); - pto = dma_get_page_table_origin(&sto[sx], gfp); - if (!pto) - return NULL; - - px = calc_px(dma_addr); - return &pto[px]; -} - -void dma_update_cpu_trans(unsigned long *ptep, phys_addr_t page_addr, int flags) -{ - unsigned long pte; - - pte = READ_ONCE(*ptep); - if (flags & ZPCI_PTE_INVALID) { - invalidate_pt_entry(&pte); - } else { - set_pt_pfaa(&pte, page_addr); - validate_pt_entry(&pte); - } - - if (flags & ZPCI_TABLE_PROTECTED) - entry_set_protected(&pte); - else - entry_clr_protected(&pte); - - xchg(ptep, pte); -} - -static int __dma_update_trans(struct zpci_dev *zdev, phys_addr_t pa, - dma_addr_t dma_addr, size_t size, int flags) -{ - unsigned int nr_pages = PAGE_ALIGN(size) >> PAGE_SHIFT; - phys_addr_t page_addr = (pa & PAGE_MASK); - unsigned long *entry; - int i, rc = 0; - - if (!nr_pages) - return -EINVAL; - - if (!zdev->dma_table) - return -EINVAL; - - for (i = 0; i < nr_pages; i++) { - entry = dma_walk_cpu_trans(zdev->dma_table, dma_addr, - GFP_ATOMIC); - if (!entry) { - rc = -ENOMEM; - goto undo_cpu_trans; - } - dma_update_cpu_trans(entry, page_addr, flags); - page_addr += PAGE_SIZE; - dma_addr += PAGE_SIZE; - } - -undo_cpu_trans: - if (rc && ((flags & ZPCI_PTE_VALID_MASK) == ZPCI_PTE_VALID)) { - flags = ZPCI_PTE_INVALID; - while (i-- > 0) { - page_addr -= PAGE_SIZE; - dma_addr -= PAGE_SIZE; - entry = dma_walk_cpu_trans(zdev->dma_table, dma_addr, - GFP_ATOMIC); - if (!entry) - break; - dma_update_cpu_trans(entry, page_addr, flags); - } - } - return rc; -} - -static int __dma_purge_tlb(struct zpci_dev *zdev, dma_addr_t dma_addr, - size_t size, int flags) -{ - unsigned long irqflags; - int ret; - - /* - * With zdev->tlb_refresh == 0, rpcit is not required to establish new - * translations when previously invalid translation-table entries are - * validated. With lazy unmap, rpcit is skipped for previously valid - * entries, but a global rpcit is then required before any address can - * be re-used, i.e. after each iommu bitmap wrap-around. - */ - if ((flags & ZPCI_PTE_VALID_MASK) == ZPCI_PTE_VALID) { - if (!zdev->tlb_refresh) - return 0; - } else { - if (!s390_iommu_strict) - return 0; - } - - ret = zpci_refresh_trans((u64) zdev->fh << 32, dma_addr, - PAGE_ALIGN(size)); - if (ret == -ENOMEM && !s390_iommu_strict) { - /* enable the hypervisor to free some resources */ - if (zpci_refresh_global(zdev)) - goto out; - - spin_lock_irqsave(&zdev->iommu_bitmap_lock, irqflags); - bitmap_andnot(zdev->iommu_bitmap, zdev->iommu_bitmap, - zdev->lazy_bitmap, zdev->iommu_pages); - bitmap_zero(zdev->lazy_bitmap, zdev->iommu_pages); - spin_unlock_irqrestore(&zdev->iommu_bitmap_lock, irqflags); - ret = 0; - } -out: - return ret; -} - -static int dma_update_trans(struct zpci_dev *zdev, phys_addr_t pa, - dma_addr_t dma_addr, size_t size, int flags) -{ - int rc; - - rc = __dma_update_trans(zdev, pa, dma_addr, size, flags); - if (rc) - return rc; - - rc = __dma_purge_tlb(zdev, dma_addr, size, flags); - if (rc && ((flags & ZPCI_PTE_VALID_MASK) == ZPCI_PTE_VALID)) - __dma_update_trans(zdev, pa, dma_addr, size, ZPCI_PTE_INVALID); - - return rc; -} - -void dma_free_seg_table(unsigned long entry) -{ - unsigned long *sto = get_rt_sto(entry); - int sx; - - for (sx = 0; sx < ZPCI_TABLE_ENTRIES; sx++) - if (reg_entry_isvalid(sto[sx])) - dma_free_page_table(get_st_pto(sto[sx])); - - dma_free_cpu_table(sto); -} - -void dma_cleanup_tables(unsigned long *table) -{ - int rtx; - - if (!table) - return; - - for (rtx = 0; rtx < ZPCI_TABLE_ENTRIES; rtx++) - if (reg_entry_isvalid(table[rtx])) - dma_free_seg_table(table[rtx]); - - dma_free_cpu_table(table); -} - -static unsigned long __dma_alloc_iommu(struct device *dev, - unsigned long start, int size) -{ - struct zpci_dev *zdev = to_zpci(to_pci_dev(dev)); - - return iommu_area_alloc(zdev->iommu_bitmap, zdev->iommu_pages, - start, size, zdev->start_dma >> PAGE_SHIFT, - dma_get_seg_boundary_nr_pages(dev, PAGE_SHIFT), - 0); -} - -static dma_addr_t dma_alloc_address(struct device *dev, int size) -{ - struct zpci_dev *zdev = to_zpci(to_pci_dev(dev)); - unsigned long offset, flags; - - spin_lock_irqsave(&zdev->iommu_bitmap_lock, flags); - offset = __dma_alloc_iommu(dev, zdev->next_bit, size); - if (offset == -1) { - if (!s390_iommu_strict) { - /* global flush before DMA addresses are reused */ - if (zpci_refresh_global(zdev)) - goto out_error; - - bitmap_andnot(zdev->iommu_bitmap, zdev->iommu_bitmap, - zdev->lazy_bitmap, zdev->iommu_pages); - bitmap_zero(zdev->lazy_bitmap, zdev->iommu_pages); - } - /* wrap-around */ - offset = __dma_alloc_iommu(dev, 0, size); - if (offset == -1) - goto out_error; - } - zdev->next_bit = offset + size; - spin_unlock_irqrestore(&zdev->iommu_bitmap_lock, flags); - - return zdev->start_dma + offset * PAGE_SIZE; - -out_error: - spin_unlock_irqrestore(&zdev->iommu_bitmap_lock, flags); - return DMA_MAPPING_ERROR; -} - -static void dma_free_address(struct device *dev, dma_addr_t dma_addr, int size) -{ - struct zpci_dev *zdev = to_zpci(to_pci_dev(dev)); - unsigned long flags, offset; - - offset = (dma_addr - zdev->start_dma) >> PAGE_SHIFT; - - spin_lock_irqsave(&zdev->iommu_bitmap_lock, flags); - if (!zdev->iommu_bitmap) - goto out; - - if (s390_iommu_strict) - bitmap_clear(zdev->iommu_bitmap, offset, size); - else - bitmap_set(zdev->lazy_bitmap, offset, size); - -out: - spin_unlock_irqrestore(&zdev->iommu_bitmap_lock, flags); -} - -static inline void zpci_err_dma(unsigned long rc, unsigned long addr) -{ - struct { - unsigned long rc; - unsigned long addr; - } __packed data = {rc, addr}; - - zpci_err_hex(&data, sizeof(data)); -} - -static dma_addr_t s390_dma_map_pages(struct device *dev, struct page *page, - unsigned long offset, size_t size, - enum dma_data_direction direction, - unsigned long attrs) -{ - struct zpci_dev *zdev = to_zpci(to_pci_dev(dev)); - unsigned long pa = page_to_phys(page) + offset; - int flags = ZPCI_PTE_VALID; - unsigned long nr_pages; - dma_addr_t dma_addr; - int ret; - - /* This rounds up number of pages based on size and offset */ - nr_pages = iommu_num_pages(pa, size, PAGE_SIZE); - dma_addr = dma_alloc_address(dev, nr_pages); - if (dma_addr == DMA_MAPPING_ERROR) { - ret = -ENOSPC; - goto out_err; - } - - /* Use rounded up size */ - size = nr_pages * PAGE_SIZE; - - if (direction == DMA_NONE || direction == DMA_TO_DEVICE) - flags |= ZPCI_TABLE_PROTECTED; - - ret = dma_update_trans(zdev, pa, dma_addr, size, flags); - if (ret) - goto out_free; - - atomic64_add(nr_pages, &zdev->mapped_pages); - return dma_addr + (offset & ~PAGE_MASK); - -out_free: - dma_free_address(dev, dma_addr, nr_pages); -out_err: - zpci_err("map error:\n"); - zpci_err_dma(ret, pa); - return DMA_MAPPING_ERROR; -} - -static void s390_dma_unmap_pages(struct device *dev, dma_addr_t dma_addr, - size_t size, enum dma_data_direction direction, - unsigned long attrs) -{ - struct zpci_dev *zdev = to_zpci(to_pci_dev(dev)); - int npages, ret; - - npages = iommu_num_pages(dma_addr, size, PAGE_SIZE); - dma_addr = dma_addr & PAGE_MASK; - ret = dma_update_trans(zdev, 0, dma_addr, npages * PAGE_SIZE, - ZPCI_PTE_INVALID); - if (ret) { - zpci_err("unmap error:\n"); - zpci_err_dma(ret, dma_addr); - return; - } - - atomic64_add(npages, &zdev->unmapped_pages); - dma_free_address(dev, dma_addr, npages); -} - -static void *s390_dma_alloc(struct device *dev, size_t size, - dma_addr_t *dma_handle, gfp_t flag, - unsigned long attrs) -{ - struct zpci_dev *zdev = to_zpci(to_pci_dev(dev)); - struct page *page; - phys_addr_t pa; - dma_addr_t map; - - size = PAGE_ALIGN(size); - page = alloc_pages(flag | __GFP_ZERO, get_order(size)); - if (!page) - return NULL; - - pa = page_to_phys(page); - map = s390_dma_map_pages(dev, page, 0, size, DMA_BIDIRECTIONAL, 0); - if (dma_mapping_error(dev, map)) { - __free_pages(page, get_order(size)); - return NULL; - } - - atomic64_add(size / PAGE_SIZE, &zdev->allocated_pages); - if (dma_handle) - *dma_handle = map; - return phys_to_virt(pa); -} - -static void s390_dma_free(struct device *dev, size_t size, - void *vaddr, dma_addr_t dma_handle, - unsigned long attrs) -{ - struct zpci_dev *zdev = to_zpci(to_pci_dev(dev)); - - size = PAGE_ALIGN(size); - atomic64_sub(size / PAGE_SIZE, &zdev->allocated_pages); - s390_dma_unmap_pages(dev, dma_handle, size, DMA_BIDIRECTIONAL, 0); - free_pages((unsigned long)vaddr, get_order(size)); -} - -/* Map a segment into a contiguous dma address area */ -static int __s390_dma_map_sg(struct device *dev, struct scatterlist *sg, - size_t size, dma_addr_t *handle, - enum dma_data_direction dir) -{ - unsigned long nr_pages = PAGE_ALIGN(size) >> PAGE_SHIFT; - struct zpci_dev *zdev = to_zpci(to_pci_dev(dev)); - dma_addr_t dma_addr_base, dma_addr; - int flags = ZPCI_PTE_VALID; - struct scatterlist *s; - phys_addr_t pa = 0; - int ret; - - dma_addr_base = dma_alloc_address(dev, nr_pages); - if (dma_addr_base == DMA_MAPPING_ERROR) - return -ENOMEM; - - dma_addr = dma_addr_base; - if (dir == DMA_NONE || dir == DMA_TO_DEVICE) - flags |= ZPCI_TABLE_PROTECTED; - - for (s = sg; dma_addr < dma_addr_base + size; s = sg_next(s)) { - pa = page_to_phys(sg_page(s)); - ret = __dma_update_trans(zdev, pa, dma_addr, - s->offset + s->length, flags); - if (ret) - goto unmap; - - dma_addr += s->offset + s->length; - } - ret = __dma_purge_tlb(zdev, dma_addr_base, size, flags); - if (ret) - goto unmap; - - *handle = dma_addr_base; - atomic64_add(nr_pages, &zdev->mapped_pages); - - return ret; - -unmap: - dma_update_trans(zdev, 0, dma_addr_base, dma_addr - dma_addr_base, - ZPCI_PTE_INVALID); - dma_free_address(dev, dma_addr_base, nr_pages); - zpci_err("map error:\n"); - zpci_err_dma(ret, pa); - return ret; -} - -static int s390_dma_map_sg(struct device *dev, struct scatterlist *sg, - int nr_elements, enum dma_data_direction dir, - unsigned long attrs) -{ - struct scatterlist *s = sg, *start = sg, *dma = sg; - unsigned int max = dma_get_max_seg_size(dev); - unsigned int size = s->offset + s->length; - unsigned int offset = s->offset; - int count = 0, i, ret; - - for (i = 1; i < nr_elements; i++) { - s = sg_next(s); - - s->dma_length = 0; - - if (s->offset || (size & ~PAGE_MASK) || - size + s->length > max) { - ret = __s390_dma_map_sg(dev, start, size, - &dma->dma_address, dir); - if (ret) - goto unmap; - - dma->dma_address += offset; - dma->dma_length = size - offset; - - size = offset = s->offset; - start = s; - dma = sg_next(dma); - count++; - } - size += s->length; - } - ret = __s390_dma_map_sg(dev, start, size, &dma->dma_address, dir); - if (ret) - goto unmap; - - dma->dma_address += offset; - dma->dma_length = size - offset; - - return count + 1; -unmap: - for_each_sg(sg, s, count, i) - s390_dma_unmap_pages(dev, sg_dma_address(s), sg_dma_len(s), - dir, attrs); - - return ret; -} - -static void s390_dma_unmap_sg(struct device *dev, struct scatterlist *sg, - int nr_elements, enum dma_data_direction dir, - unsigned long attrs) -{ - struct scatterlist *s; - int i; - - for_each_sg(sg, s, nr_elements, i) { - if (s->dma_length) - s390_dma_unmap_pages(dev, s->dma_address, s->dma_length, - dir, attrs); - s->dma_address = 0; - s->dma_length = 0; - } -} - -static unsigned long *bitmap_vzalloc(size_t bits, gfp_t flags) -{ - size_t n = BITS_TO_LONGS(bits); - size_t bytes; - - if (unlikely(check_mul_overflow(n, sizeof(unsigned long), &bytes))) - return NULL; - - return vzalloc(bytes); -} - -int zpci_dma_init_device(struct zpci_dev *zdev) -{ - u8 status; - int rc; - - /* - * At this point, if the device is part of an IOMMU domain, this would - * be a strong hint towards a bug in the IOMMU API (common) code and/or - * simultaneous access via IOMMU and DMA API. So let's issue a warning. - */ - WARN_ON(zdev->s390_domain); - - spin_lock_init(&zdev->iommu_bitmap_lock); - - zdev->dma_table = dma_alloc_cpu_table(GFP_KERNEL); - if (!zdev->dma_table) { - rc = -ENOMEM; - goto out; - } - - /* - * Restrict the iommu bitmap size to the minimum of the following: - * - s390_iommu_aperture which defaults to high_memory - * - 3-level pagetable address limit minus start_dma offset - * - DMA address range allowed by the hardware (clp query pci fn) - * - * Also set zdev->end_dma to the actual end address of the usable - * range, instead of the theoretical maximum as reported by hardware. - * - * This limits the number of concurrently usable DMA mappings since - * for each DMA mapped memory address we need a DMA address including - * extra DMA addresses for multiple mappings of the same memory address. - */ - zdev->start_dma = PAGE_ALIGN(zdev->start_dma); - zdev->iommu_size = min3(s390_iommu_aperture, - ZPCI_TABLE_SIZE_RT - zdev->start_dma, - zdev->end_dma - zdev->start_dma + 1); - zdev->end_dma = zdev->start_dma + zdev->iommu_size - 1; - zdev->iommu_pages = zdev->iommu_size >> PAGE_SHIFT; - zdev->iommu_bitmap = bitmap_vzalloc(zdev->iommu_pages, GFP_KERNEL); - if (!zdev->iommu_bitmap) { - rc = -ENOMEM; - goto free_dma_table; - } - if (!s390_iommu_strict) { - zdev->lazy_bitmap = bitmap_vzalloc(zdev->iommu_pages, GFP_KERNEL); - if (!zdev->lazy_bitmap) { - rc = -ENOMEM; - goto free_bitmap; - } - - } - if (zpci_register_ioat(zdev, 0, zdev->start_dma, zdev->end_dma, - virt_to_phys(zdev->dma_table), &status)) { - rc = -EIO; - goto free_bitmap; - } - - return 0; -free_bitmap: - vfree(zdev->iommu_bitmap); - zdev->iommu_bitmap = NULL; - vfree(zdev->lazy_bitmap); - zdev->lazy_bitmap = NULL; -free_dma_table: - dma_free_cpu_table(zdev->dma_table); - zdev->dma_table = NULL; -out: - return rc; -} - -int zpci_dma_exit_device(struct zpci_dev *zdev) -{ - int cc = 0; - - /* - * At this point, if the device is part of an IOMMU domain, this would - * be a strong hint towards a bug in the IOMMU API (common) code and/or - * simultaneous access via IOMMU and DMA API. So let's issue a warning. - */ - WARN_ON(zdev->s390_domain); - if (zdev_enabled(zdev)) - cc = zpci_unregister_ioat(zdev, 0); - /* - * cc == 3 indicates the function is gone already. This can happen - * if the function was deconfigured/disabled suddenly and we have not - * received a new handle yet. - */ - if (cc && cc != 3) - return -EIO; - - dma_cleanup_tables(zdev->dma_table); - zdev->dma_table = NULL; - vfree(zdev->iommu_bitmap); - zdev->iommu_bitmap = NULL; - vfree(zdev->lazy_bitmap); - zdev->lazy_bitmap = NULL; - zdev->next_bit = 0; - return 0; -} - -static int __init dma_alloc_cpu_table_caches(void) -{ - dma_region_table_cache = kmem_cache_create("PCI_DMA_region_tables", - ZPCI_TABLE_SIZE, ZPCI_TABLE_ALIGN, - 0, NULL); - if (!dma_region_table_cache) - return -ENOMEM; - - dma_page_table_cache = kmem_cache_create("PCI_DMA_page_tables", - ZPCI_PT_SIZE, ZPCI_PT_ALIGN, - 0, NULL); - if (!dma_page_table_cache) { - kmem_cache_destroy(dma_region_table_cache); - return -ENOMEM; - } - return 0; -} - -int __init zpci_dma_init(void) -{ - s390_iommu_aperture = (u64)virt_to_phys(high_memory); - if (!s390_iommu_aperture_factor) - s390_iommu_aperture = ULONG_MAX; - else - s390_iommu_aperture *= s390_iommu_aperture_factor; - - return dma_alloc_cpu_table_caches(); -} - -void zpci_dma_exit(void) -{ - kmem_cache_destroy(dma_page_table_cache); - kmem_cache_destroy(dma_region_table_cache); -} - -const struct dma_map_ops s390_pci_dma_ops = { - .alloc = s390_dma_alloc, - .free = s390_dma_free, - .map_sg = s390_dma_map_sg, - .unmap_sg = s390_dma_unmap_sg, - .map_page = s390_dma_map_pages, - .unmap_page = s390_dma_unmap_pages, - .mmap = dma_common_mmap, - .get_sgtable = dma_common_get_sgtable, - .alloc_pages = dma_common_alloc_pages, - .free_pages = dma_common_free_pages, - /* dma_supported is unconditionally true without a callback */ -}; -EXPORT_SYMBOL_GPL(s390_pci_dma_ops); - -static int __init s390_iommu_setup(char *str) -{ - if (!strcmp(str, "strict")) - s390_iommu_strict = 1; - return 1; -} - -__setup("s390_iommu=", s390_iommu_setup); - -static int __init s390_iommu_aperture_setup(char *str) -{ - if (kstrtou32(str, 10, &s390_iommu_aperture_factor)) - s390_iommu_aperture_factor = 1; - return 1; -} - -__setup("s390_iommu_aperture=", s390_iommu_aperture_setup); diff --git a/arch/s390/pci/pci_event.c b/arch/s390/pci/pci_event.c index b9324ca2eb94..4d9773ef9e0a 100644 --- a/arch/s390/pci/pci_event.c +++ b/arch/s390/pci/pci_event.c @@ -59,9 +59,16 @@ static inline bool ers_result_indicates_abort(pci_ers_result_t ers_res) } } -static bool is_passed_through(struct zpci_dev *zdev) +static bool is_passed_through(struct pci_dev *pdev) { - return zdev->s390_domain; + struct zpci_dev *zdev = to_zpci(pdev); + bool ret; + + mutex_lock(&zdev->kzdev_lock); + ret = !!zdev->kzdev; + mutex_unlock(&zdev->kzdev_lock); + + return ret; } static bool is_driver_supported(struct pci_driver *driver) @@ -176,7 +183,7 @@ static pci_ers_result_t zpci_event_attempt_error_recovery(struct pci_dev *pdev) } pdev->error_state = pci_channel_io_frozen; - if (is_passed_through(to_zpci(pdev))) { + if (is_passed_through(pdev)) { pr_info("%s: Cannot be recovered in the host because it is a pass-through device\n", pci_name(pdev)); goto out_unlock; @@ -239,7 +246,7 @@ static void zpci_event_io_failure(struct pci_dev *pdev, pci_channel_state_t es) * we will inject the error event and let the guest recover the device * itself. */ - if (is_passed_through(to_zpci(pdev))) + if (is_passed_through(pdev)) goto out; driver = to_pci_driver(pdev->dev.driver); if (driver && driver->err_handler && driver->err_handler->error_detected) @@ -306,8 +313,6 @@ static void zpci_event_hard_deconfigured(struct zpci_dev *zdev, u32 fh) /* Even though the device is already gone we still * need to free zPCI resources as part of the disable. */ - if (zdev->dma_table) - zpci_dma_exit_device(zdev); if (zdev_enabled(zdev)) zpci_disable_device(zdev); zdev->state = ZPCI_FN_STATE_STANDBY; diff --git a/arch/s390/pci/pci_sysfs.c b/arch/s390/pci/pci_sysfs.c index cae280e5c047..8a7abac51816 100644 --- a/arch/s390/pci/pci_sysfs.c +++ b/arch/s390/pci/pci_sysfs.c @@ -56,6 +56,7 @@ static ssize_t recover_store(struct device *dev, struct device_attribute *attr, struct pci_dev *pdev = to_pci_dev(dev); struct zpci_dev *zdev = to_zpci(pdev); int ret = 0; + u8 status; /* Can't use device_remove_self() here as that would lead us to lock * the pci_rescan_remove_lock while holding the device' kernfs lock. @@ -82,12 +83,6 @@ static ssize_t recover_store(struct device *dev, struct device_attribute *attr, pci_lock_rescan_remove(); if (pci_dev_is_added(pdev)) { pci_stop_and_remove_bus_device(pdev); - if (zdev->dma_table) { - ret = zpci_dma_exit_device(zdev); - if (ret) - goto out; - } - if (zdev_enabled(zdev)) { ret = zpci_disable_device(zdev); /* @@ -105,14 +100,16 @@ static ssize_t recover_store(struct device *dev, struct device_attribute *attr, ret = zpci_enable_device(zdev); if (ret) goto out; - ret = zpci_dma_init_device(zdev); - if (ret) { - zpci_disable_device(zdev); - goto out; + + if (zdev->dma_table) { + ret = zpci_register_ioat(zdev, 0, zdev->start_dma, zdev->end_dma, + virt_to_phys(zdev->dma_table), &status); + if (ret) + zpci_disable_device(zdev); } - pci_rescan_bus(zdev->zbus->bus); } out: + pci_rescan_bus(zdev->zbus->bus); pci_unlock_rescan_remove(); if (kn) sysfs_unbreak_active_protection(kn); diff --git a/drivers/iommu/Kconfig b/drivers/iommu/Kconfig index cd6727898b11..3199fd54b462 100644 --- a/drivers/iommu/Kconfig +++ b/drivers/iommu/Kconfig @@ -91,7 +91,7 @@ config IOMMU_DEBUGFS choice prompt "IOMMU default domain type" depends on IOMMU_API - default IOMMU_DEFAULT_DMA_LAZY if X86 || IA64 + default IOMMU_DEFAULT_DMA_LAZY if X86 || IA64 || S390 default IOMMU_DEFAULT_DMA_STRICT help Choose the type of IOMMU domain used to manage DMA API usage by @@ -146,7 +146,7 @@ config OF_IOMMU # IOMMU-agnostic DMA-mapping layer config IOMMU_DMA - def_bool ARM64 || IA64 || X86 + def_bool ARM64 || IA64 || X86 || S390 select DMA_OPS select IOMMU_API select IOMMU_IOVA diff --git a/drivers/iommu/amd/Kconfig b/drivers/iommu/amd/Kconfig index 9b5fc3356bf2..75132ae861a2 100644 --- a/drivers/iommu/amd/Kconfig +++ b/drivers/iommu/amd/Kconfig @@ -22,15 +22,6 @@ config AMD_IOMMU your BIOS for an option to enable it or if you have an IVRS ACPI table. -config AMD_IOMMU_V2 - tristate "AMD IOMMU Version 2 driver" - depends on AMD_IOMMU - select MMU_NOTIFIER - help - This option enables support for the AMD IOMMUv2 features of the IOMMU - hardware. Select this option if you want to use devices that support - the PCI PRI and PASID interface. - config AMD_IOMMU_DEBUGFS bool "Enable AMD IOMMU internals in DebugFS" depends on AMD_IOMMU && IOMMU_DEBUGFS diff --git a/drivers/iommu/amd/Makefile b/drivers/iommu/amd/Makefile index 773d8aa00283..f454fbb1569e 100644 --- a/drivers/iommu/amd/Makefile +++ b/drivers/iommu/amd/Makefile @@ -1,4 +1,3 @@ # SPDX-License-Identifier: GPL-2.0-only obj-$(CONFIG_AMD_IOMMU) += iommu.o init.o quirks.o io_pgtable.o io_pgtable_v2.o obj-$(CONFIG_AMD_IOMMU_DEBUGFS) += debugfs.o -obj-$(CONFIG_AMD_IOMMU_V2) += iommu_v2.o diff --git a/drivers/iommu/amd/amd_iommu.h b/drivers/iommu/amd/amd_iommu.h index e2857109e966..86be1edd50ee 100644 --- a/drivers/iommu/amd/amd_iommu.h +++ b/drivers/iommu/amd/amd_iommu.h @@ -38,9 +38,6 @@ extern int amd_iommu_guest_ir; extern enum io_pgtable_fmt amd_iommu_pgtable; extern int amd_iommu_gpt_level; -/* IOMMUv2 specific functions */ -struct iommu_domain; - bool amd_iommu_v2_supported(void); struct amd_iommu *get_amd_iommu(unsigned int idx); u8 amd_iommu_pc_get_max_banks(unsigned int idx); @@ -51,10 +48,10 @@ int amd_iommu_pc_get_reg(struct amd_iommu *iommu, u8 bank, u8 cntr, int amd_iommu_pc_set_reg(struct amd_iommu *iommu, u8 bank, u8 cntr, u8 fxn, u64 *value); -int amd_iommu_register_ppr_notifier(struct notifier_block *nb); -int amd_iommu_unregister_ppr_notifier(struct notifier_block *nb); -void amd_iommu_domain_direct_map(struct iommu_domain *dom); -int amd_iommu_domain_enable_v2(struct iommu_domain *dom, int pasids); +/* Device capabilities */ +int amd_iommu_pdev_enable_cap_pri(struct pci_dev *pdev); +void amd_iommu_pdev_disable_cap_pri(struct pci_dev *pdev); + int amd_iommu_flush_page(struct iommu_domain *dom, u32 pasid, u64 address); void amd_iommu_update_and_flush_device_table(struct protection_domain *domain); void amd_iommu_domain_update(struct protection_domain *domain); @@ -87,9 +84,25 @@ static inline bool is_rd890_iommu(struct pci_dev *pdev) (pdev->device == PCI_DEVICE_ID_RD890_IOMMU); } -static inline bool iommu_feature(struct amd_iommu *iommu, u64 mask) +static inline bool check_feature(u64 mask) +{ + return (amd_iommu_efr & mask); +} + +static inline bool check_feature2(u64 mask) +{ + return (amd_iommu_efr2 & mask); +} + +static inline int check_feature_gpt_level(void) +{ + return ((amd_iommu_efr >> FEATURE_GATS_SHIFT) & FEATURE_GATS_MASK); +} + +static inline bool amd_iommu_gt_ppr_supported(void) { - return !!(iommu->features & mask); + return (check_feature(FEATURE_GT) && + check_feature(FEATURE_PPR)); } static inline u64 iommu_virt_to_phys(void *vaddr) @@ -105,7 +118,6 @@ static inline void *iommu_phys_to_virt(unsigned long paddr) static inline void amd_iommu_domain_set_pt_root(struct protection_domain *domain, u64 root) { - atomic64_set(&domain->iop.pt_root, root); domain->iop.root = (u64 *)(root & PAGE_MASK); domain->iop.mode = root & 7; /* lowest 3 bits encode pgtable mode */ } @@ -146,8 +158,5 @@ void amd_iommu_domain_set_pgtable(struct protection_domain *domain, u64 *root, int mode); struct dev_table_entry *get_dev_table(struct amd_iommu *iommu); -extern u64 amd_iommu_efr; -extern u64 amd_iommu_efr2; - extern bool amd_iommu_snp_en; #endif diff --git a/drivers/iommu/amd/amd_iommu_types.h b/drivers/iommu/amd/amd_iommu_types.h index 7dc30c2b56b3..e742006f2885 100644 --- a/drivers/iommu/amd/amd_iommu_types.h +++ b/drivers/iommu/amd/amd_iommu_types.h @@ -451,6 +451,10 @@ #define PD_IOMMUV2_MASK BIT(3) /* domain has gcr3 table */ #define PD_GIOV_MASK BIT(4) /* domain enable GIOV support */ +/* Timeout stuff */ +#define LOOP_TIMEOUT 100000 +#define MMIO_STATUS_TIMEOUT 2000000 + extern bool amd_iommu_dump; #define DUMP_printk(format, arg...) \ do { \ @@ -505,19 +509,6 @@ extern struct kmem_cache *amd_iommu_irq_cache; #define APERTURE_RANGE_INDEX(a) ((a) >> APERTURE_RANGE_SHIFT) #define APERTURE_PAGE_INDEX(a) (((a) >> 21) & 0x3fULL) -/* - * This struct is used to pass information about - * incoming PPR faults around. - */ -struct amd_iommu_fault { - u64 address; /* IO virtual address of the fault*/ - u32 pasid; /* Address space identifier */ - u32 sbdf; /* Originating PCI device id */ - u16 tag; /* PPR tag */ - u16 flags; /* Fault flags */ - -}; - struct amd_iommu; struct iommu_domain; @@ -544,7 +535,6 @@ struct amd_io_pgtable { struct io_pgtable iop; int mode; u64 *root; - atomic64_t pt_root; /* pgtable root and pgtable mode */ u64 *pgd; /* v2 pgtable pgd pointer */ }; @@ -676,9 +666,6 @@ struct amd_iommu { /* Extended features 2 */ u64 features2; - /* IOMMUv2 */ - bool is_iommu_v2; - /* PCI device id of the IOMMU device */ u16 devid; @@ -799,6 +786,14 @@ struct devid_map { bool cmd_line; }; +#define AMD_IOMMU_DEVICE_FLAG_ATS_SUP 0x1 /* ATS feature supported */ +#define AMD_IOMMU_DEVICE_FLAG_PRI_SUP 0x2 /* PRI feature supported */ +#define AMD_IOMMU_DEVICE_FLAG_PASID_SUP 0x4 /* PASID context supported */ +/* Device may request execution on memory pages */ +#define AMD_IOMMU_DEVICE_FLAG_EXEC_SUP 0x8 +/* Device may request super-user privileges */ +#define AMD_IOMMU_DEVICE_FLAG_PRIV_SUP 0x10 + /* * This struct contains device specific data for the IOMMU */ @@ -811,13 +806,15 @@ struct iommu_dev_data { struct protection_domain *domain; /* Domain the device is bound to */ struct device *dev; u16 devid; /* PCI Device ID */ - bool iommu_v2; /* Device can make use of IOMMUv2 */ - struct { - bool enabled; - int qdep; - } ats; /* ATS state */ - bool pri_tlp; /* PASID TLB required for + + u32 flags; /* Holds AMD_IOMMU_DEVICE_FLAG_<*> */ + int ats_qdep; + u8 ats_enabled :1; /* ATS state */ + u8 pri_enabled :1; /* PRI state */ + u8 pasid_enabled:1; /* PASID state */ + u8 pri_tlp :1; /* PASID TLB required for PPR completions */ + u8 ppr :1; /* Enable device PPR support */ bool use_vapic; /* Enable device to use vapic mode */ bool defer_attach; @@ -884,16 +881,15 @@ extern unsigned amd_iommu_aperture_order; /* allocation bitmap for domain ids */ extern unsigned long *amd_iommu_pd_alloc_bitmap; -/* Smallest max PASID supported by any IOMMU in the system */ -extern u32 amd_iommu_max_pasid; - -extern bool amd_iommu_v2_present; - extern bool amd_iommu_force_isolation; /* Max levels of glxval supported */ extern int amd_iommu_max_glx_val; +/* Global EFR and EFR2 registers */ +extern u64 amd_iommu_efr; +extern u64 amd_iommu_efr2; + /* * This function flushes all internal caches of * the IOMMU used by this driver. diff --git a/drivers/iommu/amd/init.c b/drivers/iommu/amd/init.c index 45efb7e5d725..64bcf3df37ee 100644 --- a/drivers/iommu/amd/init.c +++ b/drivers/iommu/amd/init.c @@ -83,8 +83,6 @@ #define ACPI_DEVFLAG_LINT1 0x80 #define ACPI_DEVFLAG_ATSDIS 0x10000000 -#define LOOP_TIMEOUT 2000000 - #define IVRS_GET_SBDF_ID(seg, bus, dev, fn) (((seg & 0xffff) << 16) | ((bus & 0xff) << 8) \ | ((dev & 0x1f) << 3) | (fn & 0x7)) @@ -187,9 +185,6 @@ static int amd_iommus_present; bool amd_iommu_np_cache __read_mostly; bool amd_iommu_iotlb_sup __read_mostly = true; -u32 amd_iommu_max_pasid __read_mostly = ~0; - -bool amd_iommu_v2_present __read_mostly; static bool amd_iommu_pc_present __read_mostly; bool amdr_ivrs_remap_support __read_mostly; @@ -272,7 +267,7 @@ int amd_iommu_get_num_iommus(void) * Iterate through all the IOMMUs to get common EFR * masks among all IOMMUs and warn if found inconsistency. */ -static void get_global_efr(void) +static __init void get_global_efr(void) { struct amd_iommu *iommu; @@ -304,16 +299,6 @@ static void get_global_efr(void) pr_info("Using global IVHD EFR:%#llx, EFR2:%#llx\n", amd_iommu_efr, amd_iommu_efr2); } -static bool check_feature_on_all_iommus(u64 mask) -{ - return !!(amd_iommu_efr & mask); -} - -static inline int check_feature_gpt_level(void) -{ - return ((amd_iommu_efr >> FEATURE_GATS_SHIFT) & FEATURE_GATS_MASK); -} - /* * For IVHD type 0x11/0x40, EFR is also available via IVHD. * Default to IVHD EFR since it is available sooner @@ -399,7 +384,7 @@ static void iommu_set_cwwb_range(struct amd_iommu *iommu) u64 start = iommu_virt_to_phys((void *)iommu->cmd_sem); u64 entry = start & PM_ADDR_MASK; - if (!check_feature_on_all_iommus(FEATURE_SNP)) + if (!check_feature(FEATURE_SNP)) return; /* Note: @@ -869,7 +854,7 @@ static void *__init iommu_alloc_4k_pages(struct amd_iommu *iommu, void *buf = (void *)__get_free_pages(gfp, order); if (buf && - check_feature_on_all_iommus(FEATURE_SNP) && + check_feature(FEATURE_SNP) && set_memory_4k((unsigned long)buf, (1 << order))) { free_pages((unsigned long)buf, order); buf = NULL; @@ -985,14 +970,14 @@ static int iommu_ga_log_enable(struct amd_iommu *iommu) iommu_feature_enable(iommu, CONTROL_GAINT_EN); iommu_feature_enable(iommu, CONTROL_GALOG_EN); - for (i = 0; i < LOOP_TIMEOUT; ++i) { + for (i = 0; i < MMIO_STATUS_TIMEOUT; ++i) { status = readl(iommu->mmio_base + MMIO_STATUS_OFFSET); if (status & (MMIO_STATUS_GALOG_RUN_MASK)) break; udelay(10); } - if (WARN_ON(i >= LOOP_TIMEOUT)) + if (WARN_ON(i >= MMIO_STATUS_TIMEOUT)) return -EINVAL; return 0; @@ -1048,7 +1033,7 @@ static void iommu_enable_xt(struct amd_iommu *iommu) static void iommu_enable_gt(struct amd_iommu *iommu) { - if (!iommu_feature(iommu, FEATURE_GT)) + if (!check_feature(FEATURE_GT)) return; iommu_feature_enable(iommu, CONTROL_GT_EN); @@ -1987,7 +1972,7 @@ static void init_iommu_perf_ctr(struct amd_iommu *iommu) u64 val; struct pci_dev *pdev = iommu->dev; - if (!iommu_feature(iommu, FEATURE_PC)) + if (!check_feature(FEATURE_PC)) return; amd_iommu_pc_present = true; @@ -2014,8 +1999,7 @@ static ssize_t amd_iommu_show_features(struct device *dev, struct device_attribute *attr, char *buf) { - struct amd_iommu *iommu = dev_to_amd_iommu(dev); - return sysfs_emit(buf, "%llx:%llx\n", iommu->features2, iommu->features); + return sysfs_emit(buf, "%llx:%llx\n", amd_iommu_efr, amd_iommu_efr2); } static DEVICE_ATTR(features, S_IRUGO, amd_iommu_show_features, NULL); @@ -2051,9 +2035,9 @@ static void __init late_iommu_features_init(struct amd_iommu *iommu) features = readq(iommu->mmio_base + MMIO_EXT_FEATURES); features2 = readq(iommu->mmio_base + MMIO_EXT_FEATURES2); - if (!iommu->features) { - iommu->features = features; - iommu->features2 = features2; + if (!amd_iommu_efr) { + amd_iommu_efr = features; + amd_iommu_efr2 = features2; return; } @@ -2061,12 +2045,12 @@ static void __init late_iommu_features_init(struct amd_iommu *iommu) * Sanity check and warn if EFR values from * IVHD and MMIO conflict. */ - if (features != iommu->features || - features2 != iommu->features2) { + if (features != amd_iommu_efr || + features2 != amd_iommu_efr2) { pr_warn(FW_WARN "EFR mismatch. Use IVHD EFR (%#llx : %#llx), EFR2 (%#llx : %#llx).\n", - features, iommu->features, - features2, iommu->features2); + features, amd_iommu_efr, + features2, amd_iommu_efr2); } } @@ -2092,20 +2076,17 @@ static int __init iommu_init_pci(struct amd_iommu *iommu) late_iommu_features_init(iommu); - if (iommu_feature(iommu, FEATURE_GT)) { + if (check_feature(FEATURE_GT)) { int glxval; - u32 max_pasid; u64 pasmax; - pasmax = iommu->features & FEATURE_PASID_MASK; + pasmax = amd_iommu_efr & FEATURE_PASID_MASK; pasmax >>= FEATURE_PASID_SHIFT; - max_pasid = (1 << (pasmax + 1)) - 1; + iommu->iommu.max_pasids = (1 << (pasmax + 1)) - 1; - amd_iommu_max_pasid = min(amd_iommu_max_pasid, max_pasid); + BUG_ON(iommu->iommu.max_pasids & ~PASID_MASK); - BUG_ON(amd_iommu_max_pasid & ~PASID_MASK); - - glxval = iommu->features & FEATURE_GLXVAL_MASK; + glxval = amd_iommu_efr & FEATURE_GLXVAL_MASK; glxval >>= FEATURE_GLXVAL_SHIFT; if (amd_iommu_max_glx_val == -1) @@ -2114,13 +2095,7 @@ static int __init iommu_init_pci(struct amd_iommu *iommu) amd_iommu_max_glx_val = min(amd_iommu_max_glx_val, glxval); } - if (iommu_feature(iommu, FEATURE_GT) && - iommu_feature(iommu, FEATURE_PPR)) { - iommu->is_iommu_v2 = true; - amd_iommu_v2_present = true; - } - - if (iommu_feature(iommu, FEATURE_PPR) && alloc_ppr_log(iommu)) + if (check_feature(FEATURE_PPR) && alloc_ppr_log(iommu)) return -ENOMEM; if (iommu->cap & (1UL << IOMMU_CAP_NPCACHE)) { @@ -2132,13 +2107,10 @@ static int __init iommu_init_pci(struct amd_iommu *iommu) init_iommu_perf_ctr(iommu); if (amd_iommu_pgtable == AMD_IOMMU_V2) { - if (!iommu_feature(iommu, FEATURE_GIOSUP) || - !iommu_feature(iommu, FEATURE_GT)) { + if (!check_feature(FEATURE_GIOSUP) || + !check_feature(FEATURE_GT)) { pr_warn("Cannot enable v2 page table for DMA-API. Fallback to v1.\n"); amd_iommu_pgtable = AMD_IOMMU_V1; - } else if (iommu_default_passthrough()) { - pr_warn("V2 page table doesn't support passthrough mode. Fallback to v1.\n"); - amd_iommu_pgtable = AMD_IOMMU_V1; } } @@ -2186,35 +2158,29 @@ static int __init iommu_init_pci(struct amd_iommu *iommu) static void print_iommu_info(void) { + int i; static const char * const feat_str[] = { "PreF", "PPR", "X2APIC", "NX", "GT", "[5]", "IA", "GA", "HE", "PC" }; - struct amd_iommu *iommu; - - for_each_iommu(iommu) { - struct pci_dev *pdev = iommu->dev; - int i; - pci_info(pdev, "Found IOMMU cap 0x%x\n", iommu->cap_ptr); + if (amd_iommu_efr) { + pr_info("Extended features (%#llx, %#llx):", amd_iommu_efr, amd_iommu_efr2); - if (iommu->cap & (1 << IOMMU_CAP_EFR)) { - pr_info("Extended features (%#llx, %#llx):", iommu->features, iommu->features2); - - for (i = 0; i < ARRAY_SIZE(feat_str); ++i) { - if (iommu_feature(iommu, (1ULL << i))) - pr_cont(" %s", feat_str[i]); - } + for (i = 0; i < ARRAY_SIZE(feat_str); ++i) { + if (check_feature(1ULL << i)) + pr_cont(" %s", feat_str[i]); + } - if (iommu->features & FEATURE_GAM_VAPIC) - pr_cont(" GA_vAPIC"); + if (check_feature(FEATURE_GAM_VAPIC)) + pr_cont(" GA_vAPIC"); - if (iommu->features & FEATURE_SNP) - pr_cont(" SNP"); + if (check_feature(FEATURE_SNP)) + pr_cont(" SNP"); - pr_cont("\n"); - } + pr_cont("\n"); } + if (irq_remapping_enabled) { pr_info("Interrupt remapping enabled\n"); if (amd_iommu_xt_mode == IRQ_REMAP_X2APIC_MODE) @@ -2900,19 +2866,19 @@ static void enable_iommus_vapic(void) * Need to set and poll check the GALOGRun bit to zero before * we can set/ modify GA Log registers safely. */ - for (i = 0; i < LOOP_TIMEOUT; ++i) { + for (i = 0; i < MMIO_STATUS_TIMEOUT; ++i) { status = readl(iommu->mmio_base + MMIO_STATUS_OFFSET); if (!(status & MMIO_STATUS_GALOG_RUN_MASK)) break; udelay(10); } - if (WARN_ON(i >= LOOP_TIMEOUT)) + if (WARN_ON(i >= MMIO_STATUS_TIMEOUT)) return; } if (AMD_IOMMU_GUEST_IR_VAPIC(amd_iommu_guest_ir) && - !check_feature_on_all_iommus(FEATURE_GAM_VAPIC)) { + !check_feature(FEATURE_GAM_VAPIC)) { amd_iommu_guest_ir = AMD_IOMMU_GUEST_IR_LEGACY_GA; return; } @@ -3698,9 +3664,8 @@ bool amd_iommu_v2_supported(void) * (i.e. EFR[SNPSup]=1), IOMMUv2 page table cannot be used without * setting up IOMMUv1 page table. */ - return amd_iommu_v2_present && !amd_iommu_snp_en; + return amd_iommu_gt_ppr_supported() && !amd_iommu_snp_en; } -EXPORT_SYMBOL(amd_iommu_v2_supported); struct amd_iommu *get_amd_iommu(unsigned int idx) { @@ -3824,7 +3789,7 @@ int amd_iommu_snp_enable(void) return -EINVAL; } - amd_iommu_snp_en = check_feature_on_all_iommus(FEATURE_SNP); + amd_iommu_snp_en = check_feature(FEATURE_SNP); if (!amd_iommu_snp_en) return -EINVAL; diff --git a/drivers/iommu/amd/io_pgtable_v2.c b/drivers/iommu/amd/io_pgtable_v2.c index e9ef2e0a62f6..f818a7e254d4 100644 --- a/drivers/iommu/amd/io_pgtable_v2.c +++ b/drivers/iommu/amd/io_pgtable_v2.c @@ -363,10 +363,10 @@ static void v2_free_pgtable(struct io_pgtable *iop) if (!(pdom->flags & PD_IOMMUV2_MASK)) return; - /* - * Make changes visible to IOMMUs. No need to clear gcr3 entry - * as gcr3 table is already freed. - */ + /* Clear gcr3 entry */ + amd_iommu_domain_clear_gcr3(&pdom->domain, 0); + + /* Make changes visible to IOMMUs */ amd_iommu_domain_update(pdom); /* Free page table */ diff --git a/drivers/iommu/amd/iommu.c b/drivers/iommu/amd/iommu.c index 95bd7c25ba6f..089886485895 100644 --- a/drivers/iommu/amd/iommu.c +++ b/drivers/iommu/amd/iommu.c @@ -44,8 +44,6 @@ #define CMD_SET_TYPE(cmd, t) ((cmd)->data[1] |= ((t) << 28)) -#define LOOP_TIMEOUT 100000 - /* IO virtual address start page frame number */ #define IOVA_START_PFN (1) #define IOVA_PFN(addr) ((addr) >> PAGE_SHIFT) @@ -66,7 +64,6 @@ LIST_HEAD(acpihid_map); const struct iommu_ops amd_iommu_ops; -static ATOMIC_NOTIFIER_HEAD(ppr_notifier); int amd_iommu_max_glx_val = -1; /* @@ -79,7 +76,6 @@ struct iommu_cmd { struct kmem_cache *amd_iommu_irq_cache; static void detach_device(struct device *dev); -static int domain_enable_v2(struct protection_domain *domain, int pasids); /**************************************************************************** * @@ -322,24 +318,141 @@ static struct iommu_group *acpihid_device_group(struct device *dev) return entry->group; } -static bool pci_iommuv2_capable(struct pci_dev *pdev) +static inline bool pdev_pasid_supported(struct iommu_dev_data *dev_data) { - static const int caps[] = { - PCI_EXT_CAP_ID_PRI, - PCI_EXT_CAP_ID_PASID, - }; - int i, pos; + return (dev_data->flags & AMD_IOMMU_DEVICE_FLAG_PASID_SUP); +} - if (!pci_ats_supported(pdev)) - return false; +static u32 pdev_get_caps(struct pci_dev *pdev) +{ + int features; + u32 flags = 0; + + if (pci_ats_supported(pdev)) + flags |= AMD_IOMMU_DEVICE_FLAG_ATS_SUP; + + if (pci_pri_supported(pdev)) + flags |= AMD_IOMMU_DEVICE_FLAG_PRI_SUP; + + features = pci_pasid_features(pdev); + if (features >= 0) { + flags |= AMD_IOMMU_DEVICE_FLAG_PASID_SUP; + + if (features & PCI_PASID_CAP_EXEC) + flags |= AMD_IOMMU_DEVICE_FLAG_EXEC_SUP; - for (i = 0; i < 2; ++i) { - pos = pci_find_ext_capability(pdev, caps[i]); - if (pos == 0) - return false; + if (features & PCI_PASID_CAP_PRIV) + flags |= AMD_IOMMU_DEVICE_FLAG_PRIV_SUP; } - return true; + return flags; +} + +static inline int pdev_enable_cap_ats(struct pci_dev *pdev) +{ + struct iommu_dev_data *dev_data = dev_iommu_priv_get(&pdev->dev); + int ret = -EINVAL; + + if (dev_data->ats_enabled) + return 0; + + if (amd_iommu_iotlb_sup && + (dev_data->flags & AMD_IOMMU_DEVICE_FLAG_ATS_SUP)) { + ret = pci_enable_ats(pdev, PAGE_SHIFT); + if (!ret) { + dev_data->ats_enabled = 1; + dev_data->ats_qdep = pci_ats_queue_depth(pdev); + } + } + + return ret; +} + +static inline void pdev_disable_cap_ats(struct pci_dev *pdev) +{ + struct iommu_dev_data *dev_data = dev_iommu_priv_get(&pdev->dev); + + if (dev_data->ats_enabled) { + pci_disable_ats(pdev); + dev_data->ats_enabled = 0; + } +} + +int amd_iommu_pdev_enable_cap_pri(struct pci_dev *pdev) +{ + struct iommu_dev_data *dev_data = dev_iommu_priv_get(&pdev->dev); + int ret = -EINVAL; + + if (dev_data->pri_enabled) + return 0; + + if (dev_data->flags & AMD_IOMMU_DEVICE_FLAG_PRI_SUP) { + /* + * First reset the PRI state of the device. + * FIXME: Hardcode number of outstanding requests for now + */ + if (!pci_reset_pri(pdev) && !pci_enable_pri(pdev, 32)) { + dev_data->pri_enabled = 1; + dev_data->pri_tlp = pci_prg_resp_pasid_required(pdev); + + ret = 0; + } + } + + return ret; +} + +void amd_iommu_pdev_disable_cap_pri(struct pci_dev *pdev) +{ + struct iommu_dev_data *dev_data = dev_iommu_priv_get(&pdev->dev); + + if (dev_data->pri_enabled) { + pci_disable_pri(pdev); + dev_data->pri_enabled = 0; + } +} + +static inline int pdev_enable_cap_pasid(struct pci_dev *pdev) +{ + struct iommu_dev_data *dev_data = dev_iommu_priv_get(&pdev->dev); + int ret = -EINVAL; + + if (dev_data->pasid_enabled) + return 0; + + if (dev_data->flags & AMD_IOMMU_DEVICE_FLAG_PASID_SUP) { + /* Only allow access to user-accessible pages */ + ret = pci_enable_pasid(pdev, 0); + if (!ret) + dev_data->pasid_enabled = 1; + } + + return ret; +} + +static inline void pdev_disable_cap_pasid(struct pci_dev *pdev) +{ + struct iommu_dev_data *dev_data = dev_iommu_priv_get(&pdev->dev); + + if (dev_data->pasid_enabled) { + pci_disable_pasid(pdev); + dev_data->pasid_enabled = 0; + } +} + +static void pdev_enable_caps(struct pci_dev *pdev) +{ + pdev_enable_cap_ats(pdev); + pdev_enable_cap_pasid(pdev); + amd_iommu_pdev_enable_cap_pri(pdev); + +} + +static void pdev_disable_caps(struct pci_dev *pdev) +{ + pdev_disable_cap_ats(pdev); + pdev_disable_cap_pasid(pdev); + amd_iommu_pdev_disable_cap_pri(pdev); } /* @@ -399,8 +512,8 @@ static int iommu_init_device(struct amd_iommu *iommu, struct device *dev) * it'll be forced to go into translation mode. */ if ((iommu_default_passthrough() || !amd_iommu_force_isolation) && - dev_is_pci(dev) && pci_iommuv2_capable(to_pci_dev(dev))) { - dev_data->iommu_v2 = iommu->is_iommu_v2; + dev_is_pci(dev) && amd_iommu_gt_ppr_supported()) { + dev_data->flags = pdev_get_caps(to_pci_dev(dev)); } dev_iommu_priv_set(dev, dev_data); @@ -701,24 +814,6 @@ static void iommu_poll_events(struct amd_iommu *iommu) writel(head, iommu->mmio_base + MMIO_EVT_HEAD_OFFSET); } -static void iommu_handle_ppr_entry(struct amd_iommu *iommu, u64 *raw) -{ - struct amd_iommu_fault fault; - - if (PPR_REQ_TYPE(raw[0]) != PPR_REQ_FAULT) { - pr_err_ratelimited("Unknown PPR request received\n"); - return; - } - - fault.address = raw[1]; - fault.pasid = PPR_PASID(raw[0]); - fault.sbdf = PCI_SEG_DEVID_TO_SBDF(iommu->pci_seg->id, PPR_DEVID(raw[0])); - fault.tag = PPR_TAG(raw[0]); - fault.flags = PPR_FLAGS(raw[0]); - - atomic_notifier_call_chain(&ppr_notifier, 0, &fault); -} - static void iommu_poll_ppr_log(struct amd_iommu *iommu) { u32 head, tail; @@ -764,8 +859,7 @@ static void iommu_poll_ppr_log(struct amd_iommu *iommu) head = (head + PPR_ENTRY_SIZE) % PPR_LOG_SIZE; writel(head, iommu->mmio_base + MMIO_PPR_HEAD_OFFSET); - /* Handle PPR entry */ - iommu_handle_ppr_entry(iommu, entry); + /* TODO: PPR Handler will be added when we add IOPF support */ /* Refresh ring-buffer information */ head = readl(iommu->mmio_base + MMIO_PPR_HEAD_OFFSET); @@ -1094,7 +1188,7 @@ static void build_inv_iotlb_pasid(struct iommu_cmd *cmd, u16 devid, u32 pasid, } static void build_complete_ppr(struct iommu_cmd *cmd, u16 devid, u32 pasid, - int status, int tag, bool gn) + int status, int tag, u8 gn) { memset(cmd, 0, sizeof(*cmd)); @@ -1298,7 +1392,7 @@ static void amd_iommu_flush_irt_all(struct amd_iommu *iommu) void iommu_flush_all_caches(struct amd_iommu *iommu) { - if (iommu_feature(iommu, FEATURE_IA)) { + if (check_feature(FEATURE_IA)) { amd_iommu_flush_all(iommu); } else { amd_iommu_flush_dte_all(iommu); @@ -1317,7 +1411,7 @@ static int device_flush_iotlb(struct iommu_dev_data *dev_data, struct iommu_cmd cmd; int qdep; - qdep = dev_data->ats.qdep; + qdep = dev_data->ats_qdep; iommu = rlookup_amd_iommu(dev_data->dev); if (!iommu) return -EINVAL; @@ -1368,7 +1462,7 @@ static int device_flush_dte(struct iommu_dev_data *dev_data) return ret; } - if (dev_data->ats.enabled) + if (dev_data->ats_enabled) ret = device_flush_iotlb(dev_data, 0, ~0UL); return ret; @@ -1401,7 +1495,7 @@ static void __domain_flush_pages(struct protection_domain *domain, list_for_each_entry(dev_data, &domain->dev_list, list) { - if (!dev_data->ats.enabled) + if (!dev_data->ats_enabled) continue; ret |= device_flush_iotlb(dev_data, address, size); @@ -1577,6 +1671,42 @@ static void free_gcr3_table(struct protection_domain *domain) free_page((unsigned long)domain->gcr3_tbl); } +/* + * Number of GCR3 table levels required. Level must be 4-Kbyte + * page and can contain up to 512 entries. + */ +static int get_gcr3_levels(int pasids) +{ + int levels; + + if (pasids == -1) + return amd_iommu_max_glx_val; + + levels = get_count_order(pasids); + + return levels ? (DIV_ROUND_UP(levels, 9) - 1) : levels; +} + +/* Note: This function expects iommu_domain->lock to be held prior calling the function. */ +static int setup_gcr3_table(struct protection_domain *domain, int pasids) +{ + int levels = get_gcr3_levels(pasids); + + if (levels > amd_iommu_max_glx_val) + return -EINVAL; + + domain->gcr3_tbl = alloc_pgtable_page(domain->nid, GFP_ATOMIC); + if (domain->gcr3_tbl == NULL) + return -ENOMEM; + + domain->glx = levels; + domain->flags |= PD_IOMMUV2_MASK; + + amd_iommu_domain_update(domain); + + return 0; +} + static void set_dte_entry(struct amd_iommu *iommu, u16 devid, struct protection_domain *domain, bool ats, bool ppr) { @@ -1605,10 +1735,8 @@ static void set_dte_entry(struct amd_iommu *iommu, u16 devid, if (ats) flags |= DTE_FLAG_IOTLB; - if (ppr) { - if (iommu_feature(iommu, FEATURE_EPHSUP)) - pte_root |= 1ULL << DEV_ENTRY_PPR; - } + if (ppr) + pte_root |= 1ULL << DEV_ENTRY_PPR; if (domain->flags & PD_IOMMUV2_MASK) { u64 gcr3 = iommu_virt_to_phys(domain->gcr3_tbl); @@ -1685,7 +1813,7 @@ static void do_attach(struct iommu_dev_data *dev_data, iommu = rlookup_amd_iommu(dev_data->dev); if (!iommu) return; - ats = dev_data->ats.enabled; + ats = dev_data->ats_enabled; /* Update data structures */ dev_data->domain = domain; @@ -1701,7 +1829,7 @@ static void do_attach(struct iommu_dev_data *dev_data, /* Update device table */ set_dte_entry(iommu, dev_data->devid, domain, - ats, dev_data->iommu_v2); + ats, dev_data->ppr); clone_aliases(iommu, dev_data->dev); device_flush_dte(dev_data); @@ -1736,48 +1864,6 @@ static void do_detach(struct iommu_dev_data *dev_data) domain->dev_cnt -= 1; } -static void pdev_iommuv2_disable(struct pci_dev *pdev) -{ - pci_disable_ats(pdev); - pci_disable_pri(pdev); - pci_disable_pasid(pdev); -} - -static int pdev_pri_ats_enable(struct pci_dev *pdev) -{ - int ret; - - /* Only allow access to user-accessible pages */ - ret = pci_enable_pasid(pdev, 0); - if (ret) - return ret; - - /* First reset the PRI state of the device */ - ret = pci_reset_pri(pdev); - if (ret) - goto out_err_pasid; - - /* Enable PRI */ - /* FIXME: Hardcode number of outstanding requests for now */ - ret = pci_enable_pri(pdev, 32); - if (ret) - goto out_err_pasid; - - ret = pci_enable_ats(pdev, PAGE_SHIFT); - if (ret) - goto out_err_pri; - - return 0; - -out_err_pri: - pci_disable_pri(pdev); - -out_err_pasid: - pci_disable_pasid(pdev); - - return ret; -} - /* * If a device is not yet associated with a domain, this function makes the * device visible in the domain @@ -1786,9 +1872,8 @@ static int attach_device(struct device *dev, struct protection_domain *domain) { struct iommu_dev_data *dev_data; - struct pci_dev *pdev; unsigned long flags; - int ret; + int ret = 0; spin_lock_irqsave(&domain->lock, flags); @@ -1796,45 +1881,13 @@ static int attach_device(struct device *dev, spin_lock(&dev_data->lock); - ret = -EBUSY; - if (dev_data->domain != NULL) + if (dev_data->domain != NULL) { + ret = -EBUSY; goto out; - - if (!dev_is_pci(dev)) - goto skip_ats_check; - - pdev = to_pci_dev(dev); - if (domain->flags & PD_IOMMUV2_MASK) { - struct iommu_domain *def_domain = iommu_get_dma_domain(dev); - - ret = -EINVAL; - - /* - * In case of using AMD_IOMMU_V1 page table mode and the device - * is enabling for PPR/ATS support (using v2 table), - * we need to make sure that the domain type is identity map. - */ - if ((amd_iommu_pgtable == AMD_IOMMU_V1) && - def_domain->type != IOMMU_DOMAIN_IDENTITY) { - goto out; - } - - if (dev_data->iommu_v2) { - if (pdev_pri_ats_enable(pdev) != 0) - goto out; - - dev_data->ats.enabled = true; - dev_data->ats.qdep = pci_ats_queue_depth(pdev); - dev_data->pri_tlp = pci_prg_resp_pasid_required(pdev); - } - } else if (amd_iommu_iotlb_sup && - pci_enable_ats(pdev, PAGE_SHIFT) == 0) { - dev_data->ats.enabled = true; - dev_data->ats.qdep = pci_ats_queue_depth(pdev); } -skip_ats_check: - ret = 0; + if (dev_is_pci(dev)) + pdev_enable_caps(to_pci_dev(dev)); do_attach(dev_data, domain); @@ -1882,15 +1935,8 @@ static void detach_device(struct device *dev) do_detach(dev_data); - if (!dev_is_pci(dev)) - goto out; - - if (domain->flags & PD_IOMMUV2_MASK && dev_data->iommu_v2) - pdev_iommuv2_disable(to_pci_dev(dev)); - else if (dev_data->ats.enabled) - pci_disable_ats(to_pci_dev(dev)); - - dev_data->ats.enabled = false; + if (dev_is_pci(dev)) + pdev_disable_caps(to_pci_dev(dev)); out: spin_unlock(&dev_data->lock); @@ -1980,7 +2026,7 @@ static void update_device_table(struct protection_domain *domain) if (!iommu) continue; set_dte_entry(iommu, dev_data->devid, domain, - dev_data->ats.enabled, dev_data->iommu_v2); + dev_data->ats_enabled, dev_data->ppr); clone_aliases(iommu, dev_data->dev); } } @@ -2014,9 +2060,11 @@ void amd_iommu_domain_update(struct protection_domain *domain) static void cleanup_domain(struct protection_domain *domain) { struct iommu_dev_data *entry; - unsigned long flags; - spin_lock_irqsave(&domain->lock, flags); + lockdep_assert_held(&domain->lock); + + if (!domain->dev_cnt) + return; while (!list_empty(&domain->dev_list)) { entry = list_first_entry(&domain->dev_list, @@ -2024,8 +2072,7 @@ static void cleanup_domain(struct protection_domain *domain) BUG_ON(!entry->domain); do_detach(entry); } - - spin_unlock_irqrestore(&domain->lock, flags); + WARN_ON(domain->dev_cnt != 0); } static void protection_domain_free(struct protection_domain *domain) @@ -2036,6 +2083,12 @@ static void protection_domain_free(struct protection_domain *domain) if (domain->iop.pgtbl_cfg.tlb) free_io_pgtable_ops(&domain->iop.iop.ops); + if (domain->flags & PD_IOMMUV2_MASK) + free_gcr3_table(domain); + + if (domain->iop.root) + free_page((unsigned long)domain->iop.root); + if (domain->id) domain_id_free(domain->id); @@ -2048,18 +2101,10 @@ static int protection_domain_init_v1(struct protection_domain *domain, int mode) BUG_ON(mode < PAGE_MODE_NONE || mode > PAGE_MODE_6_LEVEL); - spin_lock_init(&domain->lock); - domain->id = domain_id_alloc(); - if (!domain->id) - return -ENOMEM; - INIT_LIST_HEAD(&domain->dev_list); - if (mode != PAGE_MODE_NONE) { pt_root = (void *)get_zeroed_page(GFP_KERNEL); - if (!pt_root) { - domain_id_free(domain->id); + if (!pt_root) return -ENOMEM; - } } amd_iommu_domain_set_pgtable(domain, pt_root, mode); @@ -2069,20 +2114,12 @@ static int protection_domain_init_v1(struct protection_domain *domain, int mode) static int protection_domain_init_v2(struct protection_domain *domain) { - spin_lock_init(&domain->lock); - domain->id = domain_id_alloc(); - if (!domain->id) - return -ENOMEM; - INIT_LIST_HEAD(&domain->dev_list); - domain->flags |= PD_GIOV_MASK; domain->domain.pgsize_bitmap = AMD_IOMMU_PGSIZES_V2; - if (domain_enable_v2(domain, 1)) { - domain_id_free(domain->id); + if (setup_gcr3_table(domain, 1)) return -ENOMEM; - } return 0; } @@ -2092,57 +2129,60 @@ static struct protection_domain *protection_domain_alloc(unsigned int type) struct io_pgtable_ops *pgtbl_ops; struct protection_domain *domain; int pgtable; - int mode = DEFAULT_PGTABLE_LEVEL; int ret; + domain = kzalloc(sizeof(*domain), GFP_KERNEL); + if (!domain) + return NULL; + + domain->id = domain_id_alloc(); + if (!domain->id) + goto out_err; + + spin_lock_init(&domain->lock); + INIT_LIST_HEAD(&domain->dev_list); + domain->nid = NUMA_NO_NODE; + + switch (type) { + /* No need to allocate io pgtable ops in passthrough mode */ + case IOMMU_DOMAIN_IDENTITY: + return domain; + case IOMMU_DOMAIN_DMA: + pgtable = amd_iommu_pgtable; + break; /* - * Force IOMMU v1 page table when iommu=pt and - * when allocating domain for pass-through devices. + * Force IOMMU v1 page table when allocating + * domain for pass-through devices. */ - if (type == IOMMU_DOMAIN_IDENTITY) { - pgtable = AMD_IOMMU_V1; - mode = PAGE_MODE_NONE; - } else if (type == IOMMU_DOMAIN_UNMANAGED) { + case IOMMU_DOMAIN_UNMANAGED: pgtable = AMD_IOMMU_V1; - } else if (type == IOMMU_DOMAIN_DMA || type == IOMMU_DOMAIN_DMA_FQ) { - pgtable = amd_iommu_pgtable; - } else { - return NULL; + break; + default: + goto out_err; } - domain = kzalloc(sizeof(*domain), GFP_KERNEL); - if (!domain) - return NULL; - switch (pgtable) { case AMD_IOMMU_V1: - ret = protection_domain_init_v1(domain, mode); + ret = protection_domain_init_v1(domain, DEFAULT_PGTABLE_LEVEL); break; case AMD_IOMMU_V2: ret = protection_domain_init_v2(domain); break; default: ret = -EINVAL; + break; } if (ret) goto out_err; - /* No need to allocate io pgtable ops in passthrough mode */ - if (type == IOMMU_DOMAIN_IDENTITY) - return domain; - - domain->nid = NUMA_NO_NODE; - pgtbl_ops = alloc_io_pgtable_ops(pgtable, &domain->iop.pgtbl_cfg, domain); - if (!pgtbl_ops) { - domain_id_free(domain->id); + if (!pgtbl_ops) goto out_err; - } return domain; out_err: - kfree(domain); + protection_domain_free(domain); return NULL; } @@ -2180,19 +2220,18 @@ static struct iommu_domain *amd_iommu_domain_alloc(unsigned type) static void amd_iommu_domain_free(struct iommu_domain *dom) { struct protection_domain *domain; + unsigned long flags; - domain = to_pdomain(dom); + if (!dom) + return; - if (domain->dev_cnt > 0) - cleanup_domain(domain); + domain = to_pdomain(dom); - BUG_ON(domain->dev_cnt != 0); + spin_lock_irqsave(&domain->lock, flags); - if (!dom) - return; + cleanup_domain(domain); - if (domain->flags & PD_IOMMUV2_MASK) - free_gcr3_table(domain); + spin_unlock_irqrestore(&domain->lock, flags); protection_domain_free(domain); } @@ -2233,14 +2272,15 @@ static int amd_iommu_attach_device(struct iommu_domain *dom, return ret; } -static void amd_iommu_iotlb_sync_map(struct iommu_domain *dom, - unsigned long iova, size_t size) +static int amd_iommu_iotlb_sync_map(struct iommu_domain *dom, + unsigned long iova, size_t size) { struct protection_domain *domain = to_pdomain(dom); struct io_pgtable_ops *ops = &domain->iop.iop.ops; if (ops->map_pages) domain_flush_np_cache(domain, iova, size); + return 0; } static int amd_iommu_map_pages(struct iommu_domain *dom, unsigned long iova, @@ -2406,7 +2446,6 @@ bool amd_iommu_is_attach_deferred(struct device *dev) return dev_data->defer_attach; } -EXPORT_SYMBOL_GPL(amd_iommu_is_attach_deferred); static void amd_iommu_flush_iotlb_all(struct iommu_domain *domain) { @@ -2446,7 +2485,7 @@ static int amd_iommu_def_domain_type(struct device *dev) * and require remapping. * - SNP is enabled, because it prohibits DTE[Mode]=0. */ - if (dev_data->iommu_v2 && + if (pdev_pasid_supported(dev_data) && !cc_platform_has(CC_ATTR_MEM_ENCRYPT) && !amd_iommu_snp_en) { return IOMMU_DOMAIN_IDENTITY; @@ -2485,93 +2524,6 @@ const struct iommu_ops amd_iommu_ops = { } }; -/***************************************************************************** - * - * The next functions do a basic initialization of IOMMU for pass through - * mode - * - * In passthrough mode the IOMMU is initialized and enabled but not used for - * DMA-API translation. - * - *****************************************************************************/ - -/* IOMMUv2 specific functions */ -int amd_iommu_register_ppr_notifier(struct notifier_block *nb) -{ - return atomic_notifier_chain_register(&ppr_notifier, nb); -} -EXPORT_SYMBOL(amd_iommu_register_ppr_notifier); - -int amd_iommu_unregister_ppr_notifier(struct notifier_block *nb) -{ - return atomic_notifier_chain_unregister(&ppr_notifier, nb); -} -EXPORT_SYMBOL(amd_iommu_unregister_ppr_notifier); - -void amd_iommu_domain_direct_map(struct iommu_domain *dom) -{ - struct protection_domain *domain = to_pdomain(dom); - unsigned long flags; - - spin_lock_irqsave(&domain->lock, flags); - - if (domain->iop.pgtbl_cfg.tlb) - free_io_pgtable_ops(&domain->iop.iop.ops); - - spin_unlock_irqrestore(&domain->lock, flags); -} -EXPORT_SYMBOL(amd_iommu_domain_direct_map); - -/* Note: This function expects iommu_domain->lock to be held prior calling the function. */ -static int domain_enable_v2(struct protection_domain *domain, int pasids) -{ - int levels; - - /* Number of GCR3 table levels required */ - for (levels = 0; (pasids - 1) & ~0x1ff; pasids >>= 9) - levels += 1; - - if (levels > amd_iommu_max_glx_val) - return -EINVAL; - - domain->gcr3_tbl = (void *)get_zeroed_page(GFP_ATOMIC); - if (domain->gcr3_tbl == NULL) - return -ENOMEM; - - domain->glx = levels; - domain->flags |= PD_IOMMUV2_MASK; - - amd_iommu_domain_update(domain); - - return 0; -} - -int amd_iommu_domain_enable_v2(struct iommu_domain *dom, int pasids) -{ - struct protection_domain *pdom = to_pdomain(dom); - unsigned long flags; - int ret; - - spin_lock_irqsave(&pdom->lock, flags); - - /* - * Save us all sanity checks whether devices already in the - * domain support IOMMUv2. Just force that the domain has no - * devices attached when it is switched into IOMMUv2 mode. - */ - ret = -EBUSY; - if (pdom->dev_cnt > 0 || pdom->flags & PD_IOMMUV2_MASK) - goto out; - - if (!pdom->gcr3_tbl) - ret = domain_enable_v2(pdom, pasids); - -out: - spin_unlock_irqrestore(&pdom->lock, flags); - return ret; -} -EXPORT_SYMBOL(amd_iommu_domain_enable_v2); - static int __flush_pasid(struct protection_domain *domain, u32 pasid, u64 address, bool size) { @@ -2609,10 +2561,10 @@ static int __flush_pasid(struct protection_domain *domain, u32 pasid, There might be non-IOMMUv2 capable devices in an IOMMUv2 * domain. */ - if (!dev_data->ats.enabled) + if (!dev_data->ats_enabled) continue; - qdep = dev_data->ats.qdep; + qdep = dev_data->ats_qdep; iommu = rlookup_amd_iommu(dev_data->dev); if (!iommu) continue; @@ -2653,7 +2605,6 @@ int amd_iommu_flush_page(struct iommu_domain *dom, u32 pasid, return ret; } -EXPORT_SYMBOL(amd_iommu_flush_page); static int __amd_iommu_flush_tlb(struct protection_domain *domain, u32 pasid) { @@ -2673,7 +2624,6 @@ int amd_iommu_flush_tlb(struct iommu_domain *dom, u32 pasid) return ret; } -EXPORT_SYMBOL(amd_iommu_flush_tlb); static u64 *__get_gcr3_pte(u64 *root, int level, u32 pasid, bool alloc) { @@ -2753,7 +2703,6 @@ int amd_iommu_domain_set_gcr3(struct iommu_domain *dom, u32 pasid, return ret; } -EXPORT_SYMBOL(amd_iommu_domain_set_gcr3); int amd_iommu_domain_clear_gcr3(struct iommu_domain *dom, u32 pasid) { @@ -2767,7 +2716,6 @@ int amd_iommu_domain_clear_gcr3(struct iommu_domain *dom, u32 pasid) return ret; } -EXPORT_SYMBOL(amd_iommu_domain_clear_gcr3); int amd_iommu_complete_ppr(struct pci_dev *pdev, u32 pasid, int status, int tag) @@ -2786,49 +2734,6 @@ int amd_iommu_complete_ppr(struct pci_dev *pdev, u32 pasid, return iommu_queue_command(iommu, &cmd); } -EXPORT_SYMBOL(amd_iommu_complete_ppr); - -int amd_iommu_device_info(struct pci_dev *pdev, - struct amd_iommu_device_info *info) -{ - int max_pasids; - int pos; - - if (pdev == NULL || info == NULL) - return -EINVAL; - - if (!amd_iommu_v2_supported()) - return -EINVAL; - - memset(info, 0, sizeof(*info)); - - if (pci_ats_supported(pdev)) - info->flags |= AMD_IOMMU_DEVICE_FLAG_ATS_SUP; - - pos = pci_find_ext_capability(pdev, PCI_EXT_CAP_ID_PRI); - if (pos) - info->flags |= AMD_IOMMU_DEVICE_FLAG_PRI_SUP; - - pos = pci_find_ext_capability(pdev, PCI_EXT_CAP_ID_PASID); - if (pos) { - int features; - - max_pasids = 1 << (9 * (amd_iommu_max_glx_val + 1)); - max_pasids = min(max_pasids, (1 << 20)); - - info->flags |= AMD_IOMMU_DEVICE_FLAG_PASID_SUP; - info->max_pasids = min(pci_max_pasids(pdev), max_pasids); - - features = pci_pasid_features(pdev); - if (features & PCI_PASID_CAP_EXEC) - info->flags |= AMD_IOMMU_DEVICE_FLAG_EXEC_SUP; - if (features & PCI_PASID_CAP_PRIV) - info->flags |= AMD_IOMMU_DEVICE_FLAG_PRIV_SUP; - } - - return 0; -} -EXPORT_SYMBOL(amd_iommu_device_info); #ifdef CONFIG_IRQ_REMAP diff --git a/drivers/iommu/amd/iommu_v2.c b/drivers/iommu/amd/iommu_v2.c deleted file mode 100644 index 57c2fb1146e2..000000000000 --- a/drivers/iommu/amd/iommu_v2.c +++ /dev/null @@ -1,996 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0-only -/* - * Copyright (C) 2010-2012 Advanced Micro Devices, Inc. - * Author: Joerg Roedel <jroedel@suse.de> - */ - -#define pr_fmt(fmt) "AMD-Vi: " fmt - -#include <linux/refcount.h> -#include <linux/mmu_notifier.h> -#include <linux/amd-iommu.h> -#include <linux/mm_types.h> -#include <linux/profile.h> -#include <linux/module.h> -#include <linux/sched.h> -#include <linux/sched/mm.h> -#include <linux/wait.h> -#include <linux/pci.h> -#include <linux/gfp.h> -#include <linux/cc_platform.h> - -#include "amd_iommu.h" - -MODULE_LICENSE("GPL v2"); -MODULE_AUTHOR("Joerg Roedel <jroedel@suse.de>"); - -#define PRI_QUEUE_SIZE 512 - -struct pri_queue { - atomic_t inflight; - bool finish; - int status; -}; - -struct pasid_state { - struct list_head list; /* For global state-list */ - refcount_t count; /* Reference count */ - unsigned mmu_notifier_count; /* Counting nested mmu_notifier - calls */ - struct mm_struct *mm; /* mm_struct for the faults */ - struct mmu_notifier mn; /* mmu_notifier handle */ - struct pri_queue pri[PRI_QUEUE_SIZE]; /* PRI tag states */ - struct device_state *device_state; /* Link to our device_state */ - u32 pasid; /* PASID index */ - bool invalid; /* Used during setup and - teardown of the pasid */ - spinlock_t lock; /* Protect pri_queues and - mmu_notifer_count */ - wait_queue_head_t wq; /* To wait for count == 0 */ -}; - -struct device_state { - struct list_head list; - u32 sbdf; - atomic_t count; - struct pci_dev *pdev; - struct pasid_state **states; - struct iommu_domain *domain; - int pasid_levels; - int max_pasids; - amd_iommu_invalid_ppr_cb inv_ppr_cb; - amd_iommu_invalidate_ctx inv_ctx_cb; - spinlock_t lock; - wait_queue_head_t wq; -}; - -struct fault { - struct work_struct work; - struct device_state *dev_state; - struct pasid_state *state; - struct mm_struct *mm; - u64 address; - u32 pasid; - u16 tag; - u16 finish; - u16 flags; -}; - -static LIST_HEAD(state_list); -static DEFINE_SPINLOCK(state_lock); - -static struct workqueue_struct *iommu_wq; - -static void free_pasid_states(struct device_state *dev_state); - -static struct device_state *__get_device_state(u32 sbdf) -{ - struct device_state *dev_state; - - list_for_each_entry(dev_state, &state_list, list) { - if (dev_state->sbdf == sbdf) - return dev_state; - } - - return NULL; -} - -static struct device_state *get_device_state(u32 sbdf) -{ - struct device_state *dev_state; - unsigned long flags; - - spin_lock_irqsave(&state_lock, flags); - dev_state = __get_device_state(sbdf); - if (dev_state != NULL) - atomic_inc(&dev_state->count); - spin_unlock_irqrestore(&state_lock, flags); - - return dev_state; -} - -static void free_device_state(struct device_state *dev_state) -{ - struct iommu_group *group; - - /* Get rid of any remaining pasid states */ - free_pasid_states(dev_state); - - /* - * Wait until the last reference is dropped before freeing - * the device state. - */ - wait_event(dev_state->wq, !atomic_read(&dev_state->count)); - - /* - * First detach device from domain - No more PRI requests will arrive - * from that device after it is unbound from the IOMMUv2 domain. - */ - group = iommu_group_get(&dev_state->pdev->dev); - if (WARN_ON(!group)) - return; - - iommu_detach_group(dev_state->domain, group); - - iommu_group_put(group); - - /* Everything is down now, free the IOMMUv2 domain */ - iommu_domain_free(dev_state->domain); - - /* Finally get rid of the device-state */ - kfree(dev_state); -} - -static void put_device_state(struct device_state *dev_state) -{ - if (atomic_dec_and_test(&dev_state->count)) - wake_up(&dev_state->wq); -} - -/* Must be called under dev_state->lock */ -static struct pasid_state **__get_pasid_state_ptr(struct device_state *dev_state, - u32 pasid, bool alloc) -{ - struct pasid_state **root, **ptr; - int level, index; - - level = dev_state->pasid_levels; - root = dev_state->states; - - while (true) { - - index = (pasid >> (9 * level)) & 0x1ff; - ptr = &root[index]; - - if (level == 0) - break; - - if (*ptr == NULL) { - if (!alloc) - return NULL; - - *ptr = (void *)get_zeroed_page(GFP_ATOMIC); - if (*ptr == NULL) - return NULL; - } - - root = (struct pasid_state **)*ptr; - level -= 1; - } - - return ptr; -} - -static int set_pasid_state(struct device_state *dev_state, - struct pasid_state *pasid_state, - u32 pasid) -{ - struct pasid_state **ptr; - unsigned long flags; - int ret; - - spin_lock_irqsave(&dev_state->lock, flags); - ptr = __get_pasid_state_ptr(dev_state, pasid, true); - - ret = -ENOMEM; - if (ptr == NULL) - goto out_unlock; - - ret = -ENOMEM; - if (*ptr != NULL) - goto out_unlock; - - *ptr = pasid_state; - - ret = 0; - -out_unlock: - spin_unlock_irqrestore(&dev_state->lock, flags); - - return ret; -} - -static void clear_pasid_state(struct device_state *dev_state, u32 pasid) -{ - struct pasid_state **ptr; - unsigned long flags; - - spin_lock_irqsave(&dev_state->lock, flags); - ptr = __get_pasid_state_ptr(dev_state, pasid, true); - - if (ptr == NULL) - goto out_unlock; - - *ptr = NULL; - -out_unlock: - spin_unlock_irqrestore(&dev_state->lock, flags); -} - -static struct pasid_state *get_pasid_state(struct device_state *dev_state, - u32 pasid) -{ - struct pasid_state **ptr, *ret = NULL; - unsigned long flags; - - spin_lock_irqsave(&dev_state->lock, flags); - ptr = __get_pasid_state_ptr(dev_state, pasid, false); - - if (ptr == NULL) - goto out_unlock; - - ret = *ptr; - if (ret) - refcount_inc(&ret->count); - -out_unlock: - spin_unlock_irqrestore(&dev_state->lock, flags); - - return ret; -} - -static void free_pasid_state(struct pasid_state *pasid_state) -{ - kfree(pasid_state); -} - -static void put_pasid_state(struct pasid_state *pasid_state) -{ - if (refcount_dec_and_test(&pasid_state->count)) - wake_up(&pasid_state->wq); -} - -static void put_pasid_state_wait(struct pasid_state *pasid_state) -{ - if (!refcount_dec_and_test(&pasid_state->count)) - wait_event(pasid_state->wq, !refcount_read(&pasid_state->count)); - free_pasid_state(pasid_state); -} - -static void unbind_pasid(struct pasid_state *pasid_state) -{ - struct iommu_domain *domain; - - domain = pasid_state->device_state->domain; - - /* - * Mark pasid_state as invalid, no more faults will we added to the - * work queue after this is visible everywhere. - */ - pasid_state->invalid = true; - - /* Make sure this is visible */ - smp_wmb(); - - /* After this the device/pasid can't access the mm anymore */ - amd_iommu_domain_clear_gcr3(domain, pasid_state->pasid); - - /* Make sure no more pending faults are in the queue */ - flush_workqueue(iommu_wq); -} - -static void free_pasid_states_level1(struct pasid_state **tbl) -{ - int i; - - for (i = 0; i < 512; ++i) { - if (tbl[i] == NULL) - continue; - - free_page((unsigned long)tbl[i]); - } -} - -static void free_pasid_states_level2(struct pasid_state **tbl) -{ - struct pasid_state **ptr; - int i; - - for (i = 0; i < 512; ++i) { - if (tbl[i] == NULL) - continue; - - ptr = (struct pasid_state **)tbl[i]; - free_pasid_states_level1(ptr); - } -} - -static void free_pasid_states(struct device_state *dev_state) -{ - struct pasid_state *pasid_state; - int i; - - for (i = 0; i < dev_state->max_pasids; ++i) { - pasid_state = get_pasid_state(dev_state, i); - if (pasid_state == NULL) - continue; - - put_pasid_state(pasid_state); - - /* Clear the pasid state so that the pasid can be re-used */ - clear_pasid_state(dev_state, pasid_state->pasid); - - /* - * This will call the mn_release function and - * unbind the PASID - */ - mmu_notifier_unregister(&pasid_state->mn, pasid_state->mm); - - put_pasid_state_wait(pasid_state); /* Reference taken in - amd_iommu_bind_pasid */ - - /* Drop reference taken in amd_iommu_bind_pasid */ - put_device_state(dev_state); - } - - if (dev_state->pasid_levels == 2) - free_pasid_states_level2(dev_state->states); - else if (dev_state->pasid_levels == 1) - free_pasid_states_level1(dev_state->states); - else - BUG_ON(dev_state->pasid_levels != 0); - - free_page((unsigned long)dev_state->states); -} - -static struct pasid_state *mn_to_state(struct mmu_notifier *mn) -{ - return container_of(mn, struct pasid_state, mn); -} - -static void mn_arch_invalidate_secondary_tlbs(struct mmu_notifier *mn, - struct mm_struct *mm, - unsigned long start, unsigned long end) -{ - struct pasid_state *pasid_state; - struct device_state *dev_state; - - pasid_state = mn_to_state(mn); - dev_state = pasid_state->device_state; - - if ((start ^ (end - 1)) < PAGE_SIZE) - amd_iommu_flush_page(dev_state->domain, pasid_state->pasid, - start); - else - amd_iommu_flush_tlb(dev_state->domain, pasid_state->pasid); -} - -static void mn_release(struct mmu_notifier *mn, struct mm_struct *mm) -{ - struct pasid_state *pasid_state; - struct device_state *dev_state; - bool run_inv_ctx_cb; - - might_sleep(); - - pasid_state = mn_to_state(mn); - dev_state = pasid_state->device_state; - run_inv_ctx_cb = !pasid_state->invalid; - - if (run_inv_ctx_cb && dev_state->inv_ctx_cb) - dev_state->inv_ctx_cb(dev_state->pdev, pasid_state->pasid); - - unbind_pasid(pasid_state); -} - -static const struct mmu_notifier_ops iommu_mn = { - .release = mn_release, - .arch_invalidate_secondary_tlbs = mn_arch_invalidate_secondary_tlbs, -}; - -static void set_pri_tag_status(struct pasid_state *pasid_state, - u16 tag, int status) -{ - unsigned long flags; - - spin_lock_irqsave(&pasid_state->lock, flags); - pasid_state->pri[tag].status = status; - spin_unlock_irqrestore(&pasid_state->lock, flags); -} - -static void finish_pri_tag(struct device_state *dev_state, - struct pasid_state *pasid_state, - u16 tag) -{ - unsigned long flags; - - spin_lock_irqsave(&pasid_state->lock, flags); - if (atomic_dec_and_test(&pasid_state->pri[tag].inflight) && - pasid_state->pri[tag].finish) { - amd_iommu_complete_ppr(dev_state->pdev, pasid_state->pasid, - pasid_state->pri[tag].status, tag); - pasid_state->pri[tag].finish = false; - pasid_state->pri[tag].status = PPR_SUCCESS; - } - spin_unlock_irqrestore(&pasid_state->lock, flags); -} - -static void handle_fault_error(struct fault *fault) -{ - int status; - - if (!fault->dev_state->inv_ppr_cb) { - set_pri_tag_status(fault->state, fault->tag, PPR_INVALID); - return; - } - - status = fault->dev_state->inv_ppr_cb(fault->dev_state->pdev, - fault->pasid, - fault->address, - fault->flags); - switch (status) { - case AMD_IOMMU_INV_PRI_RSP_SUCCESS: - set_pri_tag_status(fault->state, fault->tag, PPR_SUCCESS); - break; - case AMD_IOMMU_INV_PRI_RSP_INVALID: - set_pri_tag_status(fault->state, fault->tag, PPR_INVALID); - break; - case AMD_IOMMU_INV_PRI_RSP_FAIL: - set_pri_tag_status(fault->state, fault->tag, PPR_FAILURE); - break; - default: - BUG(); - } -} - -static bool access_error(struct vm_area_struct *vma, struct fault *fault) -{ - unsigned long requested = 0; - - if (fault->flags & PPR_FAULT_EXEC) - requested |= VM_EXEC; - - if (fault->flags & PPR_FAULT_READ) - requested |= VM_READ; - - if (fault->flags & PPR_FAULT_WRITE) - requested |= VM_WRITE; - - return (requested & ~vma->vm_flags) != 0; -} - -static void do_fault(struct work_struct *work) -{ - struct fault *fault = container_of(work, struct fault, work); - struct vm_area_struct *vma; - vm_fault_t ret = VM_FAULT_ERROR; - unsigned int flags = 0; - struct mm_struct *mm; - u64 address; - - mm = fault->state->mm; - address = fault->address; - - if (fault->flags & PPR_FAULT_USER) - flags |= FAULT_FLAG_USER; - if (fault->flags & PPR_FAULT_WRITE) - flags |= FAULT_FLAG_WRITE; - flags |= FAULT_FLAG_REMOTE; - - mmap_read_lock(mm); - vma = vma_lookup(mm, address); - if (!vma) - /* failed to get a vma in the right range */ - goto out; - - /* Check if we have the right permissions on the vma */ - if (access_error(vma, fault)) - goto out; - - ret = handle_mm_fault(vma, address, flags, NULL); -out: - mmap_read_unlock(mm); - - if (ret & VM_FAULT_ERROR) - /* failed to service fault */ - handle_fault_error(fault); - - finish_pri_tag(fault->dev_state, fault->state, fault->tag); - - put_pasid_state(fault->state); - - kfree(fault); -} - -static int ppr_notifier(struct notifier_block *nb, unsigned long e, void *data) -{ - struct amd_iommu_fault *iommu_fault; - struct pasid_state *pasid_state; - struct device_state *dev_state; - struct pci_dev *pdev = NULL; - unsigned long flags; - struct fault *fault; - bool finish; - u16 tag, devid, seg_id; - int ret; - - iommu_fault = data; - tag = iommu_fault->tag & 0x1ff; - finish = (iommu_fault->tag >> 9) & 1; - - seg_id = PCI_SBDF_TO_SEGID(iommu_fault->sbdf); - devid = PCI_SBDF_TO_DEVID(iommu_fault->sbdf); - pdev = pci_get_domain_bus_and_slot(seg_id, PCI_BUS_NUM(devid), - devid & 0xff); - if (!pdev) - return -ENODEV; - - ret = NOTIFY_DONE; - - /* In kdump kernel pci dev is not initialized yet -> send INVALID */ - if (amd_iommu_is_attach_deferred(&pdev->dev)) { - amd_iommu_complete_ppr(pdev, iommu_fault->pasid, - PPR_INVALID, tag); - goto out; - } - - dev_state = get_device_state(iommu_fault->sbdf); - if (dev_state == NULL) - goto out; - - pasid_state = get_pasid_state(dev_state, iommu_fault->pasid); - if (pasid_state == NULL || pasid_state->invalid) { - /* We know the device but not the PASID -> send INVALID */ - amd_iommu_complete_ppr(dev_state->pdev, iommu_fault->pasid, - PPR_INVALID, tag); - goto out_drop_state; - } - - spin_lock_irqsave(&pasid_state->lock, flags); - atomic_inc(&pasid_state->pri[tag].inflight); - if (finish) - pasid_state->pri[tag].finish = true; - spin_unlock_irqrestore(&pasid_state->lock, flags); - - fault = kzalloc(sizeof(*fault), GFP_ATOMIC); - if (fault == NULL) { - /* We are OOM - send success and let the device re-fault */ - finish_pri_tag(dev_state, pasid_state, tag); - goto out_drop_state; - } - - fault->dev_state = dev_state; - fault->address = iommu_fault->address; - fault->state = pasid_state; - fault->tag = tag; - fault->finish = finish; - fault->pasid = iommu_fault->pasid; - fault->flags = iommu_fault->flags; - INIT_WORK(&fault->work, do_fault); - - queue_work(iommu_wq, &fault->work); - - ret = NOTIFY_OK; - -out_drop_state: - - if (ret != NOTIFY_OK && pasid_state) - put_pasid_state(pasid_state); - - put_device_state(dev_state); - -out: - pci_dev_put(pdev); - return ret; -} - -static struct notifier_block ppr_nb = { - .notifier_call = ppr_notifier, -}; - -int amd_iommu_bind_pasid(struct pci_dev *pdev, u32 pasid, - struct task_struct *task) -{ - struct pasid_state *pasid_state; - struct device_state *dev_state; - struct mm_struct *mm; - u32 sbdf; - int ret; - - might_sleep(); - - if (!amd_iommu_v2_supported()) - return -ENODEV; - - sbdf = get_pci_sbdf_id(pdev); - dev_state = get_device_state(sbdf); - - if (dev_state == NULL) - return -EINVAL; - - ret = -EINVAL; - if (pasid >= dev_state->max_pasids) - goto out; - - ret = -ENOMEM; - pasid_state = kzalloc(sizeof(*pasid_state), GFP_KERNEL); - if (pasid_state == NULL) - goto out; - - - refcount_set(&pasid_state->count, 1); - init_waitqueue_head(&pasid_state->wq); - spin_lock_init(&pasid_state->lock); - - mm = get_task_mm(task); - pasid_state->mm = mm; - pasid_state->device_state = dev_state; - pasid_state->pasid = pasid; - pasid_state->invalid = true; /* Mark as valid only if we are - done with setting up the pasid */ - pasid_state->mn.ops = &iommu_mn; - - if (pasid_state->mm == NULL) - goto out_free; - - ret = mmu_notifier_register(&pasid_state->mn, mm); - if (ret) - goto out_free; - - ret = set_pasid_state(dev_state, pasid_state, pasid); - if (ret) - goto out_unregister; - - ret = amd_iommu_domain_set_gcr3(dev_state->domain, pasid, - __pa(pasid_state->mm->pgd)); - if (ret) - goto out_clear_state; - - /* Now we are ready to handle faults */ - pasid_state->invalid = false; - - /* - * Drop the reference to the mm_struct here. We rely on the - * mmu_notifier release call-back to inform us when the mm - * is going away. - */ - mmput(mm); - - return 0; - -out_clear_state: - clear_pasid_state(dev_state, pasid); - -out_unregister: - mmu_notifier_unregister(&pasid_state->mn, mm); - mmput(mm); - -out_free: - free_pasid_state(pasid_state); - -out: - put_device_state(dev_state); - - return ret; -} -EXPORT_SYMBOL(amd_iommu_bind_pasid); - -void amd_iommu_unbind_pasid(struct pci_dev *pdev, u32 pasid) -{ - struct pasid_state *pasid_state; - struct device_state *dev_state; - u32 sbdf; - - might_sleep(); - - if (!amd_iommu_v2_supported()) - return; - - sbdf = get_pci_sbdf_id(pdev); - dev_state = get_device_state(sbdf); - if (dev_state == NULL) - return; - - if (pasid >= dev_state->max_pasids) - goto out; - - pasid_state = get_pasid_state(dev_state, pasid); - if (pasid_state == NULL) - goto out; - /* - * Drop reference taken here. We are safe because we still hold - * the reference taken in the amd_iommu_bind_pasid function. - */ - put_pasid_state(pasid_state); - - /* Clear the pasid state so that the pasid can be re-used */ - clear_pasid_state(dev_state, pasid_state->pasid); - - /* - * Call mmu_notifier_unregister to drop our reference - * to pasid_state->mm - */ - mmu_notifier_unregister(&pasid_state->mn, pasid_state->mm); - - put_pasid_state_wait(pasid_state); /* Reference taken in - amd_iommu_bind_pasid */ -out: - /* Drop reference taken in this function */ - put_device_state(dev_state); - - /* Drop reference taken in amd_iommu_bind_pasid */ - put_device_state(dev_state); -} -EXPORT_SYMBOL(amd_iommu_unbind_pasid); - -int amd_iommu_init_device(struct pci_dev *pdev, int pasids) -{ - struct device_state *dev_state; - struct iommu_group *group; - unsigned long flags; - int ret, tmp; - u32 sbdf; - - might_sleep(); - - /* - * When memory encryption is active the device is likely not in a - * direct-mapped domain. Forbid using IOMMUv2 functionality for now. - */ - if (cc_platform_has(CC_ATTR_MEM_ENCRYPT)) - return -ENODEV; - - if (!amd_iommu_v2_supported()) - return -ENODEV; - - if (pasids <= 0 || pasids > (PASID_MASK + 1)) - return -EINVAL; - - sbdf = get_pci_sbdf_id(pdev); - - dev_state = kzalloc(sizeof(*dev_state), GFP_KERNEL); - if (dev_state == NULL) - return -ENOMEM; - - spin_lock_init(&dev_state->lock); - init_waitqueue_head(&dev_state->wq); - dev_state->pdev = pdev; - dev_state->sbdf = sbdf; - - tmp = pasids; - for (dev_state->pasid_levels = 0; (tmp - 1) & ~0x1ff; tmp >>= 9) - dev_state->pasid_levels += 1; - - atomic_set(&dev_state->count, 1); - dev_state->max_pasids = pasids; - - ret = -ENOMEM; - dev_state->states = (void *)get_zeroed_page(GFP_KERNEL); - if (dev_state->states == NULL) - goto out_free_dev_state; - - dev_state->domain = iommu_domain_alloc(&pci_bus_type); - if (dev_state->domain == NULL) - goto out_free_states; - - /* See iommu_is_default_domain() */ - dev_state->domain->type = IOMMU_DOMAIN_IDENTITY; - amd_iommu_domain_direct_map(dev_state->domain); - - ret = amd_iommu_domain_enable_v2(dev_state->domain, pasids); - if (ret) - goto out_free_domain; - - group = iommu_group_get(&pdev->dev); - if (!group) { - ret = -EINVAL; - goto out_free_domain; - } - - ret = iommu_attach_group(dev_state->domain, group); - if (ret != 0) - goto out_drop_group; - - iommu_group_put(group); - - spin_lock_irqsave(&state_lock, flags); - - if (__get_device_state(sbdf) != NULL) { - spin_unlock_irqrestore(&state_lock, flags); - ret = -EBUSY; - goto out_free_domain; - } - - list_add_tail(&dev_state->list, &state_list); - - spin_unlock_irqrestore(&state_lock, flags); - - return 0; - -out_drop_group: - iommu_group_put(group); - -out_free_domain: - iommu_domain_free(dev_state->domain); - -out_free_states: - free_page((unsigned long)dev_state->states); - -out_free_dev_state: - kfree(dev_state); - - return ret; -} -EXPORT_SYMBOL(amd_iommu_init_device); - -void amd_iommu_free_device(struct pci_dev *pdev) -{ - struct device_state *dev_state; - unsigned long flags; - u32 sbdf; - - if (!amd_iommu_v2_supported()) - return; - - sbdf = get_pci_sbdf_id(pdev); - - spin_lock_irqsave(&state_lock, flags); - - dev_state = __get_device_state(sbdf); - if (dev_state == NULL) { - spin_unlock_irqrestore(&state_lock, flags); - return; - } - - list_del(&dev_state->list); - - spin_unlock_irqrestore(&state_lock, flags); - - put_device_state(dev_state); - free_device_state(dev_state); -} -EXPORT_SYMBOL(amd_iommu_free_device); - -int amd_iommu_set_invalid_ppr_cb(struct pci_dev *pdev, - amd_iommu_invalid_ppr_cb cb) -{ - struct device_state *dev_state; - unsigned long flags; - u32 sbdf; - int ret; - - if (!amd_iommu_v2_supported()) - return -ENODEV; - - sbdf = get_pci_sbdf_id(pdev); - - spin_lock_irqsave(&state_lock, flags); - - ret = -EINVAL; - dev_state = __get_device_state(sbdf); - if (dev_state == NULL) - goto out_unlock; - - dev_state->inv_ppr_cb = cb; - - ret = 0; - -out_unlock: - spin_unlock_irqrestore(&state_lock, flags); - - return ret; -} -EXPORT_SYMBOL(amd_iommu_set_invalid_ppr_cb); - -int amd_iommu_set_invalidate_ctx_cb(struct pci_dev *pdev, - amd_iommu_invalidate_ctx cb) -{ - struct device_state *dev_state; - unsigned long flags; - u32 sbdf; - int ret; - - if (!amd_iommu_v2_supported()) - return -ENODEV; - - sbdf = get_pci_sbdf_id(pdev); - - spin_lock_irqsave(&state_lock, flags); - - ret = -EINVAL; - dev_state = __get_device_state(sbdf); - if (dev_state == NULL) - goto out_unlock; - - dev_state->inv_ctx_cb = cb; - - ret = 0; - -out_unlock: - spin_unlock_irqrestore(&state_lock, flags); - - return ret; -} -EXPORT_SYMBOL(amd_iommu_set_invalidate_ctx_cb); - -static int __init amd_iommu_v2_init(void) -{ - int ret; - - if (!amd_iommu_v2_supported()) { - pr_info("AMD IOMMUv2 functionality not available on this system - This is not a bug.\n"); - /* - * Load anyway to provide the symbols to other modules - * which may use AMD IOMMUv2 optionally. - */ - return 0; - } - - ret = -ENOMEM; - iommu_wq = alloc_workqueue("amd_iommu_v2", WQ_MEM_RECLAIM, 0); - if (iommu_wq == NULL) - goto out; - - amd_iommu_register_ppr_notifier(&ppr_nb); - - pr_info("AMD IOMMUv2 loaded and initialized\n"); - - return 0; - -out: - return ret; -} - -static void __exit amd_iommu_v2_exit(void) -{ - struct device_state *dev_state, *next; - unsigned long flags; - LIST_HEAD(freelist); - - if (!amd_iommu_v2_supported()) - return; - - amd_iommu_unregister_ppr_notifier(&ppr_nb); - - flush_workqueue(iommu_wq); - - /* - * The loop below might call flush_workqueue(), so call - * destroy_workqueue() after it - */ - spin_lock_irqsave(&state_lock, flags); - - list_for_each_entry_safe(dev_state, next, &state_list, list) { - WARN_ON_ONCE(1); - - put_device_state(dev_state); - list_del(&dev_state->list); - list_add_tail(&dev_state->list, &freelist); - } - - spin_unlock_irqrestore(&state_lock, flags); - - /* - * Since free_device_state waits on the count to be zero, - * we need to free dev_state outside the spinlock. - */ - list_for_each_entry_safe(dev_state, next, &freelist, list) { - list_del(&dev_state->list); - free_device_state(dev_state); - } - - destroy_workqueue(iommu_wq); -} - -module_init(amd_iommu_v2_init); -module_exit(amd_iommu_v2_exit); diff --git a/drivers/iommu/apple-dart.c b/drivers/iommu/apple-dart.c index 821b4a3465df..ee05f4824bfa 100644 --- a/drivers/iommu/apple-dart.c +++ b/drivers/iommu/apple-dart.c @@ -504,10 +504,11 @@ static void apple_dart_iotlb_sync(struct iommu_domain *domain, apple_dart_domain_flush_tlb(to_dart_domain(domain)); } -static void apple_dart_iotlb_sync_map(struct iommu_domain *domain, - unsigned long iova, size_t size) +static int apple_dart_iotlb_sync_map(struct iommu_domain *domain, + unsigned long iova, size_t size) { apple_dart_domain_flush_tlb(to_dart_domain(domain)); + return 0; } static phys_addr_t apple_dart_iova_to_phys(struct iommu_domain *domain, diff --git a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3-sva.c b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3-sva.c index 8a16cd3ef487..353248ab18e7 100644 --- a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3-sva.c +++ b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3-sva.c @@ -25,11 +25,9 @@ struct arm_smmu_mmu_notifier { #define mn_to_smmu(mn) container_of(mn, struct arm_smmu_mmu_notifier, mn) struct arm_smmu_bond { - struct iommu_sva sva; struct mm_struct *mm; struct arm_smmu_mmu_notifier *smmu_mn; struct list_head list; - refcount_t refs; }; #define sva_to_bond(handle) \ @@ -38,6 +36,25 @@ struct arm_smmu_bond { static DEFINE_MUTEX(sva_lock); /* + * Write the CD to the CD tables for all masters that this domain is attached + * to. Note that this is only used to update existing CD entries in the target + * CD table, for which it's assumed that arm_smmu_write_ctx_desc can't fail. + */ +static void arm_smmu_update_ctx_desc_devices(struct arm_smmu_domain *smmu_domain, + int ssid, + struct arm_smmu_ctx_desc *cd) +{ + struct arm_smmu_master *master; + unsigned long flags; + + spin_lock_irqsave(&smmu_domain->devices_lock, flags); + list_for_each_entry(master, &smmu_domain->devices, domain_head) { + arm_smmu_write_ctx_desc(master, ssid, cd); + } + spin_unlock_irqrestore(&smmu_domain->devices_lock, flags); +} + +/* * Check if the CPU ASID is available on the SMMU side. If a private context * descriptor is using it, try to replace it. */ @@ -62,7 +79,7 @@ arm_smmu_share_asid(struct mm_struct *mm, u16 asid) return cd; } - smmu_domain = container_of(cd, struct arm_smmu_domain, s1_cfg.cd); + smmu_domain = container_of(cd, struct arm_smmu_domain, cd); smmu = smmu_domain->smmu; ret = xa_alloc(&arm_smmu_asid_xa, &new_asid, cd, @@ -80,7 +97,7 @@ arm_smmu_share_asid(struct mm_struct *mm, u16 asid) * be some overlap between use of both ASIDs, until we invalidate the * TLB. */ - arm_smmu_write_ctx_desc(smmu_domain, IOMMU_NO_PASID, cd); + arm_smmu_update_ctx_desc_devices(smmu_domain, IOMMU_NO_PASID, cd); /* Invalidate TLB entries previously associated with that context */ arm_smmu_tlb_inv_asid(smmu, asid); @@ -247,7 +264,7 @@ static void arm_smmu_mm_release(struct mmu_notifier *mn, struct mm_struct *mm) * DMA may still be running. Keep the cd valid to avoid C_BAD_CD events, * but disable translation. */ - arm_smmu_write_ctx_desc(smmu_domain, mm->pasid, &quiet_cd); + arm_smmu_update_ctx_desc_devices(smmu_domain, mm->pasid, &quiet_cd); arm_smmu_tlb_inv_asid(smmu_domain->smmu, smmu_mn->cd->asid); arm_smmu_atc_inv_domain(smmu_domain, mm->pasid, 0, 0); @@ -273,8 +290,10 @@ arm_smmu_mmu_notifier_get(struct arm_smmu_domain *smmu_domain, struct mm_struct *mm) { int ret; + unsigned long flags; struct arm_smmu_ctx_desc *cd; struct arm_smmu_mmu_notifier *smmu_mn; + struct arm_smmu_master *master; list_for_each_entry(smmu_mn, &smmu_domain->mmu_notifiers, list) { if (smmu_mn->mn.mm == mm) { @@ -304,7 +323,16 @@ arm_smmu_mmu_notifier_get(struct arm_smmu_domain *smmu_domain, goto err_free_cd; } - ret = arm_smmu_write_ctx_desc(smmu_domain, mm->pasid, cd); + spin_lock_irqsave(&smmu_domain->devices_lock, flags); + list_for_each_entry(master, &smmu_domain->devices, domain_head) { + ret = arm_smmu_write_ctx_desc(master, mm->pasid, cd); + if (ret) { + list_for_each_entry_from_reverse(master, &smmu_domain->devices, domain_head) + arm_smmu_write_ctx_desc(master, mm->pasid, NULL); + break; + } + } + spin_unlock_irqrestore(&smmu_domain->devices_lock, flags); if (ret) goto err_put_notifier; @@ -329,7 +357,8 @@ static void arm_smmu_mmu_notifier_put(struct arm_smmu_mmu_notifier *smmu_mn) return; list_del(&smmu_mn->list); - arm_smmu_write_ctx_desc(smmu_domain, mm->pasid, NULL); + + arm_smmu_update_ctx_desc_devices(smmu_domain, mm->pasid, NULL); /* * If we went through clear(), we've already invalidated, and no @@ -345,8 +374,7 @@ static void arm_smmu_mmu_notifier_put(struct arm_smmu_mmu_notifier *smmu_mn) arm_smmu_free_shared_cd(cd); } -static struct iommu_sva * -__arm_smmu_sva_bind(struct device *dev, struct mm_struct *mm) +static int __arm_smmu_sva_bind(struct device *dev, struct mm_struct *mm) { int ret; struct arm_smmu_bond *bond; @@ -355,23 +383,13 @@ __arm_smmu_sva_bind(struct device *dev, struct mm_struct *mm) struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain); if (!master || !master->sva_enabled) - return ERR_PTR(-ENODEV); - - /* If bind() was already called for this {dev, mm} pair, reuse it. */ - list_for_each_entry(bond, &master->bonds, list) { - if (bond->mm == mm) { - refcount_inc(&bond->refs); - return &bond->sva; - } - } + return -ENODEV; bond = kzalloc(sizeof(*bond), GFP_KERNEL); if (!bond) - return ERR_PTR(-ENOMEM); + return -ENOMEM; bond->mm = mm; - bond->sva.dev = dev; - refcount_set(&bond->refs, 1); bond->smmu_mn = arm_smmu_mmu_notifier_get(smmu_domain, mm); if (IS_ERR(bond->smmu_mn)) { @@ -380,11 +398,11 @@ __arm_smmu_sva_bind(struct device *dev, struct mm_struct *mm) } list_add(&bond->list, &master->bonds); - return &bond->sva; + return 0; err_free_bond: kfree(bond); - return ERR_PTR(ret); + return ret; } bool arm_smmu_sva_supported(struct arm_smmu_device *smmu) @@ -550,7 +568,7 @@ void arm_smmu_sva_remove_dev_pasid(struct iommu_domain *domain, } } - if (!WARN_ON(!bond) && refcount_dec_and_test(&bond->refs)) { + if (!WARN_ON(!bond)) { list_del(&bond->list); arm_smmu_mmu_notifier_put(bond->smmu_mn); kfree(bond); @@ -562,13 +580,10 @@ static int arm_smmu_sva_set_dev_pasid(struct iommu_domain *domain, struct device *dev, ioasid_t id) { int ret = 0; - struct iommu_sva *handle; struct mm_struct *mm = domain->mm; mutex_lock(&sva_lock); - handle = __arm_smmu_sva_bind(dev, mm); - if (IS_ERR(handle)) - ret = PTR_ERR(handle); + ret = __arm_smmu_sva_bind(dev, mm); mutex_unlock(&sva_lock); return ret; diff --git a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c index bd0a596f9863..7445454c2af2 100644 --- a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c +++ b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c @@ -971,14 +971,12 @@ void arm_smmu_tlb_inv_asid(struct arm_smmu_device *smmu, u16 asid) arm_smmu_cmdq_issue_cmd_with_sync(smmu, &cmd); } -static void arm_smmu_sync_cd(struct arm_smmu_domain *smmu_domain, +static void arm_smmu_sync_cd(struct arm_smmu_master *master, int ssid, bool leaf) { size_t i; - unsigned long flags; - struct arm_smmu_master *master; struct arm_smmu_cmdq_batch cmds; - struct arm_smmu_device *smmu = smmu_domain->smmu; + struct arm_smmu_device *smmu = master->smmu; struct arm_smmu_cmdq_ent cmd = { .opcode = CMDQ_OP_CFGI_CD, .cfgi = { @@ -988,15 +986,10 @@ static void arm_smmu_sync_cd(struct arm_smmu_domain *smmu_domain, }; cmds.num = 0; - - spin_lock_irqsave(&smmu_domain->devices_lock, flags); - list_for_each_entry(master, &smmu_domain->devices, domain_head) { - for (i = 0; i < master->num_streams; i++) { - cmd.cfgi.sid = master->streams[i].id; - arm_smmu_cmdq_batch_add(smmu, &cmds, &cmd); - } + for (i = 0; i < master->num_streams; i++) { + cmd.cfgi.sid = master->streams[i].id; + arm_smmu_cmdq_batch_add(smmu, &cmds, &cmd); } - spin_unlock_irqrestore(&smmu_domain->devices_lock, flags); arm_smmu_cmdq_batch_submit(smmu, &cmds); } @@ -1026,34 +1019,33 @@ static void arm_smmu_write_cd_l1_desc(__le64 *dst, WRITE_ONCE(*dst, cpu_to_le64(val)); } -static __le64 *arm_smmu_get_cd_ptr(struct arm_smmu_domain *smmu_domain, - u32 ssid) +static __le64 *arm_smmu_get_cd_ptr(struct arm_smmu_master *master, u32 ssid) { __le64 *l1ptr; unsigned int idx; struct arm_smmu_l1_ctx_desc *l1_desc; - struct arm_smmu_device *smmu = smmu_domain->smmu; - struct arm_smmu_ctx_desc_cfg *cdcfg = &smmu_domain->s1_cfg.cdcfg; + struct arm_smmu_device *smmu = master->smmu; + struct arm_smmu_ctx_desc_cfg *cd_table = &master->cd_table; - if (smmu_domain->s1_cfg.s1fmt == STRTAB_STE_0_S1FMT_LINEAR) - return cdcfg->cdtab + ssid * CTXDESC_CD_DWORDS; + if (cd_table->s1fmt == STRTAB_STE_0_S1FMT_LINEAR) + return cd_table->cdtab + ssid * CTXDESC_CD_DWORDS; idx = ssid >> CTXDESC_SPLIT; - l1_desc = &cdcfg->l1_desc[idx]; + l1_desc = &cd_table->l1_desc[idx]; if (!l1_desc->l2ptr) { if (arm_smmu_alloc_cd_leaf_table(smmu, l1_desc)) return NULL; - l1ptr = cdcfg->cdtab + idx * CTXDESC_L1_DESC_DWORDS; + l1ptr = cd_table->cdtab + idx * CTXDESC_L1_DESC_DWORDS; arm_smmu_write_cd_l1_desc(l1ptr, l1_desc); /* An invalid L1CD can be cached */ - arm_smmu_sync_cd(smmu_domain, ssid, false); + arm_smmu_sync_cd(master, ssid, false); } idx = ssid & (CTXDESC_L2_ENTRIES - 1); return l1_desc->l2ptr + idx * CTXDESC_CD_DWORDS; } -int arm_smmu_write_ctx_desc(struct arm_smmu_domain *smmu_domain, int ssid, +int arm_smmu_write_ctx_desc(struct arm_smmu_master *master, int ssid, struct arm_smmu_ctx_desc *cd) { /* @@ -1070,11 +1062,12 @@ int arm_smmu_write_ctx_desc(struct arm_smmu_domain *smmu_domain, int ssid, u64 val; bool cd_live; __le64 *cdptr; + struct arm_smmu_ctx_desc_cfg *cd_table = &master->cd_table; - if (WARN_ON(ssid >= (1 << smmu_domain->s1_cfg.s1cdmax))) + if (WARN_ON(ssid >= (1 << cd_table->s1cdmax))) return -E2BIG; - cdptr = arm_smmu_get_cd_ptr(smmu_domain, ssid); + cdptr = arm_smmu_get_cd_ptr(master, ssid); if (!cdptr) return -ENOMEM; @@ -1098,11 +1091,11 @@ int arm_smmu_write_ctx_desc(struct arm_smmu_domain *smmu_domain, int ssid, cdptr[3] = cpu_to_le64(cd->mair); /* - * STE is live, and the SMMU might read dwords of this CD in any + * STE may be live, and the SMMU might read dwords of this CD in any * order. Ensure that it observes valid values before reading * V=1. */ - arm_smmu_sync_cd(smmu_domain, ssid, true); + arm_smmu_sync_cd(master, ssid, true); val = cd->tcr | #ifdef __BIG_ENDIAN @@ -1114,7 +1107,7 @@ int arm_smmu_write_ctx_desc(struct arm_smmu_domain *smmu_domain, int ssid, FIELD_PREP(CTXDESC_CD_0_ASID, cd->asid) | CTXDESC_CD_0_V; - if (smmu_domain->stall_enabled) + if (cd_table->stall_enabled) val |= CTXDESC_CD_0_S; } @@ -1128,44 +1121,45 @@ int arm_smmu_write_ctx_desc(struct arm_smmu_domain *smmu_domain, int ssid, * without first making the structure invalid. */ WRITE_ONCE(cdptr[0], cpu_to_le64(val)); - arm_smmu_sync_cd(smmu_domain, ssid, true); + arm_smmu_sync_cd(master, ssid, true); return 0; } -static int arm_smmu_alloc_cd_tables(struct arm_smmu_domain *smmu_domain) +static int arm_smmu_alloc_cd_tables(struct arm_smmu_master *master) { int ret; size_t l1size; size_t max_contexts; - struct arm_smmu_device *smmu = smmu_domain->smmu; - struct arm_smmu_s1_cfg *cfg = &smmu_domain->s1_cfg; - struct arm_smmu_ctx_desc_cfg *cdcfg = &cfg->cdcfg; + struct arm_smmu_device *smmu = master->smmu; + struct arm_smmu_ctx_desc_cfg *cd_table = &master->cd_table; - max_contexts = 1 << cfg->s1cdmax; + cd_table->stall_enabled = master->stall_enabled; + cd_table->s1cdmax = master->ssid_bits; + max_contexts = 1 << cd_table->s1cdmax; if (!(smmu->features & ARM_SMMU_FEAT_2_LVL_CDTAB) || max_contexts <= CTXDESC_L2_ENTRIES) { - cfg->s1fmt = STRTAB_STE_0_S1FMT_LINEAR; - cdcfg->num_l1_ents = max_contexts; + cd_table->s1fmt = STRTAB_STE_0_S1FMT_LINEAR; + cd_table->num_l1_ents = max_contexts; l1size = max_contexts * (CTXDESC_CD_DWORDS << 3); } else { - cfg->s1fmt = STRTAB_STE_0_S1FMT_64K_L2; - cdcfg->num_l1_ents = DIV_ROUND_UP(max_contexts, + cd_table->s1fmt = STRTAB_STE_0_S1FMT_64K_L2; + cd_table->num_l1_ents = DIV_ROUND_UP(max_contexts, CTXDESC_L2_ENTRIES); - cdcfg->l1_desc = devm_kcalloc(smmu->dev, cdcfg->num_l1_ents, - sizeof(*cdcfg->l1_desc), + cd_table->l1_desc = devm_kcalloc(smmu->dev, cd_table->num_l1_ents, + sizeof(*cd_table->l1_desc), GFP_KERNEL); - if (!cdcfg->l1_desc) + if (!cd_table->l1_desc) return -ENOMEM; - l1size = cdcfg->num_l1_ents * (CTXDESC_L1_DESC_DWORDS << 3); + l1size = cd_table->num_l1_ents * (CTXDESC_L1_DESC_DWORDS << 3); } - cdcfg->cdtab = dmam_alloc_coherent(smmu->dev, l1size, &cdcfg->cdtab_dma, + cd_table->cdtab = dmam_alloc_coherent(smmu->dev, l1size, &cd_table->cdtab_dma, GFP_KERNEL); - if (!cdcfg->cdtab) { + if (!cd_table->cdtab) { dev_warn(smmu->dev, "failed to allocate context descriptor\n"); ret = -ENOMEM; goto err_free_l1; @@ -1174,42 +1168,42 @@ static int arm_smmu_alloc_cd_tables(struct arm_smmu_domain *smmu_domain) return 0; err_free_l1: - if (cdcfg->l1_desc) { - devm_kfree(smmu->dev, cdcfg->l1_desc); - cdcfg->l1_desc = NULL; + if (cd_table->l1_desc) { + devm_kfree(smmu->dev, cd_table->l1_desc); + cd_table->l1_desc = NULL; } return ret; } -static void arm_smmu_free_cd_tables(struct arm_smmu_domain *smmu_domain) +static void arm_smmu_free_cd_tables(struct arm_smmu_master *master) { int i; size_t size, l1size; - struct arm_smmu_device *smmu = smmu_domain->smmu; - struct arm_smmu_ctx_desc_cfg *cdcfg = &smmu_domain->s1_cfg.cdcfg; + struct arm_smmu_device *smmu = master->smmu; + struct arm_smmu_ctx_desc_cfg *cd_table = &master->cd_table; - if (cdcfg->l1_desc) { + if (cd_table->l1_desc) { size = CTXDESC_L2_ENTRIES * (CTXDESC_CD_DWORDS << 3); - for (i = 0; i < cdcfg->num_l1_ents; i++) { - if (!cdcfg->l1_desc[i].l2ptr) + for (i = 0; i < cd_table->num_l1_ents; i++) { + if (!cd_table->l1_desc[i].l2ptr) continue; dmam_free_coherent(smmu->dev, size, - cdcfg->l1_desc[i].l2ptr, - cdcfg->l1_desc[i].l2ptr_dma); + cd_table->l1_desc[i].l2ptr, + cd_table->l1_desc[i].l2ptr_dma); } - devm_kfree(smmu->dev, cdcfg->l1_desc); - cdcfg->l1_desc = NULL; + devm_kfree(smmu->dev, cd_table->l1_desc); + cd_table->l1_desc = NULL; - l1size = cdcfg->num_l1_ents * (CTXDESC_L1_DESC_DWORDS << 3); + l1size = cd_table->num_l1_ents * (CTXDESC_L1_DESC_DWORDS << 3); } else { - l1size = cdcfg->num_l1_ents * (CTXDESC_CD_DWORDS << 3); + l1size = cd_table->num_l1_ents * (CTXDESC_CD_DWORDS << 3); } - dmam_free_coherent(smmu->dev, l1size, cdcfg->cdtab, cdcfg->cdtab_dma); - cdcfg->cdtab_dma = 0; - cdcfg->cdtab = NULL; + dmam_free_coherent(smmu->dev, l1size, cd_table->cdtab, cd_table->cdtab_dma); + cd_table->cdtab_dma = 0; + cd_table->cdtab = NULL; } bool arm_smmu_free_asid(struct arm_smmu_ctx_desc *cd) @@ -1276,7 +1270,7 @@ static void arm_smmu_write_strtab_ent(struct arm_smmu_master *master, u32 sid, u64 val = le64_to_cpu(dst[0]); bool ste_live = false; struct arm_smmu_device *smmu = NULL; - struct arm_smmu_s1_cfg *s1_cfg = NULL; + struct arm_smmu_ctx_desc_cfg *cd_table = NULL; struct arm_smmu_s2_cfg *s2_cfg = NULL; struct arm_smmu_domain *smmu_domain = NULL; struct arm_smmu_cmdq_ent prefetch_cmd = { @@ -1294,7 +1288,7 @@ static void arm_smmu_write_strtab_ent(struct arm_smmu_master *master, u32 sid, if (smmu_domain) { switch (smmu_domain->stage) { case ARM_SMMU_DOMAIN_S1: - s1_cfg = &smmu_domain->s1_cfg; + cd_table = &master->cd_table; break; case ARM_SMMU_DOMAIN_S2: case ARM_SMMU_DOMAIN_NESTED: @@ -1325,7 +1319,7 @@ static void arm_smmu_write_strtab_ent(struct arm_smmu_master *master, u32 sid, val = STRTAB_STE_0_V; /* Bypass/fault */ - if (!smmu_domain || !(s1_cfg || s2_cfg)) { + if (!smmu_domain || !(cd_table || s2_cfg)) { if (!smmu_domain && disable_bypass) val |= FIELD_PREP(STRTAB_STE_0_CFG, STRTAB_STE_0_CFG_ABORT); else @@ -1344,7 +1338,7 @@ static void arm_smmu_write_strtab_ent(struct arm_smmu_master *master, u32 sid, return; } - if (s1_cfg) { + if (cd_table) { u64 strw = smmu->features & ARM_SMMU_FEAT_E2H ? STRTAB_STE_1_STRW_EL2 : STRTAB_STE_1_STRW_NSEL1; @@ -1360,10 +1354,10 @@ static void arm_smmu_write_strtab_ent(struct arm_smmu_master *master, u32 sid, !master->stall_enabled) dst[1] |= cpu_to_le64(STRTAB_STE_1_S1STALLD); - val |= (s1_cfg->cdcfg.cdtab_dma & STRTAB_STE_0_S1CTXPTR_MASK) | + val |= (cd_table->cdtab_dma & STRTAB_STE_0_S1CTXPTR_MASK) | FIELD_PREP(STRTAB_STE_0_CFG, STRTAB_STE_0_CFG_S1_TRANS) | - FIELD_PREP(STRTAB_STE_0_S1CDMAX, s1_cfg->s1cdmax) | - FIELD_PREP(STRTAB_STE_0_S1FMT, s1_cfg->s1fmt); + FIELD_PREP(STRTAB_STE_0_S1CDMAX, cd_table->s1cdmax) | + FIELD_PREP(STRTAB_STE_0_S1FMT, cd_table->s1fmt); } if (s2_cfg) { @@ -1869,7 +1863,7 @@ static void arm_smmu_tlb_inv_context(void *cookie) * careful, 007. */ if (smmu_domain->stage == ARM_SMMU_DOMAIN_S1) { - arm_smmu_tlb_inv_asid(smmu, smmu_domain->s1_cfg.cd.asid); + arm_smmu_tlb_inv_asid(smmu, smmu_domain->cd.asid); } else { cmd.opcode = CMDQ_OP_TLBI_S12_VMALL; cmd.tlbi.vmid = smmu_domain->s2_cfg.vmid; @@ -1962,7 +1956,7 @@ static void arm_smmu_tlb_inv_range_domain(unsigned long iova, size_t size, if (smmu_domain->stage == ARM_SMMU_DOMAIN_S1) { cmd.opcode = smmu_domain->smmu->features & ARM_SMMU_FEAT_E2H ? CMDQ_OP_TLBI_EL2_VA : CMDQ_OP_TLBI_NH_VA; - cmd.tlbi.asid = smmu_domain->s1_cfg.cd.asid; + cmd.tlbi.asid = smmu_domain->cd.asid; } else { cmd.opcode = CMDQ_OP_TLBI_S2_IPA; cmd.tlbi.vmid = smmu_domain->s2_cfg.vmid; @@ -2067,15 +2061,11 @@ static void arm_smmu_domain_free(struct iommu_domain *domain) free_io_pgtable_ops(smmu_domain->pgtbl_ops); - /* Free the CD and ASID, if we allocated them */ + /* Free the ASID or VMID */ if (smmu_domain->stage == ARM_SMMU_DOMAIN_S1) { - struct arm_smmu_s1_cfg *cfg = &smmu_domain->s1_cfg; - /* Prevent SVA from touching the CD while we're freeing it */ mutex_lock(&arm_smmu_asid_lock); - if (cfg->cdcfg.cdtab) - arm_smmu_free_cd_tables(smmu_domain); - arm_smmu_free_asid(&cfg->cd); + arm_smmu_free_asid(&smmu_domain->cd); mutex_unlock(&arm_smmu_asid_lock); } else { struct arm_smmu_s2_cfg *cfg = &smmu_domain->s2_cfg; @@ -2087,66 +2077,43 @@ static void arm_smmu_domain_free(struct iommu_domain *domain) } static int arm_smmu_domain_finalise_s1(struct arm_smmu_domain *smmu_domain, - struct arm_smmu_master *master, struct io_pgtable_cfg *pgtbl_cfg) { int ret; u32 asid; struct arm_smmu_device *smmu = smmu_domain->smmu; - struct arm_smmu_s1_cfg *cfg = &smmu_domain->s1_cfg; + struct arm_smmu_ctx_desc *cd = &smmu_domain->cd; typeof(&pgtbl_cfg->arm_lpae_s1_cfg.tcr) tcr = &pgtbl_cfg->arm_lpae_s1_cfg.tcr; - refcount_set(&cfg->cd.refs, 1); + refcount_set(&cd->refs, 1); /* Prevent SVA from modifying the ASID until it is written to the CD */ mutex_lock(&arm_smmu_asid_lock); - ret = xa_alloc(&arm_smmu_asid_xa, &asid, &cfg->cd, + ret = xa_alloc(&arm_smmu_asid_xa, &asid, cd, XA_LIMIT(1, (1 << smmu->asid_bits) - 1), GFP_KERNEL); if (ret) goto out_unlock; - cfg->s1cdmax = master->ssid_bits; - - smmu_domain->stall_enabled = master->stall_enabled; - - ret = arm_smmu_alloc_cd_tables(smmu_domain); - if (ret) - goto out_free_asid; - - cfg->cd.asid = (u16)asid; - cfg->cd.ttbr = pgtbl_cfg->arm_lpae_s1_cfg.ttbr; - cfg->cd.tcr = FIELD_PREP(CTXDESC_CD_0_TCR_T0SZ, tcr->tsz) | + cd->asid = (u16)asid; + cd->ttbr = pgtbl_cfg->arm_lpae_s1_cfg.ttbr; + cd->tcr = FIELD_PREP(CTXDESC_CD_0_TCR_T0SZ, tcr->tsz) | FIELD_PREP(CTXDESC_CD_0_TCR_TG0, tcr->tg) | FIELD_PREP(CTXDESC_CD_0_TCR_IRGN0, tcr->irgn) | FIELD_PREP(CTXDESC_CD_0_TCR_ORGN0, tcr->orgn) | FIELD_PREP(CTXDESC_CD_0_TCR_SH0, tcr->sh) | FIELD_PREP(CTXDESC_CD_0_TCR_IPS, tcr->ips) | CTXDESC_CD_0_TCR_EPD1 | CTXDESC_CD_0_AA64; - cfg->cd.mair = pgtbl_cfg->arm_lpae_s1_cfg.mair; - - /* - * Note that this will end up calling arm_smmu_sync_cd() before - * the master has been added to the devices list for this domain. - * This isn't an issue because the STE hasn't been installed yet. - */ - ret = arm_smmu_write_ctx_desc(smmu_domain, IOMMU_NO_PASID, &cfg->cd); - if (ret) - goto out_free_cd_tables; + cd->mair = pgtbl_cfg->arm_lpae_s1_cfg.mair; mutex_unlock(&arm_smmu_asid_lock); return 0; -out_free_cd_tables: - arm_smmu_free_cd_tables(smmu_domain); -out_free_asid: - arm_smmu_free_asid(&cfg->cd); out_unlock: mutex_unlock(&arm_smmu_asid_lock); return ret; } static int arm_smmu_domain_finalise_s2(struct arm_smmu_domain *smmu_domain, - struct arm_smmu_master *master, struct io_pgtable_cfg *pgtbl_cfg) { int vmid; @@ -2173,8 +2140,7 @@ static int arm_smmu_domain_finalise_s2(struct arm_smmu_domain *smmu_domain, return 0; } -static int arm_smmu_domain_finalise(struct iommu_domain *domain, - struct arm_smmu_master *master) +static int arm_smmu_domain_finalise(struct iommu_domain *domain) { int ret; unsigned long ias, oas; @@ -2182,7 +2148,6 @@ static int arm_smmu_domain_finalise(struct iommu_domain *domain, struct io_pgtable_cfg pgtbl_cfg; struct io_pgtable_ops *pgtbl_ops; int (*finalise_stage_fn)(struct arm_smmu_domain *, - struct arm_smmu_master *, struct io_pgtable_cfg *); struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain); struct arm_smmu_device *smmu = smmu_domain->smmu; @@ -2234,7 +2199,7 @@ static int arm_smmu_domain_finalise(struct iommu_domain *domain, domain->geometry.aperture_end = (1UL << pgtbl_cfg.ias) - 1; domain->geometry.force_aperture = true; - ret = finalise_stage_fn(smmu_domain, master, &pgtbl_cfg); + ret = finalise_stage_fn(smmu_domain, &pgtbl_cfg); if (ret < 0) { free_io_pgtable_ops(pgtbl_ops); return ret; @@ -2403,6 +2368,14 @@ static void arm_smmu_detach_dev(struct arm_smmu_master *master) master->domain = NULL; master->ats_enabled = false; arm_smmu_install_ste_for_dev(master); + /* + * Clearing the CD entry isn't strictly required to detach the domain + * since the table is uninstalled anyway, but it helps avoid confusion + * in the call to arm_smmu_write_ctx_desc on the next attach (which + * expects the entry to be empty). + */ + if (smmu_domain->stage == ARM_SMMU_DOMAIN_S1 && master->cd_table.cdtab) + arm_smmu_write_ctx_desc(master, IOMMU_NO_PASID, NULL); } static int arm_smmu_attach_dev(struct iommu_domain *domain, struct device *dev) @@ -2436,23 +2409,15 @@ static int arm_smmu_attach_dev(struct iommu_domain *domain, struct device *dev) if (!smmu_domain->smmu) { smmu_domain->smmu = smmu; - ret = arm_smmu_domain_finalise(domain, master); - if (ret) { + ret = arm_smmu_domain_finalise(domain); + if (ret) smmu_domain->smmu = NULL; - goto out_unlock; - } - } else if (smmu_domain->smmu != smmu) { - ret = -EINVAL; - goto out_unlock; - } else if (smmu_domain->stage == ARM_SMMU_DOMAIN_S1 && - master->ssid_bits != smmu_domain->s1_cfg.s1cdmax) { + } else if (smmu_domain->smmu != smmu) ret = -EINVAL; - goto out_unlock; - } else if (smmu_domain->stage == ARM_SMMU_DOMAIN_S1 && - smmu_domain->stall_enabled != master->stall_enabled) { - ret = -EINVAL; - goto out_unlock; - } + + mutex_unlock(&smmu_domain->init_mutex); + if (ret) + return ret; master->domain = smmu_domain; @@ -2466,16 +2431,42 @@ static int arm_smmu_attach_dev(struct iommu_domain *domain, struct device *dev) if (smmu_domain->stage != ARM_SMMU_DOMAIN_BYPASS) master->ats_enabled = arm_smmu_ats_supported(master); - arm_smmu_install_ste_for_dev(master); - spin_lock_irqsave(&smmu_domain->devices_lock, flags); list_add(&master->domain_head, &smmu_domain->devices); spin_unlock_irqrestore(&smmu_domain->devices_lock, flags); + if (smmu_domain->stage == ARM_SMMU_DOMAIN_S1) { + if (!master->cd_table.cdtab) { + ret = arm_smmu_alloc_cd_tables(master); + if (ret) { + master->domain = NULL; + goto out_list_del; + } + } + + /* + * Prevent SVA from concurrently modifying the CD or writing to + * the CD entry + */ + mutex_lock(&arm_smmu_asid_lock); + ret = arm_smmu_write_ctx_desc(master, IOMMU_NO_PASID, &smmu_domain->cd); + mutex_unlock(&arm_smmu_asid_lock); + if (ret) { + master->domain = NULL; + goto out_list_del; + } + } + + arm_smmu_install_ste_for_dev(master); + arm_smmu_enable_ats(master); + return 0; + +out_list_del: + spin_lock_irqsave(&smmu_domain->devices_lock, flags); + list_del(&master->domain_head); + spin_unlock_irqrestore(&smmu_domain->devices_lock, flags); -out_unlock: - mutex_unlock(&smmu_domain->init_mutex); return ret; } @@ -2720,6 +2711,8 @@ static void arm_smmu_release_device(struct device *dev) arm_smmu_detach_dev(master); arm_smmu_disable_pasid(master); arm_smmu_remove_master(master); + if (master->cd_table.cdtab) + arm_smmu_free_cd_tables(master); kfree(master); } diff --git a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h index 9915850dd4db..961205ba86d2 100644 --- a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h +++ b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h @@ -595,13 +595,11 @@ struct arm_smmu_ctx_desc_cfg { dma_addr_t cdtab_dma; struct arm_smmu_l1_ctx_desc *l1_desc; unsigned int num_l1_ents; -}; - -struct arm_smmu_s1_cfg { - struct arm_smmu_ctx_desc_cfg cdcfg; - struct arm_smmu_ctx_desc cd; u8 s1fmt; + /* log2 of the maximum number of CDs supported by this table */ u8 s1cdmax; + /* Whether CD entries in this table have the stall bit set. */ + u8 stall_enabled:1; }; struct arm_smmu_s2_cfg { @@ -697,6 +695,8 @@ struct arm_smmu_master { struct arm_smmu_domain *domain; struct list_head domain_head; struct arm_smmu_stream *streams; + /* Locked by the iommu core using the group mutex */ + struct arm_smmu_ctx_desc_cfg cd_table; unsigned int num_streams; bool ats_enabled; bool stall_enabled; @@ -719,13 +719,12 @@ struct arm_smmu_domain { struct mutex init_mutex; /* Protects smmu pointer */ struct io_pgtable_ops *pgtbl_ops; - bool stall_enabled; atomic_t nr_ats_masters; enum arm_smmu_domain_stage stage; union { - struct arm_smmu_s1_cfg s1_cfg; - struct arm_smmu_s2_cfg s2_cfg; + struct arm_smmu_ctx_desc cd; + struct arm_smmu_s2_cfg s2_cfg; }; struct iommu_domain domain; @@ -745,7 +744,7 @@ extern struct xarray arm_smmu_asid_xa; extern struct mutex arm_smmu_asid_lock; extern struct arm_smmu_ctx_desc quiet_cd; -int arm_smmu_write_ctx_desc(struct arm_smmu_domain *smmu_domain, int ssid, +int arm_smmu_write_ctx_desc(struct arm_smmu_master *smmu_master, int ssid, struct arm_smmu_ctx_desc *cd); void arm_smmu_tlb_inv_asid(struct arm_smmu_device *smmu, u16 asid); void arm_smmu_tlb_inv_range_asid(unsigned long iova, size_t size, int asid, diff --git a/drivers/iommu/arm/arm-smmu/arm-smmu-qcom.c b/drivers/iommu/arm/arm-smmu/arm-smmu-qcom.c index 7f52ac67495f..549ae4dba3a6 100644 --- a/drivers/iommu/arm/arm-smmu/arm-smmu-qcom.c +++ b/drivers/iommu/arm/arm-smmu/arm-smmu-qcom.c @@ -251,6 +251,7 @@ static const struct of_device_id qcom_smmu_client_of_match[] __maybe_unused = { { .compatible = "qcom,sc7280-mss-pil" }, { .compatible = "qcom,sc8180x-mdss" }, { .compatible = "qcom,sc8280xp-mdss" }, + { .compatible = "qcom,sdm670-mdss" }, { .compatible = "qcom,sdm845-mdss" }, { .compatible = "qcom,sdm845-mss-pil" }, { .compatible = "qcom,sm6350-mdss" }, @@ -532,6 +533,7 @@ static const struct of_device_id __maybe_unused qcom_smmu_impl_of_match[] = { { .compatible = "qcom,sm6350-smmu-500", .data = &qcom_smmu_500_impl0_data }, { .compatible = "qcom,sm6375-smmu-v2", .data = &qcom_smmu_v2_data }, { .compatible = "qcom,sm6375-smmu-500", .data = &qcom_smmu_500_impl0_data }, + { .compatible = "qcom,sm7150-smmu-v2", .data = &qcom_smmu_v2_data }, { .compatible = "qcom,sm8150-smmu-500", .data = &qcom_smmu_500_impl0_data }, { .compatible = "qcom,sm8250-smmu-500", .data = &qcom_smmu_500_impl0_data }, { .compatible = "qcom,sm8350-smmu-500", .data = &qcom_smmu_500_impl0_data }, diff --git a/drivers/iommu/dma-iommu.c b/drivers/iommu/dma-iommu.c index 4b1a88f514c9..85163a83df2f 100644 --- a/drivers/iommu/dma-iommu.c +++ b/drivers/iommu/dma-iommu.c @@ -43,14 +43,28 @@ enum iommu_dma_cookie_type { IOMMU_DMA_MSI_COOKIE, }; +enum iommu_dma_queue_type { + IOMMU_DMA_OPTS_PER_CPU_QUEUE, + IOMMU_DMA_OPTS_SINGLE_QUEUE, +}; + +struct iommu_dma_options { + enum iommu_dma_queue_type qt; + size_t fq_size; + unsigned int fq_timeout; +}; + struct iommu_dma_cookie { enum iommu_dma_cookie_type type; union { /* Full allocator for IOMMU_DMA_IOVA_COOKIE */ struct { struct iova_domain iovad; - - struct iova_fq __percpu *fq; /* Flush queue */ + /* Flush queue */ + union { + struct iova_fq *single_fq; + struct iova_fq __percpu *percpu_fq; + }; /* Number of TLB flushes that have been started */ atomic64_t fq_flush_start_cnt; /* Number of TLB flushes that have been finished */ @@ -67,6 +81,8 @@ struct iommu_dma_cookie { /* Domain for flush queue callback; NULL if flush queue not in use */ struct iommu_domain *fq_domain; + /* Options for dma-iommu use */ + struct iommu_dma_options options; struct mutex mutex; }; @@ -84,10 +100,12 @@ static int __init iommu_dma_forcedac_setup(char *str) early_param("iommu.forcedac", iommu_dma_forcedac_setup); /* Number of entries per flush queue */ -#define IOVA_FQ_SIZE 256 +#define IOVA_DEFAULT_FQ_SIZE 256 +#define IOVA_SINGLE_FQ_SIZE 32768 /* Timeout (in ms) after which entries are flushed from the queue */ -#define IOVA_FQ_TIMEOUT 10 +#define IOVA_DEFAULT_FQ_TIMEOUT 10 +#define IOVA_SINGLE_FQ_TIMEOUT 1000 /* Flush queue entry for deferred flushing */ struct iova_fq_entry { @@ -99,18 +117,19 @@ struct iova_fq_entry { /* Per-CPU flush queue structure */ struct iova_fq { - struct iova_fq_entry entries[IOVA_FQ_SIZE]; - unsigned int head, tail; spinlock_t lock; + unsigned int head, tail; + unsigned int mod_mask; + struct iova_fq_entry entries[]; }; #define fq_ring_for_each(i, fq) \ - for ((i) = (fq)->head; (i) != (fq)->tail; (i) = ((i) + 1) % IOVA_FQ_SIZE) + for ((i) = (fq)->head; (i) != (fq)->tail; (i) = ((i) + 1) & (fq)->mod_mask) static inline bool fq_full(struct iova_fq *fq) { assert_spin_locked(&fq->lock); - return (((fq->tail + 1) % IOVA_FQ_SIZE) == fq->head); + return (((fq->tail + 1) & fq->mod_mask) == fq->head); } static inline unsigned int fq_ring_add(struct iova_fq *fq) @@ -119,12 +138,12 @@ static inline unsigned int fq_ring_add(struct iova_fq *fq) assert_spin_locked(&fq->lock); - fq->tail = (idx + 1) % IOVA_FQ_SIZE; + fq->tail = (idx + 1) & fq->mod_mask; return idx; } -static void fq_ring_free(struct iommu_dma_cookie *cookie, struct iova_fq *fq) +static void fq_ring_free_locked(struct iommu_dma_cookie *cookie, struct iova_fq *fq) { u64 counter = atomic64_read(&cookie->fq_flush_finish_cnt); unsigned int idx; @@ -141,10 +160,19 @@ static void fq_ring_free(struct iommu_dma_cookie *cookie, struct iova_fq *fq) fq->entries[idx].iova_pfn, fq->entries[idx].pages); - fq->head = (fq->head + 1) % IOVA_FQ_SIZE; + fq->head = (fq->head + 1) & fq->mod_mask; } } +static void fq_ring_free(struct iommu_dma_cookie *cookie, struct iova_fq *fq) +{ + unsigned long flags; + + spin_lock_irqsave(&fq->lock, flags); + fq_ring_free_locked(cookie, fq); + spin_unlock_irqrestore(&fq->lock, flags); +} + static void fq_flush_iotlb(struct iommu_dma_cookie *cookie) { atomic64_inc(&cookie->fq_flush_start_cnt); @@ -160,14 +188,11 @@ static void fq_flush_timeout(struct timer_list *t) atomic_set(&cookie->fq_timer_on, 0); fq_flush_iotlb(cookie); - for_each_possible_cpu(cpu) { - unsigned long flags; - struct iova_fq *fq; - - fq = per_cpu_ptr(cookie->fq, cpu); - spin_lock_irqsave(&fq->lock, flags); - fq_ring_free(cookie, fq); - spin_unlock_irqrestore(&fq->lock, flags); + if (cookie->options.qt == IOMMU_DMA_OPTS_SINGLE_QUEUE) { + fq_ring_free(cookie, cookie->single_fq); + } else { + for_each_possible_cpu(cpu) + fq_ring_free(cookie, per_cpu_ptr(cookie->percpu_fq, cpu)); } } @@ -188,7 +213,11 @@ static void queue_iova(struct iommu_dma_cookie *cookie, */ smp_mb(); - fq = raw_cpu_ptr(cookie->fq); + if (cookie->options.qt == IOMMU_DMA_OPTS_SINGLE_QUEUE) + fq = cookie->single_fq; + else + fq = raw_cpu_ptr(cookie->percpu_fq); + spin_lock_irqsave(&fq->lock, flags); /* @@ -196,11 +225,11 @@ static void queue_iova(struct iommu_dma_cookie *cookie, * flushed out on another CPU. This makes the fq_full() check below less * likely to be true. */ - fq_ring_free(cookie, fq); + fq_ring_free_locked(cookie, fq); if (fq_full(fq)) { fq_flush_iotlb(cookie); - fq_ring_free(cookie, fq); + fq_ring_free_locked(cookie, fq); } idx = fq_ring_add(fq); @@ -216,34 +245,95 @@ static void queue_iova(struct iommu_dma_cookie *cookie, if (!atomic_read(&cookie->fq_timer_on) && !atomic_xchg(&cookie->fq_timer_on, 1)) mod_timer(&cookie->fq_timer, - jiffies + msecs_to_jiffies(IOVA_FQ_TIMEOUT)); + jiffies + msecs_to_jiffies(cookie->options.fq_timeout)); } -static void iommu_dma_free_fq(struct iommu_dma_cookie *cookie) +static void iommu_dma_free_fq_single(struct iova_fq *fq) { - int cpu, idx; + int idx; - if (!cookie->fq) - return; + fq_ring_for_each(idx, fq) + put_pages_list(&fq->entries[idx].freelist); + vfree(fq); +} + +static void iommu_dma_free_fq_percpu(struct iova_fq __percpu *percpu_fq) +{ + int cpu, idx; - del_timer_sync(&cookie->fq_timer); /* The IOVAs will be torn down separately, so just free our queued pages */ for_each_possible_cpu(cpu) { - struct iova_fq *fq = per_cpu_ptr(cookie->fq, cpu); + struct iova_fq *fq = per_cpu_ptr(percpu_fq, cpu); fq_ring_for_each(idx, fq) put_pages_list(&fq->entries[idx].freelist); } - free_percpu(cookie->fq); + free_percpu(percpu_fq); +} + +static void iommu_dma_free_fq(struct iommu_dma_cookie *cookie) +{ + if (!cookie->fq_domain) + return; + + del_timer_sync(&cookie->fq_timer); + if (cookie->options.qt == IOMMU_DMA_OPTS_SINGLE_QUEUE) + iommu_dma_free_fq_single(cookie->single_fq); + else + iommu_dma_free_fq_percpu(cookie->percpu_fq); +} + +static void iommu_dma_init_one_fq(struct iova_fq *fq, size_t fq_size) +{ + int i; + + fq->head = 0; + fq->tail = 0; + fq->mod_mask = fq_size - 1; + + spin_lock_init(&fq->lock); + + for (i = 0; i < fq_size; i++) + INIT_LIST_HEAD(&fq->entries[i].freelist); +} + +static int iommu_dma_init_fq_single(struct iommu_dma_cookie *cookie) +{ + size_t fq_size = cookie->options.fq_size; + struct iova_fq *queue; + + queue = vmalloc(struct_size(queue, entries, fq_size)); + if (!queue) + return -ENOMEM; + iommu_dma_init_one_fq(queue, fq_size); + cookie->single_fq = queue; + + return 0; +} + +static int iommu_dma_init_fq_percpu(struct iommu_dma_cookie *cookie) +{ + size_t fq_size = cookie->options.fq_size; + struct iova_fq __percpu *queue; + int cpu; + + queue = __alloc_percpu(struct_size(queue, entries, fq_size), + __alignof__(*queue)); + if (!queue) + return -ENOMEM; + + for_each_possible_cpu(cpu) + iommu_dma_init_one_fq(per_cpu_ptr(queue, cpu), fq_size); + cookie->percpu_fq = queue; + return 0; } /* sysfs updates are serialised by the mutex of the group owning @domain */ int iommu_dma_init_fq(struct iommu_domain *domain) { struct iommu_dma_cookie *cookie = domain->iova_cookie; - struct iova_fq __percpu *queue; - int i, cpu; + int rc; if (cookie->fq_domain) return 0; @@ -251,26 +341,16 @@ int iommu_dma_init_fq(struct iommu_domain *domain) atomic64_set(&cookie->fq_flush_start_cnt, 0); atomic64_set(&cookie->fq_flush_finish_cnt, 0); - queue = alloc_percpu(struct iova_fq); - if (!queue) { + if (cookie->options.qt == IOMMU_DMA_OPTS_SINGLE_QUEUE) + rc = iommu_dma_init_fq_single(cookie); + else + rc = iommu_dma_init_fq_percpu(cookie); + + if (rc) { pr_warn("iova flush queue initialization failed\n"); return -ENOMEM; } - for_each_possible_cpu(cpu) { - struct iova_fq *fq = per_cpu_ptr(queue, cpu); - - fq->head = 0; - fq->tail = 0; - - spin_lock_init(&fq->lock); - - for (i = 0; i < IOVA_FQ_SIZE; i++) - INIT_LIST_HEAD(&fq->entries[i].freelist); - } - - cookie->fq = queue; - timer_setup(&cookie->fq_timer, fq_flush_timeout, 0); atomic_set(&cookie->fq_timer_on, 0); /* @@ -555,6 +635,28 @@ static bool dev_use_sg_swiotlb(struct device *dev, struct scatterlist *sg, } /** + * iommu_dma_init_options - Initialize dma-iommu options + * @options: The options to be initialized + * @dev: Device the options are set for + * + * This allows tuning dma-iommu specific to device properties + */ +static void iommu_dma_init_options(struct iommu_dma_options *options, + struct device *dev) +{ + /* Shadowing IOTLB flushes do better with a single large queue */ + if (dev->iommu->shadow_on_flush) { + options->qt = IOMMU_DMA_OPTS_SINGLE_QUEUE; + options->fq_timeout = IOVA_SINGLE_FQ_TIMEOUT; + options->fq_size = IOVA_SINGLE_FQ_SIZE; + } else { + options->qt = IOMMU_DMA_OPTS_PER_CPU_QUEUE; + options->fq_size = IOVA_DEFAULT_FQ_SIZE; + options->fq_timeout = IOVA_DEFAULT_FQ_TIMEOUT; + } +} + +/** * iommu_dma_init_domain - Initialise a DMA mapping domain * @domain: IOMMU domain previously prepared by iommu_get_dma_cookie() * @base: IOVA at which the mappable address space starts @@ -614,6 +716,8 @@ static int iommu_dma_init_domain(struct iommu_domain *domain, dma_addr_t base, if (ret) goto done_unlock; + iommu_dma_init_options(&cookie->options, dev); + /* If the FQ fails we can simply fall back to strict mode */ if (domain->type == IOMMU_DOMAIN_DMA_FQ && (!device_iommu_capable(dev, IOMMU_CAP_DEFERRED_FLUSH) || iommu_dma_init_fq(domain))) diff --git a/drivers/iommu/intel/debugfs.c b/drivers/iommu/intel/debugfs.c index 1f925285104e..dee61e513be6 100644 --- a/drivers/iommu/intel/debugfs.c +++ b/drivers/iommu/intel/debugfs.c @@ -111,6 +111,8 @@ static const struct iommu_regset iommu_regs_64[] = { IOMMU_REGSET_ENTRY(VCRSP), }; +static struct dentry *intel_iommu_debug; + static int iommu_regset_show(struct seq_file *m, void *unused) { struct dmar_drhd_unit *drhd; @@ -311,9 +313,14 @@ static inline unsigned long level_to_directory_size(int level) static inline void dump_page_info(struct seq_file *m, unsigned long iova, u64 *path) { - seq_printf(m, "0x%013lx |\t0x%016llx\t0x%016llx\t0x%016llx\t0x%016llx\t0x%016llx\n", - iova >> VTD_PAGE_SHIFT, path[5], path[4], - path[3], path[2], path[1]); + seq_printf(m, "0x%013lx |\t0x%016llx\t0x%016llx\t0x%016llx", + iova >> VTD_PAGE_SHIFT, path[5], path[4], path[3]); + if (path[2]) { + seq_printf(m, "\t0x%016llx", path[2]); + if (path[1]) + seq_printf(m, "\t0x%016llx", path[1]); + } + seq_putc(m, '\n'); } static void pgtable_walk_level(struct seq_file *m, struct dma_pte *pde, @@ -340,58 +347,140 @@ static void pgtable_walk_level(struct seq_file *m, struct dma_pte *pde, } } -static int __show_device_domain_translation(struct device *dev, void *data) +static int domain_translation_struct_show(struct seq_file *m, + struct device_domain_info *info, + ioasid_t pasid) { - struct dmar_domain *domain; - struct seq_file *m = data; - u64 path[6] = { 0 }; - - domain = to_dmar_domain(iommu_get_domain_for_dev(dev)); - if (!domain) - return 0; + bool scalable, found = false; + struct dmar_drhd_unit *drhd; + struct intel_iommu *iommu; + u16 devfn, bus, seg; - seq_printf(m, "Device %s @0x%llx\n", dev_name(dev), - (u64)virt_to_phys(domain->pgd)); - seq_puts(m, "IOVA_PFN\t\tPML5E\t\t\tPML4E\t\t\tPDPE\t\t\tPDE\t\t\tPTE\n"); + bus = info->bus; + devfn = info->devfn; + seg = info->segment; - pgtable_walk_level(m, domain->pgd, domain->agaw + 2, 0, path); - seq_putc(m, '\n'); + rcu_read_lock(); + for_each_active_iommu(iommu, drhd) { + struct context_entry *context; + u64 pgd, path[6] = { 0 }; + u32 sts, agaw; - /* Don't iterate */ - return 1; -} + if (seg != iommu->segment) + continue; -static int show_device_domain_translation(struct device *dev, void *data) -{ - struct iommu_group *group; + sts = dmar_readl(iommu->reg + DMAR_GSTS_REG); + if (!(sts & DMA_GSTS_TES)) { + seq_printf(m, "DMA Remapping is not enabled on %s\n", + iommu->name); + continue; + } + if (dmar_readq(iommu->reg + DMAR_RTADDR_REG) & DMA_RTADDR_SMT) + scalable = true; + else + scalable = false; - group = iommu_group_get(dev); - if (group) { /* - * The group->mutex is held across the callback, which will - * block calls to iommu_attach/detach_group/device. Hence, + * The iommu->lock is held across the callback, which will + * block calls to domain_attach/domain_detach. Hence, * the domain of the device will not change during traversal. * - * All devices in an iommu group share a single domain, hence - * we only dump the domain of the first device. Even though, - * this code still possibly races with the iommu_unmap() + * Traversing page table possibly races with the iommu_unmap() * interface. This could be solved by RCU-freeing the page * table pages in the iommu_unmap() path. */ - iommu_group_for_each_dev(group, data, - __show_device_domain_translation); - iommu_group_put(group); + spin_lock(&iommu->lock); + + context = iommu_context_addr(iommu, bus, devfn, 0); + if (!context || !context_present(context)) + goto iommu_unlock; + + if (scalable) { /* scalable mode */ + struct pasid_entry *pasid_tbl, *pasid_tbl_entry; + struct pasid_dir_entry *dir_tbl, *dir_entry; + u16 dir_idx, tbl_idx, pgtt; + u64 pasid_dir_ptr; + + pasid_dir_ptr = context->lo & VTD_PAGE_MASK; + + /* Dump specified device domain mappings with PASID. */ + dir_idx = pasid >> PASID_PDE_SHIFT; + tbl_idx = pasid & PASID_PTE_MASK; + + dir_tbl = phys_to_virt(pasid_dir_ptr); + dir_entry = &dir_tbl[dir_idx]; + + pasid_tbl = get_pasid_table_from_pde(dir_entry); + if (!pasid_tbl) + goto iommu_unlock; + + pasid_tbl_entry = &pasid_tbl[tbl_idx]; + if (!pasid_pte_is_present(pasid_tbl_entry)) + goto iommu_unlock; + + /* + * According to PASID Granular Translation Type(PGTT), + * get the page table pointer. + */ + pgtt = (u16)(pasid_tbl_entry->val[0] & GENMASK_ULL(8, 6)) >> 6; + agaw = (u8)(pasid_tbl_entry->val[0] & GENMASK_ULL(4, 2)) >> 2; + + switch (pgtt) { + case PASID_ENTRY_PGTT_FL_ONLY: + pgd = pasid_tbl_entry->val[2]; + break; + case PASID_ENTRY_PGTT_SL_ONLY: + case PASID_ENTRY_PGTT_NESTED: + pgd = pasid_tbl_entry->val[0]; + break; + default: + goto iommu_unlock; + } + pgd &= VTD_PAGE_MASK; + } else { /* legacy mode */ + pgd = context->lo & VTD_PAGE_MASK; + agaw = context->hi & 7; + } + + seq_printf(m, "Device %04x:%02x:%02x.%x ", + iommu->segment, bus, PCI_SLOT(devfn), PCI_FUNC(devfn)); + + if (scalable) + seq_printf(m, "with pasid %x @0x%llx\n", pasid, pgd); + else + seq_printf(m, "@0x%llx\n", pgd); + + seq_printf(m, "%-17s\t%-18s\t%-18s\t%-18s\t%-18s\t%-s\n", + "IOVA_PFN", "PML5E", "PML4E", "PDPE", "PDE", "PTE"); + pgtable_walk_level(m, phys_to_virt(pgd), agaw + 2, 0, path); + + found = true; +iommu_unlock: + spin_unlock(&iommu->lock); + if (found) + break; } + rcu_read_unlock(); return 0; } -static int domain_translation_struct_show(struct seq_file *m, void *unused) +static int dev_domain_translation_struct_show(struct seq_file *m, void *unused) +{ + struct device_domain_info *info = (struct device_domain_info *)m->private; + + return domain_translation_struct_show(m, info, IOMMU_NO_PASID); +} +DEFINE_SHOW_ATTRIBUTE(dev_domain_translation_struct); + +static int pasid_domain_translation_struct_show(struct seq_file *m, void *unused) { - return bus_for_each_dev(&pci_bus_type, NULL, m, - show_device_domain_translation); + struct dev_pasid_info *dev_pasid = (struct dev_pasid_info *)m->private; + struct device_domain_info *info = dev_iommu_priv_get(dev_pasid->dev); + + return domain_translation_struct_show(m, info, dev_pasid->pasid); } -DEFINE_SHOW_ATTRIBUTE(domain_translation_struct); +DEFINE_SHOW_ATTRIBUTE(pasid_domain_translation_struct); static void invalidation_queue_entry_show(struct seq_file *m, struct intel_iommu *iommu) @@ -666,16 +755,12 @@ static const struct file_operations dmar_perf_latency_fops = { void __init intel_iommu_debugfs_init(void) { - struct dentry *intel_iommu_debug = debugfs_create_dir("intel", - iommu_debugfs_dir); + intel_iommu_debug = debugfs_create_dir("intel", iommu_debugfs_dir); debugfs_create_file("iommu_regset", 0444, intel_iommu_debug, NULL, &iommu_regset_fops); debugfs_create_file("dmar_translation_struct", 0444, intel_iommu_debug, NULL, &dmar_translation_struct_fops); - debugfs_create_file("domain_translation_struct", 0444, - intel_iommu_debug, NULL, - &domain_translation_struct_fops); debugfs_create_file("invalidation_queue", 0444, intel_iommu_debug, NULL, &invalidation_queue_fops); #ifdef CONFIG_IRQ_REMAP @@ -685,3 +770,51 @@ void __init intel_iommu_debugfs_init(void) debugfs_create_file("dmar_perf_latency", 0644, intel_iommu_debug, NULL, &dmar_perf_latency_fops); } + +/* + * Create a debugfs directory for each device, and then create a + * debugfs file in this directory for users to dump the page table + * of the default domain. e.g. + * /sys/kernel/debug/iommu/intel/0000:00:01.0/domain_translation_struct + */ +void intel_iommu_debugfs_create_dev(struct device_domain_info *info) +{ + info->debugfs_dentry = debugfs_create_dir(dev_name(info->dev), intel_iommu_debug); + + debugfs_create_file("domain_translation_struct", 0444, info->debugfs_dentry, + info, &dev_domain_translation_struct_fops); +} + +/* Remove the device debugfs directory. */ +void intel_iommu_debugfs_remove_dev(struct device_domain_info *info) +{ + debugfs_remove_recursive(info->debugfs_dentry); +} + +/* + * Create a debugfs directory per pair of {device, pasid}, then create the + * corresponding debugfs file in this directory for users to dump its page + * table. e.g. + * /sys/kernel/debug/iommu/intel/0000:00:01.0/1/domain_translation_struct + * + * The debugfs only dumps the page tables whose mappings are created and + * destroyed by the iommu_map/unmap() interfaces. Check the mapping type + * of the domain before creating debugfs directory. + */ +void intel_iommu_debugfs_create_dev_pasid(struct dev_pasid_info *dev_pasid) +{ + struct device_domain_info *info = dev_iommu_priv_get(dev_pasid->dev); + char dir_name[10]; + + sprintf(dir_name, "%x", dev_pasid->pasid); + dev_pasid->debugfs_dentry = debugfs_create_dir(dir_name, info->debugfs_dentry); + + debugfs_create_file("domain_translation_struct", 0444, dev_pasid->debugfs_dentry, + dev_pasid, &pasid_domain_translation_struct_fops); +} + +/* Remove the device pasid debugfs directory. */ +void intel_iommu_debugfs_remove_dev_pasid(struct dev_pasid_info *dev_pasid) +{ + debugfs_remove_recursive(dev_pasid->debugfs_dentry); +} diff --git a/drivers/iommu/intel/iommu.c b/drivers/iommu/intel/iommu.c index 71c12e15ecd7..d5d191a71fe0 100644 --- a/drivers/iommu/intel/iommu.c +++ b/drivers/iommu/intel/iommu.c @@ -4407,6 +4407,8 @@ static struct iommu_device *intel_iommu_probe_device(struct device *dev) } } + intel_iommu_debugfs_create_dev(info); + return &iommu->iommu; } @@ -4416,6 +4418,7 @@ static void intel_iommu_release_device(struct device *dev) dmar_remove_one_dev_info(dev); intel_pasid_free_table(dev); + intel_iommu_debugfs_remove_dev(info); dev_iommu_priv_set(dev, NULL); kfree(info); set_dma_ops(dev, NULL); @@ -4660,8 +4663,8 @@ static bool risky_device(struct pci_dev *pdev) return false; } -static void intel_iommu_iotlb_sync_map(struct iommu_domain *domain, - unsigned long iova, size_t size) +static int intel_iommu_iotlb_sync_map(struct iommu_domain *domain, + unsigned long iova, size_t size) { struct dmar_domain *dmar_domain = to_dmar_domain(domain); unsigned long pages = aligned_nrpages(iova, size); @@ -4671,6 +4674,7 @@ static void intel_iommu_iotlb_sync_map(struct iommu_domain *domain, xa_for_each(&dmar_domain->iommu_array, i, info) __mapping_notify_one(info->iommu, dmar_domain, pfn, pages); + return 0; } static void intel_iommu_remove_dev_pasid(struct device *dev, ioasid_t pasid) @@ -4708,6 +4712,7 @@ static void intel_iommu_remove_dev_pasid(struct device *dev, ioasid_t pasid) spin_unlock_irqrestore(&dmar_domain->lock, flags); domain_detach_iommu(dmar_domain, iommu); + intel_iommu_debugfs_remove_dev_pasid(dev_pasid); kfree(dev_pasid); out_tear_down: intel_pasid_tear_down_entry(iommu, dev, pasid, false); @@ -4760,6 +4765,9 @@ static int intel_iommu_set_dev_pasid(struct iommu_domain *domain, list_add(&dev_pasid->link_domain, &dmar_domain->dev_pasids); spin_unlock_irqrestore(&dmar_domain->lock, flags); + if (domain->type & __IOMMU_DOMAIN_PAGING) + intel_iommu_debugfs_create_dev_pasid(dev_pasid); + return 0; out_detach_iommu: domain_detach_iommu(dmar_domain, iommu); diff --git a/drivers/iommu/intel/iommu.h b/drivers/iommu/intel/iommu.h index 7dac94f62b4e..f9c1dd1ccff8 100644 --- a/drivers/iommu/intel/iommu.h +++ b/drivers/iommu/intel/iommu.h @@ -716,12 +716,18 @@ struct device_domain_info { struct intel_iommu *iommu; /* IOMMU used by this device */ struct dmar_domain *domain; /* pointer to domain */ struct pasid_table *pasid_table; /* pasid table */ +#ifdef CONFIG_INTEL_IOMMU_DEBUGFS + struct dentry *debugfs_dentry; /* pointer to device directory dentry */ +#endif }; struct dev_pasid_info { struct list_head link_domain; /* link to domain siblings */ struct device *dev; ioasid_t pasid; +#ifdef CONFIG_INTEL_IOMMU_DEBUGFS + struct dentry *debugfs_dentry; /* pointer to pasid directory dentry */ +#endif }; static inline void __iommu_flush_cache( @@ -883,8 +889,16 @@ static inline void intel_svm_remove_dev_pasid(struct device *dev, ioasid_t pasid #ifdef CONFIG_INTEL_IOMMU_DEBUGFS void intel_iommu_debugfs_init(void); +void intel_iommu_debugfs_create_dev(struct device_domain_info *info); +void intel_iommu_debugfs_remove_dev(struct device_domain_info *info); +void intel_iommu_debugfs_create_dev_pasid(struct dev_pasid_info *dev_pasid); +void intel_iommu_debugfs_remove_dev_pasid(struct dev_pasid_info *dev_pasid); #else static inline void intel_iommu_debugfs_init(void) {} +static inline void intel_iommu_debugfs_create_dev(struct device_domain_info *info) {} +static inline void intel_iommu_debugfs_remove_dev(struct device_domain_info *info) {} +static inline void intel_iommu_debugfs_create_dev_pasid(struct dev_pasid_info *dev_pasid) {} +static inline void intel_iommu_debugfs_remove_dev_pasid(struct dev_pasid_info *dev_pasid) {} #endif /* CONFIG_INTEL_IOMMU_DEBUGFS */ extern const struct attribute_group *intel_iommu_groups[]; diff --git a/drivers/iommu/iommu.c b/drivers/iommu/iommu.c index 79a0fdb33404..f17a1113f3d6 100644 --- a/drivers/iommu/iommu.c +++ b/drivers/iommu/iommu.c @@ -1114,7 +1114,8 @@ map_end: } - iommu_flush_iotlb_all(domain); + if (!list_empty(&mappings) && iommu_is_dma_domain(domain)) + iommu_flush_iotlb_all(domain); out: iommu_put_resv_regions(dev, &mappings); @@ -2599,8 +2600,17 @@ int iommu_map(struct iommu_domain *domain, unsigned long iova, return -EINVAL; ret = __iommu_map(domain, iova, paddr, size, prot, gfp); - if (ret == 0 && ops->iotlb_sync_map) - ops->iotlb_sync_map(domain, iova, size); + if (ret == 0 && ops->iotlb_sync_map) { + ret = ops->iotlb_sync_map(domain, iova, size); + if (ret) + goto out_err; + } + + return ret; + +out_err: + /* undo mappings already done */ + iommu_unmap(domain, iova, size); return ret; } @@ -2728,8 +2738,11 @@ next: sg = sg_next(sg); } - if (ops->iotlb_sync_map) - ops->iotlb_sync_map(domain, iova, mapped); + if (ops->iotlb_sync_map) { + ret = ops->iotlb_sync_map(domain, iova, mapped); + if (ret) + goto out_err; + } return mapped; out_err: @@ -3172,24 +3185,6 @@ out_unlock: return ret ?: count; } -static bool iommu_is_default_domain(struct iommu_group *group) -{ - if (group->domain == group->default_domain) - return true; - - /* - * If the default domain was set to identity and it is still an identity - * domain then we consider this a pass. This happens because of - * amd_iommu_init_device() replacing the default idenytity domain with an - * identity domain that has a different configuration for AMDGPU. - */ - if (group->default_domain && - group->default_domain->type == IOMMU_DOMAIN_IDENTITY && - group->domain && group->domain->type == IOMMU_DOMAIN_IDENTITY) - return true; - return false; -} - /** * iommu_device_use_default_domain() - Device driver wants to handle device * DMA through the kernel DMA API. @@ -3209,7 +3204,7 @@ int iommu_device_use_default_domain(struct device *dev) mutex_lock(&group->mutex); if (group->owner_cnt) { - if (group->owner || !iommu_is_default_domain(group) || + if (group->domain != group->default_domain || group->owner || !xa_empty(&group->pasid_array)) { ret = -EBUSY; goto unlock_out; diff --git a/drivers/iommu/msm_iommu.c b/drivers/iommu/msm_iommu.c index a163cee0b724..f86af9815d6f 100644 --- a/drivers/iommu/msm_iommu.c +++ b/drivers/iommu/msm_iommu.c @@ -498,12 +498,13 @@ static int msm_iommu_map(struct iommu_domain *domain, unsigned long iova, return ret; } -static void msm_iommu_sync_map(struct iommu_domain *domain, unsigned long iova, - size_t size) +static int msm_iommu_sync_map(struct iommu_domain *domain, unsigned long iova, + size_t size) { struct msm_priv *priv = to_msm_priv(domain); __flush_iotlb_range(iova, size, SZ_4K, false, priv); + return 0; } static size_t msm_iommu_unmap(struct iommu_domain *domain, unsigned long iova, diff --git a/drivers/iommu/mtk_iommu.c b/drivers/iommu/mtk_iommu.c index 78c8f705383f..75279500a4a8 100644 --- a/drivers/iommu/mtk_iommu.c +++ b/drivers/iommu/mtk_iommu.c @@ -836,12 +836,13 @@ static void mtk_iommu_iotlb_sync(struct iommu_domain *domain, mtk_iommu_tlb_flush_range_sync(gather->start, length, dom->bank); } -static void mtk_iommu_sync_map(struct iommu_domain *domain, unsigned long iova, - size_t size) +static int mtk_iommu_sync_map(struct iommu_domain *domain, unsigned long iova, + size_t size) { struct mtk_iommu_domain *dom = to_mtk_domain(domain); mtk_iommu_tlb_flush_range_sync(iova, size, dom->bank); + return 0; } static phys_addr_t mtk_iommu_iova_to_phys(struct iommu_domain *domain, diff --git a/drivers/iommu/s390-iommu.c b/drivers/iommu/s390-iommu.c index 5695ad71d60e..9a5196f523de 100644 --- a/drivers/iommu/s390-iommu.c +++ b/drivers/iommu/s390-iommu.c @@ -14,16 +14,300 @@ #include <linux/rcupdate.h> #include <asm/pci_dma.h> +#include "dma-iommu.h" + static const struct iommu_ops s390_iommu_ops; +static struct kmem_cache *dma_region_table_cache; +static struct kmem_cache *dma_page_table_cache; + +static u64 s390_iommu_aperture; +static u32 s390_iommu_aperture_factor = 1; + struct s390_domain { struct iommu_domain domain; struct list_head devices; + struct zpci_iommu_ctrs ctrs; unsigned long *dma_table; spinlock_t list_lock; struct rcu_head rcu; }; +static inline unsigned int calc_rtx(dma_addr_t ptr) +{ + return ((unsigned long)ptr >> ZPCI_RT_SHIFT) & ZPCI_INDEX_MASK; +} + +static inline unsigned int calc_sx(dma_addr_t ptr) +{ + return ((unsigned long)ptr >> ZPCI_ST_SHIFT) & ZPCI_INDEX_MASK; +} + +static inline unsigned int calc_px(dma_addr_t ptr) +{ + return ((unsigned long)ptr >> PAGE_SHIFT) & ZPCI_PT_MASK; +} + +static inline void set_pt_pfaa(unsigned long *entry, phys_addr_t pfaa) +{ + *entry &= ZPCI_PTE_FLAG_MASK; + *entry |= (pfaa & ZPCI_PTE_ADDR_MASK); +} + +static inline void set_rt_sto(unsigned long *entry, phys_addr_t sto) +{ + *entry &= ZPCI_RTE_FLAG_MASK; + *entry |= (sto & ZPCI_RTE_ADDR_MASK); + *entry |= ZPCI_TABLE_TYPE_RTX; +} + +static inline void set_st_pto(unsigned long *entry, phys_addr_t pto) +{ + *entry &= ZPCI_STE_FLAG_MASK; + *entry |= (pto & ZPCI_STE_ADDR_MASK); + *entry |= ZPCI_TABLE_TYPE_SX; +} + +static inline void validate_rt_entry(unsigned long *entry) +{ + *entry &= ~ZPCI_TABLE_VALID_MASK; + *entry &= ~ZPCI_TABLE_OFFSET_MASK; + *entry |= ZPCI_TABLE_VALID; + *entry |= ZPCI_TABLE_LEN_RTX; +} + +static inline void validate_st_entry(unsigned long *entry) +{ + *entry &= ~ZPCI_TABLE_VALID_MASK; + *entry |= ZPCI_TABLE_VALID; +} + +static inline void invalidate_pt_entry(unsigned long *entry) +{ + WARN_ON_ONCE((*entry & ZPCI_PTE_VALID_MASK) == ZPCI_PTE_INVALID); + *entry &= ~ZPCI_PTE_VALID_MASK; + *entry |= ZPCI_PTE_INVALID; +} + +static inline void validate_pt_entry(unsigned long *entry) +{ + WARN_ON_ONCE((*entry & ZPCI_PTE_VALID_MASK) == ZPCI_PTE_VALID); + *entry &= ~ZPCI_PTE_VALID_MASK; + *entry |= ZPCI_PTE_VALID; +} + +static inline void entry_set_protected(unsigned long *entry) +{ + *entry &= ~ZPCI_TABLE_PROT_MASK; + *entry |= ZPCI_TABLE_PROTECTED; +} + +static inline void entry_clr_protected(unsigned long *entry) +{ + *entry &= ~ZPCI_TABLE_PROT_MASK; + *entry |= ZPCI_TABLE_UNPROTECTED; +} + +static inline int reg_entry_isvalid(unsigned long entry) +{ + return (entry & ZPCI_TABLE_VALID_MASK) == ZPCI_TABLE_VALID; +} + +static inline int pt_entry_isvalid(unsigned long entry) +{ + return (entry & ZPCI_PTE_VALID_MASK) == ZPCI_PTE_VALID; +} + +static inline unsigned long *get_rt_sto(unsigned long entry) +{ + if ((entry & ZPCI_TABLE_TYPE_MASK) == ZPCI_TABLE_TYPE_RTX) + return phys_to_virt(entry & ZPCI_RTE_ADDR_MASK); + else + return NULL; +} + +static inline unsigned long *get_st_pto(unsigned long entry) +{ + if ((entry & ZPCI_TABLE_TYPE_MASK) == ZPCI_TABLE_TYPE_SX) + return phys_to_virt(entry & ZPCI_STE_ADDR_MASK); + else + return NULL; +} + +static int __init dma_alloc_cpu_table_caches(void) +{ + dma_region_table_cache = kmem_cache_create("PCI_DMA_region_tables", + ZPCI_TABLE_SIZE, + ZPCI_TABLE_ALIGN, + 0, NULL); + if (!dma_region_table_cache) + return -ENOMEM; + + dma_page_table_cache = kmem_cache_create("PCI_DMA_page_tables", + ZPCI_PT_SIZE, + ZPCI_PT_ALIGN, + 0, NULL); + if (!dma_page_table_cache) { + kmem_cache_destroy(dma_region_table_cache); + return -ENOMEM; + } + return 0; +} + +static unsigned long *dma_alloc_cpu_table(gfp_t gfp) +{ + unsigned long *table, *entry; + + table = kmem_cache_alloc(dma_region_table_cache, gfp); + if (!table) + return NULL; + + for (entry = table; entry < table + ZPCI_TABLE_ENTRIES; entry++) + *entry = ZPCI_TABLE_INVALID; + return table; +} + +static void dma_free_cpu_table(void *table) +{ + kmem_cache_free(dma_region_table_cache, table); +} + +static void dma_free_page_table(void *table) +{ + kmem_cache_free(dma_page_table_cache, table); +} + +static void dma_free_seg_table(unsigned long entry) +{ + unsigned long *sto = get_rt_sto(entry); + int sx; + + for (sx = 0; sx < ZPCI_TABLE_ENTRIES; sx++) + if (reg_entry_isvalid(sto[sx])) + dma_free_page_table(get_st_pto(sto[sx])); + + dma_free_cpu_table(sto); +} + +static void dma_cleanup_tables(unsigned long *table) +{ + int rtx; + + if (!table) + return; + + for (rtx = 0; rtx < ZPCI_TABLE_ENTRIES; rtx++) + if (reg_entry_isvalid(table[rtx])) + dma_free_seg_table(table[rtx]); + + dma_free_cpu_table(table); +} + +static unsigned long *dma_alloc_page_table(gfp_t gfp) +{ + unsigned long *table, *entry; + + table = kmem_cache_alloc(dma_page_table_cache, gfp); + if (!table) + return NULL; + + for (entry = table; entry < table + ZPCI_PT_ENTRIES; entry++) + *entry = ZPCI_PTE_INVALID; + return table; +} + +static unsigned long *dma_get_seg_table_origin(unsigned long *rtep, gfp_t gfp) +{ + unsigned long old_rte, rte; + unsigned long *sto; + + rte = READ_ONCE(*rtep); + if (reg_entry_isvalid(rte)) { + sto = get_rt_sto(rte); + } else { + sto = dma_alloc_cpu_table(gfp); + if (!sto) + return NULL; + + set_rt_sto(&rte, virt_to_phys(sto)); + validate_rt_entry(&rte); + entry_clr_protected(&rte); + + old_rte = cmpxchg(rtep, ZPCI_TABLE_INVALID, rte); + if (old_rte != ZPCI_TABLE_INVALID) { + /* Somone else was faster, use theirs */ + dma_free_cpu_table(sto); + sto = get_rt_sto(old_rte); + } + } + return sto; +} + +static unsigned long *dma_get_page_table_origin(unsigned long *step, gfp_t gfp) +{ + unsigned long old_ste, ste; + unsigned long *pto; + + ste = READ_ONCE(*step); + if (reg_entry_isvalid(ste)) { + pto = get_st_pto(ste); + } else { + pto = dma_alloc_page_table(gfp); + if (!pto) + return NULL; + set_st_pto(&ste, virt_to_phys(pto)); + validate_st_entry(&ste); + entry_clr_protected(&ste); + + old_ste = cmpxchg(step, ZPCI_TABLE_INVALID, ste); + if (old_ste != ZPCI_TABLE_INVALID) { + /* Somone else was faster, use theirs */ + dma_free_page_table(pto); + pto = get_st_pto(old_ste); + } + } + return pto; +} + +static unsigned long *dma_walk_cpu_trans(unsigned long *rto, dma_addr_t dma_addr, gfp_t gfp) +{ + unsigned long *sto, *pto; + unsigned int rtx, sx, px; + + rtx = calc_rtx(dma_addr); + sto = dma_get_seg_table_origin(&rto[rtx], gfp); + if (!sto) + return NULL; + + sx = calc_sx(dma_addr); + pto = dma_get_page_table_origin(&sto[sx], gfp); + if (!pto) + return NULL; + + px = calc_px(dma_addr); + return &pto[px]; +} + +static void dma_update_cpu_trans(unsigned long *ptep, phys_addr_t page_addr, int flags) +{ + unsigned long pte; + + pte = READ_ONCE(*ptep); + if (flags & ZPCI_PTE_INVALID) { + invalidate_pt_entry(&pte); + } else { + set_pt_pfaa(&pte, page_addr); + validate_pt_entry(&pte); + } + + if (flags & ZPCI_TABLE_PROTECTED) + entry_set_protected(&pte); + else + entry_clr_protected(&pte); + + xchg(ptep, pte); +} + static struct s390_domain *to_s390_domain(struct iommu_domain *dom) { return container_of(dom, struct s390_domain, domain); @@ -31,9 +315,13 @@ static struct s390_domain *to_s390_domain(struct iommu_domain *dom) static bool s390_iommu_capable(struct device *dev, enum iommu_cap cap) { + struct zpci_dev *zdev = to_zpci_dev(dev); + switch (cap) { case IOMMU_CAP_CACHE_COHERENCY: return true; + case IOMMU_CAP_DEFERRED_FLUSH: + return zdev->pft != PCI_FUNC_TYPE_ISM; default: return false; } @@ -81,14 +369,13 @@ static void s390_domain_free(struct iommu_domain *domain) call_rcu(&s390_domain->rcu, s390_iommu_rcu_free_domain); } -static void __s390_iommu_detach_device(struct zpci_dev *zdev) +static void s390_iommu_detach_device(struct iommu_domain *domain, + struct device *dev) { - struct s390_domain *s390_domain = zdev->s390_domain; + struct s390_domain *s390_domain = to_s390_domain(domain); + struct zpci_dev *zdev = to_zpci_dev(dev); unsigned long flags; - if (!s390_domain) - return; - spin_lock_irqsave(&s390_domain->list_lock, flags); list_del_rcu(&zdev->iommu_list); spin_unlock_irqrestore(&s390_domain->list_lock, flags); @@ -115,9 +402,7 @@ static int s390_iommu_attach_device(struct iommu_domain *domain, return -EINVAL; if (zdev->s390_domain) - __s390_iommu_detach_device(zdev); - else if (zdev->dma_table) - zpci_dma_exit_device(zdev); + s390_iommu_detach_device(&zdev->s390_domain->domain, dev); cc = zpci_register_ioat(zdev, 0, zdev->start_dma, zdev->end_dma, virt_to_phys(s390_domain->dma_table), &status); @@ -127,7 +412,6 @@ static int s390_iommu_attach_device(struct iommu_domain *domain, */ if (cc && status != ZPCI_PCI_ST_FUNC_NOT_AVAIL) return -EIO; - zdev->dma_table = s390_domain->dma_table; zdev->dma_table = s390_domain->dma_table; zdev->s390_domain = s390_domain; @@ -139,31 +423,6 @@ static int s390_iommu_attach_device(struct iommu_domain *domain, return 0; } -/* - * Switch control over the IOMMU to S390's internal dma_api ops - */ -static int s390_iommu_platform_attach(struct iommu_domain *platform_domain, - struct device *dev) -{ - struct zpci_dev *zdev = to_zpci_dev(dev); - - if (!zdev->s390_domain) - return 0; - - __s390_iommu_detach_device(zdev); - zpci_dma_init_device(zdev); - return 0; -} - -static struct iommu_domain_ops s390_iommu_platform_ops = { - .attach_dev = s390_iommu_platform_attach, -}; - -static struct iommu_domain s390_iommu_platform_domain = { - .type = IOMMU_DOMAIN_PLATFORM, - .ops = &s390_iommu_platform_ops, -}; - static void s390_iommu_get_resv_regions(struct device *dev, struct list_head *list) { @@ -204,6 +463,9 @@ static struct iommu_device *s390_iommu_probe_device(struct device *dev) if (zdev->end_dma > ZPCI_TABLE_SIZE_RT - 1) zdev->end_dma = ZPCI_TABLE_SIZE_RT - 1; + if (zdev->tlb_refresh) + dev->iommu->shadow_on_flush = 1; + return &zdev->iommu_dev; } @@ -216,7 +478,13 @@ static void s390_iommu_release_device(struct device *dev) * to the device, but keep it attached to other devices in the group. */ if (zdev) - __s390_iommu_detach_device(zdev); + s390_iommu_detach_device(&zdev->s390_domain->domain, dev); +} + +static int zpci_refresh_all(struct zpci_dev *zdev) +{ + return zpci_refresh_trans((u64)zdev->fh << 32, zdev->start_dma, + zdev->end_dma - zdev->start_dma + 1); } static void s390_iommu_flush_iotlb_all(struct iommu_domain *domain) @@ -226,8 +494,8 @@ static void s390_iommu_flush_iotlb_all(struct iommu_domain *domain) rcu_read_lock(); list_for_each_entry_rcu(zdev, &s390_domain->devices, iommu_list) { - zpci_refresh_trans((u64)zdev->fh << 32, zdev->start_dma, - zdev->end_dma - zdev->start_dma + 1); + atomic64_inc(&s390_domain->ctrs.global_rpcits); + zpci_refresh_all(zdev); } rcu_read_unlock(); } @@ -245,26 +513,40 @@ static void s390_iommu_iotlb_sync(struct iommu_domain *domain, rcu_read_lock(); list_for_each_entry_rcu(zdev, &s390_domain->devices, iommu_list) { + atomic64_inc(&s390_domain->ctrs.sync_rpcits); zpci_refresh_trans((u64)zdev->fh << 32, gather->start, size); } rcu_read_unlock(); } -static void s390_iommu_iotlb_sync_map(struct iommu_domain *domain, - unsigned long iova, size_t size) +static int s390_iommu_iotlb_sync_map(struct iommu_domain *domain, + unsigned long iova, size_t size) { struct s390_domain *s390_domain = to_s390_domain(domain); struct zpci_dev *zdev; + int ret = 0; rcu_read_lock(); list_for_each_entry_rcu(zdev, &s390_domain->devices, iommu_list) { if (!zdev->tlb_refresh) continue; - zpci_refresh_trans((u64)zdev->fh << 32, - iova, size); + atomic64_inc(&s390_domain->ctrs.sync_map_rpcits); + ret = zpci_refresh_trans((u64)zdev->fh << 32, + iova, size); + /* + * let the hypervisor discover invalidated entries + * allowing it to free IOVAs and unpin pages + */ + if (ret == -ENOMEM) { + ret = zpci_refresh_all(zdev); + if (ret) + break; + } } rcu_read_unlock(); + + return ret; } static int s390_iommu_validate_trans(struct s390_domain *s390_domain, @@ -344,16 +626,15 @@ static int s390_iommu_map_pages(struct iommu_domain *domain, if (!IS_ALIGNED(iova | paddr, pgsize)) return -EINVAL; - if (!(prot & IOMMU_READ)) - return -EINVAL; - if (!(prot & IOMMU_WRITE)) flags |= ZPCI_TABLE_PROTECTED; rc = s390_iommu_validate_trans(s390_domain, paddr, iova, - pgcount, flags, gfp); - if (!rc) + pgcount, flags, gfp); + if (!rc) { *mapped = size; + atomic64_add(pgcount, &s390_domain->ctrs.mapped_pages); + } return rc; } @@ -409,12 +690,26 @@ static size_t s390_iommu_unmap_pages(struct iommu_domain *domain, return 0; iommu_iotlb_gather_add_range(gather, iova, size); + atomic64_add(pgcount, &s390_domain->ctrs.unmapped_pages); return size; } +static void s390_iommu_probe_finalize(struct device *dev) +{ + iommu_setup_dma_ops(dev, 0, U64_MAX); +} + +struct zpci_iommu_ctrs *zpci_get_iommu_ctrs(struct zpci_dev *zdev) +{ + if (!zdev || !zdev->s390_domain) + return NULL; + return &zdev->s390_domain->ctrs; +} + int zpci_init_iommu(struct zpci_dev *zdev) { + u64 aperture_size; int rc = 0; rc = iommu_device_sysfs_add(&zdev->iommu_dev, NULL, NULL, @@ -426,6 +721,12 @@ int zpci_init_iommu(struct zpci_dev *zdev) if (rc) goto out_sysfs; + zdev->start_dma = PAGE_ALIGN(zdev->start_dma); + aperture_size = min3(s390_iommu_aperture, + ZPCI_TABLE_SIZE_RT - zdev->start_dma, + zdev->end_dma - zdev->start_dma + 1); + zdev->end_dma = zdev->start_dma + aperture_size - 1; + return 0; out_sysfs: @@ -441,11 +742,50 @@ void zpci_destroy_iommu(struct zpci_dev *zdev) iommu_device_sysfs_remove(&zdev->iommu_dev); } +static int __init s390_iommu_setup(char *str) +{ + if (!strcmp(str, "strict")) { + pr_warn("s390_iommu=strict deprecated; use iommu.strict=1 instead\n"); + iommu_set_dma_strict(); + } + return 1; +} + +__setup("s390_iommu=", s390_iommu_setup); + +static int __init s390_iommu_aperture_setup(char *str) +{ + if (kstrtou32(str, 10, &s390_iommu_aperture_factor)) + s390_iommu_aperture_factor = 1; + return 1; +} + +__setup("s390_iommu_aperture=", s390_iommu_aperture_setup); + +static int __init s390_iommu_init(void) +{ + int rc; + + iommu_dma_forcedac = true; + s390_iommu_aperture = (u64)virt_to_phys(high_memory); + if (!s390_iommu_aperture_factor) + s390_iommu_aperture = ULONG_MAX; + else + s390_iommu_aperture *= s390_iommu_aperture_factor; + + rc = dma_alloc_cpu_table_caches(); + if (rc) + return rc; + + return rc; +} +subsys_initcall(s390_iommu_init); + static const struct iommu_ops s390_iommu_ops = { - .default_domain = &s390_iommu_platform_domain, .capable = s390_iommu_capable, .domain_alloc_paging = s390_domain_alloc_paging, .probe_device = s390_iommu_probe_device, + .probe_finalize = s390_iommu_probe_finalize, .release_device = s390_iommu_release_device, .device_group = generic_device_group, .pgsize_bitmap = SZ_4K, diff --git a/drivers/iommu/sprd-iommu.c b/drivers/iommu/sprd-iommu.c index 9c33ea6903f6..2eb9fb46703b 100644 --- a/drivers/iommu/sprd-iommu.c +++ b/drivers/iommu/sprd-iommu.c @@ -341,8 +341,8 @@ static size_t sprd_iommu_unmap(struct iommu_domain *domain, unsigned long iova, return size; } -static void sprd_iommu_sync_map(struct iommu_domain *domain, - unsigned long iova, size_t size) +static int sprd_iommu_sync_map(struct iommu_domain *domain, + unsigned long iova, size_t size) { struct sprd_iommu_domain *dom = to_sprd_domain(domain); unsigned int reg; @@ -354,6 +354,7 @@ static void sprd_iommu_sync_map(struct iommu_domain *domain, /* clear IOMMU TLB buffer after page table updated */ sprd_iommu_write(dom->sdev, reg, 0xffffffff); + return 0; } static void sprd_iommu_sync(struct iommu_domain *domain, diff --git a/drivers/iommu/sun50i-iommu.c b/drivers/iommu/sun50i-iommu.c index 3c834854eda1..41484a5a399b 100644 --- a/drivers/iommu/sun50i-iommu.c +++ b/drivers/iommu/sun50i-iommu.c @@ -401,8 +401,8 @@ static void sun50i_iommu_flush_iotlb_all(struct iommu_domain *domain) spin_unlock_irqrestore(&iommu->iommu_lock, flags); } -static void sun50i_iommu_iotlb_sync_map(struct iommu_domain *domain, - unsigned long iova, size_t size) +static int sun50i_iommu_iotlb_sync_map(struct iommu_domain *domain, + unsigned long iova, size_t size) { struct sun50i_iommu_domain *sun50i_domain = to_sun50i_domain(domain); struct sun50i_iommu *iommu = sun50i_domain->iommu; @@ -411,6 +411,8 @@ static void sun50i_iommu_iotlb_sync_map(struct iommu_domain *domain, spin_lock_irqsave(&iommu->iommu_lock, flags); sun50i_iommu_zap_range(iommu, iova, size); spin_unlock_irqrestore(&iommu->iommu_lock, flags); + + return 0; } static void sun50i_iommu_iotlb_sync(struct iommu_domain *domain, diff --git a/drivers/iommu/tegra-smmu.c b/drivers/iommu/tegra-smmu.c index d3cb966abfc3..310871728ab4 100644 --- a/drivers/iommu/tegra-smmu.c +++ b/drivers/iommu/tegra-smmu.c @@ -1084,8 +1084,6 @@ DEFINE_SHOW_ATTRIBUTE(tegra_smmu_clients); static void tegra_smmu_debugfs_init(struct tegra_smmu *smmu) { smmu->debugfs = debugfs_create_dir("smmu", NULL); - if (!smmu->debugfs) - return; debugfs_create_file("swgroups", S_IRUGO, smmu->debugfs, smmu, &tegra_smmu_swgroups_fops); diff --git a/drivers/iommu/virtio-iommu.c b/drivers/iommu/virtio-iommu.c index 17dcd826f5c2..379ebe03efb6 100644 --- a/drivers/iommu/virtio-iommu.c +++ b/drivers/iommu/virtio-iommu.c @@ -85,7 +85,7 @@ struct viommu_request { void *writeback; unsigned int write_offset; unsigned int len; - char buf[]; + char buf[] __counted_by(len); }; #define VIOMMU_FAULT_RESV_MASK 0xffffff00 @@ -230,7 +230,7 @@ static int __viommu_add_req(struct viommu_dev *viommu, void *buf, size_t len, if (write_offset <= 0) return -EINVAL; - req = kzalloc(sizeof(*req) + len, GFP_ATOMIC); + req = kzalloc(struct_size(req, buf, len), GFP_ATOMIC); if (!req) return -ENOMEM; diff --git a/include/linux/amd-iommu.h b/include/linux/amd-iommu.h index 99a5201d9e62..dc7ed2f46886 100644 --- a/include/linux/amd-iommu.h +++ b/include/linux/amd-iommu.h @@ -33,126 +33,6 @@ struct pci_dev; extern int amd_iommu_detect(void); -/** - * amd_iommu_init_device() - Init device for use with IOMMUv2 driver - * @pdev: The PCI device to initialize - * @pasids: Number of PASIDs to support for this device - * - * This function does all setup for the device pdev so that it can be - * used with IOMMUv2. - * Returns 0 on success or negative value on error. - */ -extern int amd_iommu_init_device(struct pci_dev *pdev, int pasids); - -/** - * amd_iommu_free_device() - Free all IOMMUv2 related device resources - * and disable IOMMUv2 usage for this device - * @pdev: The PCI device to disable IOMMUv2 usage for' - */ -extern void amd_iommu_free_device(struct pci_dev *pdev); - -/** - * amd_iommu_bind_pasid() - Bind a given task to a PASID on a device - * @pdev: The PCI device to bind the task to - * @pasid: The PASID on the device the task should be bound to - * @task: the task to bind - * - * The function returns 0 on success or a negative value on error. - */ -extern int amd_iommu_bind_pasid(struct pci_dev *pdev, u32 pasid, - struct task_struct *task); - -/** - * amd_iommu_unbind_pasid() - Unbind a PASID from its task on - * a device - * @pdev: The device of the PASID - * @pasid: The PASID to unbind - * - * When this function returns the device is no longer using the PASID - * and the PASID is no longer bound to its task. - */ -extern void amd_iommu_unbind_pasid(struct pci_dev *pdev, u32 pasid); - -/** - * amd_iommu_set_invalid_ppr_cb() - Register a call-back for failed - * PRI requests - * @pdev: The PCI device the call-back should be registered for - * @cb: The call-back function - * - * The IOMMUv2 driver invokes this call-back when it is unable to - * successfully handle a PRI request. The device driver can then decide - * which PRI response the device should see. Possible return values for - * the call-back are: - * - * - AMD_IOMMU_INV_PRI_RSP_SUCCESS - Send SUCCESS back to the device - * - AMD_IOMMU_INV_PRI_RSP_INVALID - Send INVALID back to the device - * - AMD_IOMMU_INV_PRI_RSP_FAIL - Send Failure back to the device, - * the device is required to disable - * PRI when it receives this response - * - * The function returns 0 on success or negative value on error. - */ -#define AMD_IOMMU_INV_PRI_RSP_SUCCESS 0 -#define AMD_IOMMU_INV_PRI_RSP_INVALID 1 -#define AMD_IOMMU_INV_PRI_RSP_FAIL 2 - -typedef int (*amd_iommu_invalid_ppr_cb)(struct pci_dev *pdev, - u32 pasid, - unsigned long address, - u16); - -extern int amd_iommu_set_invalid_ppr_cb(struct pci_dev *pdev, - amd_iommu_invalid_ppr_cb cb); - -#define PPR_FAULT_EXEC (1 << 1) -#define PPR_FAULT_READ (1 << 2) -#define PPR_FAULT_WRITE (1 << 5) -#define PPR_FAULT_USER (1 << 6) -#define PPR_FAULT_RSVD (1 << 7) -#define PPR_FAULT_GN (1 << 8) - -/** - * amd_iommu_device_info() - Get information about IOMMUv2 support of a - * PCI device - * @pdev: PCI device to query information from - * @info: A pointer to an amd_iommu_device_info structure which will contain - * the information about the PCI device - * - * Returns 0 on success, negative value on error - */ - -#define AMD_IOMMU_DEVICE_FLAG_ATS_SUP 0x1 /* ATS feature supported */ -#define AMD_IOMMU_DEVICE_FLAG_PRI_SUP 0x2 /* PRI feature supported */ -#define AMD_IOMMU_DEVICE_FLAG_PASID_SUP 0x4 /* PASID context supported */ -#define AMD_IOMMU_DEVICE_FLAG_EXEC_SUP 0x8 /* Device may request execution - on memory pages */ -#define AMD_IOMMU_DEVICE_FLAG_PRIV_SUP 0x10 /* Device may request - super-user privileges */ - -struct amd_iommu_device_info { - int max_pasids; - u32 flags; -}; - -extern int amd_iommu_device_info(struct pci_dev *pdev, - struct amd_iommu_device_info *info); - -/** - * amd_iommu_set_invalidate_ctx_cb() - Register a call-back for invalidating - * a pasid context. This call-back is - * invoked when the IOMMUv2 driver needs to - * invalidate a PASID context, for example - * because the task that is bound to that - * context is about to exit. - * - * @pdev: The PCI device the call-back should be registered for - * @cb: The call-back function - */ - -typedef void (*amd_iommu_invalidate_ctx)(struct pci_dev *pdev, u32 pasid); - -extern int amd_iommu_set_invalidate_ctx_cb(struct pci_dev *pdev, - amd_iommu_invalidate_ctx cb); #else /* CONFIG_AMD_IOMMU */ static inline int amd_iommu_detect(void) { return -ENODEV; } diff --git a/include/linux/iommu.h b/include/linux/iommu.h index 73daa4fb168b..ddc25d239106 100644 --- a/include/linux/iommu.h +++ b/include/linux/iommu.h @@ -353,8 +353,8 @@ struct iommu_domain_ops { struct iommu_iotlb_gather *iotlb_gather); void (*flush_iotlb_all)(struct iommu_domain *domain); - void (*iotlb_sync_map)(struct iommu_domain *domain, unsigned long iova, - size_t size); + int (*iotlb_sync_map)(struct iommu_domain *domain, unsigned long iova, + size_t size); void (*iotlb_sync)(struct iommu_domain *domain, struct iommu_iotlb_gather *iotlb_gather); @@ -427,6 +427,7 @@ struct iommu_fault_param { * @attach_deferred: the dma domain attachment is deferred * @pci_32bit_workaround: Limit DMA allocations to 32-bit IOVAs * @require_direct: device requires IOMMU_RESV_DIRECT regions + * @shadow_on_flush: IOTLB flushes are used to sync shadow tables * * TODO: migrate other per device data pointers under iommu_dev_data, e.g. * struct iommu_group *iommu_group; @@ -442,6 +443,7 @@ struct dev_iommu { u32 attach_deferred:1; u32 pci_32bit_workaround:1; u32 require_direct:1; + u32 shadow_on_flush:1; }; int iommu_device_register(struct iommu_device *iommu, |