summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--Documentation/admin-guide/kernel-parameters.txt9
-rw-r--r--Documentation/devicetree/bindings/iommu/arm,smmu.yaml2
-rw-r--r--arch/s390/include/asm/pci.h11
-rw-r--r--arch/s390/include/asm/pci_clp.h3
-rw-r--r--arch/s390/include/asm/pci_dma.h121
-rw-r--r--arch/s390/pci/Makefile2
-rw-r--r--arch/s390/pci/pci.c35
-rw-r--r--arch/s390/pci/pci_bus.c5
-rw-r--r--arch/s390/pci/pci_debug.c12
-rw-r--r--arch/s390/pci/pci_dma.c746
-rw-r--r--arch/s390/pci/pci_event.c17
-rw-r--r--arch/s390/pci/pci_sysfs.c19
-rw-r--r--drivers/iommu/Kconfig4
-rw-r--r--drivers/iommu/amd/Kconfig9
-rw-r--r--drivers/iommu/amd/Makefile1
-rw-r--r--drivers/iommu/amd/amd_iommu.h35
-rw-r--r--drivers/iommu/amd/amd_iommu_types.h52
-rw-r--r--drivers/iommu/amd/init.c117
-rw-r--r--drivers/iommu/amd/io_pgtable_v2.c8
-rw-r--r--drivers/iommu/amd/iommu.c577
-rw-r--r--drivers/iommu/amd/iommu_v2.c996
-rw-r--r--drivers/iommu/apple-dart.c5
-rw-r--r--drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3-sva.c71
-rw-r--r--drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c251
-rw-r--r--drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h17
-rw-r--r--drivers/iommu/arm/arm-smmu/arm-smmu-qcom.c2
-rw-r--r--drivers/iommu/dma-iommu.c200
-rw-r--r--drivers/iommu/intel/debugfs.c215
-rw-r--r--drivers/iommu/intel/iommu.c12
-rw-r--r--drivers/iommu/intel/iommu.h14
-rw-r--r--drivers/iommu/iommu.c43
-rw-r--r--drivers/iommu/msm_iommu.c5
-rw-r--r--drivers/iommu/mtk_iommu.c5
-rw-r--r--drivers/iommu/s390-iommu.c434
-rw-r--r--drivers/iommu/sprd-iommu.c5
-rw-r--r--drivers/iommu/sun50i-iommu.c6
-rw-r--r--drivers/iommu/tegra-smmu.c2
-rw-r--r--drivers/iommu/virtio-iommu.c4
-rw-r--r--include/linux/amd-iommu.h120
-rw-r--r--include/linux/iommu.h6
40 files changed, 1351 insertions, 2847 deletions
diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt
index 0a1731a0f0ef..14f56c448edc 100644
--- a/Documentation/admin-guide/kernel-parameters.txt
+++ b/Documentation/admin-guide/kernel-parameters.txt
@@ -2220,7 +2220,7 @@
forcing Dual Address Cycle for PCI cards supporting
greater than 32-bit addressing.
- iommu.strict= [ARM64, X86] Configure TLB invalidation behaviour
+ iommu.strict= [ARM64, X86, S390] Configure TLB invalidation behaviour
Format: { "0" | "1" }
0 - Lazy mode.
Request that DMA unmap operations use deferred
@@ -5611,9 +5611,10 @@
s390_iommu= [HW,S390]
Set s390 IOTLB flushing mode
strict
- With strict flushing every unmap operation will result in
- an IOTLB flush. Default is lazy flushing before reuse,
- which is faster.
+ With strict flushing every unmap operation will result
+ in an IOTLB flush. Default is lazy flushing before
+ reuse, which is faster. Deprecated, equivalent to
+ iommu.strict=1.
s390_iommu_aperture= [KNL,S390]
Specifies the size of the per device DMA address space
diff --git a/Documentation/devicetree/bindings/iommu/arm,smmu.yaml b/Documentation/devicetree/bindings/iommu/arm,smmu.yaml
index b1b2cf81b42f..aa9e1c0895a5 100644
--- a/Documentation/devicetree/bindings/iommu/arm,smmu.yaml
+++ b/Documentation/devicetree/bindings/iommu/arm,smmu.yaml
@@ -110,6 +110,7 @@ properties:
- qcom,sdm630-smmu-v2
- qcom,sdm845-smmu-v2
- qcom,sm6350-smmu-v2
+ - qcom,sm7150-smmu-v2
- const: qcom,adreno-smmu
- const: qcom,smmu-v2
- description: Qcom Adreno GPUs on Google Cheza platform
@@ -409,6 +410,7 @@ allOf:
contains:
enum:
- qcom,sm6350-smmu-v2
+ - qcom,sm7150-smmu-v2
- qcom,sm8150-smmu-500
- qcom,sm8250-smmu-500
then:
diff --git a/arch/s390/include/asm/pci.h b/arch/s390/include/asm/pci.h
index b248694e0024..e91cd6bbc330 100644
--- a/arch/s390/include/asm/pci.h
+++ b/arch/s390/include/asm/pci.h
@@ -159,13 +159,6 @@ struct zpci_dev {
unsigned long *dma_table;
int tlb_refresh;
- spinlock_t iommu_bitmap_lock;
- unsigned long *iommu_bitmap;
- unsigned long *lazy_bitmap;
- unsigned long iommu_size;
- unsigned long iommu_pages;
- unsigned int next_bit;
-
struct iommu_device iommu_dev; /* IOMMU core handle */
char res_name[16];
@@ -180,10 +173,6 @@ struct zpci_dev {
struct zpci_fmb *fmb;
u16 fmb_update; /* update interval */
u16 fmb_length;
- /* software counters */
- atomic64_t allocated_pages;
- atomic64_t mapped_pages;
- atomic64_t unmapped_pages;
u8 version;
enum pci_bus_speed max_bus_speed;
diff --git a/arch/s390/include/asm/pci_clp.h b/arch/s390/include/asm/pci_clp.h
index d6189ed14f84..f0c677ddd270 100644
--- a/arch/s390/include/asm/pci_clp.h
+++ b/arch/s390/include/asm/pci_clp.h
@@ -50,6 +50,9 @@ struct clp_fh_list_entry {
#define CLP_UTIL_STR_LEN 64
#define CLP_PFIP_NR_SEGMENTS 4
+/* PCI function type numbers */
+#define PCI_FUNC_TYPE_ISM 0x5 /* ISM device */
+
extern bool zpci_unique_uid;
struct clp_rsp_slpc_pci {
diff --git a/arch/s390/include/asm/pci_dma.h b/arch/s390/include/asm/pci_dma.h
index 7119c04c51c5..42d7cc4262ca 100644
--- a/arch/s390/include/asm/pci_dma.h
+++ b/arch/s390/include/asm/pci_dma.h
@@ -82,117 +82,16 @@ enum zpci_ioat_dtype {
#define ZPCI_TABLE_VALID_MASK 0x20
#define ZPCI_TABLE_PROT_MASK 0x200
-static inline unsigned int calc_rtx(dma_addr_t ptr)
-{
- return ((unsigned long) ptr >> ZPCI_RT_SHIFT) & ZPCI_INDEX_MASK;
-}
-
-static inline unsigned int calc_sx(dma_addr_t ptr)
-{
- return ((unsigned long) ptr >> ZPCI_ST_SHIFT) & ZPCI_INDEX_MASK;
-}
-
-static inline unsigned int calc_px(dma_addr_t ptr)
-{
- return ((unsigned long) ptr >> PAGE_SHIFT) & ZPCI_PT_MASK;
-}
-
-static inline void set_pt_pfaa(unsigned long *entry, phys_addr_t pfaa)
-{
- *entry &= ZPCI_PTE_FLAG_MASK;
- *entry |= (pfaa & ZPCI_PTE_ADDR_MASK);
-}
-
-static inline void set_rt_sto(unsigned long *entry, phys_addr_t sto)
-{
- *entry &= ZPCI_RTE_FLAG_MASK;
- *entry |= (sto & ZPCI_RTE_ADDR_MASK);
- *entry |= ZPCI_TABLE_TYPE_RTX;
-}
-
-static inline void set_st_pto(unsigned long *entry, phys_addr_t pto)
-{
- *entry &= ZPCI_STE_FLAG_MASK;
- *entry |= (pto & ZPCI_STE_ADDR_MASK);
- *entry |= ZPCI_TABLE_TYPE_SX;
-}
-
-static inline void validate_rt_entry(unsigned long *entry)
-{
- *entry &= ~ZPCI_TABLE_VALID_MASK;
- *entry &= ~ZPCI_TABLE_OFFSET_MASK;
- *entry |= ZPCI_TABLE_VALID;
- *entry |= ZPCI_TABLE_LEN_RTX;
-}
-
-static inline void validate_st_entry(unsigned long *entry)
-{
- *entry &= ~ZPCI_TABLE_VALID_MASK;
- *entry |= ZPCI_TABLE_VALID;
-}
-
-static inline void invalidate_pt_entry(unsigned long *entry)
-{
- WARN_ON_ONCE((*entry & ZPCI_PTE_VALID_MASK) == ZPCI_PTE_INVALID);
- *entry &= ~ZPCI_PTE_VALID_MASK;
- *entry |= ZPCI_PTE_INVALID;
-}
-
-static inline void validate_pt_entry(unsigned long *entry)
-{
- WARN_ON_ONCE((*entry & ZPCI_PTE_VALID_MASK) == ZPCI_PTE_VALID);
- *entry &= ~ZPCI_PTE_VALID_MASK;
- *entry |= ZPCI_PTE_VALID;
-}
-
-static inline void entry_set_protected(unsigned long *entry)
-{
- *entry &= ~ZPCI_TABLE_PROT_MASK;
- *entry |= ZPCI_TABLE_PROTECTED;
-}
-
-static inline void entry_clr_protected(unsigned long *entry)
-{
- *entry &= ~ZPCI_TABLE_PROT_MASK;
- *entry |= ZPCI_TABLE_UNPROTECTED;
-}
-
-static inline int reg_entry_isvalid(unsigned long entry)
-{
- return (entry & ZPCI_TABLE_VALID_MASK) == ZPCI_TABLE_VALID;
-}
-
-static inline int pt_entry_isvalid(unsigned long entry)
-{
- return (entry & ZPCI_PTE_VALID_MASK) == ZPCI_PTE_VALID;
-}
-
-static inline unsigned long *get_rt_sto(unsigned long entry)
-{
- if ((entry & ZPCI_TABLE_TYPE_MASK) == ZPCI_TABLE_TYPE_RTX)
- return phys_to_virt(entry & ZPCI_RTE_ADDR_MASK);
- else
- return NULL;
-
-}
-
-static inline unsigned long *get_st_pto(unsigned long entry)
-{
- if ((entry & ZPCI_TABLE_TYPE_MASK) == ZPCI_TABLE_TYPE_SX)
- return phys_to_virt(entry & ZPCI_STE_ADDR_MASK);
- else
- return NULL;
-}
-
-/* Prototypes */
-void dma_free_seg_table(unsigned long);
-unsigned long *dma_alloc_cpu_table(gfp_t gfp);
-void dma_cleanup_tables(unsigned long *);
-unsigned long *dma_walk_cpu_trans(unsigned long *rto, dma_addr_t dma_addr,
- gfp_t gfp);
-void dma_update_cpu_trans(unsigned long *entry, phys_addr_t page_addr, int flags);
-
-extern const struct dma_map_ops s390_pci_dma_ops;
+struct zpci_iommu_ctrs {
+ atomic64_t mapped_pages;
+ atomic64_t unmapped_pages;
+ atomic64_t global_rpcits;
+ atomic64_t sync_map_rpcits;
+ atomic64_t sync_rpcits;
+};
+
+struct zpci_dev;
+struct zpci_iommu_ctrs *zpci_get_iommu_ctrs(struct zpci_dev *zdev);
#endif
diff --git a/arch/s390/pci/Makefile b/arch/s390/pci/Makefile
index 5ae31ca9dd44..0547a10406e7 100644
--- a/arch/s390/pci/Makefile
+++ b/arch/s390/pci/Makefile
@@ -3,7 +3,7 @@
# Makefile for the s390 PCI subsystem.
#
-obj-$(CONFIG_PCI) += pci.o pci_irq.o pci_dma.o pci_clp.o pci_sysfs.o \
+obj-$(CONFIG_PCI) += pci.o pci_irq.o pci_clp.o pci_sysfs.o \
pci_event.o pci_debug.o pci_insn.o pci_mmio.o \
pci_bus.o pci_kvm_hook.o
obj-$(CONFIG_PCI_IOV) += pci_iov.o
diff --git a/arch/s390/pci/pci.c b/arch/s390/pci/pci.c
index d34d5813d006..63fd9e1d9f22 100644
--- a/arch/s390/pci/pci.c
+++ b/arch/s390/pci/pci.c
@@ -124,7 +124,11 @@ int zpci_register_ioat(struct zpci_dev *zdev, u8 dmaas,
WARN_ON_ONCE(iota & 0x3fff);
fib.pba = base;
- fib.pal = limit;
+ /* Work around off by one in ISM virt device */
+ if (zdev->pft == PCI_FUNC_TYPE_ISM && limit > base)
+ fib.pal = limit + (1 << 12);
+ else
+ fib.pal = limit;
fib.iota = iota | ZPCI_IOTA_RTTO_FLAG;
fib.gd = zdev->gisa;
cc = zpci_mod_fc(req, &fib, status);
@@ -153,6 +157,7 @@ int zpci_unregister_ioat(struct zpci_dev *zdev, u8 dmaas)
int zpci_fmb_enable_device(struct zpci_dev *zdev)
{
u64 req = ZPCI_CREATE_REQ(zdev->fh, 0, ZPCI_MOD_FC_SET_MEASURE);
+ struct zpci_iommu_ctrs *ctrs;
struct zpci_fib fib = {0};
u8 cc, status;
@@ -165,9 +170,15 @@ int zpci_fmb_enable_device(struct zpci_dev *zdev)
WARN_ON((u64) zdev->fmb & 0xf);
/* reset software counters */
- atomic64_set(&zdev->allocated_pages, 0);
- atomic64_set(&zdev->mapped_pages, 0);
- atomic64_set(&zdev->unmapped_pages, 0);
+ ctrs = zpci_get_iommu_ctrs(zdev);
+ if (ctrs) {
+ atomic64_set(&ctrs->mapped_pages, 0);
+ atomic64_set(&ctrs->unmapped_pages, 0);
+ atomic64_set(&ctrs->global_rpcits, 0);
+ atomic64_set(&ctrs->sync_map_rpcits, 0);
+ atomic64_set(&ctrs->sync_rpcits, 0);
+ }
+
fib.fmb_addr = virt_to_phys(zdev->fmb);
fib.gd = zdev->gisa;
@@ -582,7 +593,6 @@ int pcibios_device_add(struct pci_dev *pdev)
pdev->no_vf_scan = 1;
pdev->dev.groups = zpci_attr_groups;
- pdev->dev.dma_ops = &s390_pci_dma_ops;
zpci_map_resources(pdev);
for (i = 0; i < PCI_STD_NUM_BARS; i++) {
@@ -756,8 +766,6 @@ int zpci_hot_reset_device(struct zpci_dev *zdev)
if (zdev->dma_table)
rc = zpci_register_ioat(zdev, 0, zdev->start_dma, zdev->end_dma,
virt_to_phys(zdev->dma_table), &status);
- else
- rc = zpci_dma_init_device(zdev);
if (rc) {
zpci_disable_device(zdev);
return rc;
@@ -865,11 +873,6 @@ int zpci_deconfigure_device(struct zpci_dev *zdev)
if (zdev->zbus->bus)
zpci_bus_remove_device(zdev, false);
- if (zdev->dma_table) {
- rc = zpci_dma_exit_device(zdev);
- if (rc)
- return rc;
- }
if (zdev_enabled(zdev)) {
rc = zpci_disable_device(zdev);
if (rc)
@@ -918,8 +921,6 @@ void zpci_release_device(struct kref *kref)
if (zdev->zbus->bus)
zpci_bus_remove_device(zdev, false);
- if (zdev->dma_table)
- zpci_dma_exit_device(zdev);
if (zdev_enabled(zdev))
zpci_disable_device(zdev);
@@ -1109,10 +1110,6 @@ static int __init pci_base_init(void)
if (rc)
goto out_irq;
- rc = zpci_dma_init();
- if (rc)
- goto out_dma;
-
rc = clp_scan_pci_devices();
if (rc)
goto out_find;
@@ -1122,8 +1119,6 @@ static int __init pci_base_init(void)
return 0;
out_find:
- zpci_dma_exit();
-out_dma:
zpci_irq_exit();
out_irq:
zpci_mem_exit();
diff --git a/arch/s390/pci/pci_bus.c b/arch/s390/pci/pci_bus.c
index 32245b970a0c..daa5d7450c7d 100644
--- a/arch/s390/pci/pci_bus.c
+++ b/arch/s390/pci/pci_bus.c
@@ -47,11 +47,6 @@ static int zpci_bus_prepare_device(struct zpci_dev *zdev)
rc = zpci_enable_device(zdev);
if (rc)
return rc;
- rc = zpci_dma_init_device(zdev);
- if (rc) {
- zpci_disable_device(zdev);
- return rc;
- }
}
if (!zdev->has_resources) {
diff --git a/arch/s390/pci/pci_debug.c b/arch/s390/pci/pci_debug.c
index ca6bd98eec13..6dde2263c79d 100644
--- a/arch/s390/pci/pci_debug.c
+++ b/arch/s390/pci/pci_debug.c
@@ -53,9 +53,11 @@ static char *pci_fmt3_names[] = {
};
static char *pci_sw_names[] = {
- "Allocated pages",
"Mapped pages",
"Unmapped pages",
+ "Global RPCITs",
+ "Sync Map RPCITs",
+ "Sync RPCITs",
};
static void pci_fmb_show(struct seq_file *m, char *name[], int length,
@@ -69,10 +71,14 @@ static void pci_fmb_show(struct seq_file *m, char *name[], int length,
static void pci_sw_counter_show(struct seq_file *m)
{
- struct zpci_dev *zdev = m->private;
- atomic64_t *counter = &zdev->allocated_pages;
+ struct zpci_iommu_ctrs *ctrs = zpci_get_iommu_ctrs(m->private);
+ atomic64_t *counter;
int i;
+ if (!ctrs)
+ return;
+
+ counter = &ctrs->mapped_pages;
for (i = 0; i < ARRAY_SIZE(pci_sw_names); i++, counter++)
seq_printf(m, "%26s:\t%llu\n", pci_sw_names[i],
atomic64_read(counter));
diff --git a/arch/s390/pci/pci_dma.c b/arch/s390/pci/pci_dma.c
deleted file mode 100644
index 99209085c75b..000000000000
--- a/arch/s390/pci/pci_dma.c
+++ /dev/null
@@ -1,746 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-/*
- * Copyright IBM Corp. 2012
- *
- * Author(s):
- * Jan Glauber <jang@linux.vnet.ibm.com>
- */
-
-#include <linux/kernel.h>
-#include <linux/slab.h>
-#include <linux/export.h>
-#include <linux/iommu-helper.h>
-#include <linux/dma-map-ops.h>
-#include <linux/vmalloc.h>
-#include <linux/pci.h>
-#include <asm/pci_dma.h>
-
-static struct kmem_cache *dma_region_table_cache;
-static struct kmem_cache *dma_page_table_cache;
-static int s390_iommu_strict;
-static u64 s390_iommu_aperture;
-static u32 s390_iommu_aperture_factor = 1;
-
-static int zpci_refresh_global(struct zpci_dev *zdev)
-{
- return zpci_refresh_trans((u64) zdev->fh << 32, zdev->start_dma,
- zdev->iommu_pages * PAGE_SIZE);
-}
-
-unsigned long *dma_alloc_cpu_table(gfp_t gfp)
-{
- unsigned long *table, *entry;
-
- table = kmem_cache_alloc(dma_region_table_cache, gfp);
- if (!table)
- return NULL;
-
- for (entry = table; entry < table + ZPCI_TABLE_ENTRIES; entry++)
- *entry = ZPCI_TABLE_INVALID;
- return table;
-}
-
-static void dma_free_cpu_table(void *table)
-{
- kmem_cache_free(dma_region_table_cache, table);
-}
-
-static unsigned long *dma_alloc_page_table(gfp_t gfp)
-{
- unsigned long *table, *entry;
-
- table = kmem_cache_alloc(dma_page_table_cache, gfp);
- if (!table)
- return NULL;
-
- for (entry = table; entry < table + ZPCI_PT_ENTRIES; entry++)
- *entry = ZPCI_PTE_INVALID;
- return table;
-}
-
-static void dma_free_page_table(void *table)
-{
- kmem_cache_free(dma_page_table_cache, table);
-}
-
-static unsigned long *dma_get_seg_table_origin(unsigned long *rtep, gfp_t gfp)
-{
- unsigned long old_rte, rte;
- unsigned long *sto;
-
- rte = READ_ONCE(*rtep);
- if (reg_entry_isvalid(rte)) {
- sto = get_rt_sto(rte);
- } else {
- sto = dma_alloc_cpu_table(gfp);
- if (!sto)
- return NULL;
-
- set_rt_sto(&rte, virt_to_phys(sto));
- validate_rt_entry(&rte);
- entry_clr_protected(&rte);
-
- old_rte = cmpxchg(rtep, ZPCI_TABLE_INVALID, rte);
- if (old_rte != ZPCI_TABLE_INVALID) {
- /* Somone else was faster, use theirs */
- dma_free_cpu_table(sto);
- sto = get_rt_sto(old_rte);
- }
- }
- return sto;
-}
-
-static unsigned long *dma_get_page_table_origin(unsigned long *step, gfp_t gfp)
-{
- unsigned long old_ste, ste;
- unsigned long *pto;
-
- ste = READ_ONCE(*step);
- if (reg_entry_isvalid(ste)) {
- pto = get_st_pto(ste);
- } else {
- pto = dma_alloc_page_table(gfp);
- if (!pto)
- return NULL;
- set_st_pto(&ste, virt_to_phys(pto));
- validate_st_entry(&ste);
- entry_clr_protected(&ste);
-
- old_ste = cmpxchg(step, ZPCI_TABLE_INVALID, ste);
- if (old_ste != ZPCI_TABLE_INVALID) {
- /* Somone else was faster, use theirs */
- dma_free_page_table(pto);
- pto = get_st_pto(old_ste);
- }
- }
- return pto;
-}
-
-unsigned long *dma_walk_cpu_trans(unsigned long *rto, dma_addr_t dma_addr,
- gfp_t gfp)
-{
- unsigned long *sto, *pto;
- unsigned int rtx, sx, px;
-
- rtx = calc_rtx(dma_addr);
- sto = dma_get_seg_table_origin(&rto[rtx], gfp);
- if (!sto)
- return NULL;
-
- sx = calc_sx(dma_addr);
- pto = dma_get_page_table_origin(&sto[sx], gfp);
- if (!pto)
- return NULL;
-
- px = calc_px(dma_addr);
- return &pto[px];
-}
-
-void dma_update_cpu_trans(unsigned long *ptep, phys_addr_t page_addr, int flags)
-{
- unsigned long pte;
-
- pte = READ_ONCE(*ptep);
- if (flags & ZPCI_PTE_INVALID) {
- invalidate_pt_entry(&pte);
- } else {
- set_pt_pfaa(&pte, page_addr);
- validate_pt_entry(&pte);
- }
-
- if (flags & ZPCI_TABLE_PROTECTED)
- entry_set_protected(&pte);
- else
- entry_clr_protected(&pte);
-
- xchg(ptep, pte);
-}
-
-static int __dma_update_trans(struct zpci_dev *zdev, phys_addr_t pa,
- dma_addr_t dma_addr, size_t size, int flags)
-{
- unsigned int nr_pages = PAGE_ALIGN(size) >> PAGE_SHIFT;
- phys_addr_t page_addr = (pa & PAGE_MASK);
- unsigned long *entry;
- int i, rc = 0;
-
- if (!nr_pages)
- return -EINVAL;
-
- if (!zdev->dma_table)
- return -EINVAL;
-
- for (i = 0; i < nr_pages; i++) {
- entry = dma_walk_cpu_trans(zdev->dma_table, dma_addr,
- GFP_ATOMIC);
- if (!entry) {
- rc = -ENOMEM;
- goto undo_cpu_trans;
- }
- dma_update_cpu_trans(entry, page_addr, flags);
- page_addr += PAGE_SIZE;
- dma_addr += PAGE_SIZE;
- }
-
-undo_cpu_trans:
- if (rc && ((flags & ZPCI_PTE_VALID_MASK) == ZPCI_PTE_VALID)) {
- flags = ZPCI_PTE_INVALID;
- while (i-- > 0) {
- page_addr -= PAGE_SIZE;
- dma_addr -= PAGE_SIZE;
- entry = dma_walk_cpu_trans(zdev->dma_table, dma_addr,
- GFP_ATOMIC);
- if (!entry)
- break;
- dma_update_cpu_trans(entry, page_addr, flags);
- }
- }
- return rc;
-}
-
-static int __dma_purge_tlb(struct zpci_dev *zdev, dma_addr_t dma_addr,
- size_t size, int flags)
-{
- unsigned long irqflags;
- int ret;
-
- /*
- * With zdev->tlb_refresh == 0, rpcit is not required to establish new
- * translations when previously invalid translation-table entries are
- * validated. With lazy unmap, rpcit is skipped for previously valid
- * entries, but a global rpcit is then required before any address can
- * be re-used, i.e. after each iommu bitmap wrap-around.
- */
- if ((flags & ZPCI_PTE_VALID_MASK) == ZPCI_PTE_VALID) {
- if (!zdev->tlb_refresh)
- return 0;
- } else {
- if (!s390_iommu_strict)
- return 0;
- }
-
- ret = zpci_refresh_trans((u64) zdev->fh << 32, dma_addr,
- PAGE_ALIGN(size));
- if (ret == -ENOMEM && !s390_iommu_strict) {
- /* enable the hypervisor to free some resources */
- if (zpci_refresh_global(zdev))
- goto out;
-
- spin_lock_irqsave(&zdev->iommu_bitmap_lock, irqflags);
- bitmap_andnot(zdev->iommu_bitmap, zdev->iommu_bitmap,
- zdev->lazy_bitmap, zdev->iommu_pages);
- bitmap_zero(zdev->lazy_bitmap, zdev->iommu_pages);
- spin_unlock_irqrestore(&zdev->iommu_bitmap_lock, irqflags);
- ret = 0;
- }
-out:
- return ret;
-}
-
-static int dma_update_trans(struct zpci_dev *zdev, phys_addr_t pa,
- dma_addr_t dma_addr, size_t size, int flags)
-{
- int rc;
-
- rc = __dma_update_trans(zdev, pa, dma_addr, size, flags);
- if (rc)
- return rc;
-
- rc = __dma_purge_tlb(zdev, dma_addr, size, flags);
- if (rc && ((flags & ZPCI_PTE_VALID_MASK) == ZPCI_PTE_VALID))
- __dma_update_trans(zdev, pa, dma_addr, size, ZPCI_PTE_INVALID);
-
- return rc;
-}
-
-void dma_free_seg_table(unsigned long entry)
-{
- unsigned long *sto = get_rt_sto(entry);
- int sx;
-
- for (sx = 0; sx < ZPCI_TABLE_ENTRIES; sx++)
- if (reg_entry_isvalid(sto[sx]))
- dma_free_page_table(get_st_pto(sto[sx]));
-
- dma_free_cpu_table(sto);
-}
-
-void dma_cleanup_tables(unsigned long *table)
-{
- int rtx;
-
- if (!table)
- return;
-
- for (rtx = 0; rtx < ZPCI_TABLE_ENTRIES; rtx++)
- if (reg_entry_isvalid(table[rtx]))
- dma_free_seg_table(table[rtx]);
-
- dma_free_cpu_table(table);
-}
-
-static unsigned long __dma_alloc_iommu(struct device *dev,
- unsigned long start, int size)
-{
- struct zpci_dev *zdev = to_zpci(to_pci_dev(dev));
-
- return iommu_area_alloc(zdev->iommu_bitmap, zdev->iommu_pages,
- start, size, zdev->start_dma >> PAGE_SHIFT,
- dma_get_seg_boundary_nr_pages(dev, PAGE_SHIFT),
- 0);
-}
-
-static dma_addr_t dma_alloc_address(struct device *dev, int size)
-{
- struct zpci_dev *zdev = to_zpci(to_pci_dev(dev));
- unsigned long offset, flags;
-
- spin_lock_irqsave(&zdev->iommu_bitmap_lock, flags);
- offset = __dma_alloc_iommu(dev, zdev->next_bit, size);
- if (offset == -1) {
- if (!s390_iommu_strict) {
- /* global flush before DMA addresses are reused */
- if (zpci_refresh_global(zdev))
- goto out_error;
-
- bitmap_andnot(zdev->iommu_bitmap, zdev->iommu_bitmap,
- zdev->lazy_bitmap, zdev->iommu_pages);
- bitmap_zero(zdev->lazy_bitmap, zdev->iommu_pages);
- }
- /* wrap-around */
- offset = __dma_alloc_iommu(dev, 0, size);
- if (offset == -1)
- goto out_error;
- }
- zdev->next_bit = offset + size;
- spin_unlock_irqrestore(&zdev->iommu_bitmap_lock, flags);
-
- return zdev->start_dma + offset * PAGE_SIZE;
-
-out_error:
- spin_unlock_irqrestore(&zdev->iommu_bitmap_lock, flags);
- return DMA_MAPPING_ERROR;
-}
-
-static void dma_free_address(struct device *dev, dma_addr_t dma_addr, int size)
-{
- struct zpci_dev *zdev = to_zpci(to_pci_dev(dev));
- unsigned long flags, offset;
-
- offset = (dma_addr - zdev->start_dma) >> PAGE_SHIFT;
-
- spin_lock_irqsave(&zdev->iommu_bitmap_lock, flags);
- if (!zdev->iommu_bitmap)
- goto out;
-
- if (s390_iommu_strict)
- bitmap_clear(zdev->iommu_bitmap, offset, size);
- else
- bitmap_set(zdev->lazy_bitmap, offset, size);
-
-out:
- spin_unlock_irqrestore(&zdev->iommu_bitmap_lock, flags);
-}
-
-static inline void zpci_err_dma(unsigned long rc, unsigned long addr)
-{
- struct {
- unsigned long rc;
- unsigned long addr;
- } __packed data = {rc, addr};
-
- zpci_err_hex(&data, sizeof(data));
-}
-
-static dma_addr_t s390_dma_map_pages(struct device *dev, struct page *page,
- unsigned long offset, size_t size,
- enum dma_data_direction direction,
- unsigned long attrs)
-{
- struct zpci_dev *zdev = to_zpci(to_pci_dev(dev));
- unsigned long pa = page_to_phys(page) + offset;
- int flags = ZPCI_PTE_VALID;
- unsigned long nr_pages;
- dma_addr_t dma_addr;
- int ret;
-
- /* This rounds up number of pages based on size and offset */
- nr_pages = iommu_num_pages(pa, size, PAGE_SIZE);
- dma_addr = dma_alloc_address(dev, nr_pages);
- if (dma_addr == DMA_MAPPING_ERROR) {
- ret = -ENOSPC;
- goto out_err;
- }
-
- /* Use rounded up size */
- size = nr_pages * PAGE_SIZE;
-
- if (direction == DMA_NONE || direction == DMA_TO_DEVICE)
- flags |= ZPCI_TABLE_PROTECTED;
-
- ret = dma_update_trans(zdev, pa, dma_addr, size, flags);
- if (ret)
- goto out_free;
-
- atomic64_add(nr_pages, &zdev->mapped_pages);
- return dma_addr + (offset & ~PAGE_MASK);
-
-out_free:
- dma_free_address(dev, dma_addr, nr_pages);
-out_err:
- zpci_err("map error:\n");
- zpci_err_dma(ret, pa);
- return DMA_MAPPING_ERROR;
-}
-
-static void s390_dma_unmap_pages(struct device *dev, dma_addr_t dma_addr,
- size_t size, enum dma_data_direction direction,
- unsigned long attrs)
-{
- struct zpci_dev *zdev = to_zpci(to_pci_dev(dev));
- int npages, ret;
-
- npages = iommu_num_pages(dma_addr, size, PAGE_SIZE);
- dma_addr = dma_addr & PAGE_MASK;
- ret = dma_update_trans(zdev, 0, dma_addr, npages * PAGE_SIZE,
- ZPCI_PTE_INVALID);
- if (ret) {
- zpci_err("unmap error:\n");
- zpci_err_dma(ret, dma_addr);
- return;
- }
-
- atomic64_add(npages, &zdev->unmapped_pages);
- dma_free_address(dev, dma_addr, npages);
-}
-
-static void *s390_dma_alloc(struct device *dev, size_t size,
- dma_addr_t *dma_handle, gfp_t flag,
- unsigned long attrs)
-{
- struct zpci_dev *zdev = to_zpci(to_pci_dev(dev));
- struct page *page;
- phys_addr_t pa;
- dma_addr_t map;
-
- size = PAGE_ALIGN(size);
- page = alloc_pages(flag | __GFP_ZERO, get_order(size));
- if (!page)
- return NULL;
-
- pa = page_to_phys(page);
- map = s390_dma_map_pages(dev, page, 0, size, DMA_BIDIRECTIONAL, 0);
- if (dma_mapping_error(dev, map)) {
- __free_pages(page, get_order(size));
- return NULL;
- }
-
- atomic64_add(size / PAGE_SIZE, &zdev->allocated_pages);
- if (dma_handle)
- *dma_handle = map;
- return phys_to_virt(pa);
-}
-
-static void s390_dma_free(struct device *dev, size_t size,
- void *vaddr, dma_addr_t dma_handle,
- unsigned long attrs)
-{
- struct zpci_dev *zdev = to_zpci(to_pci_dev(dev));
-
- size = PAGE_ALIGN(size);
- atomic64_sub(size / PAGE_SIZE, &zdev->allocated_pages);
- s390_dma_unmap_pages(dev, dma_handle, size, DMA_BIDIRECTIONAL, 0);
- free_pages((unsigned long)vaddr, get_order(size));
-}
-
-/* Map a segment into a contiguous dma address area */
-static int __s390_dma_map_sg(struct device *dev, struct scatterlist *sg,
- size_t size, dma_addr_t *handle,
- enum dma_data_direction dir)
-{
- unsigned long nr_pages = PAGE_ALIGN(size) >> PAGE_SHIFT;
- struct zpci_dev *zdev = to_zpci(to_pci_dev(dev));
- dma_addr_t dma_addr_base, dma_addr;
- int flags = ZPCI_PTE_VALID;
- struct scatterlist *s;
- phys_addr_t pa = 0;
- int ret;
-
- dma_addr_base = dma_alloc_address(dev, nr_pages);
- if (dma_addr_base == DMA_MAPPING_ERROR)
- return -ENOMEM;
-
- dma_addr = dma_addr_base;
- if (dir == DMA_NONE || dir == DMA_TO_DEVICE)
- flags |= ZPCI_TABLE_PROTECTED;
-
- for (s = sg; dma_addr < dma_addr_base + size; s = sg_next(s)) {
- pa = page_to_phys(sg_page(s));
- ret = __dma_update_trans(zdev, pa, dma_addr,
- s->offset + s->length, flags);
- if (ret)
- goto unmap;
-
- dma_addr += s->offset + s->length;
- }
- ret = __dma_purge_tlb(zdev, dma_addr_base, size, flags);
- if (ret)
- goto unmap;
-
- *handle = dma_addr_base;
- atomic64_add(nr_pages, &zdev->mapped_pages);
-
- return ret;
-
-unmap:
- dma_update_trans(zdev, 0, dma_addr_base, dma_addr - dma_addr_base,
- ZPCI_PTE_INVALID);
- dma_free_address(dev, dma_addr_base, nr_pages);
- zpci_err("map error:\n");
- zpci_err_dma(ret, pa);
- return ret;
-}
-
-static int s390_dma_map_sg(struct device *dev, struct scatterlist *sg,
- int nr_elements, enum dma_data_direction dir,
- unsigned long attrs)
-{
- struct scatterlist *s = sg, *start = sg, *dma = sg;
- unsigned int max = dma_get_max_seg_size(dev);
- unsigned int size = s->offset + s->length;
- unsigned int offset = s->offset;
- int count = 0, i, ret;
-
- for (i = 1; i < nr_elements; i++) {
- s = sg_next(s);
-
- s->dma_length = 0;
-
- if (s->offset || (size & ~PAGE_MASK) ||
- size + s->length > max) {
- ret = __s390_dma_map_sg(dev, start, size,
- &dma->dma_address, dir);
- if (ret)
- goto unmap;
-
- dma->dma_address += offset;
- dma->dma_length = size - offset;
-
- size = offset = s->offset;
- start = s;
- dma = sg_next(dma);
- count++;
- }
- size += s->length;
- }
- ret = __s390_dma_map_sg(dev, start, size, &dma->dma_address, dir);
- if (ret)
- goto unmap;
-
- dma->dma_address += offset;
- dma->dma_length = size - offset;
-
- return count + 1;
-unmap:
- for_each_sg(sg, s, count, i)
- s390_dma_unmap_pages(dev, sg_dma_address(s), sg_dma_len(s),
- dir, attrs);
-
- return ret;
-}
-
-static void s390_dma_unmap_sg(struct device *dev, struct scatterlist *sg,
- int nr_elements, enum dma_data_direction dir,
- unsigned long attrs)
-{
- struct scatterlist *s;
- int i;
-
- for_each_sg(sg, s, nr_elements, i) {
- if (s->dma_length)
- s390_dma_unmap_pages(dev, s->dma_address, s->dma_length,
- dir, attrs);
- s->dma_address = 0;
- s->dma_length = 0;
- }
-}
-
-static unsigned long *bitmap_vzalloc(size_t bits, gfp_t flags)
-{
- size_t n = BITS_TO_LONGS(bits);
- size_t bytes;
-
- if (unlikely(check_mul_overflow(n, sizeof(unsigned long), &bytes)))
- return NULL;
-
- return vzalloc(bytes);
-}
-
-int zpci_dma_init_device(struct zpci_dev *zdev)
-{
- u8 status;
- int rc;
-
- /*
- * At this point, if the device is part of an IOMMU domain, this would
- * be a strong hint towards a bug in the IOMMU API (common) code and/or
- * simultaneous access via IOMMU and DMA API. So let's issue a warning.
- */
- WARN_ON(zdev->s390_domain);
-
- spin_lock_init(&zdev->iommu_bitmap_lock);
-
- zdev->dma_table = dma_alloc_cpu_table(GFP_KERNEL);
- if (!zdev->dma_table) {
- rc = -ENOMEM;
- goto out;
- }
-
- /*
- * Restrict the iommu bitmap size to the minimum of the following:
- * - s390_iommu_aperture which defaults to high_memory
- * - 3-level pagetable address limit minus start_dma offset
- * - DMA address range allowed by the hardware (clp query pci fn)
- *
- * Also set zdev->end_dma to the actual end address of the usable
- * range, instead of the theoretical maximum as reported by hardware.
- *
- * This limits the number of concurrently usable DMA mappings since
- * for each DMA mapped memory address we need a DMA address including
- * extra DMA addresses for multiple mappings of the same memory address.
- */
- zdev->start_dma = PAGE_ALIGN(zdev->start_dma);
- zdev->iommu_size = min3(s390_iommu_aperture,
- ZPCI_TABLE_SIZE_RT - zdev->start_dma,
- zdev->end_dma - zdev->start_dma + 1);
- zdev->end_dma = zdev->start_dma + zdev->iommu_size - 1;
- zdev->iommu_pages = zdev->iommu_size >> PAGE_SHIFT;
- zdev->iommu_bitmap = bitmap_vzalloc(zdev->iommu_pages, GFP_KERNEL);
- if (!zdev->iommu_bitmap) {
- rc = -ENOMEM;
- goto free_dma_table;
- }
- if (!s390_iommu_strict) {
- zdev->lazy_bitmap = bitmap_vzalloc(zdev->iommu_pages, GFP_KERNEL);
- if (!zdev->lazy_bitmap) {
- rc = -ENOMEM;
- goto free_bitmap;
- }
-
- }
- if (zpci_register_ioat(zdev, 0, zdev->start_dma, zdev->end_dma,
- virt_to_phys(zdev->dma_table), &status)) {
- rc = -EIO;
- goto free_bitmap;
- }
-
- return 0;
-free_bitmap:
- vfree(zdev->iommu_bitmap);
- zdev->iommu_bitmap = NULL;
- vfree(zdev->lazy_bitmap);
- zdev->lazy_bitmap = NULL;
-free_dma_table:
- dma_free_cpu_table(zdev->dma_table);
- zdev->dma_table = NULL;
-out:
- return rc;
-}
-
-int zpci_dma_exit_device(struct zpci_dev *zdev)
-{
- int cc = 0;
-
- /*
- * At this point, if the device is part of an IOMMU domain, this would
- * be a strong hint towards a bug in the IOMMU API (common) code and/or
- * simultaneous access via IOMMU and DMA API. So let's issue a warning.
- */
- WARN_ON(zdev->s390_domain);
- if (zdev_enabled(zdev))
- cc = zpci_unregister_ioat(zdev, 0);
- /*
- * cc == 3 indicates the function is gone already. This can happen
- * if the function was deconfigured/disabled suddenly and we have not
- * received a new handle yet.
- */
- if (cc && cc != 3)
- return -EIO;
-
- dma_cleanup_tables(zdev->dma_table);
- zdev->dma_table = NULL;
- vfree(zdev->iommu_bitmap);
- zdev->iommu_bitmap = NULL;
- vfree(zdev->lazy_bitmap);
- zdev->lazy_bitmap = NULL;
- zdev->next_bit = 0;
- return 0;
-}
-
-static int __init dma_alloc_cpu_table_caches(void)
-{
- dma_region_table_cache = kmem_cache_create("PCI_DMA_region_tables",
- ZPCI_TABLE_SIZE, ZPCI_TABLE_ALIGN,
- 0, NULL);
- if (!dma_region_table_cache)
- return -ENOMEM;
-
- dma_page_table_cache = kmem_cache_create("PCI_DMA_page_tables",
- ZPCI_PT_SIZE, ZPCI_PT_ALIGN,
- 0, NULL);
- if (!dma_page_table_cache) {
- kmem_cache_destroy(dma_region_table_cache);
- return -ENOMEM;
- }
- return 0;
-}
-
-int __init zpci_dma_init(void)
-{
- s390_iommu_aperture = (u64)virt_to_phys(high_memory);
- if (!s390_iommu_aperture_factor)
- s390_iommu_aperture = ULONG_MAX;
- else
- s390_iommu_aperture *= s390_iommu_aperture_factor;
-
- return dma_alloc_cpu_table_caches();
-}
-
-void zpci_dma_exit(void)
-{
- kmem_cache_destroy(dma_page_table_cache);
- kmem_cache_destroy(dma_region_table_cache);
-}
-
-const struct dma_map_ops s390_pci_dma_ops = {
- .alloc = s390_dma_alloc,
- .free = s390_dma_free,
- .map_sg = s390_dma_map_sg,
- .unmap_sg = s390_dma_unmap_sg,
- .map_page = s390_dma_map_pages,
- .unmap_page = s390_dma_unmap_pages,
- .mmap = dma_common_mmap,
- .get_sgtable = dma_common_get_sgtable,
- .alloc_pages = dma_common_alloc_pages,
- .free_pages = dma_common_free_pages,
- /* dma_supported is unconditionally true without a callback */
-};
-EXPORT_SYMBOL_GPL(s390_pci_dma_ops);
-
-static int __init s390_iommu_setup(char *str)
-{
- if (!strcmp(str, "strict"))
- s390_iommu_strict = 1;
- return 1;
-}
-
-__setup("s390_iommu=", s390_iommu_setup);
-
-static int __init s390_iommu_aperture_setup(char *str)
-{
- if (kstrtou32(str, 10, &s390_iommu_aperture_factor))
- s390_iommu_aperture_factor = 1;
- return 1;
-}
-
-__setup("s390_iommu_aperture=", s390_iommu_aperture_setup);
diff --git a/arch/s390/pci/pci_event.c b/arch/s390/pci/pci_event.c
index b9324ca2eb94..4d9773ef9e0a 100644
--- a/arch/s390/pci/pci_event.c
+++ b/arch/s390/pci/pci_event.c
@@ -59,9 +59,16 @@ static inline bool ers_result_indicates_abort(pci_ers_result_t ers_res)
}
}
-static bool is_passed_through(struct zpci_dev *zdev)
+static bool is_passed_through(struct pci_dev *pdev)
{
- return zdev->s390_domain;
+ struct zpci_dev *zdev = to_zpci(pdev);
+ bool ret;
+
+ mutex_lock(&zdev->kzdev_lock);
+ ret = !!zdev->kzdev;
+ mutex_unlock(&zdev->kzdev_lock);
+
+ return ret;
}
static bool is_driver_supported(struct pci_driver *driver)
@@ -176,7 +183,7 @@ static pci_ers_result_t zpci_event_attempt_error_recovery(struct pci_dev *pdev)
}
pdev->error_state = pci_channel_io_frozen;
- if (is_passed_through(to_zpci(pdev))) {
+ if (is_passed_through(pdev)) {
pr_info("%s: Cannot be recovered in the host because it is a pass-through device\n",
pci_name(pdev));
goto out_unlock;
@@ -239,7 +246,7 @@ static void zpci_event_io_failure(struct pci_dev *pdev, pci_channel_state_t es)
* we will inject the error event and let the guest recover the device
* itself.
*/
- if (is_passed_through(to_zpci(pdev)))
+ if (is_passed_through(pdev))
goto out;
driver = to_pci_driver(pdev->dev.driver);
if (driver && driver->err_handler && driver->err_handler->error_detected)
@@ -306,8 +313,6 @@ static void zpci_event_hard_deconfigured(struct zpci_dev *zdev, u32 fh)
/* Even though the device is already gone we still
* need to free zPCI resources as part of the disable.
*/
- if (zdev->dma_table)
- zpci_dma_exit_device(zdev);
if (zdev_enabled(zdev))
zpci_disable_device(zdev);
zdev->state = ZPCI_FN_STATE_STANDBY;
diff --git a/arch/s390/pci/pci_sysfs.c b/arch/s390/pci/pci_sysfs.c
index cae280e5c047..8a7abac51816 100644
--- a/arch/s390/pci/pci_sysfs.c
+++ b/arch/s390/pci/pci_sysfs.c
@@ -56,6 +56,7 @@ static ssize_t recover_store(struct device *dev, struct device_attribute *attr,
struct pci_dev *pdev = to_pci_dev(dev);
struct zpci_dev *zdev = to_zpci(pdev);
int ret = 0;
+ u8 status;
/* Can't use device_remove_self() here as that would lead us to lock
* the pci_rescan_remove_lock while holding the device' kernfs lock.
@@ -82,12 +83,6 @@ static ssize_t recover_store(struct device *dev, struct device_attribute *attr,
pci_lock_rescan_remove();
if (pci_dev_is_added(pdev)) {
pci_stop_and_remove_bus_device(pdev);
- if (zdev->dma_table) {
- ret = zpci_dma_exit_device(zdev);
- if (ret)
- goto out;
- }
-
if (zdev_enabled(zdev)) {
ret = zpci_disable_device(zdev);
/*
@@ -105,14 +100,16 @@ static ssize_t recover_store(struct device *dev, struct device_attribute *attr,
ret = zpci_enable_device(zdev);
if (ret)
goto out;
- ret = zpci_dma_init_device(zdev);
- if (ret) {
- zpci_disable_device(zdev);
- goto out;
+
+ if (zdev->dma_table) {
+ ret = zpci_register_ioat(zdev, 0, zdev->start_dma, zdev->end_dma,
+ virt_to_phys(zdev->dma_table), &status);
+ if (ret)
+ zpci_disable_device(zdev);
}
- pci_rescan_bus(zdev->zbus->bus);
}
out:
+ pci_rescan_bus(zdev->zbus->bus);
pci_unlock_rescan_remove();
if (kn)
sysfs_unbreak_active_protection(kn);
diff --git a/drivers/iommu/Kconfig b/drivers/iommu/Kconfig
index cd6727898b11..3199fd54b462 100644
--- a/drivers/iommu/Kconfig
+++ b/drivers/iommu/Kconfig
@@ -91,7 +91,7 @@ config IOMMU_DEBUGFS
choice
prompt "IOMMU default domain type"
depends on IOMMU_API
- default IOMMU_DEFAULT_DMA_LAZY if X86 || IA64
+ default IOMMU_DEFAULT_DMA_LAZY if X86 || IA64 || S390
default IOMMU_DEFAULT_DMA_STRICT
help
Choose the type of IOMMU domain used to manage DMA API usage by
@@ -146,7 +146,7 @@ config OF_IOMMU
# IOMMU-agnostic DMA-mapping layer
config IOMMU_DMA
- def_bool ARM64 || IA64 || X86
+ def_bool ARM64 || IA64 || X86 || S390
select DMA_OPS
select IOMMU_API
select IOMMU_IOVA
diff --git a/drivers/iommu/amd/Kconfig b/drivers/iommu/amd/Kconfig
index 9b5fc3356bf2..75132ae861a2 100644
--- a/drivers/iommu/amd/Kconfig
+++ b/drivers/iommu/amd/Kconfig
@@ -22,15 +22,6 @@ config AMD_IOMMU
your BIOS for an option to enable it or if you have an IVRS ACPI
table.
-config AMD_IOMMU_V2
- tristate "AMD IOMMU Version 2 driver"
- depends on AMD_IOMMU
- select MMU_NOTIFIER
- help
- This option enables support for the AMD IOMMUv2 features of the IOMMU
- hardware. Select this option if you want to use devices that support
- the PCI PRI and PASID interface.
-
config AMD_IOMMU_DEBUGFS
bool "Enable AMD IOMMU internals in DebugFS"
depends on AMD_IOMMU && IOMMU_DEBUGFS
diff --git a/drivers/iommu/amd/Makefile b/drivers/iommu/amd/Makefile
index 773d8aa00283..f454fbb1569e 100644
--- a/drivers/iommu/amd/Makefile
+++ b/drivers/iommu/amd/Makefile
@@ -1,4 +1,3 @@
# SPDX-License-Identifier: GPL-2.0-only
obj-$(CONFIG_AMD_IOMMU) += iommu.o init.o quirks.o io_pgtable.o io_pgtable_v2.o
obj-$(CONFIG_AMD_IOMMU_DEBUGFS) += debugfs.o
-obj-$(CONFIG_AMD_IOMMU_V2) += iommu_v2.o
diff --git a/drivers/iommu/amd/amd_iommu.h b/drivers/iommu/amd/amd_iommu.h
index e2857109e966..86be1edd50ee 100644
--- a/drivers/iommu/amd/amd_iommu.h
+++ b/drivers/iommu/amd/amd_iommu.h
@@ -38,9 +38,6 @@ extern int amd_iommu_guest_ir;
extern enum io_pgtable_fmt amd_iommu_pgtable;
extern int amd_iommu_gpt_level;
-/* IOMMUv2 specific functions */
-struct iommu_domain;
-
bool amd_iommu_v2_supported(void);
struct amd_iommu *get_amd_iommu(unsigned int idx);
u8 amd_iommu_pc_get_max_banks(unsigned int idx);
@@ -51,10 +48,10 @@ int amd_iommu_pc_get_reg(struct amd_iommu *iommu, u8 bank, u8 cntr,
int amd_iommu_pc_set_reg(struct amd_iommu *iommu, u8 bank, u8 cntr,
u8 fxn, u64 *value);
-int amd_iommu_register_ppr_notifier(struct notifier_block *nb);
-int amd_iommu_unregister_ppr_notifier(struct notifier_block *nb);
-void amd_iommu_domain_direct_map(struct iommu_domain *dom);
-int amd_iommu_domain_enable_v2(struct iommu_domain *dom, int pasids);
+/* Device capabilities */
+int amd_iommu_pdev_enable_cap_pri(struct pci_dev *pdev);
+void amd_iommu_pdev_disable_cap_pri(struct pci_dev *pdev);
+
int amd_iommu_flush_page(struct iommu_domain *dom, u32 pasid, u64 address);
void amd_iommu_update_and_flush_device_table(struct protection_domain *domain);
void amd_iommu_domain_update(struct protection_domain *domain);
@@ -87,9 +84,25 @@ static inline bool is_rd890_iommu(struct pci_dev *pdev)
(pdev->device == PCI_DEVICE_ID_RD890_IOMMU);
}
-static inline bool iommu_feature(struct amd_iommu *iommu, u64 mask)
+static inline bool check_feature(u64 mask)
+{
+ return (amd_iommu_efr & mask);
+}
+
+static inline bool check_feature2(u64 mask)
+{
+ return (amd_iommu_efr2 & mask);
+}
+
+static inline int check_feature_gpt_level(void)
+{
+ return ((amd_iommu_efr >> FEATURE_GATS_SHIFT) & FEATURE_GATS_MASK);
+}
+
+static inline bool amd_iommu_gt_ppr_supported(void)
{
- return !!(iommu->features & mask);
+ return (check_feature(FEATURE_GT) &&
+ check_feature(FEATURE_PPR));
}
static inline u64 iommu_virt_to_phys(void *vaddr)
@@ -105,7 +118,6 @@ static inline void *iommu_phys_to_virt(unsigned long paddr)
static inline
void amd_iommu_domain_set_pt_root(struct protection_domain *domain, u64 root)
{
- atomic64_set(&domain->iop.pt_root, root);
domain->iop.root = (u64 *)(root & PAGE_MASK);
domain->iop.mode = root & 7; /* lowest 3 bits encode pgtable mode */
}
@@ -146,8 +158,5 @@ void amd_iommu_domain_set_pgtable(struct protection_domain *domain,
u64 *root, int mode);
struct dev_table_entry *get_dev_table(struct amd_iommu *iommu);
-extern u64 amd_iommu_efr;
-extern u64 amd_iommu_efr2;
-
extern bool amd_iommu_snp_en;
#endif
diff --git a/drivers/iommu/amd/amd_iommu_types.h b/drivers/iommu/amd/amd_iommu_types.h
index 7dc30c2b56b3..e742006f2885 100644
--- a/drivers/iommu/amd/amd_iommu_types.h
+++ b/drivers/iommu/amd/amd_iommu_types.h
@@ -451,6 +451,10 @@
#define PD_IOMMUV2_MASK BIT(3) /* domain has gcr3 table */
#define PD_GIOV_MASK BIT(4) /* domain enable GIOV support */
+/* Timeout stuff */
+#define LOOP_TIMEOUT 100000
+#define MMIO_STATUS_TIMEOUT 2000000
+
extern bool amd_iommu_dump;
#define DUMP_printk(format, arg...) \
do { \
@@ -505,19 +509,6 @@ extern struct kmem_cache *amd_iommu_irq_cache;
#define APERTURE_RANGE_INDEX(a) ((a) >> APERTURE_RANGE_SHIFT)
#define APERTURE_PAGE_INDEX(a) (((a) >> 21) & 0x3fULL)
-/*
- * This struct is used to pass information about
- * incoming PPR faults around.
- */
-struct amd_iommu_fault {
- u64 address; /* IO virtual address of the fault*/
- u32 pasid; /* Address space identifier */
- u32 sbdf; /* Originating PCI device id */
- u16 tag; /* PPR tag */
- u16 flags; /* Fault flags */
-
-};
-
struct amd_iommu;
struct iommu_domain;
@@ -544,7 +535,6 @@ struct amd_io_pgtable {
struct io_pgtable iop;
int mode;
u64 *root;
- atomic64_t pt_root; /* pgtable root and pgtable mode */
u64 *pgd; /* v2 pgtable pgd pointer */
};
@@ -676,9 +666,6 @@ struct amd_iommu {
/* Extended features 2 */
u64 features2;
- /* IOMMUv2 */
- bool is_iommu_v2;
-
/* PCI device id of the IOMMU device */
u16 devid;
@@ -799,6 +786,14 @@ struct devid_map {
bool cmd_line;
};
+#define AMD_IOMMU_DEVICE_FLAG_ATS_SUP 0x1 /* ATS feature supported */
+#define AMD_IOMMU_DEVICE_FLAG_PRI_SUP 0x2 /* PRI feature supported */
+#define AMD_IOMMU_DEVICE_FLAG_PASID_SUP 0x4 /* PASID context supported */
+/* Device may request execution on memory pages */
+#define AMD_IOMMU_DEVICE_FLAG_EXEC_SUP 0x8
+/* Device may request super-user privileges */
+#define AMD_IOMMU_DEVICE_FLAG_PRIV_SUP 0x10
+
/*
* This struct contains device specific data for the IOMMU
*/
@@ -811,13 +806,15 @@ struct iommu_dev_data {
struct protection_domain *domain; /* Domain the device is bound to */
struct device *dev;
u16 devid; /* PCI Device ID */
- bool iommu_v2; /* Device can make use of IOMMUv2 */
- struct {
- bool enabled;
- int qdep;
- } ats; /* ATS state */
- bool pri_tlp; /* PASID TLB required for
+
+ u32 flags; /* Holds AMD_IOMMU_DEVICE_FLAG_<*> */
+ int ats_qdep;
+ u8 ats_enabled :1; /* ATS state */
+ u8 pri_enabled :1; /* PRI state */
+ u8 pasid_enabled:1; /* PASID state */
+ u8 pri_tlp :1; /* PASID TLB required for
PPR completions */
+ u8 ppr :1; /* Enable device PPR support */
bool use_vapic; /* Enable device to use vapic mode */
bool defer_attach;
@@ -884,16 +881,15 @@ extern unsigned amd_iommu_aperture_order;
/* allocation bitmap for domain ids */
extern unsigned long *amd_iommu_pd_alloc_bitmap;
-/* Smallest max PASID supported by any IOMMU in the system */
-extern u32 amd_iommu_max_pasid;
-
-extern bool amd_iommu_v2_present;
-
extern bool amd_iommu_force_isolation;
/* Max levels of glxval supported */
extern int amd_iommu_max_glx_val;
+/* Global EFR and EFR2 registers */
+extern u64 amd_iommu_efr;
+extern u64 amd_iommu_efr2;
+
/*
* This function flushes all internal caches of
* the IOMMU used by this driver.
diff --git a/drivers/iommu/amd/init.c b/drivers/iommu/amd/init.c
index 45efb7e5d725..64bcf3df37ee 100644
--- a/drivers/iommu/amd/init.c
+++ b/drivers/iommu/amd/init.c
@@ -83,8 +83,6 @@
#define ACPI_DEVFLAG_LINT1 0x80
#define ACPI_DEVFLAG_ATSDIS 0x10000000
-#define LOOP_TIMEOUT 2000000
-
#define IVRS_GET_SBDF_ID(seg, bus, dev, fn) (((seg & 0xffff) << 16) | ((bus & 0xff) << 8) \
| ((dev & 0x1f) << 3) | (fn & 0x7))
@@ -187,9 +185,6 @@ static int amd_iommus_present;
bool amd_iommu_np_cache __read_mostly;
bool amd_iommu_iotlb_sup __read_mostly = true;
-u32 amd_iommu_max_pasid __read_mostly = ~0;
-
-bool amd_iommu_v2_present __read_mostly;
static bool amd_iommu_pc_present __read_mostly;
bool amdr_ivrs_remap_support __read_mostly;
@@ -272,7 +267,7 @@ int amd_iommu_get_num_iommus(void)
* Iterate through all the IOMMUs to get common EFR
* masks among all IOMMUs and warn if found inconsistency.
*/
-static void get_global_efr(void)
+static __init void get_global_efr(void)
{
struct amd_iommu *iommu;
@@ -304,16 +299,6 @@ static void get_global_efr(void)
pr_info("Using global IVHD EFR:%#llx, EFR2:%#llx\n", amd_iommu_efr, amd_iommu_efr2);
}
-static bool check_feature_on_all_iommus(u64 mask)
-{
- return !!(amd_iommu_efr & mask);
-}
-
-static inline int check_feature_gpt_level(void)
-{
- return ((amd_iommu_efr >> FEATURE_GATS_SHIFT) & FEATURE_GATS_MASK);
-}
-
/*
* For IVHD type 0x11/0x40, EFR is also available via IVHD.
* Default to IVHD EFR since it is available sooner
@@ -399,7 +384,7 @@ static void iommu_set_cwwb_range(struct amd_iommu *iommu)
u64 start = iommu_virt_to_phys((void *)iommu->cmd_sem);
u64 entry = start & PM_ADDR_MASK;
- if (!check_feature_on_all_iommus(FEATURE_SNP))
+ if (!check_feature(FEATURE_SNP))
return;
/* Note:
@@ -869,7 +854,7 @@ static void *__init iommu_alloc_4k_pages(struct amd_iommu *iommu,
void *buf = (void *)__get_free_pages(gfp, order);
if (buf &&
- check_feature_on_all_iommus(FEATURE_SNP) &&
+ check_feature(FEATURE_SNP) &&
set_memory_4k((unsigned long)buf, (1 << order))) {
free_pages((unsigned long)buf, order);
buf = NULL;
@@ -985,14 +970,14 @@ static int iommu_ga_log_enable(struct amd_iommu *iommu)
iommu_feature_enable(iommu, CONTROL_GAINT_EN);
iommu_feature_enable(iommu, CONTROL_GALOG_EN);
- for (i = 0; i < LOOP_TIMEOUT; ++i) {
+ for (i = 0; i < MMIO_STATUS_TIMEOUT; ++i) {
status = readl(iommu->mmio_base + MMIO_STATUS_OFFSET);
if (status & (MMIO_STATUS_GALOG_RUN_MASK))
break;
udelay(10);
}
- if (WARN_ON(i >= LOOP_TIMEOUT))
+ if (WARN_ON(i >= MMIO_STATUS_TIMEOUT))
return -EINVAL;
return 0;
@@ -1048,7 +1033,7 @@ static void iommu_enable_xt(struct amd_iommu *iommu)
static void iommu_enable_gt(struct amd_iommu *iommu)
{
- if (!iommu_feature(iommu, FEATURE_GT))
+ if (!check_feature(FEATURE_GT))
return;
iommu_feature_enable(iommu, CONTROL_GT_EN);
@@ -1987,7 +1972,7 @@ static void init_iommu_perf_ctr(struct amd_iommu *iommu)
u64 val;
struct pci_dev *pdev = iommu->dev;
- if (!iommu_feature(iommu, FEATURE_PC))
+ if (!check_feature(FEATURE_PC))
return;
amd_iommu_pc_present = true;
@@ -2014,8 +1999,7 @@ static ssize_t amd_iommu_show_features(struct device *dev,
struct device_attribute *attr,
char *buf)
{
- struct amd_iommu *iommu = dev_to_amd_iommu(dev);
- return sysfs_emit(buf, "%llx:%llx\n", iommu->features2, iommu->features);
+ return sysfs_emit(buf, "%llx:%llx\n", amd_iommu_efr, amd_iommu_efr2);
}
static DEVICE_ATTR(features, S_IRUGO, amd_iommu_show_features, NULL);
@@ -2051,9 +2035,9 @@ static void __init late_iommu_features_init(struct amd_iommu *iommu)
features = readq(iommu->mmio_base + MMIO_EXT_FEATURES);
features2 = readq(iommu->mmio_base + MMIO_EXT_FEATURES2);
- if (!iommu->features) {
- iommu->features = features;
- iommu->features2 = features2;
+ if (!amd_iommu_efr) {
+ amd_iommu_efr = features;
+ amd_iommu_efr2 = features2;
return;
}
@@ -2061,12 +2045,12 @@ static void __init late_iommu_features_init(struct amd_iommu *iommu)
* Sanity check and warn if EFR values from
* IVHD and MMIO conflict.
*/
- if (features != iommu->features ||
- features2 != iommu->features2) {
+ if (features != amd_iommu_efr ||
+ features2 != amd_iommu_efr2) {
pr_warn(FW_WARN
"EFR mismatch. Use IVHD EFR (%#llx : %#llx), EFR2 (%#llx : %#llx).\n",
- features, iommu->features,
- features2, iommu->features2);
+ features, amd_iommu_efr,
+ features2, amd_iommu_efr2);
}
}
@@ -2092,20 +2076,17 @@ static int __init iommu_init_pci(struct amd_iommu *iommu)
late_iommu_features_init(iommu);
- if (iommu_feature(iommu, FEATURE_GT)) {
+ if (check_feature(FEATURE_GT)) {
int glxval;
- u32 max_pasid;
u64 pasmax;
- pasmax = iommu->features & FEATURE_PASID_MASK;
+ pasmax = amd_iommu_efr & FEATURE_PASID_MASK;
pasmax >>= FEATURE_PASID_SHIFT;
- max_pasid = (1 << (pasmax + 1)) - 1;
+ iommu->iommu.max_pasids = (1 << (pasmax + 1)) - 1;
- amd_iommu_max_pasid = min(amd_iommu_max_pasid, max_pasid);
+ BUG_ON(iommu->iommu.max_pasids & ~PASID_MASK);
- BUG_ON(amd_iommu_max_pasid & ~PASID_MASK);
-
- glxval = iommu->features & FEATURE_GLXVAL_MASK;
+ glxval = amd_iommu_efr & FEATURE_GLXVAL_MASK;
glxval >>= FEATURE_GLXVAL_SHIFT;
if (amd_iommu_max_glx_val == -1)
@@ -2114,13 +2095,7 @@ static int __init iommu_init_pci(struct amd_iommu *iommu)
amd_iommu_max_glx_val = min(amd_iommu_max_glx_val, glxval);
}
- if (iommu_feature(iommu, FEATURE_GT) &&
- iommu_feature(iommu, FEATURE_PPR)) {
- iommu->is_iommu_v2 = true;
- amd_iommu_v2_present = true;
- }
-
- if (iommu_feature(iommu, FEATURE_PPR) && alloc_ppr_log(iommu))
+ if (check_feature(FEATURE_PPR) && alloc_ppr_log(iommu))
return -ENOMEM;
if (iommu->cap & (1UL << IOMMU_CAP_NPCACHE)) {
@@ -2132,13 +2107,10 @@ static int __init iommu_init_pci(struct amd_iommu *iommu)
init_iommu_perf_ctr(iommu);
if (amd_iommu_pgtable == AMD_IOMMU_V2) {
- if (!iommu_feature(iommu, FEATURE_GIOSUP) ||
- !iommu_feature(iommu, FEATURE_GT)) {
+ if (!check_feature(FEATURE_GIOSUP) ||
+ !check_feature(FEATURE_GT)) {
pr_warn("Cannot enable v2 page table for DMA-API. Fallback to v1.\n");
amd_iommu_pgtable = AMD_IOMMU_V1;
- } else if (iommu_default_passthrough()) {
- pr_warn("V2 page table doesn't support passthrough mode. Fallback to v1.\n");
- amd_iommu_pgtable = AMD_IOMMU_V1;
}
}
@@ -2186,35 +2158,29 @@ static int __init iommu_init_pci(struct amd_iommu *iommu)
static void print_iommu_info(void)
{
+ int i;
static const char * const feat_str[] = {
"PreF", "PPR", "X2APIC", "NX", "GT", "[5]",
"IA", "GA", "HE", "PC"
};
- struct amd_iommu *iommu;
-
- for_each_iommu(iommu) {
- struct pci_dev *pdev = iommu->dev;
- int i;
- pci_info(pdev, "Found IOMMU cap 0x%x\n", iommu->cap_ptr);
+ if (amd_iommu_efr) {
+ pr_info("Extended features (%#llx, %#llx):", amd_iommu_efr, amd_iommu_efr2);
- if (iommu->cap & (1 << IOMMU_CAP_EFR)) {
- pr_info("Extended features (%#llx, %#llx):", iommu->features, iommu->features2);
-
- for (i = 0; i < ARRAY_SIZE(feat_str); ++i) {
- if (iommu_feature(iommu, (1ULL << i)))
- pr_cont(" %s", feat_str[i]);
- }
+ for (i = 0; i < ARRAY_SIZE(feat_str); ++i) {
+ if (check_feature(1ULL << i))
+ pr_cont(" %s", feat_str[i]);
+ }
- if (iommu->features & FEATURE_GAM_VAPIC)
- pr_cont(" GA_vAPIC");
+ if (check_feature(FEATURE_GAM_VAPIC))
+ pr_cont(" GA_vAPIC");
- if (iommu->features & FEATURE_SNP)
- pr_cont(" SNP");
+ if (check_feature(FEATURE_SNP))
+ pr_cont(" SNP");
- pr_cont("\n");
- }
+ pr_cont("\n");
}
+
if (irq_remapping_enabled) {
pr_info("Interrupt remapping enabled\n");
if (amd_iommu_xt_mode == IRQ_REMAP_X2APIC_MODE)
@@ -2900,19 +2866,19 @@ static void enable_iommus_vapic(void)
* Need to set and poll check the GALOGRun bit to zero before
* we can set/ modify GA Log registers safely.
*/
- for (i = 0; i < LOOP_TIMEOUT; ++i) {
+ for (i = 0; i < MMIO_STATUS_TIMEOUT; ++i) {
status = readl(iommu->mmio_base + MMIO_STATUS_OFFSET);
if (!(status & MMIO_STATUS_GALOG_RUN_MASK))
break;
udelay(10);
}
- if (WARN_ON(i >= LOOP_TIMEOUT))
+ if (WARN_ON(i >= MMIO_STATUS_TIMEOUT))
return;
}
if (AMD_IOMMU_GUEST_IR_VAPIC(amd_iommu_guest_ir) &&
- !check_feature_on_all_iommus(FEATURE_GAM_VAPIC)) {
+ !check_feature(FEATURE_GAM_VAPIC)) {
amd_iommu_guest_ir = AMD_IOMMU_GUEST_IR_LEGACY_GA;
return;
}
@@ -3698,9 +3664,8 @@ bool amd_iommu_v2_supported(void)
* (i.e. EFR[SNPSup]=1), IOMMUv2 page table cannot be used without
* setting up IOMMUv1 page table.
*/
- return amd_iommu_v2_present && !amd_iommu_snp_en;
+ return amd_iommu_gt_ppr_supported() && !amd_iommu_snp_en;
}
-EXPORT_SYMBOL(amd_iommu_v2_supported);
struct amd_iommu *get_amd_iommu(unsigned int idx)
{
@@ -3824,7 +3789,7 @@ int amd_iommu_snp_enable(void)
return -EINVAL;
}
- amd_iommu_snp_en = check_feature_on_all_iommus(FEATURE_SNP);
+ amd_iommu_snp_en = check_feature(FEATURE_SNP);
if (!amd_iommu_snp_en)
return -EINVAL;
diff --git a/drivers/iommu/amd/io_pgtable_v2.c b/drivers/iommu/amd/io_pgtable_v2.c
index e9ef2e0a62f6..f818a7e254d4 100644
--- a/drivers/iommu/amd/io_pgtable_v2.c
+++ b/drivers/iommu/amd/io_pgtable_v2.c
@@ -363,10 +363,10 @@ static void v2_free_pgtable(struct io_pgtable *iop)
if (!(pdom->flags & PD_IOMMUV2_MASK))
return;
- /*
- * Make changes visible to IOMMUs. No need to clear gcr3 entry
- * as gcr3 table is already freed.
- */
+ /* Clear gcr3 entry */
+ amd_iommu_domain_clear_gcr3(&pdom->domain, 0);
+
+ /* Make changes visible to IOMMUs */
amd_iommu_domain_update(pdom);
/* Free page table */
diff --git a/drivers/iommu/amd/iommu.c b/drivers/iommu/amd/iommu.c
index 95bd7c25ba6f..089886485895 100644
--- a/drivers/iommu/amd/iommu.c
+++ b/drivers/iommu/amd/iommu.c
@@ -44,8 +44,6 @@
#define CMD_SET_TYPE(cmd, t) ((cmd)->data[1] |= ((t) << 28))
-#define LOOP_TIMEOUT 100000
-
/* IO virtual address start page frame number */
#define IOVA_START_PFN (1)
#define IOVA_PFN(addr) ((addr) >> PAGE_SHIFT)
@@ -66,7 +64,6 @@ LIST_HEAD(acpihid_map);
const struct iommu_ops amd_iommu_ops;
-static ATOMIC_NOTIFIER_HEAD(ppr_notifier);
int amd_iommu_max_glx_val = -1;
/*
@@ -79,7 +76,6 @@ struct iommu_cmd {
struct kmem_cache *amd_iommu_irq_cache;
static void detach_device(struct device *dev);
-static int domain_enable_v2(struct protection_domain *domain, int pasids);
/****************************************************************************
*
@@ -322,24 +318,141 @@ static struct iommu_group *acpihid_device_group(struct device *dev)
return entry->group;
}
-static bool pci_iommuv2_capable(struct pci_dev *pdev)
+static inline bool pdev_pasid_supported(struct iommu_dev_data *dev_data)
{
- static const int caps[] = {
- PCI_EXT_CAP_ID_PRI,
- PCI_EXT_CAP_ID_PASID,
- };
- int i, pos;
+ return (dev_data->flags & AMD_IOMMU_DEVICE_FLAG_PASID_SUP);
+}
- if (!pci_ats_supported(pdev))
- return false;
+static u32 pdev_get_caps(struct pci_dev *pdev)
+{
+ int features;
+ u32 flags = 0;
+
+ if (pci_ats_supported(pdev))
+ flags |= AMD_IOMMU_DEVICE_FLAG_ATS_SUP;
+
+ if (pci_pri_supported(pdev))
+ flags |= AMD_IOMMU_DEVICE_FLAG_PRI_SUP;
+
+ features = pci_pasid_features(pdev);
+ if (features >= 0) {
+ flags |= AMD_IOMMU_DEVICE_FLAG_PASID_SUP;
+
+ if (features & PCI_PASID_CAP_EXEC)
+ flags |= AMD_IOMMU_DEVICE_FLAG_EXEC_SUP;
- for (i = 0; i < 2; ++i) {
- pos = pci_find_ext_capability(pdev, caps[i]);
- if (pos == 0)
- return false;
+ if (features & PCI_PASID_CAP_PRIV)
+ flags |= AMD_IOMMU_DEVICE_FLAG_PRIV_SUP;
}
- return true;
+ return flags;
+}
+
+static inline int pdev_enable_cap_ats(struct pci_dev *pdev)
+{
+ struct iommu_dev_data *dev_data = dev_iommu_priv_get(&pdev->dev);
+ int ret = -EINVAL;
+
+ if (dev_data->ats_enabled)
+ return 0;
+
+ if (amd_iommu_iotlb_sup &&
+ (dev_data->flags & AMD_IOMMU_DEVICE_FLAG_ATS_SUP)) {
+ ret = pci_enable_ats(pdev, PAGE_SHIFT);
+ if (!ret) {
+ dev_data->ats_enabled = 1;
+ dev_data->ats_qdep = pci_ats_queue_depth(pdev);
+ }
+ }
+
+ return ret;
+}
+
+static inline void pdev_disable_cap_ats(struct pci_dev *pdev)
+{
+ struct iommu_dev_data *dev_data = dev_iommu_priv_get(&pdev->dev);
+
+ if (dev_data->ats_enabled) {
+ pci_disable_ats(pdev);
+ dev_data->ats_enabled = 0;
+ }
+}
+
+int amd_iommu_pdev_enable_cap_pri(struct pci_dev *pdev)
+{
+ struct iommu_dev_data *dev_data = dev_iommu_priv_get(&pdev->dev);
+ int ret = -EINVAL;
+
+ if (dev_data->pri_enabled)
+ return 0;
+
+ if (dev_data->flags & AMD_IOMMU_DEVICE_FLAG_PRI_SUP) {
+ /*
+ * First reset the PRI state of the device.
+ * FIXME: Hardcode number of outstanding requests for now
+ */
+ if (!pci_reset_pri(pdev) && !pci_enable_pri(pdev, 32)) {
+ dev_data->pri_enabled = 1;
+ dev_data->pri_tlp = pci_prg_resp_pasid_required(pdev);
+
+ ret = 0;
+ }
+ }
+
+ return ret;
+}
+
+void amd_iommu_pdev_disable_cap_pri(struct pci_dev *pdev)
+{
+ struct iommu_dev_data *dev_data = dev_iommu_priv_get(&pdev->dev);
+
+ if (dev_data->pri_enabled) {
+ pci_disable_pri(pdev);
+ dev_data->pri_enabled = 0;
+ }
+}
+
+static inline int pdev_enable_cap_pasid(struct pci_dev *pdev)
+{
+ struct iommu_dev_data *dev_data = dev_iommu_priv_get(&pdev->dev);
+ int ret = -EINVAL;
+
+ if (dev_data->pasid_enabled)
+ return 0;
+
+ if (dev_data->flags & AMD_IOMMU_DEVICE_FLAG_PASID_SUP) {
+ /* Only allow access to user-accessible pages */
+ ret = pci_enable_pasid(pdev, 0);
+ if (!ret)
+ dev_data->pasid_enabled = 1;
+ }
+
+ return ret;
+}
+
+static inline void pdev_disable_cap_pasid(struct pci_dev *pdev)
+{
+ struct iommu_dev_data *dev_data = dev_iommu_priv_get(&pdev->dev);
+
+ if (dev_data->pasid_enabled) {
+ pci_disable_pasid(pdev);
+ dev_data->pasid_enabled = 0;
+ }
+}
+
+static void pdev_enable_caps(struct pci_dev *pdev)
+{
+ pdev_enable_cap_ats(pdev);
+ pdev_enable_cap_pasid(pdev);
+ amd_iommu_pdev_enable_cap_pri(pdev);
+
+}
+
+static void pdev_disable_caps(struct pci_dev *pdev)
+{
+ pdev_disable_cap_ats(pdev);
+ pdev_disable_cap_pasid(pdev);
+ amd_iommu_pdev_disable_cap_pri(pdev);
}
/*
@@ -399,8 +512,8 @@ static int iommu_init_device(struct amd_iommu *iommu, struct device *dev)
* it'll be forced to go into translation mode.
*/
if ((iommu_default_passthrough() || !amd_iommu_force_isolation) &&
- dev_is_pci(dev) && pci_iommuv2_capable(to_pci_dev(dev))) {
- dev_data->iommu_v2 = iommu->is_iommu_v2;
+ dev_is_pci(dev) && amd_iommu_gt_ppr_supported()) {
+ dev_data->flags = pdev_get_caps(to_pci_dev(dev));
}
dev_iommu_priv_set(dev, dev_data);
@@ -701,24 +814,6 @@ static void iommu_poll_events(struct amd_iommu *iommu)
writel(head, iommu->mmio_base + MMIO_EVT_HEAD_OFFSET);
}
-static void iommu_handle_ppr_entry(struct amd_iommu *iommu, u64 *raw)
-{
- struct amd_iommu_fault fault;
-
- if (PPR_REQ_TYPE(raw[0]) != PPR_REQ_FAULT) {
- pr_err_ratelimited("Unknown PPR request received\n");
- return;
- }
-
- fault.address = raw[1];
- fault.pasid = PPR_PASID(raw[0]);
- fault.sbdf = PCI_SEG_DEVID_TO_SBDF(iommu->pci_seg->id, PPR_DEVID(raw[0]));
- fault.tag = PPR_TAG(raw[0]);
- fault.flags = PPR_FLAGS(raw[0]);
-
- atomic_notifier_call_chain(&ppr_notifier, 0, &fault);
-}
-
static void iommu_poll_ppr_log(struct amd_iommu *iommu)
{
u32 head, tail;
@@ -764,8 +859,7 @@ static void iommu_poll_ppr_log(struct amd_iommu *iommu)
head = (head + PPR_ENTRY_SIZE) % PPR_LOG_SIZE;
writel(head, iommu->mmio_base + MMIO_PPR_HEAD_OFFSET);
- /* Handle PPR entry */
- iommu_handle_ppr_entry(iommu, entry);
+ /* TODO: PPR Handler will be added when we add IOPF support */
/* Refresh ring-buffer information */
head = readl(iommu->mmio_base + MMIO_PPR_HEAD_OFFSET);
@@ -1094,7 +1188,7 @@ static void build_inv_iotlb_pasid(struct iommu_cmd *cmd, u16 devid, u32 pasid,
}
static void build_complete_ppr(struct iommu_cmd *cmd, u16 devid, u32 pasid,
- int status, int tag, bool gn)
+ int status, int tag, u8 gn)
{
memset(cmd, 0, sizeof(*cmd));
@@ -1298,7 +1392,7 @@ static void amd_iommu_flush_irt_all(struct amd_iommu *iommu)
void iommu_flush_all_caches(struct amd_iommu *iommu)
{
- if (iommu_feature(iommu, FEATURE_IA)) {
+ if (check_feature(FEATURE_IA)) {
amd_iommu_flush_all(iommu);
} else {
amd_iommu_flush_dte_all(iommu);
@@ -1317,7 +1411,7 @@ static int device_flush_iotlb(struct iommu_dev_data *dev_data,
struct iommu_cmd cmd;
int qdep;
- qdep = dev_data->ats.qdep;
+ qdep = dev_data->ats_qdep;
iommu = rlookup_amd_iommu(dev_data->dev);
if (!iommu)
return -EINVAL;
@@ -1368,7 +1462,7 @@ static int device_flush_dte(struct iommu_dev_data *dev_data)
return ret;
}
- if (dev_data->ats.enabled)
+ if (dev_data->ats_enabled)
ret = device_flush_iotlb(dev_data, 0, ~0UL);
return ret;
@@ -1401,7 +1495,7 @@ static void __domain_flush_pages(struct protection_domain *domain,
list_for_each_entry(dev_data, &domain->dev_list, list) {
- if (!dev_data->ats.enabled)
+ if (!dev_data->ats_enabled)
continue;
ret |= device_flush_iotlb(dev_data, address, size);
@@ -1577,6 +1671,42 @@ static void free_gcr3_table(struct protection_domain *domain)
free_page((unsigned long)domain->gcr3_tbl);
}
+/*
+ * Number of GCR3 table levels required. Level must be 4-Kbyte
+ * page and can contain up to 512 entries.
+ */
+static int get_gcr3_levels(int pasids)
+{
+ int levels;
+
+ if (pasids == -1)
+ return amd_iommu_max_glx_val;
+
+ levels = get_count_order(pasids);
+
+ return levels ? (DIV_ROUND_UP(levels, 9) - 1) : levels;
+}
+
+/* Note: This function expects iommu_domain->lock to be held prior calling the function. */
+static int setup_gcr3_table(struct protection_domain *domain, int pasids)
+{
+ int levels = get_gcr3_levels(pasids);
+
+ if (levels > amd_iommu_max_glx_val)
+ return -EINVAL;
+
+ domain->gcr3_tbl = alloc_pgtable_page(domain->nid, GFP_ATOMIC);
+ if (domain->gcr3_tbl == NULL)
+ return -ENOMEM;
+
+ domain->glx = levels;
+ domain->flags |= PD_IOMMUV2_MASK;
+
+ amd_iommu_domain_update(domain);
+
+ return 0;
+}
+
static void set_dte_entry(struct amd_iommu *iommu, u16 devid,
struct protection_domain *domain, bool ats, bool ppr)
{
@@ -1605,10 +1735,8 @@ static void set_dte_entry(struct amd_iommu *iommu, u16 devid,
if (ats)
flags |= DTE_FLAG_IOTLB;
- if (ppr) {
- if (iommu_feature(iommu, FEATURE_EPHSUP))
- pte_root |= 1ULL << DEV_ENTRY_PPR;
- }
+ if (ppr)
+ pte_root |= 1ULL << DEV_ENTRY_PPR;
if (domain->flags & PD_IOMMUV2_MASK) {
u64 gcr3 = iommu_virt_to_phys(domain->gcr3_tbl);
@@ -1685,7 +1813,7 @@ static void do_attach(struct iommu_dev_data *dev_data,
iommu = rlookup_amd_iommu(dev_data->dev);
if (!iommu)
return;
- ats = dev_data->ats.enabled;
+ ats = dev_data->ats_enabled;
/* Update data structures */
dev_data->domain = domain;
@@ -1701,7 +1829,7 @@ static void do_attach(struct iommu_dev_data *dev_data,
/* Update device table */
set_dte_entry(iommu, dev_data->devid, domain,
- ats, dev_data->iommu_v2);
+ ats, dev_data->ppr);
clone_aliases(iommu, dev_data->dev);
device_flush_dte(dev_data);
@@ -1736,48 +1864,6 @@ static void do_detach(struct iommu_dev_data *dev_data)
domain->dev_cnt -= 1;
}
-static void pdev_iommuv2_disable(struct pci_dev *pdev)
-{
- pci_disable_ats(pdev);
- pci_disable_pri(pdev);
- pci_disable_pasid(pdev);
-}
-
-static int pdev_pri_ats_enable(struct pci_dev *pdev)
-{
- int ret;
-
- /* Only allow access to user-accessible pages */
- ret = pci_enable_pasid(pdev, 0);
- if (ret)
- return ret;
-
- /* First reset the PRI state of the device */
- ret = pci_reset_pri(pdev);
- if (ret)
- goto out_err_pasid;
-
- /* Enable PRI */
- /* FIXME: Hardcode number of outstanding requests for now */
- ret = pci_enable_pri(pdev, 32);
- if (ret)
- goto out_err_pasid;
-
- ret = pci_enable_ats(pdev, PAGE_SHIFT);
- if (ret)
- goto out_err_pri;
-
- return 0;
-
-out_err_pri:
- pci_disable_pri(pdev);
-
-out_err_pasid:
- pci_disable_pasid(pdev);
-
- return ret;
-}
-
/*
* If a device is not yet associated with a domain, this function makes the
* device visible in the domain
@@ -1786,9 +1872,8 @@ static int attach_device(struct device *dev,
struct protection_domain *domain)
{
struct iommu_dev_data *dev_data;
- struct pci_dev *pdev;
unsigned long flags;
- int ret;
+ int ret = 0;
spin_lock_irqsave(&domain->lock, flags);
@@ -1796,45 +1881,13 @@ static int attach_device(struct device *dev,
spin_lock(&dev_data->lock);
- ret = -EBUSY;
- if (dev_data->domain != NULL)
+ if (dev_data->domain != NULL) {
+ ret = -EBUSY;
goto out;
-
- if (!dev_is_pci(dev))
- goto skip_ats_check;
-
- pdev = to_pci_dev(dev);
- if (domain->flags & PD_IOMMUV2_MASK) {
- struct iommu_domain *def_domain = iommu_get_dma_domain(dev);
-
- ret = -EINVAL;
-
- /*
- * In case of using AMD_IOMMU_V1 page table mode and the device
- * is enabling for PPR/ATS support (using v2 table),
- * we need to make sure that the domain type is identity map.
- */
- if ((amd_iommu_pgtable == AMD_IOMMU_V1) &&
- def_domain->type != IOMMU_DOMAIN_IDENTITY) {
- goto out;
- }
-
- if (dev_data->iommu_v2) {
- if (pdev_pri_ats_enable(pdev) != 0)
- goto out;
-
- dev_data->ats.enabled = true;
- dev_data->ats.qdep = pci_ats_queue_depth(pdev);
- dev_data->pri_tlp = pci_prg_resp_pasid_required(pdev);
- }
- } else if (amd_iommu_iotlb_sup &&
- pci_enable_ats(pdev, PAGE_SHIFT) == 0) {
- dev_data->ats.enabled = true;
- dev_data->ats.qdep = pci_ats_queue_depth(pdev);
}
-skip_ats_check:
- ret = 0;
+ if (dev_is_pci(dev))
+ pdev_enable_caps(to_pci_dev(dev));
do_attach(dev_data, domain);
@@ -1882,15 +1935,8 @@ static void detach_device(struct device *dev)
do_detach(dev_data);
- if (!dev_is_pci(dev))
- goto out;
-
- if (domain->flags & PD_IOMMUV2_MASK && dev_data->iommu_v2)
- pdev_iommuv2_disable(to_pci_dev(dev));
- else if (dev_data->ats.enabled)
- pci_disable_ats(to_pci_dev(dev));
-
- dev_data->ats.enabled = false;
+ if (dev_is_pci(dev))
+ pdev_disable_caps(to_pci_dev(dev));
out:
spin_unlock(&dev_data->lock);
@@ -1980,7 +2026,7 @@ static void update_device_table(struct protection_domain *domain)
if (!iommu)
continue;
set_dte_entry(iommu, dev_data->devid, domain,
- dev_data->ats.enabled, dev_data->iommu_v2);
+ dev_data->ats_enabled, dev_data->ppr);
clone_aliases(iommu, dev_data->dev);
}
}
@@ -2014,9 +2060,11 @@ void amd_iommu_domain_update(struct protection_domain *domain)
static void cleanup_domain(struct protection_domain *domain)
{
struct iommu_dev_data *entry;
- unsigned long flags;
- spin_lock_irqsave(&domain->lock, flags);
+ lockdep_assert_held(&domain->lock);
+
+ if (!domain->dev_cnt)
+ return;
while (!list_empty(&domain->dev_list)) {
entry = list_first_entry(&domain->dev_list,
@@ -2024,8 +2072,7 @@ static void cleanup_domain(struct protection_domain *domain)
BUG_ON(!entry->domain);
do_detach(entry);
}
-
- spin_unlock_irqrestore(&domain->lock, flags);
+ WARN_ON(domain->dev_cnt != 0);
}
static void protection_domain_free(struct protection_domain *domain)
@@ -2036,6 +2083,12 @@ static void protection_domain_free(struct protection_domain *domain)
if (domain->iop.pgtbl_cfg.tlb)
free_io_pgtable_ops(&domain->iop.iop.ops);
+ if (domain->flags & PD_IOMMUV2_MASK)
+ free_gcr3_table(domain);
+
+ if (domain->iop.root)
+ free_page((unsigned long)domain->iop.root);
+
if (domain->id)
domain_id_free(domain->id);
@@ -2048,18 +2101,10 @@ static int protection_domain_init_v1(struct protection_domain *domain, int mode)
BUG_ON(mode < PAGE_MODE_NONE || mode > PAGE_MODE_6_LEVEL);
- spin_lock_init(&domain->lock);
- domain->id = domain_id_alloc();
- if (!domain->id)
- return -ENOMEM;
- INIT_LIST_HEAD(&domain->dev_list);
-
if (mode != PAGE_MODE_NONE) {
pt_root = (void *)get_zeroed_page(GFP_KERNEL);
- if (!pt_root) {
- domain_id_free(domain->id);
+ if (!pt_root)
return -ENOMEM;
- }
}
amd_iommu_domain_set_pgtable(domain, pt_root, mode);
@@ -2069,20 +2114,12 @@ static int protection_domain_init_v1(struct protection_domain *domain, int mode)
static int protection_domain_init_v2(struct protection_domain *domain)
{
- spin_lock_init(&domain->lock);
- domain->id = domain_id_alloc();
- if (!domain->id)
- return -ENOMEM;
- INIT_LIST_HEAD(&domain->dev_list);
-
domain->flags |= PD_GIOV_MASK;
domain->domain.pgsize_bitmap = AMD_IOMMU_PGSIZES_V2;
- if (domain_enable_v2(domain, 1)) {
- domain_id_free(domain->id);
+ if (setup_gcr3_table(domain, 1))
return -ENOMEM;
- }
return 0;
}
@@ -2092,57 +2129,60 @@ static struct protection_domain *protection_domain_alloc(unsigned int type)
struct io_pgtable_ops *pgtbl_ops;
struct protection_domain *domain;
int pgtable;
- int mode = DEFAULT_PGTABLE_LEVEL;
int ret;
+ domain = kzalloc(sizeof(*domain), GFP_KERNEL);
+ if (!domain)
+ return NULL;
+
+ domain->id = domain_id_alloc();
+ if (!domain->id)
+ goto out_err;
+
+ spin_lock_init(&domain->lock);
+ INIT_LIST_HEAD(&domain->dev_list);
+ domain->nid = NUMA_NO_NODE;
+
+ switch (type) {
+ /* No need to allocate io pgtable ops in passthrough mode */
+ case IOMMU_DOMAIN_IDENTITY:
+ return domain;
+ case IOMMU_DOMAIN_DMA:
+ pgtable = amd_iommu_pgtable;
+ break;
/*
- * Force IOMMU v1 page table when iommu=pt and
- * when allocating domain for pass-through devices.
+ * Force IOMMU v1 page table when allocating
+ * domain for pass-through devices.
*/
- if (type == IOMMU_DOMAIN_IDENTITY) {
- pgtable = AMD_IOMMU_V1;
- mode = PAGE_MODE_NONE;
- } else if (type == IOMMU_DOMAIN_UNMANAGED) {
+ case IOMMU_DOMAIN_UNMANAGED:
pgtable = AMD_IOMMU_V1;
- } else if (type == IOMMU_DOMAIN_DMA || type == IOMMU_DOMAIN_DMA_FQ) {
- pgtable = amd_iommu_pgtable;
- } else {
- return NULL;
+ break;
+ default:
+ goto out_err;
}
- domain = kzalloc(sizeof(*domain), GFP_KERNEL);
- if (!domain)
- return NULL;
-
switch (pgtable) {
case AMD_IOMMU_V1:
- ret = protection_domain_init_v1(domain, mode);
+ ret = protection_domain_init_v1(domain, DEFAULT_PGTABLE_LEVEL);
break;
case AMD_IOMMU_V2:
ret = protection_domain_init_v2(domain);
break;
default:
ret = -EINVAL;
+ break;
}
if (ret)
goto out_err;
- /* No need to allocate io pgtable ops in passthrough mode */
- if (type == IOMMU_DOMAIN_IDENTITY)
- return domain;
-
- domain->nid = NUMA_NO_NODE;
-
pgtbl_ops = alloc_io_pgtable_ops(pgtable, &domain->iop.pgtbl_cfg, domain);
- if (!pgtbl_ops) {
- domain_id_free(domain->id);
+ if (!pgtbl_ops)
goto out_err;
- }
return domain;
out_err:
- kfree(domain);
+ protection_domain_free(domain);
return NULL;
}
@@ -2180,19 +2220,18 @@ static struct iommu_domain *amd_iommu_domain_alloc(unsigned type)
static void amd_iommu_domain_free(struct iommu_domain *dom)
{
struct protection_domain *domain;
+ unsigned long flags;
- domain = to_pdomain(dom);
+ if (!dom)
+ return;
- if (domain->dev_cnt > 0)
- cleanup_domain(domain);
+ domain = to_pdomain(dom);
- BUG_ON(domain->dev_cnt != 0);
+ spin_lock_irqsave(&domain->lock, flags);
- if (!dom)
- return;
+ cleanup_domain(domain);
- if (domain->flags & PD_IOMMUV2_MASK)
- free_gcr3_table(domain);
+ spin_unlock_irqrestore(&domain->lock, flags);
protection_domain_free(domain);
}
@@ -2233,14 +2272,15 @@ static int amd_iommu_attach_device(struct iommu_domain *dom,
return ret;
}
-static void amd_iommu_iotlb_sync_map(struct iommu_domain *dom,
- unsigned long iova, size_t size)
+static int amd_iommu_iotlb_sync_map(struct iommu_domain *dom,
+ unsigned long iova, size_t size)
{
struct protection_domain *domain = to_pdomain(dom);
struct io_pgtable_ops *ops = &domain->iop.iop.ops;
if (ops->map_pages)
domain_flush_np_cache(domain, iova, size);
+ return 0;
}
static int amd_iommu_map_pages(struct iommu_domain *dom, unsigned long iova,
@@ -2406,7 +2446,6 @@ bool amd_iommu_is_attach_deferred(struct device *dev)
return dev_data->defer_attach;
}
-EXPORT_SYMBOL_GPL(amd_iommu_is_attach_deferred);
static void amd_iommu_flush_iotlb_all(struct iommu_domain *domain)
{
@@ -2446,7 +2485,7 @@ static int amd_iommu_def_domain_type(struct device *dev)
* and require remapping.
* - SNP is enabled, because it prohibits DTE[Mode]=0.
*/
- if (dev_data->iommu_v2 &&
+ if (pdev_pasid_supported(dev_data) &&
!cc_platform_has(CC_ATTR_MEM_ENCRYPT) &&
!amd_iommu_snp_en) {
return IOMMU_DOMAIN_IDENTITY;
@@ -2485,93 +2524,6 @@ const struct iommu_ops amd_iommu_ops = {
}
};
-/*****************************************************************************
- *
- * The next functions do a basic initialization of IOMMU for pass through
- * mode
- *
- * In passthrough mode the IOMMU is initialized and enabled but not used for
- * DMA-API translation.
- *
- *****************************************************************************/
-
-/* IOMMUv2 specific functions */
-int amd_iommu_register_ppr_notifier(struct notifier_block *nb)
-{
- return atomic_notifier_chain_register(&ppr_notifier, nb);
-}
-EXPORT_SYMBOL(amd_iommu_register_ppr_notifier);
-
-int amd_iommu_unregister_ppr_notifier(struct notifier_block *nb)
-{
- return atomic_notifier_chain_unregister(&ppr_notifier, nb);
-}
-EXPORT_SYMBOL(amd_iommu_unregister_ppr_notifier);
-
-void amd_iommu_domain_direct_map(struct iommu_domain *dom)
-{
- struct protection_domain *domain = to_pdomain(dom);
- unsigned long flags;
-
- spin_lock_irqsave(&domain->lock, flags);
-
- if (domain->iop.pgtbl_cfg.tlb)
- free_io_pgtable_ops(&domain->iop.iop.ops);
-
- spin_unlock_irqrestore(&domain->lock, flags);
-}
-EXPORT_SYMBOL(amd_iommu_domain_direct_map);
-
-/* Note: This function expects iommu_domain->lock to be held prior calling the function. */
-static int domain_enable_v2(struct protection_domain *domain, int pasids)
-{
- int levels;
-
- /* Number of GCR3 table levels required */
- for (levels = 0; (pasids - 1) & ~0x1ff; pasids >>= 9)
- levels += 1;
-
- if (levels > amd_iommu_max_glx_val)
- return -EINVAL;
-
- domain->gcr3_tbl = (void *)get_zeroed_page(GFP_ATOMIC);
- if (domain->gcr3_tbl == NULL)
- return -ENOMEM;
-
- domain->glx = levels;
- domain->flags |= PD_IOMMUV2_MASK;
-
- amd_iommu_domain_update(domain);
-
- return 0;
-}
-
-int amd_iommu_domain_enable_v2(struct iommu_domain *dom, int pasids)
-{
- struct protection_domain *pdom = to_pdomain(dom);
- unsigned long flags;
- int ret;
-
- spin_lock_irqsave(&pdom->lock, flags);
-
- /*
- * Save us all sanity checks whether devices already in the
- * domain support IOMMUv2. Just force that the domain has no
- * devices attached when it is switched into IOMMUv2 mode.
- */
- ret = -EBUSY;
- if (pdom->dev_cnt > 0 || pdom->flags & PD_IOMMUV2_MASK)
- goto out;
-
- if (!pdom->gcr3_tbl)
- ret = domain_enable_v2(pdom, pasids);
-
-out:
- spin_unlock_irqrestore(&pdom->lock, flags);
- return ret;
-}
-EXPORT_SYMBOL(amd_iommu_domain_enable_v2);
-
static int __flush_pasid(struct protection_domain *domain, u32 pasid,
u64 address, bool size)
{
@@ -2609,10 +2561,10 @@ static int __flush_pasid(struct protection_domain *domain, u32 pasid,
There might be non-IOMMUv2 capable devices in an IOMMUv2
* domain.
*/
- if (!dev_data->ats.enabled)
+ if (!dev_data->ats_enabled)
continue;
- qdep = dev_data->ats.qdep;
+ qdep = dev_data->ats_qdep;
iommu = rlookup_amd_iommu(dev_data->dev);
if (!iommu)
continue;
@@ -2653,7 +2605,6 @@ int amd_iommu_flush_page(struct iommu_domain *dom, u32 pasid,
return ret;
}
-EXPORT_SYMBOL(amd_iommu_flush_page);
static int __amd_iommu_flush_tlb(struct protection_domain *domain, u32 pasid)
{
@@ -2673,7 +2624,6 @@ int amd_iommu_flush_tlb(struct iommu_domain *dom, u32 pasid)
return ret;
}
-EXPORT_SYMBOL(amd_iommu_flush_tlb);
static u64 *__get_gcr3_pte(u64 *root, int level, u32 pasid, bool alloc)
{
@@ -2753,7 +2703,6 @@ int amd_iommu_domain_set_gcr3(struct iommu_domain *dom, u32 pasid,
return ret;
}
-EXPORT_SYMBOL(amd_iommu_domain_set_gcr3);
int amd_iommu_domain_clear_gcr3(struct iommu_domain *dom, u32 pasid)
{
@@ -2767,7 +2716,6 @@ int amd_iommu_domain_clear_gcr3(struct iommu_domain *dom, u32 pasid)
return ret;
}
-EXPORT_SYMBOL(amd_iommu_domain_clear_gcr3);
int amd_iommu_complete_ppr(struct pci_dev *pdev, u32 pasid,
int status, int tag)
@@ -2786,49 +2734,6 @@ int amd_iommu_complete_ppr(struct pci_dev *pdev, u32 pasid,
return iommu_queue_command(iommu, &cmd);
}
-EXPORT_SYMBOL(amd_iommu_complete_ppr);
-
-int amd_iommu_device_info(struct pci_dev *pdev,
- struct amd_iommu_device_info *info)
-{
- int max_pasids;
- int pos;
-
- if (pdev == NULL || info == NULL)
- return -EINVAL;
-
- if (!amd_iommu_v2_supported())
- return -EINVAL;
-
- memset(info, 0, sizeof(*info));
-
- if (pci_ats_supported(pdev))
- info->flags |= AMD_IOMMU_DEVICE_FLAG_ATS_SUP;
-
- pos = pci_find_ext_capability(pdev, PCI_EXT_CAP_ID_PRI);
- if (pos)
- info->flags |= AMD_IOMMU_DEVICE_FLAG_PRI_SUP;
-
- pos = pci_find_ext_capability(pdev, PCI_EXT_CAP_ID_PASID);
- if (pos) {
- int features;
-
- max_pasids = 1 << (9 * (amd_iommu_max_glx_val + 1));
- max_pasids = min(max_pasids, (1 << 20));
-
- info->flags |= AMD_IOMMU_DEVICE_FLAG_PASID_SUP;
- info->max_pasids = min(pci_max_pasids(pdev), max_pasids);
-
- features = pci_pasid_features(pdev);
- if (features & PCI_PASID_CAP_EXEC)
- info->flags |= AMD_IOMMU_DEVICE_FLAG_EXEC_SUP;
- if (features & PCI_PASID_CAP_PRIV)
- info->flags |= AMD_IOMMU_DEVICE_FLAG_PRIV_SUP;
- }
-
- return 0;
-}
-EXPORT_SYMBOL(amd_iommu_device_info);
#ifdef CONFIG_IRQ_REMAP
diff --git a/drivers/iommu/amd/iommu_v2.c b/drivers/iommu/amd/iommu_v2.c
deleted file mode 100644
index 57c2fb1146e2..000000000000
--- a/drivers/iommu/amd/iommu_v2.c
+++ /dev/null
@@ -1,996 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-only
-/*
- * Copyright (C) 2010-2012 Advanced Micro Devices, Inc.
- * Author: Joerg Roedel <jroedel@suse.de>
- */
-
-#define pr_fmt(fmt) "AMD-Vi: " fmt
-
-#include <linux/refcount.h>
-#include <linux/mmu_notifier.h>
-#include <linux/amd-iommu.h>
-#include <linux/mm_types.h>
-#include <linux/profile.h>
-#include <linux/module.h>
-#include <linux/sched.h>
-#include <linux/sched/mm.h>
-#include <linux/wait.h>
-#include <linux/pci.h>
-#include <linux/gfp.h>
-#include <linux/cc_platform.h>
-
-#include "amd_iommu.h"
-
-MODULE_LICENSE("GPL v2");
-MODULE_AUTHOR("Joerg Roedel <jroedel@suse.de>");
-
-#define PRI_QUEUE_SIZE 512
-
-struct pri_queue {
- atomic_t inflight;
- bool finish;
- int status;
-};
-
-struct pasid_state {
- struct list_head list; /* For global state-list */
- refcount_t count; /* Reference count */
- unsigned mmu_notifier_count; /* Counting nested mmu_notifier
- calls */
- struct mm_struct *mm; /* mm_struct for the faults */
- struct mmu_notifier mn; /* mmu_notifier handle */
- struct pri_queue pri[PRI_QUEUE_SIZE]; /* PRI tag states */
- struct device_state *device_state; /* Link to our device_state */
- u32 pasid; /* PASID index */
- bool invalid; /* Used during setup and
- teardown of the pasid */
- spinlock_t lock; /* Protect pri_queues and
- mmu_notifer_count */
- wait_queue_head_t wq; /* To wait for count == 0 */
-};
-
-struct device_state {
- struct list_head list;
- u32 sbdf;
- atomic_t count;
- struct pci_dev *pdev;
- struct pasid_state **states;
- struct iommu_domain *domain;
- int pasid_levels;
- int max_pasids;
- amd_iommu_invalid_ppr_cb inv_ppr_cb;
- amd_iommu_invalidate_ctx inv_ctx_cb;
- spinlock_t lock;
- wait_queue_head_t wq;
-};
-
-struct fault {
- struct work_struct work;
- struct device_state *dev_state;
- struct pasid_state *state;
- struct mm_struct *mm;
- u64 address;
- u32 pasid;
- u16 tag;
- u16 finish;
- u16 flags;
-};
-
-static LIST_HEAD(state_list);
-static DEFINE_SPINLOCK(state_lock);
-
-static struct workqueue_struct *iommu_wq;
-
-static void free_pasid_states(struct device_state *dev_state);
-
-static struct device_state *__get_device_state(u32 sbdf)
-{
- struct device_state *dev_state;
-
- list_for_each_entry(dev_state, &state_list, list) {
- if (dev_state->sbdf == sbdf)
- return dev_state;
- }
-
- return NULL;
-}
-
-static struct device_state *get_device_state(u32 sbdf)
-{
- struct device_state *dev_state;
- unsigned long flags;
-
- spin_lock_irqsave(&state_lock, flags);
- dev_state = __get_device_state(sbdf);
- if (dev_state != NULL)
- atomic_inc(&dev_state->count);
- spin_unlock_irqrestore(&state_lock, flags);
-
- return dev_state;
-}
-
-static void free_device_state(struct device_state *dev_state)
-{
- struct iommu_group *group;
-
- /* Get rid of any remaining pasid states */
- free_pasid_states(dev_state);
-
- /*
- * Wait until the last reference is dropped before freeing
- * the device state.
- */
- wait_event(dev_state->wq, !atomic_read(&dev_state->count));
-
- /*
- * First detach device from domain - No more PRI requests will arrive
- * from that device after it is unbound from the IOMMUv2 domain.
- */
- group = iommu_group_get(&dev_state->pdev->dev);
- if (WARN_ON(!group))
- return;
-
- iommu_detach_group(dev_state->domain, group);
-
- iommu_group_put(group);
-
- /* Everything is down now, free the IOMMUv2 domain */
- iommu_domain_free(dev_state->domain);
-
- /* Finally get rid of the device-state */
- kfree(dev_state);
-}
-
-static void put_device_state(struct device_state *dev_state)
-{
- if (atomic_dec_and_test(&dev_state->count))
- wake_up(&dev_state->wq);
-}
-
-/* Must be called under dev_state->lock */
-static struct pasid_state **__get_pasid_state_ptr(struct device_state *dev_state,
- u32 pasid, bool alloc)
-{
- struct pasid_state **root, **ptr;
- int level, index;
-
- level = dev_state->pasid_levels;
- root = dev_state->states;
-
- while (true) {
-
- index = (pasid >> (9 * level)) & 0x1ff;
- ptr = &root[index];
-
- if (level == 0)
- break;
-
- if (*ptr == NULL) {
- if (!alloc)
- return NULL;
-
- *ptr = (void *)get_zeroed_page(GFP_ATOMIC);
- if (*ptr == NULL)
- return NULL;
- }
-
- root = (struct pasid_state **)*ptr;
- level -= 1;
- }
-
- return ptr;
-}
-
-static int set_pasid_state(struct device_state *dev_state,
- struct pasid_state *pasid_state,
- u32 pasid)
-{
- struct pasid_state **ptr;
- unsigned long flags;
- int ret;
-
- spin_lock_irqsave(&dev_state->lock, flags);
- ptr = __get_pasid_state_ptr(dev_state, pasid, true);
-
- ret = -ENOMEM;
- if (ptr == NULL)
- goto out_unlock;
-
- ret = -ENOMEM;
- if (*ptr != NULL)
- goto out_unlock;
-
- *ptr = pasid_state;
-
- ret = 0;
-
-out_unlock:
- spin_unlock_irqrestore(&dev_state->lock, flags);
-
- return ret;
-}
-
-static void clear_pasid_state(struct device_state *dev_state, u32 pasid)
-{
- struct pasid_state **ptr;
- unsigned long flags;
-
- spin_lock_irqsave(&dev_state->lock, flags);
- ptr = __get_pasid_state_ptr(dev_state, pasid, true);
-
- if (ptr == NULL)
- goto out_unlock;
-
- *ptr = NULL;
-
-out_unlock:
- spin_unlock_irqrestore(&dev_state->lock, flags);
-}
-
-static struct pasid_state *get_pasid_state(struct device_state *dev_state,
- u32 pasid)
-{
- struct pasid_state **ptr, *ret = NULL;
- unsigned long flags;
-
- spin_lock_irqsave(&dev_state->lock, flags);
- ptr = __get_pasid_state_ptr(dev_state, pasid, false);
-
- if (ptr == NULL)
- goto out_unlock;
-
- ret = *ptr;
- if (ret)
- refcount_inc(&ret->count);
-
-out_unlock:
- spin_unlock_irqrestore(&dev_state->lock, flags);
-
- return ret;
-}
-
-static void free_pasid_state(struct pasid_state *pasid_state)
-{
- kfree(pasid_state);
-}
-
-static void put_pasid_state(struct pasid_state *pasid_state)
-{
- if (refcount_dec_and_test(&pasid_state->count))
- wake_up(&pasid_state->wq);
-}
-
-static void put_pasid_state_wait(struct pasid_state *pasid_state)
-{
- if (!refcount_dec_and_test(&pasid_state->count))
- wait_event(pasid_state->wq, !refcount_read(&pasid_state->count));
- free_pasid_state(pasid_state);
-}
-
-static void unbind_pasid(struct pasid_state *pasid_state)
-{
- struct iommu_domain *domain;
-
- domain = pasid_state->device_state->domain;
-
- /*
- * Mark pasid_state as invalid, no more faults will we added to the
- * work queue after this is visible everywhere.
- */
- pasid_state->invalid = true;
-
- /* Make sure this is visible */
- smp_wmb();
-
- /* After this the device/pasid can't access the mm anymore */
- amd_iommu_domain_clear_gcr3(domain, pasid_state->pasid);
-
- /* Make sure no more pending faults are in the queue */
- flush_workqueue(iommu_wq);
-}
-
-static void free_pasid_states_level1(struct pasid_state **tbl)
-{
- int i;
-
- for (i = 0; i < 512; ++i) {
- if (tbl[i] == NULL)
- continue;
-
- free_page((unsigned long)tbl[i]);
- }
-}
-
-static void free_pasid_states_level2(struct pasid_state **tbl)
-{
- struct pasid_state **ptr;
- int i;
-
- for (i = 0; i < 512; ++i) {
- if (tbl[i] == NULL)
- continue;
-
- ptr = (struct pasid_state **)tbl[i];
- free_pasid_states_level1(ptr);
- }
-}
-
-static void free_pasid_states(struct device_state *dev_state)
-{
- struct pasid_state *pasid_state;
- int i;
-
- for (i = 0; i < dev_state->max_pasids; ++i) {
- pasid_state = get_pasid_state(dev_state, i);
- if (pasid_state == NULL)
- continue;
-
- put_pasid_state(pasid_state);
-
- /* Clear the pasid state so that the pasid can be re-used */
- clear_pasid_state(dev_state, pasid_state->pasid);
-
- /*
- * This will call the mn_release function and
- * unbind the PASID
- */
- mmu_notifier_unregister(&pasid_state->mn, pasid_state->mm);
-
- put_pasid_state_wait(pasid_state); /* Reference taken in
- amd_iommu_bind_pasid */
-
- /* Drop reference taken in amd_iommu_bind_pasid */
- put_device_state(dev_state);
- }
-
- if (dev_state->pasid_levels == 2)
- free_pasid_states_level2(dev_state->states);
- else if (dev_state->pasid_levels == 1)
- free_pasid_states_level1(dev_state->states);
- else
- BUG_ON(dev_state->pasid_levels != 0);
-
- free_page((unsigned long)dev_state->states);
-}
-
-static struct pasid_state *mn_to_state(struct mmu_notifier *mn)
-{
- return container_of(mn, struct pasid_state, mn);
-}
-
-static void mn_arch_invalidate_secondary_tlbs(struct mmu_notifier *mn,
- struct mm_struct *mm,
- unsigned long start, unsigned long end)
-{
- struct pasid_state *pasid_state;
- struct device_state *dev_state;
-
- pasid_state = mn_to_state(mn);
- dev_state = pasid_state->device_state;
-
- if ((start ^ (end - 1)) < PAGE_SIZE)
- amd_iommu_flush_page(dev_state->domain, pasid_state->pasid,
- start);
- else
- amd_iommu_flush_tlb(dev_state->domain, pasid_state->pasid);
-}
-
-static void mn_release(struct mmu_notifier *mn, struct mm_struct *mm)
-{
- struct pasid_state *pasid_state;
- struct device_state *dev_state;
- bool run_inv_ctx_cb;
-
- might_sleep();
-
- pasid_state = mn_to_state(mn);
- dev_state = pasid_state->device_state;
- run_inv_ctx_cb = !pasid_state->invalid;
-
- if (run_inv_ctx_cb && dev_state->inv_ctx_cb)
- dev_state->inv_ctx_cb(dev_state->pdev, pasid_state->pasid);
-
- unbind_pasid(pasid_state);
-}
-
-static const struct mmu_notifier_ops iommu_mn = {
- .release = mn_release,
- .arch_invalidate_secondary_tlbs = mn_arch_invalidate_secondary_tlbs,
-};
-
-static void set_pri_tag_status(struct pasid_state *pasid_state,
- u16 tag, int status)
-{
- unsigned long flags;
-
- spin_lock_irqsave(&pasid_state->lock, flags);
- pasid_state->pri[tag].status = status;
- spin_unlock_irqrestore(&pasid_state->lock, flags);
-}
-
-static void finish_pri_tag(struct device_state *dev_state,
- struct pasid_state *pasid_state,
- u16 tag)
-{
- unsigned long flags;
-
- spin_lock_irqsave(&pasid_state->lock, flags);
- if (atomic_dec_and_test(&pasid_state->pri[tag].inflight) &&
- pasid_state->pri[tag].finish) {
- amd_iommu_complete_ppr(dev_state->pdev, pasid_state->pasid,
- pasid_state->pri[tag].status, tag);
- pasid_state->pri[tag].finish = false;
- pasid_state->pri[tag].status = PPR_SUCCESS;
- }
- spin_unlock_irqrestore(&pasid_state->lock, flags);
-}
-
-static void handle_fault_error(struct fault *fault)
-{
- int status;
-
- if (!fault->dev_state->inv_ppr_cb) {
- set_pri_tag_status(fault->state, fault->tag, PPR_INVALID);
- return;
- }
-
- status = fault->dev_state->inv_ppr_cb(fault->dev_state->pdev,
- fault->pasid,
- fault->address,
- fault->flags);
- switch (status) {
- case AMD_IOMMU_INV_PRI_RSP_SUCCESS:
- set_pri_tag_status(fault->state, fault->tag, PPR_SUCCESS);
- break;
- case AMD_IOMMU_INV_PRI_RSP_INVALID:
- set_pri_tag_status(fault->state, fault->tag, PPR_INVALID);
- break;
- case AMD_IOMMU_INV_PRI_RSP_FAIL:
- set_pri_tag_status(fault->state, fault->tag, PPR_FAILURE);
- break;
- default:
- BUG();
- }
-}
-
-static bool access_error(struct vm_area_struct *vma, struct fault *fault)
-{
- unsigned long requested = 0;
-
- if (fault->flags & PPR_FAULT_EXEC)
- requested |= VM_EXEC;
-
- if (fault->flags & PPR_FAULT_READ)
- requested |= VM_READ;
-
- if (fault->flags & PPR_FAULT_WRITE)
- requested |= VM_WRITE;
-
- return (requested & ~vma->vm_flags) != 0;
-}
-
-static void do_fault(struct work_struct *work)
-{
- struct fault *fault = container_of(work, struct fault, work);
- struct vm_area_struct *vma;
- vm_fault_t ret = VM_FAULT_ERROR;
- unsigned int flags = 0;
- struct mm_struct *mm;
- u64 address;
-
- mm = fault->state->mm;
- address = fault->address;
-
- if (fault->flags & PPR_FAULT_USER)
- flags |= FAULT_FLAG_USER;
- if (fault->flags & PPR_FAULT_WRITE)
- flags |= FAULT_FLAG_WRITE;
- flags |= FAULT_FLAG_REMOTE;
-
- mmap_read_lock(mm);
- vma = vma_lookup(mm, address);
- if (!vma)
- /* failed to get a vma in the right range */
- goto out;
-
- /* Check if we have the right permissions on the vma */
- if (access_error(vma, fault))
- goto out;
-
- ret = handle_mm_fault(vma, address, flags, NULL);
-out:
- mmap_read_unlock(mm);
-
- if (ret & VM_FAULT_ERROR)
- /* failed to service fault */
- handle_fault_error(fault);
-
- finish_pri_tag(fault->dev_state, fault->state, fault->tag);
-
- put_pasid_state(fault->state);
-
- kfree(fault);
-}
-
-static int ppr_notifier(struct notifier_block *nb, unsigned long e, void *data)
-{
- struct amd_iommu_fault *iommu_fault;
- struct pasid_state *pasid_state;
- struct device_state *dev_state;
- struct pci_dev *pdev = NULL;
- unsigned long flags;
- struct fault *fault;
- bool finish;
- u16 tag, devid, seg_id;
- int ret;
-
- iommu_fault = data;
- tag = iommu_fault->tag & 0x1ff;
- finish = (iommu_fault->tag >> 9) & 1;
-
- seg_id = PCI_SBDF_TO_SEGID(iommu_fault->sbdf);
- devid = PCI_SBDF_TO_DEVID(iommu_fault->sbdf);
- pdev = pci_get_domain_bus_and_slot(seg_id, PCI_BUS_NUM(devid),
- devid & 0xff);
- if (!pdev)
- return -ENODEV;
-
- ret = NOTIFY_DONE;
-
- /* In kdump kernel pci dev is not initialized yet -> send INVALID */
- if (amd_iommu_is_attach_deferred(&pdev->dev)) {
- amd_iommu_complete_ppr(pdev, iommu_fault->pasid,
- PPR_INVALID, tag);
- goto out;
- }
-
- dev_state = get_device_state(iommu_fault->sbdf);
- if (dev_state == NULL)
- goto out;
-
- pasid_state = get_pasid_state(dev_state, iommu_fault->pasid);
- if (pasid_state == NULL || pasid_state->invalid) {
- /* We know the device but not the PASID -> send INVALID */
- amd_iommu_complete_ppr(dev_state->pdev, iommu_fault->pasid,
- PPR_INVALID, tag);
- goto out_drop_state;
- }
-
- spin_lock_irqsave(&pasid_state->lock, flags);
- atomic_inc(&pasid_state->pri[tag].inflight);
- if (finish)
- pasid_state->pri[tag].finish = true;
- spin_unlock_irqrestore(&pasid_state->lock, flags);
-
- fault = kzalloc(sizeof(*fault), GFP_ATOMIC);
- if (fault == NULL) {
- /* We are OOM - send success and let the device re-fault */
- finish_pri_tag(dev_state, pasid_state, tag);
- goto out_drop_state;
- }
-
- fault->dev_state = dev_state;
- fault->address = iommu_fault->address;
- fault->state = pasid_state;
- fault->tag = tag;
- fault->finish = finish;
- fault->pasid = iommu_fault->pasid;
- fault->flags = iommu_fault->flags;
- INIT_WORK(&fault->work, do_fault);
-
- queue_work(iommu_wq, &fault->work);
-
- ret = NOTIFY_OK;
-
-out_drop_state:
-
- if (ret != NOTIFY_OK && pasid_state)
- put_pasid_state(pasid_state);
-
- put_device_state(dev_state);
-
-out:
- pci_dev_put(pdev);
- return ret;
-}
-
-static struct notifier_block ppr_nb = {
- .notifier_call = ppr_notifier,
-};
-
-int amd_iommu_bind_pasid(struct pci_dev *pdev, u32 pasid,
- struct task_struct *task)
-{
- struct pasid_state *pasid_state;
- struct device_state *dev_state;
- struct mm_struct *mm;
- u32 sbdf;
- int ret;
-
- might_sleep();
-
- if (!amd_iommu_v2_supported())
- return -ENODEV;
-
- sbdf = get_pci_sbdf_id(pdev);
- dev_state = get_device_state(sbdf);
-
- if (dev_state == NULL)
- return -EINVAL;
-
- ret = -EINVAL;
- if (pasid >= dev_state->max_pasids)
- goto out;
-
- ret = -ENOMEM;
- pasid_state = kzalloc(sizeof(*pasid_state), GFP_KERNEL);
- if (pasid_state == NULL)
- goto out;
-
-
- refcount_set(&pasid_state->count, 1);
- init_waitqueue_head(&pasid_state->wq);
- spin_lock_init(&pasid_state->lock);
-
- mm = get_task_mm(task);
- pasid_state->mm = mm;
- pasid_state->device_state = dev_state;
- pasid_state->pasid = pasid;
- pasid_state->invalid = true; /* Mark as valid only if we are
- done with setting up the pasid */
- pasid_state->mn.ops = &iommu_mn;
-
- if (pasid_state->mm == NULL)
- goto out_free;
-
- ret = mmu_notifier_register(&pasid_state->mn, mm);
- if (ret)
- goto out_free;
-
- ret = set_pasid_state(dev_state, pasid_state, pasid);
- if (ret)
- goto out_unregister;
-
- ret = amd_iommu_domain_set_gcr3(dev_state->domain, pasid,
- __pa(pasid_state->mm->pgd));
- if (ret)
- goto out_clear_state;
-
- /* Now we are ready to handle faults */
- pasid_state->invalid = false;
-
- /*
- * Drop the reference to the mm_struct here. We rely on the
- * mmu_notifier release call-back to inform us when the mm
- * is going away.
- */
- mmput(mm);
-
- return 0;
-
-out_clear_state:
- clear_pasid_state(dev_state, pasid);
-
-out_unregister:
- mmu_notifier_unregister(&pasid_state->mn, mm);
- mmput(mm);
-
-out_free:
- free_pasid_state(pasid_state);
-
-out:
- put_device_state(dev_state);
-
- return ret;
-}
-EXPORT_SYMBOL(amd_iommu_bind_pasid);
-
-void amd_iommu_unbind_pasid(struct pci_dev *pdev, u32 pasid)
-{
- struct pasid_state *pasid_state;
- struct device_state *dev_state;
- u32 sbdf;
-
- might_sleep();
-
- if (!amd_iommu_v2_supported())
- return;
-
- sbdf = get_pci_sbdf_id(pdev);
- dev_state = get_device_state(sbdf);
- if (dev_state == NULL)
- return;
-
- if (pasid >= dev_state->max_pasids)
- goto out;
-
- pasid_state = get_pasid_state(dev_state, pasid);
- if (pasid_state == NULL)
- goto out;
- /*
- * Drop reference taken here. We are safe because we still hold
- * the reference taken in the amd_iommu_bind_pasid function.
- */
- put_pasid_state(pasid_state);
-
- /* Clear the pasid state so that the pasid can be re-used */
- clear_pasid_state(dev_state, pasid_state->pasid);
-
- /*
- * Call mmu_notifier_unregister to drop our reference
- * to pasid_state->mm
- */
- mmu_notifier_unregister(&pasid_state->mn, pasid_state->mm);
-
- put_pasid_state_wait(pasid_state); /* Reference taken in
- amd_iommu_bind_pasid */
-out:
- /* Drop reference taken in this function */
- put_device_state(dev_state);
-
- /* Drop reference taken in amd_iommu_bind_pasid */
- put_device_state(dev_state);
-}
-EXPORT_SYMBOL(amd_iommu_unbind_pasid);
-
-int amd_iommu_init_device(struct pci_dev *pdev, int pasids)
-{
- struct device_state *dev_state;
- struct iommu_group *group;
- unsigned long flags;
- int ret, tmp;
- u32 sbdf;
-
- might_sleep();
-
- /*
- * When memory encryption is active the device is likely not in a
- * direct-mapped domain. Forbid using IOMMUv2 functionality for now.
- */
- if (cc_platform_has(CC_ATTR_MEM_ENCRYPT))
- return -ENODEV;
-
- if (!amd_iommu_v2_supported())
- return -ENODEV;
-
- if (pasids <= 0 || pasids > (PASID_MASK + 1))
- return -EINVAL;
-
- sbdf = get_pci_sbdf_id(pdev);
-
- dev_state = kzalloc(sizeof(*dev_state), GFP_KERNEL);
- if (dev_state == NULL)
- return -ENOMEM;
-
- spin_lock_init(&dev_state->lock);
- init_waitqueue_head(&dev_state->wq);
- dev_state->pdev = pdev;
- dev_state->sbdf = sbdf;
-
- tmp = pasids;
- for (dev_state->pasid_levels = 0; (tmp - 1) & ~0x1ff; tmp >>= 9)
- dev_state->pasid_levels += 1;
-
- atomic_set(&dev_state->count, 1);
- dev_state->max_pasids = pasids;
-
- ret = -ENOMEM;
- dev_state->states = (void *)get_zeroed_page(GFP_KERNEL);
- if (dev_state->states == NULL)
- goto out_free_dev_state;
-
- dev_state->domain = iommu_domain_alloc(&pci_bus_type);
- if (dev_state->domain == NULL)
- goto out_free_states;
-
- /* See iommu_is_default_domain() */
- dev_state->domain->type = IOMMU_DOMAIN_IDENTITY;
- amd_iommu_domain_direct_map(dev_state->domain);
-
- ret = amd_iommu_domain_enable_v2(dev_state->domain, pasids);
- if (ret)
- goto out_free_domain;
-
- group = iommu_group_get(&pdev->dev);
- if (!group) {
- ret = -EINVAL;
- goto out_free_domain;
- }
-
- ret = iommu_attach_group(dev_state->domain, group);
- if (ret != 0)
- goto out_drop_group;
-
- iommu_group_put(group);
-
- spin_lock_irqsave(&state_lock, flags);
-
- if (__get_device_state(sbdf) != NULL) {
- spin_unlock_irqrestore(&state_lock, flags);
- ret = -EBUSY;
- goto out_free_domain;
- }
-
- list_add_tail(&dev_state->list, &state_list);
-
- spin_unlock_irqrestore(&state_lock, flags);
-
- return 0;
-
-out_drop_group:
- iommu_group_put(group);
-
-out_free_domain:
- iommu_domain_free(dev_state->domain);
-
-out_free_states:
- free_page((unsigned long)dev_state->states);
-
-out_free_dev_state:
- kfree(dev_state);
-
- return ret;
-}
-EXPORT_SYMBOL(amd_iommu_init_device);
-
-void amd_iommu_free_device(struct pci_dev *pdev)
-{
- struct device_state *dev_state;
- unsigned long flags;
- u32 sbdf;
-
- if (!amd_iommu_v2_supported())
- return;
-
- sbdf = get_pci_sbdf_id(pdev);
-
- spin_lock_irqsave(&state_lock, flags);
-
- dev_state = __get_device_state(sbdf);
- if (dev_state == NULL) {
- spin_unlock_irqrestore(&state_lock, flags);
- return;
- }
-
- list_del(&dev_state->list);
-
- spin_unlock_irqrestore(&state_lock, flags);
-
- put_device_state(dev_state);
- free_device_state(dev_state);
-}
-EXPORT_SYMBOL(amd_iommu_free_device);
-
-int amd_iommu_set_invalid_ppr_cb(struct pci_dev *pdev,
- amd_iommu_invalid_ppr_cb cb)
-{
- struct device_state *dev_state;
- unsigned long flags;
- u32 sbdf;
- int ret;
-
- if (!amd_iommu_v2_supported())
- return -ENODEV;
-
- sbdf = get_pci_sbdf_id(pdev);
-
- spin_lock_irqsave(&state_lock, flags);
-
- ret = -EINVAL;
- dev_state = __get_device_state(sbdf);
- if (dev_state == NULL)
- goto out_unlock;
-
- dev_state->inv_ppr_cb = cb;
-
- ret = 0;
-
-out_unlock:
- spin_unlock_irqrestore(&state_lock, flags);
-
- return ret;
-}
-EXPORT_SYMBOL(amd_iommu_set_invalid_ppr_cb);
-
-int amd_iommu_set_invalidate_ctx_cb(struct pci_dev *pdev,
- amd_iommu_invalidate_ctx cb)
-{
- struct device_state *dev_state;
- unsigned long flags;
- u32 sbdf;
- int ret;
-
- if (!amd_iommu_v2_supported())
- return -ENODEV;
-
- sbdf = get_pci_sbdf_id(pdev);
-
- spin_lock_irqsave(&state_lock, flags);
-
- ret = -EINVAL;
- dev_state = __get_device_state(sbdf);
- if (dev_state == NULL)
- goto out_unlock;
-
- dev_state->inv_ctx_cb = cb;
-
- ret = 0;
-
-out_unlock:
- spin_unlock_irqrestore(&state_lock, flags);
-
- return ret;
-}
-EXPORT_SYMBOL(amd_iommu_set_invalidate_ctx_cb);
-
-static int __init amd_iommu_v2_init(void)
-{
- int ret;
-
- if (!amd_iommu_v2_supported()) {
- pr_info("AMD IOMMUv2 functionality not available on this system - This is not a bug.\n");
- /*
- * Load anyway to provide the symbols to other modules
- * which may use AMD IOMMUv2 optionally.
- */
- return 0;
- }
-
- ret = -ENOMEM;
- iommu_wq = alloc_workqueue("amd_iommu_v2", WQ_MEM_RECLAIM, 0);
- if (iommu_wq == NULL)
- goto out;
-
- amd_iommu_register_ppr_notifier(&ppr_nb);
-
- pr_info("AMD IOMMUv2 loaded and initialized\n");
-
- return 0;
-
-out:
- return ret;
-}
-
-static void __exit amd_iommu_v2_exit(void)
-{
- struct device_state *dev_state, *next;
- unsigned long flags;
- LIST_HEAD(freelist);
-
- if (!amd_iommu_v2_supported())
- return;
-
- amd_iommu_unregister_ppr_notifier(&ppr_nb);
-
- flush_workqueue(iommu_wq);
-
- /*
- * The loop below might call flush_workqueue(), so call
- * destroy_workqueue() after it
- */
- spin_lock_irqsave(&state_lock, flags);
-
- list_for_each_entry_safe(dev_state, next, &state_list, list) {
- WARN_ON_ONCE(1);
-
- put_device_state(dev_state);
- list_del(&dev_state->list);
- list_add_tail(&dev_state->list, &freelist);
- }
-
- spin_unlock_irqrestore(&state_lock, flags);
-
- /*
- * Since free_device_state waits on the count to be zero,
- * we need to free dev_state outside the spinlock.
- */
- list_for_each_entry_safe(dev_state, next, &freelist, list) {
- list_del(&dev_state->list);
- free_device_state(dev_state);
- }
-
- destroy_workqueue(iommu_wq);
-}
-
-module_init(amd_iommu_v2_init);
-module_exit(amd_iommu_v2_exit);
diff --git a/drivers/iommu/apple-dart.c b/drivers/iommu/apple-dart.c
index 821b4a3465df..ee05f4824bfa 100644
--- a/drivers/iommu/apple-dart.c
+++ b/drivers/iommu/apple-dart.c
@@ -504,10 +504,11 @@ static void apple_dart_iotlb_sync(struct iommu_domain *domain,
apple_dart_domain_flush_tlb(to_dart_domain(domain));
}
-static void apple_dart_iotlb_sync_map(struct iommu_domain *domain,
- unsigned long iova, size_t size)
+static int apple_dart_iotlb_sync_map(struct iommu_domain *domain,
+ unsigned long iova, size_t size)
{
apple_dart_domain_flush_tlb(to_dart_domain(domain));
+ return 0;
}
static phys_addr_t apple_dart_iova_to_phys(struct iommu_domain *domain,
diff --git a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3-sva.c b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3-sva.c
index 8a16cd3ef487..353248ab18e7 100644
--- a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3-sva.c
+++ b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3-sva.c
@@ -25,11 +25,9 @@ struct arm_smmu_mmu_notifier {
#define mn_to_smmu(mn) container_of(mn, struct arm_smmu_mmu_notifier, mn)
struct arm_smmu_bond {
- struct iommu_sva sva;
struct mm_struct *mm;
struct arm_smmu_mmu_notifier *smmu_mn;
struct list_head list;
- refcount_t refs;
};
#define sva_to_bond(handle) \
@@ -38,6 +36,25 @@ struct arm_smmu_bond {
static DEFINE_MUTEX(sva_lock);
/*
+ * Write the CD to the CD tables for all masters that this domain is attached
+ * to. Note that this is only used to update existing CD entries in the target
+ * CD table, for which it's assumed that arm_smmu_write_ctx_desc can't fail.
+ */
+static void arm_smmu_update_ctx_desc_devices(struct arm_smmu_domain *smmu_domain,
+ int ssid,
+ struct arm_smmu_ctx_desc *cd)
+{
+ struct arm_smmu_master *master;
+ unsigned long flags;
+
+ spin_lock_irqsave(&smmu_domain->devices_lock, flags);
+ list_for_each_entry(master, &smmu_domain->devices, domain_head) {
+ arm_smmu_write_ctx_desc(master, ssid, cd);
+ }
+ spin_unlock_irqrestore(&smmu_domain->devices_lock, flags);
+}
+
+/*
* Check if the CPU ASID is available on the SMMU side. If a private context
* descriptor is using it, try to replace it.
*/
@@ -62,7 +79,7 @@ arm_smmu_share_asid(struct mm_struct *mm, u16 asid)
return cd;
}
- smmu_domain = container_of(cd, struct arm_smmu_domain, s1_cfg.cd);
+ smmu_domain = container_of(cd, struct arm_smmu_domain, cd);
smmu = smmu_domain->smmu;
ret = xa_alloc(&arm_smmu_asid_xa, &new_asid, cd,
@@ -80,7 +97,7 @@ arm_smmu_share_asid(struct mm_struct *mm, u16 asid)
* be some overlap between use of both ASIDs, until we invalidate the
* TLB.
*/
- arm_smmu_write_ctx_desc(smmu_domain, IOMMU_NO_PASID, cd);
+ arm_smmu_update_ctx_desc_devices(smmu_domain, IOMMU_NO_PASID, cd);
/* Invalidate TLB entries previously associated with that context */
arm_smmu_tlb_inv_asid(smmu, asid);
@@ -247,7 +264,7 @@ static void arm_smmu_mm_release(struct mmu_notifier *mn, struct mm_struct *mm)
* DMA may still be running. Keep the cd valid to avoid C_BAD_CD events,
* but disable translation.
*/
- arm_smmu_write_ctx_desc(smmu_domain, mm->pasid, &quiet_cd);
+ arm_smmu_update_ctx_desc_devices(smmu_domain, mm->pasid, &quiet_cd);
arm_smmu_tlb_inv_asid(smmu_domain->smmu, smmu_mn->cd->asid);
arm_smmu_atc_inv_domain(smmu_domain, mm->pasid, 0, 0);
@@ -273,8 +290,10 @@ arm_smmu_mmu_notifier_get(struct arm_smmu_domain *smmu_domain,
struct mm_struct *mm)
{
int ret;
+ unsigned long flags;
struct arm_smmu_ctx_desc *cd;
struct arm_smmu_mmu_notifier *smmu_mn;
+ struct arm_smmu_master *master;
list_for_each_entry(smmu_mn, &smmu_domain->mmu_notifiers, list) {
if (smmu_mn->mn.mm == mm) {
@@ -304,7 +323,16 @@ arm_smmu_mmu_notifier_get(struct arm_smmu_domain *smmu_domain,
goto err_free_cd;
}
- ret = arm_smmu_write_ctx_desc(smmu_domain, mm->pasid, cd);
+ spin_lock_irqsave(&smmu_domain->devices_lock, flags);
+ list_for_each_entry(master, &smmu_domain->devices, domain_head) {
+ ret = arm_smmu_write_ctx_desc(master, mm->pasid, cd);
+ if (ret) {
+ list_for_each_entry_from_reverse(master, &smmu_domain->devices, domain_head)
+ arm_smmu_write_ctx_desc(master, mm->pasid, NULL);
+ break;
+ }
+ }
+ spin_unlock_irqrestore(&smmu_domain->devices_lock, flags);
if (ret)
goto err_put_notifier;
@@ -329,7 +357,8 @@ static void arm_smmu_mmu_notifier_put(struct arm_smmu_mmu_notifier *smmu_mn)
return;
list_del(&smmu_mn->list);
- arm_smmu_write_ctx_desc(smmu_domain, mm->pasid, NULL);
+
+ arm_smmu_update_ctx_desc_devices(smmu_domain, mm->pasid, NULL);
/*
* If we went through clear(), we've already invalidated, and no
@@ -345,8 +374,7 @@ static void arm_smmu_mmu_notifier_put(struct arm_smmu_mmu_notifier *smmu_mn)
arm_smmu_free_shared_cd(cd);
}
-static struct iommu_sva *
-__arm_smmu_sva_bind(struct device *dev, struct mm_struct *mm)
+static int __arm_smmu_sva_bind(struct device *dev, struct mm_struct *mm)
{
int ret;
struct arm_smmu_bond *bond;
@@ -355,23 +383,13 @@ __arm_smmu_sva_bind(struct device *dev, struct mm_struct *mm)
struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
if (!master || !master->sva_enabled)
- return ERR_PTR(-ENODEV);
-
- /* If bind() was already called for this {dev, mm} pair, reuse it. */
- list_for_each_entry(bond, &master->bonds, list) {
- if (bond->mm == mm) {
- refcount_inc(&bond->refs);
- return &bond->sva;
- }
- }
+ return -ENODEV;
bond = kzalloc(sizeof(*bond), GFP_KERNEL);
if (!bond)
- return ERR_PTR(-ENOMEM);
+ return -ENOMEM;
bond->mm = mm;
- bond->sva.dev = dev;
- refcount_set(&bond->refs, 1);
bond->smmu_mn = arm_smmu_mmu_notifier_get(smmu_domain, mm);
if (IS_ERR(bond->smmu_mn)) {
@@ -380,11 +398,11 @@ __arm_smmu_sva_bind(struct device *dev, struct mm_struct *mm)
}
list_add(&bond->list, &master->bonds);
- return &bond->sva;
+ return 0;
err_free_bond:
kfree(bond);
- return ERR_PTR(ret);
+ return ret;
}
bool arm_smmu_sva_supported(struct arm_smmu_device *smmu)
@@ -550,7 +568,7 @@ void arm_smmu_sva_remove_dev_pasid(struct iommu_domain *domain,
}
}
- if (!WARN_ON(!bond) && refcount_dec_and_test(&bond->refs)) {
+ if (!WARN_ON(!bond)) {
list_del(&bond->list);
arm_smmu_mmu_notifier_put(bond->smmu_mn);
kfree(bond);
@@ -562,13 +580,10 @@ static int arm_smmu_sva_set_dev_pasid(struct iommu_domain *domain,
struct device *dev, ioasid_t id)
{
int ret = 0;
- struct iommu_sva *handle;
struct mm_struct *mm = domain->mm;
mutex_lock(&sva_lock);
- handle = __arm_smmu_sva_bind(dev, mm);
- if (IS_ERR(handle))
- ret = PTR_ERR(handle);
+ ret = __arm_smmu_sva_bind(dev, mm);
mutex_unlock(&sva_lock);
return ret;
diff --git a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c
index bd0a596f9863..7445454c2af2 100644
--- a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c
+++ b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c
@@ -971,14 +971,12 @@ void arm_smmu_tlb_inv_asid(struct arm_smmu_device *smmu, u16 asid)
arm_smmu_cmdq_issue_cmd_with_sync(smmu, &cmd);
}
-static void arm_smmu_sync_cd(struct arm_smmu_domain *smmu_domain,
+static void arm_smmu_sync_cd(struct arm_smmu_master *master,
int ssid, bool leaf)
{
size_t i;
- unsigned long flags;
- struct arm_smmu_master *master;
struct arm_smmu_cmdq_batch cmds;
- struct arm_smmu_device *smmu = smmu_domain->smmu;
+ struct arm_smmu_device *smmu = master->smmu;
struct arm_smmu_cmdq_ent cmd = {
.opcode = CMDQ_OP_CFGI_CD,
.cfgi = {
@@ -988,15 +986,10 @@ static void arm_smmu_sync_cd(struct arm_smmu_domain *smmu_domain,
};
cmds.num = 0;
-
- spin_lock_irqsave(&smmu_domain->devices_lock, flags);
- list_for_each_entry(master, &smmu_domain->devices, domain_head) {
- for (i = 0; i < master->num_streams; i++) {
- cmd.cfgi.sid = master->streams[i].id;
- arm_smmu_cmdq_batch_add(smmu, &cmds, &cmd);
- }
+ for (i = 0; i < master->num_streams; i++) {
+ cmd.cfgi.sid = master->streams[i].id;
+ arm_smmu_cmdq_batch_add(smmu, &cmds, &cmd);
}
- spin_unlock_irqrestore(&smmu_domain->devices_lock, flags);
arm_smmu_cmdq_batch_submit(smmu, &cmds);
}
@@ -1026,34 +1019,33 @@ static void arm_smmu_write_cd_l1_desc(__le64 *dst,
WRITE_ONCE(*dst, cpu_to_le64(val));
}
-static __le64 *arm_smmu_get_cd_ptr(struct arm_smmu_domain *smmu_domain,
- u32 ssid)
+static __le64 *arm_smmu_get_cd_ptr(struct arm_smmu_master *master, u32 ssid)
{
__le64 *l1ptr;
unsigned int idx;
struct arm_smmu_l1_ctx_desc *l1_desc;
- struct arm_smmu_device *smmu = smmu_domain->smmu;
- struct arm_smmu_ctx_desc_cfg *cdcfg = &smmu_domain->s1_cfg.cdcfg;
+ struct arm_smmu_device *smmu = master->smmu;
+ struct arm_smmu_ctx_desc_cfg *cd_table = &master->cd_table;
- if (smmu_domain->s1_cfg.s1fmt == STRTAB_STE_0_S1FMT_LINEAR)
- return cdcfg->cdtab + ssid * CTXDESC_CD_DWORDS;
+ if (cd_table->s1fmt == STRTAB_STE_0_S1FMT_LINEAR)
+ return cd_table->cdtab + ssid * CTXDESC_CD_DWORDS;
idx = ssid >> CTXDESC_SPLIT;
- l1_desc = &cdcfg->l1_desc[idx];
+ l1_desc = &cd_table->l1_desc[idx];
if (!l1_desc->l2ptr) {
if (arm_smmu_alloc_cd_leaf_table(smmu, l1_desc))
return NULL;
- l1ptr = cdcfg->cdtab + idx * CTXDESC_L1_DESC_DWORDS;
+ l1ptr = cd_table->cdtab + idx * CTXDESC_L1_DESC_DWORDS;
arm_smmu_write_cd_l1_desc(l1ptr, l1_desc);
/* An invalid L1CD can be cached */
- arm_smmu_sync_cd(smmu_domain, ssid, false);
+ arm_smmu_sync_cd(master, ssid, false);
}
idx = ssid & (CTXDESC_L2_ENTRIES - 1);
return l1_desc->l2ptr + idx * CTXDESC_CD_DWORDS;
}
-int arm_smmu_write_ctx_desc(struct arm_smmu_domain *smmu_domain, int ssid,
+int arm_smmu_write_ctx_desc(struct arm_smmu_master *master, int ssid,
struct arm_smmu_ctx_desc *cd)
{
/*
@@ -1070,11 +1062,12 @@ int arm_smmu_write_ctx_desc(struct arm_smmu_domain *smmu_domain, int ssid,
u64 val;
bool cd_live;
__le64 *cdptr;
+ struct arm_smmu_ctx_desc_cfg *cd_table = &master->cd_table;
- if (WARN_ON(ssid >= (1 << smmu_domain->s1_cfg.s1cdmax)))
+ if (WARN_ON(ssid >= (1 << cd_table->s1cdmax)))
return -E2BIG;
- cdptr = arm_smmu_get_cd_ptr(smmu_domain, ssid);
+ cdptr = arm_smmu_get_cd_ptr(master, ssid);
if (!cdptr)
return -ENOMEM;
@@ -1098,11 +1091,11 @@ int arm_smmu_write_ctx_desc(struct arm_smmu_domain *smmu_domain, int ssid,
cdptr[3] = cpu_to_le64(cd->mair);
/*
- * STE is live, and the SMMU might read dwords of this CD in any
+ * STE may be live, and the SMMU might read dwords of this CD in any
* order. Ensure that it observes valid values before reading
* V=1.
*/
- arm_smmu_sync_cd(smmu_domain, ssid, true);
+ arm_smmu_sync_cd(master, ssid, true);
val = cd->tcr |
#ifdef __BIG_ENDIAN
@@ -1114,7 +1107,7 @@ int arm_smmu_write_ctx_desc(struct arm_smmu_domain *smmu_domain, int ssid,
FIELD_PREP(CTXDESC_CD_0_ASID, cd->asid) |
CTXDESC_CD_0_V;
- if (smmu_domain->stall_enabled)
+ if (cd_table->stall_enabled)
val |= CTXDESC_CD_0_S;
}
@@ -1128,44 +1121,45 @@ int arm_smmu_write_ctx_desc(struct arm_smmu_domain *smmu_domain, int ssid,
* without first making the structure invalid.
*/
WRITE_ONCE(cdptr[0], cpu_to_le64(val));
- arm_smmu_sync_cd(smmu_domain, ssid, true);
+ arm_smmu_sync_cd(master, ssid, true);
return 0;
}
-static int arm_smmu_alloc_cd_tables(struct arm_smmu_domain *smmu_domain)
+static int arm_smmu_alloc_cd_tables(struct arm_smmu_master *master)
{
int ret;
size_t l1size;
size_t max_contexts;
- struct arm_smmu_device *smmu = smmu_domain->smmu;
- struct arm_smmu_s1_cfg *cfg = &smmu_domain->s1_cfg;
- struct arm_smmu_ctx_desc_cfg *cdcfg = &cfg->cdcfg;
+ struct arm_smmu_device *smmu = master->smmu;
+ struct arm_smmu_ctx_desc_cfg *cd_table = &master->cd_table;
- max_contexts = 1 << cfg->s1cdmax;
+ cd_table->stall_enabled = master->stall_enabled;
+ cd_table->s1cdmax = master->ssid_bits;
+ max_contexts = 1 << cd_table->s1cdmax;
if (!(smmu->features & ARM_SMMU_FEAT_2_LVL_CDTAB) ||
max_contexts <= CTXDESC_L2_ENTRIES) {
- cfg->s1fmt = STRTAB_STE_0_S1FMT_LINEAR;
- cdcfg->num_l1_ents = max_contexts;
+ cd_table->s1fmt = STRTAB_STE_0_S1FMT_LINEAR;
+ cd_table->num_l1_ents = max_contexts;
l1size = max_contexts * (CTXDESC_CD_DWORDS << 3);
} else {
- cfg->s1fmt = STRTAB_STE_0_S1FMT_64K_L2;
- cdcfg->num_l1_ents = DIV_ROUND_UP(max_contexts,
+ cd_table->s1fmt = STRTAB_STE_0_S1FMT_64K_L2;
+ cd_table->num_l1_ents = DIV_ROUND_UP(max_contexts,
CTXDESC_L2_ENTRIES);
- cdcfg->l1_desc = devm_kcalloc(smmu->dev, cdcfg->num_l1_ents,
- sizeof(*cdcfg->l1_desc),
+ cd_table->l1_desc = devm_kcalloc(smmu->dev, cd_table->num_l1_ents,
+ sizeof(*cd_table->l1_desc),
GFP_KERNEL);
- if (!cdcfg->l1_desc)
+ if (!cd_table->l1_desc)
return -ENOMEM;
- l1size = cdcfg->num_l1_ents * (CTXDESC_L1_DESC_DWORDS << 3);
+ l1size = cd_table->num_l1_ents * (CTXDESC_L1_DESC_DWORDS << 3);
}
- cdcfg->cdtab = dmam_alloc_coherent(smmu->dev, l1size, &cdcfg->cdtab_dma,
+ cd_table->cdtab = dmam_alloc_coherent(smmu->dev, l1size, &cd_table->cdtab_dma,
GFP_KERNEL);
- if (!cdcfg->cdtab) {
+ if (!cd_table->cdtab) {
dev_warn(smmu->dev, "failed to allocate context descriptor\n");
ret = -ENOMEM;
goto err_free_l1;
@@ -1174,42 +1168,42 @@ static int arm_smmu_alloc_cd_tables(struct arm_smmu_domain *smmu_domain)
return 0;
err_free_l1:
- if (cdcfg->l1_desc) {
- devm_kfree(smmu->dev, cdcfg->l1_desc);
- cdcfg->l1_desc = NULL;
+ if (cd_table->l1_desc) {
+ devm_kfree(smmu->dev, cd_table->l1_desc);
+ cd_table->l1_desc = NULL;
}
return ret;
}
-static void arm_smmu_free_cd_tables(struct arm_smmu_domain *smmu_domain)
+static void arm_smmu_free_cd_tables(struct arm_smmu_master *master)
{
int i;
size_t size, l1size;
- struct arm_smmu_device *smmu = smmu_domain->smmu;
- struct arm_smmu_ctx_desc_cfg *cdcfg = &smmu_domain->s1_cfg.cdcfg;
+ struct arm_smmu_device *smmu = master->smmu;
+ struct arm_smmu_ctx_desc_cfg *cd_table = &master->cd_table;
- if (cdcfg->l1_desc) {
+ if (cd_table->l1_desc) {
size = CTXDESC_L2_ENTRIES * (CTXDESC_CD_DWORDS << 3);
- for (i = 0; i < cdcfg->num_l1_ents; i++) {
- if (!cdcfg->l1_desc[i].l2ptr)
+ for (i = 0; i < cd_table->num_l1_ents; i++) {
+ if (!cd_table->l1_desc[i].l2ptr)
continue;
dmam_free_coherent(smmu->dev, size,
- cdcfg->l1_desc[i].l2ptr,
- cdcfg->l1_desc[i].l2ptr_dma);
+ cd_table->l1_desc[i].l2ptr,
+ cd_table->l1_desc[i].l2ptr_dma);
}
- devm_kfree(smmu->dev, cdcfg->l1_desc);
- cdcfg->l1_desc = NULL;
+ devm_kfree(smmu->dev, cd_table->l1_desc);
+ cd_table->l1_desc = NULL;
- l1size = cdcfg->num_l1_ents * (CTXDESC_L1_DESC_DWORDS << 3);
+ l1size = cd_table->num_l1_ents * (CTXDESC_L1_DESC_DWORDS << 3);
} else {
- l1size = cdcfg->num_l1_ents * (CTXDESC_CD_DWORDS << 3);
+ l1size = cd_table->num_l1_ents * (CTXDESC_CD_DWORDS << 3);
}
- dmam_free_coherent(smmu->dev, l1size, cdcfg->cdtab, cdcfg->cdtab_dma);
- cdcfg->cdtab_dma = 0;
- cdcfg->cdtab = NULL;
+ dmam_free_coherent(smmu->dev, l1size, cd_table->cdtab, cd_table->cdtab_dma);
+ cd_table->cdtab_dma = 0;
+ cd_table->cdtab = NULL;
}
bool arm_smmu_free_asid(struct arm_smmu_ctx_desc *cd)
@@ -1276,7 +1270,7 @@ static void arm_smmu_write_strtab_ent(struct arm_smmu_master *master, u32 sid,
u64 val = le64_to_cpu(dst[0]);
bool ste_live = false;
struct arm_smmu_device *smmu = NULL;
- struct arm_smmu_s1_cfg *s1_cfg = NULL;
+ struct arm_smmu_ctx_desc_cfg *cd_table = NULL;
struct arm_smmu_s2_cfg *s2_cfg = NULL;
struct arm_smmu_domain *smmu_domain = NULL;
struct arm_smmu_cmdq_ent prefetch_cmd = {
@@ -1294,7 +1288,7 @@ static void arm_smmu_write_strtab_ent(struct arm_smmu_master *master, u32 sid,
if (smmu_domain) {
switch (smmu_domain->stage) {
case ARM_SMMU_DOMAIN_S1:
- s1_cfg = &smmu_domain->s1_cfg;
+ cd_table = &master->cd_table;
break;
case ARM_SMMU_DOMAIN_S2:
case ARM_SMMU_DOMAIN_NESTED:
@@ -1325,7 +1319,7 @@ static void arm_smmu_write_strtab_ent(struct arm_smmu_master *master, u32 sid,
val = STRTAB_STE_0_V;
/* Bypass/fault */
- if (!smmu_domain || !(s1_cfg || s2_cfg)) {
+ if (!smmu_domain || !(cd_table || s2_cfg)) {
if (!smmu_domain && disable_bypass)
val |= FIELD_PREP(STRTAB_STE_0_CFG, STRTAB_STE_0_CFG_ABORT);
else
@@ -1344,7 +1338,7 @@ static void arm_smmu_write_strtab_ent(struct arm_smmu_master *master, u32 sid,
return;
}
- if (s1_cfg) {
+ if (cd_table) {
u64 strw = smmu->features & ARM_SMMU_FEAT_E2H ?
STRTAB_STE_1_STRW_EL2 : STRTAB_STE_1_STRW_NSEL1;
@@ -1360,10 +1354,10 @@ static void arm_smmu_write_strtab_ent(struct arm_smmu_master *master, u32 sid,
!master->stall_enabled)
dst[1] |= cpu_to_le64(STRTAB_STE_1_S1STALLD);
- val |= (s1_cfg->cdcfg.cdtab_dma & STRTAB_STE_0_S1CTXPTR_MASK) |
+ val |= (cd_table->cdtab_dma & STRTAB_STE_0_S1CTXPTR_MASK) |
FIELD_PREP(STRTAB_STE_0_CFG, STRTAB_STE_0_CFG_S1_TRANS) |
- FIELD_PREP(STRTAB_STE_0_S1CDMAX, s1_cfg->s1cdmax) |
- FIELD_PREP(STRTAB_STE_0_S1FMT, s1_cfg->s1fmt);
+ FIELD_PREP(STRTAB_STE_0_S1CDMAX, cd_table->s1cdmax) |
+ FIELD_PREP(STRTAB_STE_0_S1FMT, cd_table->s1fmt);
}
if (s2_cfg) {
@@ -1869,7 +1863,7 @@ static void arm_smmu_tlb_inv_context(void *cookie)
* careful, 007.
*/
if (smmu_domain->stage == ARM_SMMU_DOMAIN_S1) {
- arm_smmu_tlb_inv_asid(smmu, smmu_domain->s1_cfg.cd.asid);
+ arm_smmu_tlb_inv_asid(smmu, smmu_domain->cd.asid);
} else {
cmd.opcode = CMDQ_OP_TLBI_S12_VMALL;
cmd.tlbi.vmid = smmu_domain->s2_cfg.vmid;
@@ -1962,7 +1956,7 @@ static void arm_smmu_tlb_inv_range_domain(unsigned long iova, size_t size,
if (smmu_domain->stage == ARM_SMMU_DOMAIN_S1) {
cmd.opcode = smmu_domain->smmu->features & ARM_SMMU_FEAT_E2H ?
CMDQ_OP_TLBI_EL2_VA : CMDQ_OP_TLBI_NH_VA;
- cmd.tlbi.asid = smmu_domain->s1_cfg.cd.asid;
+ cmd.tlbi.asid = smmu_domain->cd.asid;
} else {
cmd.opcode = CMDQ_OP_TLBI_S2_IPA;
cmd.tlbi.vmid = smmu_domain->s2_cfg.vmid;
@@ -2067,15 +2061,11 @@ static void arm_smmu_domain_free(struct iommu_domain *domain)
free_io_pgtable_ops(smmu_domain->pgtbl_ops);
- /* Free the CD and ASID, if we allocated them */
+ /* Free the ASID or VMID */
if (smmu_domain->stage == ARM_SMMU_DOMAIN_S1) {
- struct arm_smmu_s1_cfg *cfg = &smmu_domain->s1_cfg;
-
/* Prevent SVA from touching the CD while we're freeing it */
mutex_lock(&arm_smmu_asid_lock);
- if (cfg->cdcfg.cdtab)
- arm_smmu_free_cd_tables(smmu_domain);
- arm_smmu_free_asid(&cfg->cd);
+ arm_smmu_free_asid(&smmu_domain->cd);
mutex_unlock(&arm_smmu_asid_lock);
} else {
struct arm_smmu_s2_cfg *cfg = &smmu_domain->s2_cfg;
@@ -2087,66 +2077,43 @@ static void arm_smmu_domain_free(struct iommu_domain *domain)
}
static int arm_smmu_domain_finalise_s1(struct arm_smmu_domain *smmu_domain,
- struct arm_smmu_master *master,
struct io_pgtable_cfg *pgtbl_cfg)
{
int ret;
u32 asid;
struct arm_smmu_device *smmu = smmu_domain->smmu;
- struct arm_smmu_s1_cfg *cfg = &smmu_domain->s1_cfg;
+ struct arm_smmu_ctx_desc *cd = &smmu_domain->cd;
typeof(&pgtbl_cfg->arm_lpae_s1_cfg.tcr) tcr = &pgtbl_cfg->arm_lpae_s1_cfg.tcr;
- refcount_set(&cfg->cd.refs, 1);
+ refcount_set(&cd->refs, 1);
/* Prevent SVA from modifying the ASID until it is written to the CD */
mutex_lock(&arm_smmu_asid_lock);
- ret = xa_alloc(&arm_smmu_asid_xa, &asid, &cfg->cd,
+ ret = xa_alloc(&arm_smmu_asid_xa, &asid, cd,
XA_LIMIT(1, (1 << smmu->asid_bits) - 1), GFP_KERNEL);
if (ret)
goto out_unlock;
- cfg->s1cdmax = master->ssid_bits;
-
- smmu_domain->stall_enabled = master->stall_enabled;
-
- ret = arm_smmu_alloc_cd_tables(smmu_domain);
- if (ret)
- goto out_free_asid;
-
- cfg->cd.asid = (u16)asid;
- cfg->cd.ttbr = pgtbl_cfg->arm_lpae_s1_cfg.ttbr;
- cfg->cd.tcr = FIELD_PREP(CTXDESC_CD_0_TCR_T0SZ, tcr->tsz) |
+ cd->asid = (u16)asid;
+ cd->ttbr = pgtbl_cfg->arm_lpae_s1_cfg.ttbr;
+ cd->tcr = FIELD_PREP(CTXDESC_CD_0_TCR_T0SZ, tcr->tsz) |
FIELD_PREP(CTXDESC_CD_0_TCR_TG0, tcr->tg) |
FIELD_PREP(CTXDESC_CD_0_TCR_IRGN0, tcr->irgn) |
FIELD_PREP(CTXDESC_CD_0_TCR_ORGN0, tcr->orgn) |
FIELD_PREP(CTXDESC_CD_0_TCR_SH0, tcr->sh) |
FIELD_PREP(CTXDESC_CD_0_TCR_IPS, tcr->ips) |
CTXDESC_CD_0_TCR_EPD1 | CTXDESC_CD_0_AA64;
- cfg->cd.mair = pgtbl_cfg->arm_lpae_s1_cfg.mair;
-
- /*
- * Note that this will end up calling arm_smmu_sync_cd() before
- * the master has been added to the devices list for this domain.
- * This isn't an issue because the STE hasn't been installed yet.
- */
- ret = arm_smmu_write_ctx_desc(smmu_domain, IOMMU_NO_PASID, &cfg->cd);
- if (ret)
- goto out_free_cd_tables;
+ cd->mair = pgtbl_cfg->arm_lpae_s1_cfg.mair;
mutex_unlock(&arm_smmu_asid_lock);
return 0;
-out_free_cd_tables:
- arm_smmu_free_cd_tables(smmu_domain);
-out_free_asid:
- arm_smmu_free_asid(&cfg->cd);
out_unlock:
mutex_unlock(&arm_smmu_asid_lock);
return ret;
}
static int arm_smmu_domain_finalise_s2(struct arm_smmu_domain *smmu_domain,
- struct arm_smmu_master *master,
struct io_pgtable_cfg *pgtbl_cfg)
{
int vmid;
@@ -2173,8 +2140,7 @@ static int arm_smmu_domain_finalise_s2(struct arm_smmu_domain *smmu_domain,
return 0;
}
-static int arm_smmu_domain_finalise(struct iommu_domain *domain,
- struct arm_smmu_master *master)
+static int arm_smmu_domain_finalise(struct iommu_domain *domain)
{
int ret;
unsigned long ias, oas;
@@ -2182,7 +2148,6 @@ static int arm_smmu_domain_finalise(struct iommu_domain *domain,
struct io_pgtable_cfg pgtbl_cfg;
struct io_pgtable_ops *pgtbl_ops;
int (*finalise_stage_fn)(struct arm_smmu_domain *,
- struct arm_smmu_master *,
struct io_pgtable_cfg *);
struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
struct arm_smmu_device *smmu = smmu_domain->smmu;
@@ -2234,7 +2199,7 @@ static int arm_smmu_domain_finalise(struct iommu_domain *domain,
domain->geometry.aperture_end = (1UL << pgtbl_cfg.ias) - 1;
domain->geometry.force_aperture = true;
- ret = finalise_stage_fn(smmu_domain, master, &pgtbl_cfg);
+ ret = finalise_stage_fn(smmu_domain, &pgtbl_cfg);
if (ret < 0) {
free_io_pgtable_ops(pgtbl_ops);
return ret;
@@ -2403,6 +2368,14 @@ static void arm_smmu_detach_dev(struct arm_smmu_master *master)
master->domain = NULL;
master->ats_enabled = false;
arm_smmu_install_ste_for_dev(master);
+ /*
+ * Clearing the CD entry isn't strictly required to detach the domain
+ * since the table is uninstalled anyway, but it helps avoid confusion
+ * in the call to arm_smmu_write_ctx_desc on the next attach (which
+ * expects the entry to be empty).
+ */
+ if (smmu_domain->stage == ARM_SMMU_DOMAIN_S1 && master->cd_table.cdtab)
+ arm_smmu_write_ctx_desc(master, IOMMU_NO_PASID, NULL);
}
static int arm_smmu_attach_dev(struct iommu_domain *domain, struct device *dev)
@@ -2436,23 +2409,15 @@ static int arm_smmu_attach_dev(struct iommu_domain *domain, struct device *dev)
if (!smmu_domain->smmu) {
smmu_domain->smmu = smmu;
- ret = arm_smmu_domain_finalise(domain, master);
- if (ret) {
+ ret = arm_smmu_domain_finalise(domain);
+ if (ret)
smmu_domain->smmu = NULL;
- goto out_unlock;
- }
- } else if (smmu_domain->smmu != smmu) {
- ret = -EINVAL;
- goto out_unlock;
- } else if (smmu_domain->stage == ARM_SMMU_DOMAIN_S1 &&
- master->ssid_bits != smmu_domain->s1_cfg.s1cdmax) {
+ } else if (smmu_domain->smmu != smmu)
ret = -EINVAL;
- goto out_unlock;
- } else if (smmu_domain->stage == ARM_SMMU_DOMAIN_S1 &&
- smmu_domain->stall_enabled != master->stall_enabled) {
- ret = -EINVAL;
- goto out_unlock;
- }
+
+ mutex_unlock(&smmu_domain->init_mutex);
+ if (ret)
+ return ret;
master->domain = smmu_domain;
@@ -2466,16 +2431,42 @@ static int arm_smmu_attach_dev(struct iommu_domain *domain, struct device *dev)
if (smmu_domain->stage != ARM_SMMU_DOMAIN_BYPASS)
master->ats_enabled = arm_smmu_ats_supported(master);
- arm_smmu_install_ste_for_dev(master);
-
spin_lock_irqsave(&smmu_domain->devices_lock, flags);
list_add(&master->domain_head, &smmu_domain->devices);
spin_unlock_irqrestore(&smmu_domain->devices_lock, flags);
+ if (smmu_domain->stage == ARM_SMMU_DOMAIN_S1) {
+ if (!master->cd_table.cdtab) {
+ ret = arm_smmu_alloc_cd_tables(master);
+ if (ret) {
+ master->domain = NULL;
+ goto out_list_del;
+ }
+ }
+
+ /*
+ * Prevent SVA from concurrently modifying the CD or writing to
+ * the CD entry
+ */
+ mutex_lock(&arm_smmu_asid_lock);
+ ret = arm_smmu_write_ctx_desc(master, IOMMU_NO_PASID, &smmu_domain->cd);
+ mutex_unlock(&arm_smmu_asid_lock);
+ if (ret) {
+ master->domain = NULL;
+ goto out_list_del;
+ }
+ }
+
+ arm_smmu_install_ste_for_dev(master);
+
arm_smmu_enable_ats(master);
+ return 0;
+
+out_list_del:
+ spin_lock_irqsave(&smmu_domain->devices_lock, flags);
+ list_del(&master->domain_head);
+ spin_unlock_irqrestore(&smmu_domain->devices_lock, flags);
-out_unlock:
- mutex_unlock(&smmu_domain->init_mutex);
return ret;
}
@@ -2720,6 +2711,8 @@ static void arm_smmu_release_device(struct device *dev)
arm_smmu_detach_dev(master);
arm_smmu_disable_pasid(master);
arm_smmu_remove_master(master);
+ if (master->cd_table.cdtab)
+ arm_smmu_free_cd_tables(master);
kfree(master);
}
diff --git a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h
index 9915850dd4db..961205ba86d2 100644
--- a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h
+++ b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.h
@@ -595,13 +595,11 @@ struct arm_smmu_ctx_desc_cfg {
dma_addr_t cdtab_dma;
struct arm_smmu_l1_ctx_desc *l1_desc;
unsigned int num_l1_ents;
-};
-
-struct arm_smmu_s1_cfg {
- struct arm_smmu_ctx_desc_cfg cdcfg;
- struct arm_smmu_ctx_desc cd;
u8 s1fmt;
+ /* log2 of the maximum number of CDs supported by this table */
u8 s1cdmax;
+ /* Whether CD entries in this table have the stall bit set. */
+ u8 stall_enabled:1;
};
struct arm_smmu_s2_cfg {
@@ -697,6 +695,8 @@ struct arm_smmu_master {
struct arm_smmu_domain *domain;
struct list_head domain_head;
struct arm_smmu_stream *streams;
+ /* Locked by the iommu core using the group mutex */
+ struct arm_smmu_ctx_desc_cfg cd_table;
unsigned int num_streams;
bool ats_enabled;
bool stall_enabled;
@@ -719,13 +719,12 @@ struct arm_smmu_domain {
struct mutex init_mutex; /* Protects smmu pointer */
struct io_pgtable_ops *pgtbl_ops;
- bool stall_enabled;
atomic_t nr_ats_masters;
enum arm_smmu_domain_stage stage;
union {
- struct arm_smmu_s1_cfg s1_cfg;
- struct arm_smmu_s2_cfg s2_cfg;
+ struct arm_smmu_ctx_desc cd;
+ struct arm_smmu_s2_cfg s2_cfg;
};
struct iommu_domain domain;
@@ -745,7 +744,7 @@ extern struct xarray arm_smmu_asid_xa;
extern struct mutex arm_smmu_asid_lock;
extern struct arm_smmu_ctx_desc quiet_cd;
-int arm_smmu_write_ctx_desc(struct arm_smmu_domain *smmu_domain, int ssid,
+int arm_smmu_write_ctx_desc(struct arm_smmu_master *smmu_master, int ssid,
struct arm_smmu_ctx_desc *cd);
void arm_smmu_tlb_inv_asid(struct arm_smmu_device *smmu, u16 asid);
void arm_smmu_tlb_inv_range_asid(unsigned long iova, size_t size, int asid,
diff --git a/drivers/iommu/arm/arm-smmu/arm-smmu-qcom.c b/drivers/iommu/arm/arm-smmu/arm-smmu-qcom.c
index 7f52ac67495f..549ae4dba3a6 100644
--- a/drivers/iommu/arm/arm-smmu/arm-smmu-qcom.c
+++ b/drivers/iommu/arm/arm-smmu/arm-smmu-qcom.c
@@ -251,6 +251,7 @@ static const struct of_device_id qcom_smmu_client_of_match[] __maybe_unused = {
{ .compatible = "qcom,sc7280-mss-pil" },
{ .compatible = "qcom,sc8180x-mdss" },
{ .compatible = "qcom,sc8280xp-mdss" },
+ { .compatible = "qcom,sdm670-mdss" },
{ .compatible = "qcom,sdm845-mdss" },
{ .compatible = "qcom,sdm845-mss-pil" },
{ .compatible = "qcom,sm6350-mdss" },
@@ -532,6 +533,7 @@ static const struct of_device_id __maybe_unused qcom_smmu_impl_of_match[] = {
{ .compatible = "qcom,sm6350-smmu-500", .data = &qcom_smmu_500_impl0_data },
{ .compatible = "qcom,sm6375-smmu-v2", .data = &qcom_smmu_v2_data },
{ .compatible = "qcom,sm6375-smmu-500", .data = &qcom_smmu_500_impl0_data },
+ { .compatible = "qcom,sm7150-smmu-v2", .data = &qcom_smmu_v2_data },
{ .compatible = "qcom,sm8150-smmu-500", .data = &qcom_smmu_500_impl0_data },
{ .compatible = "qcom,sm8250-smmu-500", .data = &qcom_smmu_500_impl0_data },
{ .compatible = "qcom,sm8350-smmu-500", .data = &qcom_smmu_500_impl0_data },
diff --git a/drivers/iommu/dma-iommu.c b/drivers/iommu/dma-iommu.c
index 4b1a88f514c9..85163a83df2f 100644
--- a/drivers/iommu/dma-iommu.c
+++ b/drivers/iommu/dma-iommu.c
@@ -43,14 +43,28 @@ enum iommu_dma_cookie_type {
IOMMU_DMA_MSI_COOKIE,
};
+enum iommu_dma_queue_type {
+ IOMMU_DMA_OPTS_PER_CPU_QUEUE,
+ IOMMU_DMA_OPTS_SINGLE_QUEUE,
+};
+
+struct iommu_dma_options {
+ enum iommu_dma_queue_type qt;
+ size_t fq_size;
+ unsigned int fq_timeout;
+};
+
struct iommu_dma_cookie {
enum iommu_dma_cookie_type type;
union {
/* Full allocator for IOMMU_DMA_IOVA_COOKIE */
struct {
struct iova_domain iovad;
-
- struct iova_fq __percpu *fq; /* Flush queue */
+ /* Flush queue */
+ union {
+ struct iova_fq *single_fq;
+ struct iova_fq __percpu *percpu_fq;
+ };
/* Number of TLB flushes that have been started */
atomic64_t fq_flush_start_cnt;
/* Number of TLB flushes that have been finished */
@@ -67,6 +81,8 @@ struct iommu_dma_cookie {
/* Domain for flush queue callback; NULL if flush queue not in use */
struct iommu_domain *fq_domain;
+ /* Options for dma-iommu use */
+ struct iommu_dma_options options;
struct mutex mutex;
};
@@ -84,10 +100,12 @@ static int __init iommu_dma_forcedac_setup(char *str)
early_param("iommu.forcedac", iommu_dma_forcedac_setup);
/* Number of entries per flush queue */
-#define IOVA_FQ_SIZE 256
+#define IOVA_DEFAULT_FQ_SIZE 256
+#define IOVA_SINGLE_FQ_SIZE 32768
/* Timeout (in ms) after which entries are flushed from the queue */
-#define IOVA_FQ_TIMEOUT 10
+#define IOVA_DEFAULT_FQ_TIMEOUT 10
+#define IOVA_SINGLE_FQ_TIMEOUT 1000
/* Flush queue entry for deferred flushing */
struct iova_fq_entry {
@@ -99,18 +117,19 @@ struct iova_fq_entry {
/* Per-CPU flush queue structure */
struct iova_fq {
- struct iova_fq_entry entries[IOVA_FQ_SIZE];
- unsigned int head, tail;
spinlock_t lock;
+ unsigned int head, tail;
+ unsigned int mod_mask;
+ struct iova_fq_entry entries[];
};
#define fq_ring_for_each(i, fq) \
- for ((i) = (fq)->head; (i) != (fq)->tail; (i) = ((i) + 1) % IOVA_FQ_SIZE)
+ for ((i) = (fq)->head; (i) != (fq)->tail; (i) = ((i) + 1) & (fq)->mod_mask)
static inline bool fq_full(struct iova_fq *fq)
{
assert_spin_locked(&fq->lock);
- return (((fq->tail + 1) % IOVA_FQ_SIZE) == fq->head);
+ return (((fq->tail + 1) & fq->mod_mask) == fq->head);
}
static inline unsigned int fq_ring_add(struct iova_fq *fq)
@@ -119,12 +138,12 @@ static inline unsigned int fq_ring_add(struct iova_fq *fq)
assert_spin_locked(&fq->lock);
- fq->tail = (idx + 1) % IOVA_FQ_SIZE;
+ fq->tail = (idx + 1) & fq->mod_mask;
return idx;
}
-static void fq_ring_free(struct iommu_dma_cookie *cookie, struct iova_fq *fq)
+static void fq_ring_free_locked(struct iommu_dma_cookie *cookie, struct iova_fq *fq)
{
u64 counter = atomic64_read(&cookie->fq_flush_finish_cnt);
unsigned int idx;
@@ -141,10 +160,19 @@ static void fq_ring_free(struct iommu_dma_cookie *cookie, struct iova_fq *fq)
fq->entries[idx].iova_pfn,
fq->entries[idx].pages);
- fq->head = (fq->head + 1) % IOVA_FQ_SIZE;
+ fq->head = (fq->head + 1) & fq->mod_mask;
}
}
+static void fq_ring_free(struct iommu_dma_cookie *cookie, struct iova_fq *fq)
+{
+ unsigned long flags;
+
+ spin_lock_irqsave(&fq->lock, flags);
+ fq_ring_free_locked(cookie, fq);
+ spin_unlock_irqrestore(&fq->lock, flags);
+}
+
static void fq_flush_iotlb(struct iommu_dma_cookie *cookie)
{
atomic64_inc(&cookie->fq_flush_start_cnt);
@@ -160,14 +188,11 @@ static void fq_flush_timeout(struct timer_list *t)
atomic_set(&cookie->fq_timer_on, 0);
fq_flush_iotlb(cookie);
- for_each_possible_cpu(cpu) {
- unsigned long flags;
- struct iova_fq *fq;
-
- fq = per_cpu_ptr(cookie->fq, cpu);
- spin_lock_irqsave(&fq->lock, flags);
- fq_ring_free(cookie, fq);
- spin_unlock_irqrestore(&fq->lock, flags);
+ if (cookie->options.qt == IOMMU_DMA_OPTS_SINGLE_QUEUE) {
+ fq_ring_free(cookie, cookie->single_fq);
+ } else {
+ for_each_possible_cpu(cpu)
+ fq_ring_free(cookie, per_cpu_ptr(cookie->percpu_fq, cpu));
}
}
@@ -188,7 +213,11 @@ static void queue_iova(struct iommu_dma_cookie *cookie,
*/
smp_mb();
- fq = raw_cpu_ptr(cookie->fq);
+ if (cookie->options.qt == IOMMU_DMA_OPTS_SINGLE_QUEUE)
+ fq = cookie->single_fq;
+ else
+ fq = raw_cpu_ptr(cookie->percpu_fq);
+
spin_lock_irqsave(&fq->lock, flags);
/*
@@ -196,11 +225,11 @@ static void queue_iova(struct iommu_dma_cookie *cookie,
* flushed out on another CPU. This makes the fq_full() check below less
* likely to be true.
*/
- fq_ring_free(cookie, fq);
+ fq_ring_free_locked(cookie, fq);
if (fq_full(fq)) {
fq_flush_iotlb(cookie);
- fq_ring_free(cookie, fq);
+ fq_ring_free_locked(cookie, fq);
}
idx = fq_ring_add(fq);
@@ -216,34 +245,95 @@ static void queue_iova(struct iommu_dma_cookie *cookie,
if (!atomic_read(&cookie->fq_timer_on) &&
!atomic_xchg(&cookie->fq_timer_on, 1))
mod_timer(&cookie->fq_timer,
- jiffies + msecs_to_jiffies(IOVA_FQ_TIMEOUT));
+ jiffies + msecs_to_jiffies(cookie->options.fq_timeout));
}
-static void iommu_dma_free_fq(struct iommu_dma_cookie *cookie)
+static void iommu_dma_free_fq_single(struct iova_fq *fq)
{
- int cpu, idx;
+ int idx;
- if (!cookie->fq)
- return;
+ fq_ring_for_each(idx, fq)
+ put_pages_list(&fq->entries[idx].freelist);
+ vfree(fq);
+}
+
+static void iommu_dma_free_fq_percpu(struct iova_fq __percpu *percpu_fq)
+{
+ int cpu, idx;
- del_timer_sync(&cookie->fq_timer);
/* The IOVAs will be torn down separately, so just free our queued pages */
for_each_possible_cpu(cpu) {
- struct iova_fq *fq = per_cpu_ptr(cookie->fq, cpu);
+ struct iova_fq *fq = per_cpu_ptr(percpu_fq, cpu);
fq_ring_for_each(idx, fq)
put_pages_list(&fq->entries[idx].freelist);
}
- free_percpu(cookie->fq);
+ free_percpu(percpu_fq);
+}
+
+static void iommu_dma_free_fq(struct iommu_dma_cookie *cookie)
+{
+ if (!cookie->fq_domain)
+ return;
+
+ del_timer_sync(&cookie->fq_timer);
+ if (cookie->options.qt == IOMMU_DMA_OPTS_SINGLE_QUEUE)
+ iommu_dma_free_fq_single(cookie->single_fq);
+ else
+ iommu_dma_free_fq_percpu(cookie->percpu_fq);
+}
+
+static void iommu_dma_init_one_fq(struct iova_fq *fq, size_t fq_size)
+{
+ int i;
+
+ fq->head = 0;
+ fq->tail = 0;
+ fq->mod_mask = fq_size - 1;
+
+ spin_lock_init(&fq->lock);
+
+ for (i = 0; i < fq_size; i++)
+ INIT_LIST_HEAD(&fq->entries[i].freelist);
+}
+
+static int iommu_dma_init_fq_single(struct iommu_dma_cookie *cookie)
+{
+ size_t fq_size = cookie->options.fq_size;
+ struct iova_fq *queue;
+
+ queue = vmalloc(struct_size(queue, entries, fq_size));
+ if (!queue)
+ return -ENOMEM;
+ iommu_dma_init_one_fq(queue, fq_size);
+ cookie->single_fq = queue;
+
+ return 0;
+}
+
+static int iommu_dma_init_fq_percpu(struct iommu_dma_cookie *cookie)
+{
+ size_t fq_size = cookie->options.fq_size;
+ struct iova_fq __percpu *queue;
+ int cpu;
+
+ queue = __alloc_percpu(struct_size(queue, entries, fq_size),
+ __alignof__(*queue));
+ if (!queue)
+ return -ENOMEM;
+
+ for_each_possible_cpu(cpu)
+ iommu_dma_init_one_fq(per_cpu_ptr(queue, cpu), fq_size);
+ cookie->percpu_fq = queue;
+ return 0;
}
/* sysfs updates are serialised by the mutex of the group owning @domain */
int iommu_dma_init_fq(struct iommu_domain *domain)
{
struct iommu_dma_cookie *cookie = domain->iova_cookie;
- struct iova_fq __percpu *queue;
- int i, cpu;
+ int rc;
if (cookie->fq_domain)
return 0;
@@ -251,26 +341,16 @@ int iommu_dma_init_fq(struct iommu_domain *domain)
atomic64_set(&cookie->fq_flush_start_cnt, 0);
atomic64_set(&cookie->fq_flush_finish_cnt, 0);
- queue = alloc_percpu(struct iova_fq);
- if (!queue) {
+ if (cookie->options.qt == IOMMU_DMA_OPTS_SINGLE_QUEUE)
+ rc = iommu_dma_init_fq_single(cookie);
+ else
+ rc = iommu_dma_init_fq_percpu(cookie);
+
+ if (rc) {
pr_warn("iova flush queue initialization failed\n");
return -ENOMEM;
}
- for_each_possible_cpu(cpu) {
- struct iova_fq *fq = per_cpu_ptr(queue, cpu);
-
- fq->head = 0;
- fq->tail = 0;
-
- spin_lock_init(&fq->lock);
-
- for (i = 0; i < IOVA_FQ_SIZE; i++)
- INIT_LIST_HEAD(&fq->entries[i].freelist);
- }
-
- cookie->fq = queue;
-
timer_setup(&cookie->fq_timer, fq_flush_timeout, 0);
atomic_set(&cookie->fq_timer_on, 0);
/*
@@ -555,6 +635,28 @@ static bool dev_use_sg_swiotlb(struct device *dev, struct scatterlist *sg,
}
/**
+ * iommu_dma_init_options - Initialize dma-iommu options
+ * @options: The options to be initialized
+ * @dev: Device the options are set for
+ *
+ * This allows tuning dma-iommu specific to device properties
+ */
+static void iommu_dma_init_options(struct iommu_dma_options *options,
+ struct device *dev)
+{
+ /* Shadowing IOTLB flushes do better with a single large queue */
+ if (dev->iommu->shadow_on_flush) {
+ options->qt = IOMMU_DMA_OPTS_SINGLE_QUEUE;
+ options->fq_timeout = IOVA_SINGLE_FQ_TIMEOUT;
+ options->fq_size = IOVA_SINGLE_FQ_SIZE;
+ } else {
+ options->qt = IOMMU_DMA_OPTS_PER_CPU_QUEUE;
+ options->fq_size = IOVA_DEFAULT_FQ_SIZE;
+ options->fq_timeout = IOVA_DEFAULT_FQ_TIMEOUT;
+ }
+}
+
+/**
* iommu_dma_init_domain - Initialise a DMA mapping domain
* @domain: IOMMU domain previously prepared by iommu_get_dma_cookie()
* @base: IOVA at which the mappable address space starts
@@ -614,6 +716,8 @@ static int iommu_dma_init_domain(struct iommu_domain *domain, dma_addr_t base,
if (ret)
goto done_unlock;
+ iommu_dma_init_options(&cookie->options, dev);
+
/* If the FQ fails we can simply fall back to strict mode */
if (domain->type == IOMMU_DOMAIN_DMA_FQ &&
(!device_iommu_capable(dev, IOMMU_CAP_DEFERRED_FLUSH) || iommu_dma_init_fq(domain)))
diff --git a/drivers/iommu/intel/debugfs.c b/drivers/iommu/intel/debugfs.c
index 1f925285104e..dee61e513be6 100644
--- a/drivers/iommu/intel/debugfs.c
+++ b/drivers/iommu/intel/debugfs.c
@@ -111,6 +111,8 @@ static const struct iommu_regset iommu_regs_64[] = {
IOMMU_REGSET_ENTRY(VCRSP),
};
+static struct dentry *intel_iommu_debug;
+
static int iommu_regset_show(struct seq_file *m, void *unused)
{
struct dmar_drhd_unit *drhd;
@@ -311,9 +313,14 @@ static inline unsigned long level_to_directory_size(int level)
static inline void
dump_page_info(struct seq_file *m, unsigned long iova, u64 *path)
{
- seq_printf(m, "0x%013lx |\t0x%016llx\t0x%016llx\t0x%016llx\t0x%016llx\t0x%016llx\n",
- iova >> VTD_PAGE_SHIFT, path[5], path[4],
- path[3], path[2], path[1]);
+ seq_printf(m, "0x%013lx |\t0x%016llx\t0x%016llx\t0x%016llx",
+ iova >> VTD_PAGE_SHIFT, path[5], path[4], path[3]);
+ if (path[2]) {
+ seq_printf(m, "\t0x%016llx", path[2]);
+ if (path[1])
+ seq_printf(m, "\t0x%016llx", path[1]);
+ }
+ seq_putc(m, '\n');
}
static void pgtable_walk_level(struct seq_file *m, struct dma_pte *pde,
@@ -340,58 +347,140 @@ static void pgtable_walk_level(struct seq_file *m, struct dma_pte *pde,
}
}
-static int __show_device_domain_translation(struct device *dev, void *data)
+static int domain_translation_struct_show(struct seq_file *m,
+ struct device_domain_info *info,
+ ioasid_t pasid)
{
- struct dmar_domain *domain;
- struct seq_file *m = data;
- u64 path[6] = { 0 };
-
- domain = to_dmar_domain(iommu_get_domain_for_dev(dev));
- if (!domain)
- return 0;
+ bool scalable, found = false;
+ struct dmar_drhd_unit *drhd;
+ struct intel_iommu *iommu;
+ u16 devfn, bus, seg;
- seq_printf(m, "Device %s @0x%llx\n", dev_name(dev),
- (u64)virt_to_phys(domain->pgd));
- seq_puts(m, "IOVA_PFN\t\tPML5E\t\t\tPML4E\t\t\tPDPE\t\t\tPDE\t\t\tPTE\n");
+ bus = info->bus;
+ devfn = info->devfn;
+ seg = info->segment;
- pgtable_walk_level(m, domain->pgd, domain->agaw + 2, 0, path);
- seq_putc(m, '\n');
+ rcu_read_lock();
+ for_each_active_iommu(iommu, drhd) {
+ struct context_entry *context;
+ u64 pgd, path[6] = { 0 };
+ u32 sts, agaw;
- /* Don't iterate */
- return 1;
-}
+ if (seg != iommu->segment)
+ continue;
-static int show_device_domain_translation(struct device *dev, void *data)
-{
- struct iommu_group *group;
+ sts = dmar_readl(iommu->reg + DMAR_GSTS_REG);
+ if (!(sts & DMA_GSTS_TES)) {
+ seq_printf(m, "DMA Remapping is not enabled on %s\n",
+ iommu->name);
+ continue;
+ }
+ if (dmar_readq(iommu->reg + DMAR_RTADDR_REG) & DMA_RTADDR_SMT)
+ scalable = true;
+ else
+ scalable = false;
- group = iommu_group_get(dev);
- if (group) {
/*
- * The group->mutex is held across the callback, which will
- * block calls to iommu_attach/detach_group/device. Hence,
+ * The iommu->lock is held across the callback, which will
+ * block calls to domain_attach/domain_detach. Hence,
* the domain of the device will not change during traversal.
*
- * All devices in an iommu group share a single domain, hence
- * we only dump the domain of the first device. Even though,
- * this code still possibly races with the iommu_unmap()
+ * Traversing page table possibly races with the iommu_unmap()
* interface. This could be solved by RCU-freeing the page
* table pages in the iommu_unmap() path.
*/
- iommu_group_for_each_dev(group, data,
- __show_device_domain_translation);
- iommu_group_put(group);
+ spin_lock(&iommu->lock);
+
+ context = iommu_context_addr(iommu, bus, devfn, 0);
+ if (!context || !context_present(context))
+ goto iommu_unlock;
+
+ if (scalable) { /* scalable mode */
+ struct pasid_entry *pasid_tbl, *pasid_tbl_entry;
+ struct pasid_dir_entry *dir_tbl, *dir_entry;
+ u16 dir_idx, tbl_idx, pgtt;
+ u64 pasid_dir_ptr;
+
+ pasid_dir_ptr = context->lo & VTD_PAGE_MASK;
+
+ /* Dump specified device domain mappings with PASID. */
+ dir_idx = pasid >> PASID_PDE_SHIFT;
+ tbl_idx = pasid & PASID_PTE_MASK;
+
+ dir_tbl = phys_to_virt(pasid_dir_ptr);
+ dir_entry = &dir_tbl[dir_idx];
+
+ pasid_tbl = get_pasid_table_from_pde(dir_entry);
+ if (!pasid_tbl)
+ goto iommu_unlock;
+
+ pasid_tbl_entry = &pasid_tbl[tbl_idx];
+ if (!pasid_pte_is_present(pasid_tbl_entry))
+ goto iommu_unlock;
+
+ /*
+ * According to PASID Granular Translation Type(PGTT),
+ * get the page table pointer.
+ */
+ pgtt = (u16)(pasid_tbl_entry->val[0] & GENMASK_ULL(8, 6)) >> 6;
+ agaw = (u8)(pasid_tbl_entry->val[0] & GENMASK_ULL(4, 2)) >> 2;
+
+ switch (pgtt) {
+ case PASID_ENTRY_PGTT_FL_ONLY:
+ pgd = pasid_tbl_entry->val[2];
+ break;
+ case PASID_ENTRY_PGTT_SL_ONLY:
+ case PASID_ENTRY_PGTT_NESTED:
+ pgd = pasid_tbl_entry->val[0];
+ break;
+ default:
+ goto iommu_unlock;
+ }
+ pgd &= VTD_PAGE_MASK;
+ } else { /* legacy mode */
+ pgd = context->lo & VTD_PAGE_MASK;
+ agaw = context->hi & 7;
+ }
+
+ seq_printf(m, "Device %04x:%02x:%02x.%x ",
+ iommu->segment, bus, PCI_SLOT(devfn), PCI_FUNC(devfn));
+
+ if (scalable)
+ seq_printf(m, "with pasid %x @0x%llx\n", pasid, pgd);
+ else
+ seq_printf(m, "@0x%llx\n", pgd);
+
+ seq_printf(m, "%-17s\t%-18s\t%-18s\t%-18s\t%-18s\t%-s\n",
+ "IOVA_PFN", "PML5E", "PML4E", "PDPE", "PDE", "PTE");
+ pgtable_walk_level(m, phys_to_virt(pgd), agaw + 2, 0, path);
+
+ found = true;
+iommu_unlock:
+ spin_unlock(&iommu->lock);
+ if (found)
+ break;
}
+ rcu_read_unlock();
return 0;
}
-static int domain_translation_struct_show(struct seq_file *m, void *unused)
+static int dev_domain_translation_struct_show(struct seq_file *m, void *unused)
+{
+ struct device_domain_info *info = (struct device_domain_info *)m->private;
+
+ return domain_translation_struct_show(m, info, IOMMU_NO_PASID);
+}
+DEFINE_SHOW_ATTRIBUTE(dev_domain_translation_struct);
+
+static int pasid_domain_translation_struct_show(struct seq_file *m, void *unused)
{
- return bus_for_each_dev(&pci_bus_type, NULL, m,
- show_device_domain_translation);
+ struct dev_pasid_info *dev_pasid = (struct dev_pasid_info *)m->private;
+ struct device_domain_info *info = dev_iommu_priv_get(dev_pasid->dev);
+
+ return domain_translation_struct_show(m, info, dev_pasid->pasid);
}
-DEFINE_SHOW_ATTRIBUTE(domain_translation_struct);
+DEFINE_SHOW_ATTRIBUTE(pasid_domain_translation_struct);
static void invalidation_queue_entry_show(struct seq_file *m,
struct intel_iommu *iommu)
@@ -666,16 +755,12 @@ static const struct file_operations dmar_perf_latency_fops = {
void __init intel_iommu_debugfs_init(void)
{
- struct dentry *intel_iommu_debug = debugfs_create_dir("intel",
- iommu_debugfs_dir);
+ intel_iommu_debug = debugfs_create_dir("intel", iommu_debugfs_dir);
debugfs_create_file("iommu_regset", 0444, intel_iommu_debug, NULL,
&iommu_regset_fops);
debugfs_create_file("dmar_translation_struct", 0444, intel_iommu_debug,
NULL, &dmar_translation_struct_fops);
- debugfs_create_file("domain_translation_struct", 0444,
- intel_iommu_debug, NULL,
- &domain_translation_struct_fops);
debugfs_create_file("invalidation_queue", 0444, intel_iommu_debug,
NULL, &invalidation_queue_fops);
#ifdef CONFIG_IRQ_REMAP
@@ -685,3 +770,51 @@ void __init intel_iommu_debugfs_init(void)
debugfs_create_file("dmar_perf_latency", 0644, intel_iommu_debug,
NULL, &dmar_perf_latency_fops);
}
+
+/*
+ * Create a debugfs directory for each device, and then create a
+ * debugfs file in this directory for users to dump the page table
+ * of the default domain. e.g.
+ * /sys/kernel/debug/iommu/intel/0000:00:01.0/domain_translation_struct
+ */
+void intel_iommu_debugfs_create_dev(struct device_domain_info *info)
+{
+ info->debugfs_dentry = debugfs_create_dir(dev_name(info->dev), intel_iommu_debug);
+
+ debugfs_create_file("domain_translation_struct", 0444, info->debugfs_dentry,
+ info, &dev_domain_translation_struct_fops);
+}
+
+/* Remove the device debugfs directory. */
+void intel_iommu_debugfs_remove_dev(struct device_domain_info *info)
+{
+ debugfs_remove_recursive(info->debugfs_dentry);
+}
+
+/*
+ * Create a debugfs directory per pair of {device, pasid}, then create the
+ * corresponding debugfs file in this directory for users to dump its page
+ * table. e.g.
+ * /sys/kernel/debug/iommu/intel/0000:00:01.0/1/domain_translation_struct
+ *
+ * The debugfs only dumps the page tables whose mappings are created and
+ * destroyed by the iommu_map/unmap() interfaces. Check the mapping type
+ * of the domain before creating debugfs directory.
+ */
+void intel_iommu_debugfs_create_dev_pasid(struct dev_pasid_info *dev_pasid)
+{
+ struct device_domain_info *info = dev_iommu_priv_get(dev_pasid->dev);
+ char dir_name[10];
+
+ sprintf(dir_name, "%x", dev_pasid->pasid);
+ dev_pasid->debugfs_dentry = debugfs_create_dir(dir_name, info->debugfs_dentry);
+
+ debugfs_create_file("domain_translation_struct", 0444, dev_pasid->debugfs_dentry,
+ dev_pasid, &pasid_domain_translation_struct_fops);
+}
+
+/* Remove the device pasid debugfs directory. */
+void intel_iommu_debugfs_remove_dev_pasid(struct dev_pasid_info *dev_pasid)
+{
+ debugfs_remove_recursive(dev_pasid->debugfs_dentry);
+}
diff --git a/drivers/iommu/intel/iommu.c b/drivers/iommu/intel/iommu.c
index 71c12e15ecd7..d5d191a71fe0 100644
--- a/drivers/iommu/intel/iommu.c
+++ b/drivers/iommu/intel/iommu.c
@@ -4407,6 +4407,8 @@ static struct iommu_device *intel_iommu_probe_device(struct device *dev)
}
}
+ intel_iommu_debugfs_create_dev(info);
+
return &iommu->iommu;
}
@@ -4416,6 +4418,7 @@ static void intel_iommu_release_device(struct device *dev)
dmar_remove_one_dev_info(dev);
intel_pasid_free_table(dev);
+ intel_iommu_debugfs_remove_dev(info);
dev_iommu_priv_set(dev, NULL);
kfree(info);
set_dma_ops(dev, NULL);
@@ -4660,8 +4663,8 @@ static bool risky_device(struct pci_dev *pdev)
return false;
}
-static void intel_iommu_iotlb_sync_map(struct iommu_domain *domain,
- unsigned long iova, size_t size)
+static int intel_iommu_iotlb_sync_map(struct iommu_domain *domain,
+ unsigned long iova, size_t size)
{
struct dmar_domain *dmar_domain = to_dmar_domain(domain);
unsigned long pages = aligned_nrpages(iova, size);
@@ -4671,6 +4674,7 @@ static void intel_iommu_iotlb_sync_map(struct iommu_domain *domain,
xa_for_each(&dmar_domain->iommu_array, i, info)
__mapping_notify_one(info->iommu, dmar_domain, pfn, pages);
+ return 0;
}
static void intel_iommu_remove_dev_pasid(struct device *dev, ioasid_t pasid)
@@ -4708,6 +4712,7 @@ static void intel_iommu_remove_dev_pasid(struct device *dev, ioasid_t pasid)
spin_unlock_irqrestore(&dmar_domain->lock, flags);
domain_detach_iommu(dmar_domain, iommu);
+ intel_iommu_debugfs_remove_dev_pasid(dev_pasid);
kfree(dev_pasid);
out_tear_down:
intel_pasid_tear_down_entry(iommu, dev, pasid, false);
@@ -4760,6 +4765,9 @@ static int intel_iommu_set_dev_pasid(struct iommu_domain *domain,
list_add(&dev_pasid->link_domain, &dmar_domain->dev_pasids);
spin_unlock_irqrestore(&dmar_domain->lock, flags);
+ if (domain->type & __IOMMU_DOMAIN_PAGING)
+ intel_iommu_debugfs_create_dev_pasid(dev_pasid);
+
return 0;
out_detach_iommu:
domain_detach_iommu(dmar_domain, iommu);
diff --git a/drivers/iommu/intel/iommu.h b/drivers/iommu/intel/iommu.h
index 7dac94f62b4e..f9c1dd1ccff8 100644
--- a/drivers/iommu/intel/iommu.h
+++ b/drivers/iommu/intel/iommu.h
@@ -716,12 +716,18 @@ struct device_domain_info {
struct intel_iommu *iommu; /* IOMMU used by this device */
struct dmar_domain *domain; /* pointer to domain */
struct pasid_table *pasid_table; /* pasid table */
+#ifdef CONFIG_INTEL_IOMMU_DEBUGFS
+ struct dentry *debugfs_dentry; /* pointer to device directory dentry */
+#endif
};
struct dev_pasid_info {
struct list_head link_domain; /* link to domain siblings */
struct device *dev;
ioasid_t pasid;
+#ifdef CONFIG_INTEL_IOMMU_DEBUGFS
+ struct dentry *debugfs_dentry; /* pointer to pasid directory dentry */
+#endif
};
static inline void __iommu_flush_cache(
@@ -883,8 +889,16 @@ static inline void intel_svm_remove_dev_pasid(struct device *dev, ioasid_t pasid
#ifdef CONFIG_INTEL_IOMMU_DEBUGFS
void intel_iommu_debugfs_init(void);
+void intel_iommu_debugfs_create_dev(struct device_domain_info *info);
+void intel_iommu_debugfs_remove_dev(struct device_domain_info *info);
+void intel_iommu_debugfs_create_dev_pasid(struct dev_pasid_info *dev_pasid);
+void intel_iommu_debugfs_remove_dev_pasid(struct dev_pasid_info *dev_pasid);
#else
static inline void intel_iommu_debugfs_init(void) {}
+static inline void intel_iommu_debugfs_create_dev(struct device_domain_info *info) {}
+static inline void intel_iommu_debugfs_remove_dev(struct device_domain_info *info) {}
+static inline void intel_iommu_debugfs_create_dev_pasid(struct dev_pasid_info *dev_pasid) {}
+static inline void intel_iommu_debugfs_remove_dev_pasid(struct dev_pasid_info *dev_pasid) {}
#endif /* CONFIG_INTEL_IOMMU_DEBUGFS */
extern const struct attribute_group *intel_iommu_groups[];
diff --git a/drivers/iommu/iommu.c b/drivers/iommu/iommu.c
index 79a0fdb33404..f17a1113f3d6 100644
--- a/drivers/iommu/iommu.c
+++ b/drivers/iommu/iommu.c
@@ -1114,7 +1114,8 @@ map_end:
}
- iommu_flush_iotlb_all(domain);
+ if (!list_empty(&mappings) && iommu_is_dma_domain(domain))
+ iommu_flush_iotlb_all(domain);
out:
iommu_put_resv_regions(dev, &mappings);
@@ -2599,8 +2600,17 @@ int iommu_map(struct iommu_domain *domain, unsigned long iova,
return -EINVAL;
ret = __iommu_map(domain, iova, paddr, size, prot, gfp);
- if (ret == 0 && ops->iotlb_sync_map)
- ops->iotlb_sync_map(domain, iova, size);
+ if (ret == 0 && ops->iotlb_sync_map) {
+ ret = ops->iotlb_sync_map(domain, iova, size);
+ if (ret)
+ goto out_err;
+ }
+
+ return ret;
+
+out_err:
+ /* undo mappings already done */
+ iommu_unmap(domain, iova, size);
return ret;
}
@@ -2728,8 +2738,11 @@ next:
sg = sg_next(sg);
}
- if (ops->iotlb_sync_map)
- ops->iotlb_sync_map(domain, iova, mapped);
+ if (ops->iotlb_sync_map) {
+ ret = ops->iotlb_sync_map(domain, iova, mapped);
+ if (ret)
+ goto out_err;
+ }
return mapped;
out_err:
@@ -3172,24 +3185,6 @@ out_unlock:
return ret ?: count;
}
-static bool iommu_is_default_domain(struct iommu_group *group)
-{
- if (group->domain == group->default_domain)
- return true;
-
- /*
- * If the default domain was set to identity and it is still an identity
- * domain then we consider this a pass. This happens because of
- * amd_iommu_init_device() replacing the default idenytity domain with an
- * identity domain that has a different configuration for AMDGPU.
- */
- if (group->default_domain &&
- group->default_domain->type == IOMMU_DOMAIN_IDENTITY &&
- group->domain && group->domain->type == IOMMU_DOMAIN_IDENTITY)
- return true;
- return false;
-}
-
/**
* iommu_device_use_default_domain() - Device driver wants to handle device
* DMA through the kernel DMA API.
@@ -3209,7 +3204,7 @@ int iommu_device_use_default_domain(struct device *dev)
mutex_lock(&group->mutex);
if (group->owner_cnt) {
- if (group->owner || !iommu_is_default_domain(group) ||
+ if (group->domain != group->default_domain || group->owner ||
!xa_empty(&group->pasid_array)) {
ret = -EBUSY;
goto unlock_out;
diff --git a/drivers/iommu/msm_iommu.c b/drivers/iommu/msm_iommu.c
index a163cee0b724..f86af9815d6f 100644
--- a/drivers/iommu/msm_iommu.c
+++ b/drivers/iommu/msm_iommu.c
@@ -498,12 +498,13 @@ static int msm_iommu_map(struct iommu_domain *domain, unsigned long iova,
return ret;
}
-static void msm_iommu_sync_map(struct iommu_domain *domain, unsigned long iova,
- size_t size)
+static int msm_iommu_sync_map(struct iommu_domain *domain, unsigned long iova,
+ size_t size)
{
struct msm_priv *priv = to_msm_priv(domain);
__flush_iotlb_range(iova, size, SZ_4K, false, priv);
+ return 0;
}
static size_t msm_iommu_unmap(struct iommu_domain *domain, unsigned long iova,
diff --git a/drivers/iommu/mtk_iommu.c b/drivers/iommu/mtk_iommu.c
index 78c8f705383f..75279500a4a8 100644
--- a/drivers/iommu/mtk_iommu.c
+++ b/drivers/iommu/mtk_iommu.c
@@ -836,12 +836,13 @@ static void mtk_iommu_iotlb_sync(struct iommu_domain *domain,
mtk_iommu_tlb_flush_range_sync(gather->start, length, dom->bank);
}
-static void mtk_iommu_sync_map(struct iommu_domain *domain, unsigned long iova,
- size_t size)
+static int mtk_iommu_sync_map(struct iommu_domain *domain, unsigned long iova,
+ size_t size)
{
struct mtk_iommu_domain *dom = to_mtk_domain(domain);
mtk_iommu_tlb_flush_range_sync(iova, size, dom->bank);
+ return 0;
}
static phys_addr_t mtk_iommu_iova_to_phys(struct iommu_domain *domain,
diff --git a/drivers/iommu/s390-iommu.c b/drivers/iommu/s390-iommu.c
index 5695ad71d60e..9a5196f523de 100644
--- a/drivers/iommu/s390-iommu.c
+++ b/drivers/iommu/s390-iommu.c
@@ -14,16 +14,300 @@
#include <linux/rcupdate.h>
#include <asm/pci_dma.h>
+#include "dma-iommu.h"
+
static const struct iommu_ops s390_iommu_ops;
+static struct kmem_cache *dma_region_table_cache;
+static struct kmem_cache *dma_page_table_cache;
+
+static u64 s390_iommu_aperture;
+static u32 s390_iommu_aperture_factor = 1;
+
struct s390_domain {
struct iommu_domain domain;
struct list_head devices;
+ struct zpci_iommu_ctrs ctrs;
unsigned long *dma_table;
spinlock_t list_lock;
struct rcu_head rcu;
};
+static inline unsigned int calc_rtx(dma_addr_t ptr)
+{
+ return ((unsigned long)ptr >> ZPCI_RT_SHIFT) & ZPCI_INDEX_MASK;
+}
+
+static inline unsigned int calc_sx(dma_addr_t ptr)
+{
+ return ((unsigned long)ptr >> ZPCI_ST_SHIFT) & ZPCI_INDEX_MASK;
+}
+
+static inline unsigned int calc_px(dma_addr_t ptr)
+{
+ return ((unsigned long)ptr >> PAGE_SHIFT) & ZPCI_PT_MASK;
+}
+
+static inline void set_pt_pfaa(unsigned long *entry, phys_addr_t pfaa)
+{
+ *entry &= ZPCI_PTE_FLAG_MASK;
+ *entry |= (pfaa & ZPCI_PTE_ADDR_MASK);
+}
+
+static inline void set_rt_sto(unsigned long *entry, phys_addr_t sto)
+{
+ *entry &= ZPCI_RTE_FLAG_MASK;
+ *entry |= (sto & ZPCI_RTE_ADDR_MASK);
+ *entry |= ZPCI_TABLE_TYPE_RTX;
+}
+
+static inline void set_st_pto(unsigned long *entry, phys_addr_t pto)
+{
+ *entry &= ZPCI_STE_FLAG_MASK;
+ *entry |= (pto & ZPCI_STE_ADDR_MASK);
+ *entry |= ZPCI_TABLE_TYPE_SX;
+}
+
+static inline void validate_rt_entry(unsigned long *entry)
+{
+ *entry &= ~ZPCI_TABLE_VALID_MASK;
+ *entry &= ~ZPCI_TABLE_OFFSET_MASK;
+ *entry |= ZPCI_TABLE_VALID;
+ *entry |= ZPCI_TABLE_LEN_RTX;
+}
+
+static inline void validate_st_entry(unsigned long *entry)
+{
+ *entry &= ~ZPCI_TABLE_VALID_MASK;
+ *entry |= ZPCI_TABLE_VALID;
+}
+
+static inline void invalidate_pt_entry(unsigned long *entry)
+{
+ WARN_ON_ONCE((*entry & ZPCI_PTE_VALID_MASK) == ZPCI_PTE_INVALID);
+ *entry &= ~ZPCI_PTE_VALID_MASK;
+ *entry |= ZPCI_PTE_INVALID;
+}
+
+static inline void validate_pt_entry(unsigned long *entry)
+{
+ WARN_ON_ONCE((*entry & ZPCI_PTE_VALID_MASK) == ZPCI_PTE_VALID);
+ *entry &= ~ZPCI_PTE_VALID_MASK;
+ *entry |= ZPCI_PTE_VALID;
+}
+
+static inline void entry_set_protected(unsigned long *entry)
+{
+ *entry &= ~ZPCI_TABLE_PROT_MASK;
+ *entry |= ZPCI_TABLE_PROTECTED;
+}
+
+static inline void entry_clr_protected(unsigned long *entry)
+{
+ *entry &= ~ZPCI_TABLE_PROT_MASK;
+ *entry |= ZPCI_TABLE_UNPROTECTED;
+}
+
+static inline int reg_entry_isvalid(unsigned long entry)
+{
+ return (entry & ZPCI_TABLE_VALID_MASK) == ZPCI_TABLE_VALID;
+}
+
+static inline int pt_entry_isvalid(unsigned long entry)
+{
+ return (entry & ZPCI_PTE_VALID_MASK) == ZPCI_PTE_VALID;
+}
+
+static inline unsigned long *get_rt_sto(unsigned long entry)
+{
+ if ((entry & ZPCI_TABLE_TYPE_MASK) == ZPCI_TABLE_TYPE_RTX)
+ return phys_to_virt(entry & ZPCI_RTE_ADDR_MASK);
+ else
+ return NULL;
+}
+
+static inline unsigned long *get_st_pto(unsigned long entry)
+{
+ if ((entry & ZPCI_TABLE_TYPE_MASK) == ZPCI_TABLE_TYPE_SX)
+ return phys_to_virt(entry & ZPCI_STE_ADDR_MASK);
+ else
+ return NULL;
+}
+
+static int __init dma_alloc_cpu_table_caches(void)
+{
+ dma_region_table_cache = kmem_cache_create("PCI_DMA_region_tables",
+ ZPCI_TABLE_SIZE,
+ ZPCI_TABLE_ALIGN,
+ 0, NULL);
+ if (!dma_region_table_cache)
+ return -ENOMEM;
+
+ dma_page_table_cache = kmem_cache_create("PCI_DMA_page_tables",
+ ZPCI_PT_SIZE,
+ ZPCI_PT_ALIGN,
+ 0, NULL);
+ if (!dma_page_table_cache) {
+ kmem_cache_destroy(dma_region_table_cache);
+ return -ENOMEM;
+ }
+ return 0;
+}
+
+static unsigned long *dma_alloc_cpu_table(gfp_t gfp)
+{
+ unsigned long *table, *entry;
+
+ table = kmem_cache_alloc(dma_region_table_cache, gfp);
+ if (!table)
+ return NULL;
+
+ for (entry = table; entry < table + ZPCI_TABLE_ENTRIES; entry++)
+ *entry = ZPCI_TABLE_INVALID;
+ return table;
+}
+
+static void dma_free_cpu_table(void *table)
+{
+ kmem_cache_free(dma_region_table_cache, table);
+}
+
+static void dma_free_page_table(void *table)
+{
+ kmem_cache_free(dma_page_table_cache, table);
+}
+
+static void dma_free_seg_table(unsigned long entry)
+{
+ unsigned long *sto = get_rt_sto(entry);
+ int sx;
+
+ for (sx = 0; sx < ZPCI_TABLE_ENTRIES; sx++)
+ if (reg_entry_isvalid(sto[sx]))
+ dma_free_page_table(get_st_pto(sto[sx]));
+
+ dma_free_cpu_table(sto);
+}
+
+static void dma_cleanup_tables(unsigned long *table)
+{
+ int rtx;
+
+ if (!table)
+ return;
+
+ for (rtx = 0; rtx < ZPCI_TABLE_ENTRIES; rtx++)
+ if (reg_entry_isvalid(table[rtx]))
+ dma_free_seg_table(table[rtx]);
+
+ dma_free_cpu_table(table);
+}
+
+static unsigned long *dma_alloc_page_table(gfp_t gfp)
+{
+ unsigned long *table, *entry;
+
+ table = kmem_cache_alloc(dma_page_table_cache, gfp);
+ if (!table)
+ return NULL;
+
+ for (entry = table; entry < table + ZPCI_PT_ENTRIES; entry++)
+ *entry = ZPCI_PTE_INVALID;
+ return table;
+}
+
+static unsigned long *dma_get_seg_table_origin(unsigned long *rtep, gfp_t gfp)
+{
+ unsigned long old_rte, rte;
+ unsigned long *sto;
+
+ rte = READ_ONCE(*rtep);
+ if (reg_entry_isvalid(rte)) {
+ sto = get_rt_sto(rte);
+ } else {
+ sto = dma_alloc_cpu_table(gfp);
+ if (!sto)
+ return NULL;
+
+ set_rt_sto(&rte, virt_to_phys(sto));
+ validate_rt_entry(&rte);
+ entry_clr_protected(&rte);
+
+ old_rte = cmpxchg(rtep, ZPCI_TABLE_INVALID, rte);
+ if (old_rte != ZPCI_TABLE_INVALID) {
+ /* Somone else was faster, use theirs */
+ dma_free_cpu_table(sto);
+ sto = get_rt_sto(old_rte);
+ }
+ }
+ return sto;
+}
+
+static unsigned long *dma_get_page_table_origin(unsigned long *step, gfp_t gfp)
+{
+ unsigned long old_ste, ste;
+ unsigned long *pto;
+
+ ste = READ_ONCE(*step);
+ if (reg_entry_isvalid(ste)) {
+ pto = get_st_pto(ste);
+ } else {
+ pto = dma_alloc_page_table(gfp);
+ if (!pto)
+ return NULL;
+ set_st_pto(&ste, virt_to_phys(pto));
+ validate_st_entry(&ste);
+ entry_clr_protected(&ste);
+
+ old_ste = cmpxchg(step, ZPCI_TABLE_INVALID, ste);
+ if (old_ste != ZPCI_TABLE_INVALID) {
+ /* Somone else was faster, use theirs */
+ dma_free_page_table(pto);
+ pto = get_st_pto(old_ste);
+ }
+ }
+ return pto;
+}
+
+static unsigned long *dma_walk_cpu_trans(unsigned long *rto, dma_addr_t dma_addr, gfp_t gfp)
+{
+ unsigned long *sto, *pto;
+ unsigned int rtx, sx, px;
+
+ rtx = calc_rtx(dma_addr);
+ sto = dma_get_seg_table_origin(&rto[rtx], gfp);
+ if (!sto)
+ return NULL;
+
+ sx = calc_sx(dma_addr);
+ pto = dma_get_page_table_origin(&sto[sx], gfp);
+ if (!pto)
+ return NULL;
+
+ px = calc_px(dma_addr);
+ return &pto[px];
+}
+
+static void dma_update_cpu_trans(unsigned long *ptep, phys_addr_t page_addr, int flags)
+{
+ unsigned long pte;
+
+ pte = READ_ONCE(*ptep);
+ if (flags & ZPCI_PTE_INVALID) {
+ invalidate_pt_entry(&pte);
+ } else {
+ set_pt_pfaa(&pte, page_addr);
+ validate_pt_entry(&pte);
+ }
+
+ if (flags & ZPCI_TABLE_PROTECTED)
+ entry_set_protected(&pte);
+ else
+ entry_clr_protected(&pte);
+
+ xchg(ptep, pte);
+}
+
static struct s390_domain *to_s390_domain(struct iommu_domain *dom)
{
return container_of(dom, struct s390_domain, domain);
@@ -31,9 +315,13 @@ static struct s390_domain *to_s390_domain(struct iommu_domain *dom)
static bool s390_iommu_capable(struct device *dev, enum iommu_cap cap)
{
+ struct zpci_dev *zdev = to_zpci_dev(dev);
+
switch (cap) {
case IOMMU_CAP_CACHE_COHERENCY:
return true;
+ case IOMMU_CAP_DEFERRED_FLUSH:
+ return zdev->pft != PCI_FUNC_TYPE_ISM;
default:
return false;
}
@@ -81,14 +369,13 @@ static void s390_domain_free(struct iommu_domain *domain)
call_rcu(&s390_domain->rcu, s390_iommu_rcu_free_domain);
}
-static void __s390_iommu_detach_device(struct zpci_dev *zdev)
+static void s390_iommu_detach_device(struct iommu_domain *domain,
+ struct device *dev)
{
- struct s390_domain *s390_domain = zdev->s390_domain;
+ struct s390_domain *s390_domain = to_s390_domain(domain);
+ struct zpci_dev *zdev = to_zpci_dev(dev);
unsigned long flags;
- if (!s390_domain)
- return;
-
spin_lock_irqsave(&s390_domain->list_lock, flags);
list_del_rcu(&zdev->iommu_list);
spin_unlock_irqrestore(&s390_domain->list_lock, flags);
@@ -115,9 +402,7 @@ static int s390_iommu_attach_device(struct iommu_domain *domain,
return -EINVAL;
if (zdev->s390_domain)
- __s390_iommu_detach_device(zdev);
- else if (zdev->dma_table)
- zpci_dma_exit_device(zdev);
+ s390_iommu_detach_device(&zdev->s390_domain->domain, dev);
cc = zpci_register_ioat(zdev, 0, zdev->start_dma, zdev->end_dma,
virt_to_phys(s390_domain->dma_table), &status);
@@ -127,7 +412,6 @@ static int s390_iommu_attach_device(struct iommu_domain *domain,
*/
if (cc && status != ZPCI_PCI_ST_FUNC_NOT_AVAIL)
return -EIO;
- zdev->dma_table = s390_domain->dma_table;
zdev->dma_table = s390_domain->dma_table;
zdev->s390_domain = s390_domain;
@@ -139,31 +423,6 @@ static int s390_iommu_attach_device(struct iommu_domain *domain,
return 0;
}
-/*
- * Switch control over the IOMMU to S390's internal dma_api ops
- */
-static int s390_iommu_platform_attach(struct iommu_domain *platform_domain,
- struct device *dev)
-{
- struct zpci_dev *zdev = to_zpci_dev(dev);
-
- if (!zdev->s390_domain)
- return 0;
-
- __s390_iommu_detach_device(zdev);
- zpci_dma_init_device(zdev);
- return 0;
-}
-
-static struct iommu_domain_ops s390_iommu_platform_ops = {
- .attach_dev = s390_iommu_platform_attach,
-};
-
-static struct iommu_domain s390_iommu_platform_domain = {
- .type = IOMMU_DOMAIN_PLATFORM,
- .ops = &s390_iommu_platform_ops,
-};
-
static void s390_iommu_get_resv_regions(struct device *dev,
struct list_head *list)
{
@@ -204,6 +463,9 @@ static struct iommu_device *s390_iommu_probe_device(struct device *dev)
if (zdev->end_dma > ZPCI_TABLE_SIZE_RT - 1)
zdev->end_dma = ZPCI_TABLE_SIZE_RT - 1;
+ if (zdev->tlb_refresh)
+ dev->iommu->shadow_on_flush = 1;
+
return &zdev->iommu_dev;
}
@@ -216,7 +478,13 @@ static void s390_iommu_release_device(struct device *dev)
* to the device, but keep it attached to other devices in the group.
*/
if (zdev)
- __s390_iommu_detach_device(zdev);
+ s390_iommu_detach_device(&zdev->s390_domain->domain, dev);
+}
+
+static int zpci_refresh_all(struct zpci_dev *zdev)
+{
+ return zpci_refresh_trans((u64)zdev->fh << 32, zdev->start_dma,
+ zdev->end_dma - zdev->start_dma + 1);
}
static void s390_iommu_flush_iotlb_all(struct iommu_domain *domain)
@@ -226,8 +494,8 @@ static void s390_iommu_flush_iotlb_all(struct iommu_domain *domain)
rcu_read_lock();
list_for_each_entry_rcu(zdev, &s390_domain->devices, iommu_list) {
- zpci_refresh_trans((u64)zdev->fh << 32, zdev->start_dma,
- zdev->end_dma - zdev->start_dma + 1);
+ atomic64_inc(&s390_domain->ctrs.global_rpcits);
+ zpci_refresh_all(zdev);
}
rcu_read_unlock();
}
@@ -245,26 +513,40 @@ static void s390_iommu_iotlb_sync(struct iommu_domain *domain,
rcu_read_lock();
list_for_each_entry_rcu(zdev, &s390_domain->devices, iommu_list) {
+ atomic64_inc(&s390_domain->ctrs.sync_rpcits);
zpci_refresh_trans((u64)zdev->fh << 32, gather->start,
size);
}
rcu_read_unlock();
}
-static void s390_iommu_iotlb_sync_map(struct iommu_domain *domain,
- unsigned long iova, size_t size)
+static int s390_iommu_iotlb_sync_map(struct iommu_domain *domain,
+ unsigned long iova, size_t size)
{
struct s390_domain *s390_domain = to_s390_domain(domain);
struct zpci_dev *zdev;
+ int ret = 0;
rcu_read_lock();
list_for_each_entry_rcu(zdev, &s390_domain->devices, iommu_list) {
if (!zdev->tlb_refresh)
continue;
- zpci_refresh_trans((u64)zdev->fh << 32,
- iova, size);
+ atomic64_inc(&s390_domain->ctrs.sync_map_rpcits);
+ ret = zpci_refresh_trans((u64)zdev->fh << 32,
+ iova, size);
+ /*
+ * let the hypervisor discover invalidated entries
+ * allowing it to free IOVAs and unpin pages
+ */
+ if (ret == -ENOMEM) {
+ ret = zpci_refresh_all(zdev);
+ if (ret)
+ break;
+ }
}
rcu_read_unlock();
+
+ return ret;
}
static int s390_iommu_validate_trans(struct s390_domain *s390_domain,
@@ -344,16 +626,15 @@ static int s390_iommu_map_pages(struct iommu_domain *domain,
if (!IS_ALIGNED(iova | paddr, pgsize))
return -EINVAL;
- if (!(prot & IOMMU_READ))
- return -EINVAL;
-
if (!(prot & IOMMU_WRITE))
flags |= ZPCI_TABLE_PROTECTED;
rc = s390_iommu_validate_trans(s390_domain, paddr, iova,
- pgcount, flags, gfp);
- if (!rc)
+ pgcount, flags, gfp);
+ if (!rc) {
*mapped = size;
+ atomic64_add(pgcount, &s390_domain->ctrs.mapped_pages);
+ }
return rc;
}
@@ -409,12 +690,26 @@ static size_t s390_iommu_unmap_pages(struct iommu_domain *domain,
return 0;
iommu_iotlb_gather_add_range(gather, iova, size);
+ atomic64_add(pgcount, &s390_domain->ctrs.unmapped_pages);
return size;
}
+static void s390_iommu_probe_finalize(struct device *dev)
+{
+ iommu_setup_dma_ops(dev, 0, U64_MAX);
+}
+
+struct zpci_iommu_ctrs *zpci_get_iommu_ctrs(struct zpci_dev *zdev)
+{
+ if (!zdev || !zdev->s390_domain)
+ return NULL;
+ return &zdev->s390_domain->ctrs;
+}
+
int zpci_init_iommu(struct zpci_dev *zdev)
{
+ u64 aperture_size;
int rc = 0;
rc = iommu_device_sysfs_add(&zdev->iommu_dev, NULL, NULL,
@@ -426,6 +721,12 @@ int zpci_init_iommu(struct zpci_dev *zdev)
if (rc)
goto out_sysfs;
+ zdev->start_dma = PAGE_ALIGN(zdev->start_dma);
+ aperture_size = min3(s390_iommu_aperture,
+ ZPCI_TABLE_SIZE_RT - zdev->start_dma,
+ zdev->end_dma - zdev->start_dma + 1);
+ zdev->end_dma = zdev->start_dma + aperture_size - 1;
+
return 0;
out_sysfs:
@@ -441,11 +742,50 @@ void zpci_destroy_iommu(struct zpci_dev *zdev)
iommu_device_sysfs_remove(&zdev->iommu_dev);
}
+static int __init s390_iommu_setup(char *str)
+{
+ if (!strcmp(str, "strict")) {
+ pr_warn("s390_iommu=strict deprecated; use iommu.strict=1 instead\n");
+ iommu_set_dma_strict();
+ }
+ return 1;
+}
+
+__setup("s390_iommu=", s390_iommu_setup);
+
+static int __init s390_iommu_aperture_setup(char *str)
+{
+ if (kstrtou32(str, 10, &s390_iommu_aperture_factor))
+ s390_iommu_aperture_factor = 1;
+ return 1;
+}
+
+__setup("s390_iommu_aperture=", s390_iommu_aperture_setup);
+
+static int __init s390_iommu_init(void)
+{
+ int rc;
+
+ iommu_dma_forcedac = true;
+ s390_iommu_aperture = (u64)virt_to_phys(high_memory);
+ if (!s390_iommu_aperture_factor)
+ s390_iommu_aperture = ULONG_MAX;
+ else
+ s390_iommu_aperture *= s390_iommu_aperture_factor;
+
+ rc = dma_alloc_cpu_table_caches();
+ if (rc)
+ return rc;
+
+ return rc;
+}
+subsys_initcall(s390_iommu_init);
+
static const struct iommu_ops s390_iommu_ops = {
- .default_domain = &s390_iommu_platform_domain,
.capable = s390_iommu_capable,
.domain_alloc_paging = s390_domain_alloc_paging,
.probe_device = s390_iommu_probe_device,
+ .probe_finalize = s390_iommu_probe_finalize,
.release_device = s390_iommu_release_device,
.device_group = generic_device_group,
.pgsize_bitmap = SZ_4K,
diff --git a/drivers/iommu/sprd-iommu.c b/drivers/iommu/sprd-iommu.c
index 9c33ea6903f6..2eb9fb46703b 100644
--- a/drivers/iommu/sprd-iommu.c
+++ b/drivers/iommu/sprd-iommu.c
@@ -341,8 +341,8 @@ static size_t sprd_iommu_unmap(struct iommu_domain *domain, unsigned long iova,
return size;
}
-static void sprd_iommu_sync_map(struct iommu_domain *domain,
- unsigned long iova, size_t size)
+static int sprd_iommu_sync_map(struct iommu_domain *domain,
+ unsigned long iova, size_t size)
{
struct sprd_iommu_domain *dom = to_sprd_domain(domain);
unsigned int reg;
@@ -354,6 +354,7 @@ static void sprd_iommu_sync_map(struct iommu_domain *domain,
/* clear IOMMU TLB buffer after page table updated */
sprd_iommu_write(dom->sdev, reg, 0xffffffff);
+ return 0;
}
static void sprd_iommu_sync(struct iommu_domain *domain,
diff --git a/drivers/iommu/sun50i-iommu.c b/drivers/iommu/sun50i-iommu.c
index 3c834854eda1..41484a5a399b 100644
--- a/drivers/iommu/sun50i-iommu.c
+++ b/drivers/iommu/sun50i-iommu.c
@@ -401,8 +401,8 @@ static void sun50i_iommu_flush_iotlb_all(struct iommu_domain *domain)
spin_unlock_irqrestore(&iommu->iommu_lock, flags);
}
-static void sun50i_iommu_iotlb_sync_map(struct iommu_domain *domain,
- unsigned long iova, size_t size)
+static int sun50i_iommu_iotlb_sync_map(struct iommu_domain *domain,
+ unsigned long iova, size_t size)
{
struct sun50i_iommu_domain *sun50i_domain = to_sun50i_domain(domain);
struct sun50i_iommu *iommu = sun50i_domain->iommu;
@@ -411,6 +411,8 @@ static void sun50i_iommu_iotlb_sync_map(struct iommu_domain *domain,
spin_lock_irqsave(&iommu->iommu_lock, flags);
sun50i_iommu_zap_range(iommu, iova, size);
spin_unlock_irqrestore(&iommu->iommu_lock, flags);
+
+ return 0;
}
static void sun50i_iommu_iotlb_sync(struct iommu_domain *domain,
diff --git a/drivers/iommu/tegra-smmu.c b/drivers/iommu/tegra-smmu.c
index d3cb966abfc3..310871728ab4 100644
--- a/drivers/iommu/tegra-smmu.c
+++ b/drivers/iommu/tegra-smmu.c
@@ -1084,8 +1084,6 @@ DEFINE_SHOW_ATTRIBUTE(tegra_smmu_clients);
static void tegra_smmu_debugfs_init(struct tegra_smmu *smmu)
{
smmu->debugfs = debugfs_create_dir("smmu", NULL);
- if (!smmu->debugfs)
- return;
debugfs_create_file("swgroups", S_IRUGO, smmu->debugfs, smmu,
&tegra_smmu_swgroups_fops);
diff --git a/drivers/iommu/virtio-iommu.c b/drivers/iommu/virtio-iommu.c
index 17dcd826f5c2..379ebe03efb6 100644
--- a/drivers/iommu/virtio-iommu.c
+++ b/drivers/iommu/virtio-iommu.c
@@ -85,7 +85,7 @@ struct viommu_request {
void *writeback;
unsigned int write_offset;
unsigned int len;
- char buf[];
+ char buf[] __counted_by(len);
};
#define VIOMMU_FAULT_RESV_MASK 0xffffff00
@@ -230,7 +230,7 @@ static int __viommu_add_req(struct viommu_dev *viommu, void *buf, size_t len,
if (write_offset <= 0)
return -EINVAL;
- req = kzalloc(sizeof(*req) + len, GFP_ATOMIC);
+ req = kzalloc(struct_size(req, buf, len), GFP_ATOMIC);
if (!req)
return -ENOMEM;
diff --git a/include/linux/amd-iommu.h b/include/linux/amd-iommu.h
index 99a5201d9e62..dc7ed2f46886 100644
--- a/include/linux/amd-iommu.h
+++ b/include/linux/amd-iommu.h
@@ -33,126 +33,6 @@ struct pci_dev;
extern int amd_iommu_detect(void);
-/**
- * amd_iommu_init_device() - Init device for use with IOMMUv2 driver
- * @pdev: The PCI device to initialize
- * @pasids: Number of PASIDs to support for this device
- *
- * This function does all setup for the device pdev so that it can be
- * used with IOMMUv2.
- * Returns 0 on success or negative value on error.
- */
-extern int amd_iommu_init_device(struct pci_dev *pdev, int pasids);
-
-/**
- * amd_iommu_free_device() - Free all IOMMUv2 related device resources
- * and disable IOMMUv2 usage for this device
- * @pdev: The PCI device to disable IOMMUv2 usage for'
- */
-extern void amd_iommu_free_device(struct pci_dev *pdev);
-
-/**
- * amd_iommu_bind_pasid() - Bind a given task to a PASID on a device
- * @pdev: The PCI device to bind the task to
- * @pasid: The PASID on the device the task should be bound to
- * @task: the task to bind
- *
- * The function returns 0 on success or a negative value on error.
- */
-extern int amd_iommu_bind_pasid(struct pci_dev *pdev, u32 pasid,
- struct task_struct *task);
-
-/**
- * amd_iommu_unbind_pasid() - Unbind a PASID from its task on
- * a device
- * @pdev: The device of the PASID
- * @pasid: The PASID to unbind
- *
- * When this function returns the device is no longer using the PASID
- * and the PASID is no longer bound to its task.
- */
-extern void amd_iommu_unbind_pasid(struct pci_dev *pdev, u32 pasid);
-
-/**
- * amd_iommu_set_invalid_ppr_cb() - Register a call-back for failed
- * PRI requests
- * @pdev: The PCI device the call-back should be registered for
- * @cb: The call-back function
- *
- * The IOMMUv2 driver invokes this call-back when it is unable to
- * successfully handle a PRI request. The device driver can then decide
- * which PRI response the device should see. Possible return values for
- * the call-back are:
- *
- * - AMD_IOMMU_INV_PRI_RSP_SUCCESS - Send SUCCESS back to the device
- * - AMD_IOMMU_INV_PRI_RSP_INVALID - Send INVALID back to the device
- * - AMD_IOMMU_INV_PRI_RSP_FAIL - Send Failure back to the device,
- * the device is required to disable
- * PRI when it receives this response
- *
- * The function returns 0 on success or negative value on error.
- */
-#define AMD_IOMMU_INV_PRI_RSP_SUCCESS 0
-#define AMD_IOMMU_INV_PRI_RSP_INVALID 1
-#define AMD_IOMMU_INV_PRI_RSP_FAIL 2
-
-typedef int (*amd_iommu_invalid_ppr_cb)(struct pci_dev *pdev,
- u32 pasid,
- unsigned long address,
- u16);
-
-extern int amd_iommu_set_invalid_ppr_cb(struct pci_dev *pdev,
- amd_iommu_invalid_ppr_cb cb);
-
-#define PPR_FAULT_EXEC (1 << 1)
-#define PPR_FAULT_READ (1 << 2)
-#define PPR_FAULT_WRITE (1 << 5)
-#define PPR_FAULT_USER (1 << 6)
-#define PPR_FAULT_RSVD (1 << 7)
-#define PPR_FAULT_GN (1 << 8)
-
-/**
- * amd_iommu_device_info() - Get information about IOMMUv2 support of a
- * PCI device
- * @pdev: PCI device to query information from
- * @info: A pointer to an amd_iommu_device_info structure which will contain
- * the information about the PCI device
- *
- * Returns 0 on success, negative value on error
- */
-
-#define AMD_IOMMU_DEVICE_FLAG_ATS_SUP 0x1 /* ATS feature supported */
-#define AMD_IOMMU_DEVICE_FLAG_PRI_SUP 0x2 /* PRI feature supported */
-#define AMD_IOMMU_DEVICE_FLAG_PASID_SUP 0x4 /* PASID context supported */
-#define AMD_IOMMU_DEVICE_FLAG_EXEC_SUP 0x8 /* Device may request execution
- on memory pages */
-#define AMD_IOMMU_DEVICE_FLAG_PRIV_SUP 0x10 /* Device may request
- super-user privileges */
-
-struct amd_iommu_device_info {
- int max_pasids;
- u32 flags;
-};
-
-extern int amd_iommu_device_info(struct pci_dev *pdev,
- struct amd_iommu_device_info *info);
-
-/**
- * amd_iommu_set_invalidate_ctx_cb() - Register a call-back for invalidating
- * a pasid context. This call-back is
- * invoked when the IOMMUv2 driver needs to
- * invalidate a PASID context, for example
- * because the task that is bound to that
- * context is about to exit.
- *
- * @pdev: The PCI device the call-back should be registered for
- * @cb: The call-back function
- */
-
-typedef void (*amd_iommu_invalidate_ctx)(struct pci_dev *pdev, u32 pasid);
-
-extern int amd_iommu_set_invalidate_ctx_cb(struct pci_dev *pdev,
- amd_iommu_invalidate_ctx cb);
#else /* CONFIG_AMD_IOMMU */
static inline int amd_iommu_detect(void) { return -ENODEV; }
diff --git a/include/linux/iommu.h b/include/linux/iommu.h
index 73daa4fb168b..ddc25d239106 100644
--- a/include/linux/iommu.h
+++ b/include/linux/iommu.h
@@ -353,8 +353,8 @@ struct iommu_domain_ops {
struct iommu_iotlb_gather *iotlb_gather);
void (*flush_iotlb_all)(struct iommu_domain *domain);
- void (*iotlb_sync_map)(struct iommu_domain *domain, unsigned long iova,
- size_t size);
+ int (*iotlb_sync_map)(struct iommu_domain *domain, unsigned long iova,
+ size_t size);
void (*iotlb_sync)(struct iommu_domain *domain,
struct iommu_iotlb_gather *iotlb_gather);
@@ -427,6 +427,7 @@ struct iommu_fault_param {
* @attach_deferred: the dma domain attachment is deferred
* @pci_32bit_workaround: Limit DMA allocations to 32-bit IOVAs
* @require_direct: device requires IOMMU_RESV_DIRECT regions
+ * @shadow_on_flush: IOTLB flushes are used to sync shadow tables
*
* TODO: migrate other per device data pointers under iommu_dev_data, e.g.
* struct iommu_group *iommu_group;
@@ -442,6 +443,7 @@ struct dev_iommu {
u32 attach_deferred:1;
u32 pci_32bit_workaround:1;
u32 require_direct:1;
+ u32 shadow_on_flush:1;
};
int iommu_device_register(struct iommu_device *iommu,