summaryrefslogtreecommitdiff
path: root/drivers/iommu/amd
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/iommu/amd')
-rw-r--r--drivers/iommu/amd/Kconfig1
-rw-r--r--drivers/iommu/amd/Makefile2
-rw-r--r--drivers/iommu/amd/amd_iommu.h31
-rw-r--r--drivers/iommu/amd/amd_iommu_types.h65
-rw-r--r--drivers/iommu/amd/init.c394
-rw-r--r--drivers/iommu/amd/io_pgtable.c560
-rw-r--r--drivers/iommu/amd/iommu.c971
-rw-r--r--drivers/iommu/amd/iommu_v2.c4
8 files changed, 1060 insertions, 968 deletions
diff --git a/drivers/iommu/amd/Kconfig b/drivers/iommu/amd/Kconfig
index 626b97d0dd21..a3cbafb603f5 100644
--- a/drivers/iommu/amd/Kconfig
+++ b/drivers/iommu/amd/Kconfig
@@ -10,6 +10,7 @@ config AMD_IOMMU
select IOMMU_API
select IOMMU_IOVA
select IOMMU_DMA
+ select IOMMU_IO_PGTABLE
depends on X86_64 && PCI && ACPI && HAVE_CMPXCHG_DOUBLE
help
With this option you can enable support for AMD IOMMU hardware in
diff --git a/drivers/iommu/amd/Makefile b/drivers/iommu/amd/Makefile
index dc5a2fa4fd37..a935f8f4b974 100644
--- a/drivers/iommu/amd/Makefile
+++ b/drivers/iommu/amd/Makefile
@@ -1,4 +1,4 @@
# SPDX-License-Identifier: GPL-2.0-only
-obj-$(CONFIG_AMD_IOMMU) += iommu.o init.o quirks.o
+obj-$(CONFIG_AMD_IOMMU) += iommu.o init.o quirks.o io_pgtable.o
obj-$(CONFIG_AMD_IOMMU_DEBUGFS) += debugfs.o
obj-$(CONFIG_AMD_IOMMU_V2) += iommu_v2.o
diff --git a/drivers/iommu/amd/amd_iommu.h b/drivers/iommu/amd/amd_iommu.h
index 6b8cbdf71714..55dd38d814d9 100644
--- a/drivers/iommu/amd/amd_iommu.h
+++ b/drivers/iommu/amd/amd_iommu.h
@@ -11,7 +11,6 @@
#include "amd_iommu_types.h"
-extern int amd_iommu_get_num_iommus(void);
extern int amd_iommu_init_dma_ops(void);
extern int amd_iommu_init_passthrough(void);
extern irqreturn_t amd_iommu_int_thread(int irq, void *data);
@@ -36,6 +35,7 @@ extern void amd_iommu_disable(void);
extern int amd_iommu_reenable(int);
extern int amd_iommu_enable_faulting(void);
extern int amd_iommu_guest_ir;
+extern enum io_pgtable_fmt amd_iommu_pgtable;
/* IOMMUv2 specific functions */
struct iommu_domain;
@@ -56,11 +56,14 @@ extern void amd_iommu_domain_direct_map(struct iommu_domain *dom);
extern int amd_iommu_domain_enable_v2(struct iommu_domain *dom, int pasids);
extern int amd_iommu_flush_page(struct iommu_domain *dom, u32 pasid,
u64 address);
+extern void amd_iommu_update_and_flush_device_table(struct protection_domain *domain);
+extern void amd_iommu_domain_update(struct protection_domain *domain);
+extern void amd_iommu_domain_flush_complete(struct protection_domain *domain);
+extern void amd_iommu_domain_flush_tlb_pde(struct protection_domain *domain);
extern int amd_iommu_flush_tlb(struct iommu_domain *dom, u32 pasid);
extern int amd_iommu_domain_set_gcr3(struct iommu_domain *dom, u32 pasid,
unsigned long cr3);
extern int amd_iommu_domain_clear_gcr3(struct iommu_domain *dom, u32 pasid);
-extern struct iommu_domain *amd_iommu_get_v2_domain(struct pci_dev *pdev);
#ifdef CONFIG_IRQ_REMAP
extern int amd_iommu_create_irq_domain(struct amd_iommu *iommu);
@@ -84,12 +87,9 @@ static inline bool is_rd890_iommu(struct pci_dev *pdev)
(pdev->device == PCI_DEVICE_ID_RD890_IOMMU);
}
-static inline bool iommu_feature(struct amd_iommu *iommu, u64 f)
+static inline bool iommu_feature(struct amd_iommu *iommu, u64 mask)
{
- if (!(iommu->cap & (1 << IOMMU_CAP_EFR)))
- return false;
-
- return !!(iommu->features & f);
+ return !!(iommu->features & mask);
}
static inline u64 iommu_virt_to_phys(void *vaddr)
@@ -102,6 +102,21 @@ static inline void *iommu_phys_to_virt(unsigned long paddr)
return phys_to_virt(__sme_clr(paddr));
}
+static inline
+void amd_iommu_domain_set_pt_root(struct protection_domain *domain, u64 root)
+{
+ atomic64_set(&domain->iop.pt_root, root);
+ domain->iop.root = (u64 *)(root & PAGE_MASK);
+ domain->iop.mode = root & 7; /* lowest 3 bits encode pgtable mode */
+}
+
+static inline
+void amd_iommu_domain_clr_pt_root(struct protection_domain *domain)
+{
+ amd_iommu_domain_set_pt_root(domain, 0);
+}
+
+
extern bool translation_pre_enabled(struct amd_iommu *iommu);
extern bool amd_iommu_is_attach_deferred(struct iommu_domain *domain,
struct device *dev);
@@ -114,4 +129,6 @@ void amd_iommu_apply_ivrs_quirks(void);
static inline void amd_iommu_apply_ivrs_quirks(void) { }
#endif
+extern void amd_iommu_domain_set_pgtable(struct protection_domain *domain,
+ u64 *root, int mode);
#endif
diff --git a/drivers/iommu/amd/amd_iommu_types.h b/drivers/iommu/amd/amd_iommu_types.h
index f696ac7c5f89..94c1a7a9876d 100644
--- a/drivers/iommu/amd/amd_iommu_types.h
+++ b/drivers/iommu/amd/amd_iommu_types.h
@@ -15,6 +15,7 @@
#include <linux/spinlock.h>
#include <linux/pci.h>
#include <linux/irqreturn.h>
+#include <linux/io-pgtable.h>
/*
* Maximum number of IOMMUs supported
@@ -252,14 +253,35 @@
#define GA_GUEST_NR 0x1
+#define IOMMU_IN_ADDR_BIT_SIZE 52
+#define IOMMU_OUT_ADDR_BIT_SIZE 52
+
+/*
+ * This bitmap is used to advertise the page sizes our hardware support
+ * to the IOMMU core, which will then use this information to split
+ * physically contiguous memory regions it is mapping into page sizes
+ * that we support.
+ *
+ * 512GB Pages are not supported due to a hardware bug
+ */
+#define AMD_IOMMU_PGSIZES ((~0xFFFUL) & ~(2ULL << 38))
+
/* Bit value definition for dte irq remapping fields*/
#define DTE_IRQ_PHYS_ADDR_MASK (((1ULL << 45)-1) << 6)
#define DTE_IRQ_REMAP_INTCTL_MASK (0x3ULL << 60)
-#define DTE_IRQ_TABLE_LEN_MASK (0xfULL << 1)
#define DTE_IRQ_REMAP_INTCTL (2ULL << 60)
-#define DTE_IRQ_TABLE_LEN (8ULL << 1)
#define DTE_IRQ_REMAP_ENABLE 1ULL
+/*
+ * AMD IOMMU hardware only support 512 IRTEs despite
+ * the architectural limitation of 2048 entries.
+ */
+#define DTE_INTTAB_ALIGNMENT 128
+#define DTE_INTTABLEN_VALUE 9ULL
+#define DTE_INTTABLEN (DTE_INTTABLEN_VALUE << 1)
+#define DTE_INTTABLEN_MASK (0xfULL << 1)
+#define MAX_IRQS_PER_TABLE (1 << DTE_INTTABLEN_VALUE)
+
#define PAGE_MODE_NONE 0x00
#define PAGE_MODE_1_LEVEL 0x01
#define PAGE_MODE_2_LEVEL 0x02
@@ -379,6 +401,10 @@
#define IOMMU_CAP_NPCACHE 26
#define IOMMU_CAP_EFR 27
+/* IOMMU IVINFO */
+#define IOMMU_IVINFO_OFFSET 36
+#define IOMMU_IVINFO_EFRSUP BIT(0)
+
/* IOMMU Feature Reporting Field (for IVHD type 10h */
#define IOMMU_FEAT_GASUP_SHIFT 6
@@ -409,9 +435,6 @@ extern bool amd_iommu_np_cache;
/* Only true if all IOMMUs support device IOTLBs */
extern bool amd_iommu_iotlb_sup;
-#define MAX_IRQS_PER_TABLE 256
-#define IRQ_TABLE_ALIGNMENT 128
-
struct irq_remap_table {
raw_spinlock_t lock;
unsigned min_index;
@@ -461,6 +484,27 @@ struct amd_irte_ops;
#define AMD_IOMMU_FLAG_TRANS_PRE_ENABLED (1 << 0)
+#define io_pgtable_to_data(x) \
+ container_of((x), struct amd_io_pgtable, iop)
+
+#define io_pgtable_ops_to_data(x) \
+ io_pgtable_to_data(io_pgtable_ops_to_pgtable(x))
+
+#define io_pgtable_ops_to_domain(x) \
+ container_of(io_pgtable_ops_to_data(x), \
+ struct protection_domain, iop)
+
+#define io_pgtable_cfg_to_data(x) \
+ container_of((x), struct amd_io_pgtable, pgtbl_cfg)
+
+struct amd_io_pgtable {
+ struct io_pgtable_cfg pgtbl_cfg;
+ struct io_pgtable iop;
+ int mode;
+ u64 *root;
+ atomic64_t pt_root; /* pgtable root and pgtable mode */
+};
+
/*
* This structure contains generic data for IOMMU protection domains
* independent of their use.
@@ -469,9 +513,9 @@ struct protection_domain {
struct list_head dev_list; /* List of all devices in this domain */
struct iommu_domain domain; /* generic domain handle used by
iommu core code */
+ struct amd_io_pgtable iop;
spinlock_t lock; /* mostly used to lock the page table*/
u16 id; /* the domain id written to the device table */
- atomic64_t pt_root; /* pgtable root and pgtable mode */
int glx; /* Number of levels for GCR3 table */
u64 *gcr3_tbl; /* Guest CR3 table */
unsigned long flags; /* flags to find out type of domain */
@@ -479,12 +523,6 @@ struct protection_domain {
unsigned dev_iommu[MAX_IOMMUS]; /* per-IOMMU reference count */
};
-/* For decocded pt_root */
-struct domain_pgtable {
- int mode;
- u64 *root;
-};
-
/*
* Structure where we save information about one hardware AMD IOMMU in the
* system.
@@ -655,7 +693,6 @@ struct iommu_dev_data {
} ats; /* ATS state */
bool pri_tlp; /* PASID TLB required for
PPR completions */
- u32 errata; /* Bitmap for errata to apply */
bool use_vapic; /* Enable device to use vapic mode */
bool defer_attach;
@@ -893,7 +930,7 @@ struct amd_ir_data {
};
struct amd_irte_ops {
- void (*prepare)(void *, u32, u32, u8, u32, int);
+ void (*prepare)(void *, u32, bool, u8, u32, int);
void (*activate)(void *, u16, u16);
void (*deactivate)(void *, u16, u16);
void (*set_affinity)(void *, u16, u16, u8, u32);
diff --git a/drivers/iommu/amd/init.c b/drivers/iommu/amd/init.c
index 82e4af8f09bb..d006724f4dc2 100644
--- a/drivers/iommu/amd/init.c
+++ b/drivers/iommu/amd/init.c
@@ -16,6 +16,7 @@
#include <linux/syscore_ops.h>
#include <linux/interrupt.h>
#include <linux/msi.h>
+#include <linux/irq.h>
#include <linux/amd-iommu.h>
#include <linux/export.h>
#include <linux/kmemleak.h>
@@ -23,12 +24,12 @@
#include <asm/pci-direct.h>
#include <asm/iommu.h>
#include <asm/apic.h>
-#include <asm/msidef.h>
#include <asm/gart.h>
#include <asm/x86_init.h>
#include <asm/iommu_table.h>
#include <asm/io_apic.h>
#include <asm/irq_remapping.h>
+#include <asm/set_memory.h>
#include <linux/crash_dump.h>
@@ -146,6 +147,8 @@ struct ivmd_header {
bool amd_iommu_dump;
bool amd_iommu_irq_remap __read_mostly;
+enum io_pgtable_fmt amd_iommu_pgtable = AMD_IOMMU_V1;
+
int amd_iommu_guest_ir = AMD_IOMMU_GUEST_IR_VAPIC;
static int amd_iommu_xt_mode = IRQ_REMAP_XAPIC_MODE;
@@ -204,7 +207,6 @@ u16 *amd_iommu_alias_table;
* for a specific device. It is also indexed by the PCI device id.
*/
struct amd_iommu **amd_iommu_rlookup_table;
-EXPORT_SYMBOL(amd_iommu_rlookup_table);
/*
* This table is used to find the irq remapping table for a given device id
@@ -256,11 +258,12 @@ static void init_device_table_dma(void);
static bool amd_iommu_pre_enabled = true;
+static u32 amd_iommu_ivinfo __initdata;
+
bool translation_pre_enabled(struct amd_iommu *iommu)
{
return (iommu->flags & AMD_IOMMU_FLAG_TRANS_PRE_ENABLED);
}
-EXPORT_SYMBOL(translation_pre_enabled);
static void clear_translation_pre_enabled(struct amd_iommu *iommu)
{
@@ -295,6 +298,18 @@ int amd_iommu_get_num_iommus(void)
return amd_iommus_present;
}
+/*
+ * For IVHD type 0x11/0x40, EFR is also available via IVHD.
+ * Default to IVHD EFR since it is available sooner
+ * (i.e. before PCI init).
+ */
+static void __init early_iommu_features_init(struct amd_iommu *iommu,
+ struct ivhd_header *h)
+{
+ if (amd_iommu_ivinfo & IOMMU_IVINFO_EFRSUP)
+ iommu->features = h->efr_reg;
+}
+
/* Access to l1 and l2 indexed register spaces */
static u32 iommu_read_l1(struct amd_iommu *iommu, u16 l1, u8 address)
@@ -672,11 +687,27 @@ static void __init free_command_buffer(struct amd_iommu *iommu)
free_pages((unsigned long)iommu->cmd_buf, get_order(CMD_BUFFER_SIZE));
}
+static void *__init iommu_alloc_4k_pages(struct amd_iommu *iommu,
+ gfp_t gfp, size_t size)
+{
+ int order = get_order(size);
+ void *buf = (void *)__get_free_pages(gfp, order);
+
+ if (buf &&
+ iommu_feature(iommu, FEATURE_SNP) &&
+ set_memory_4k((unsigned long)buf, (1 << order))) {
+ free_pages((unsigned long)buf, order);
+ buf = NULL;
+ }
+
+ return buf;
+}
+
/* allocates the memory where the IOMMU will log its events to */
static int __init alloc_event_buffer(struct amd_iommu *iommu)
{
- iommu->evt_buf = (void *)__get_free_pages(GFP_KERNEL | __GFP_ZERO,
- get_order(EVT_BUFFER_SIZE));
+ iommu->evt_buf = iommu_alloc_4k_pages(iommu, GFP_KERNEL | __GFP_ZERO,
+ EVT_BUFFER_SIZE);
return iommu->evt_buf ? 0 : -ENOMEM;
}
@@ -715,8 +746,8 @@ static void __init free_event_buffer(struct amd_iommu *iommu)
/* allocates the memory where the IOMMU will log its events to */
static int __init alloc_ppr_log(struct amd_iommu *iommu)
{
- iommu->ppr_log = (void *)__get_free_pages(GFP_KERNEL | __GFP_ZERO,
- get_order(PPR_LOG_SIZE));
+ iommu->ppr_log = iommu_alloc_4k_pages(iommu, GFP_KERNEL | __GFP_ZERO,
+ PPR_LOG_SIZE);
return iommu->ppr_log ? 0 : -ENOMEM;
}
@@ -838,7 +869,7 @@ static int iommu_init_ga(struct amd_iommu *iommu)
static int __init alloc_cwwb_sem(struct amd_iommu *iommu)
{
- iommu->cmd_sem = (void *)get_zeroed_page(GFP_KERNEL);
+ iommu->cmd_sem = iommu_alloc_4k_pages(iommu, GFP_KERNEL | __GFP_ZERO, 1);
return iommu->cmd_sem ? 0 : -ENOMEM;
}
@@ -972,10 +1003,10 @@ static bool copy_device_table(void)
irq_v = old_devtb[devid].data[2] & DTE_IRQ_REMAP_ENABLE;
int_ctl = old_devtb[devid].data[2] & DTE_IRQ_REMAP_INTCTL_MASK;
- int_tab_len = old_devtb[devid].data[2] & DTE_IRQ_TABLE_LEN_MASK;
+ int_tab_len = old_devtb[devid].data[2] & DTE_INTTABLEN_MASK;
if (irq_v && (int_ctl || int_tab_len)) {
if ((int_ctl != DTE_IRQ_REMAP_INTCTL) ||
- (int_tab_len != DTE_IRQ_TABLE_LEN)) {
+ (int_tab_len != DTE_INTTABLEN)) {
pr_err("Wrong old irq remapping flag: %#x\n", devid);
return false;
}
@@ -1558,15 +1589,11 @@ static int __init init_iommu_one(struct amd_iommu *iommu, struct ivhd_header *h)
break;
}
- /*
- * Note: Since iommu_update_intcapxt() leverages
- * the IOMMU MMIO access to MSI capability block registers
- * for MSI address lo/hi/data, we need to check both
- * EFR[XtSup] and EFR[MsiCapMmioSup] for x2APIC support.
- */
- if ((h->efr_reg & BIT(IOMMU_EFR_XTSUP_SHIFT)) &&
- (h->efr_reg & BIT(IOMMU_EFR_MSICAPMMIOSUP_SHIFT)))
+ if (h->efr_reg & BIT(IOMMU_EFR_XTSUP_SHIFT))
amd_iommu_xt_mode = IRQ_REMAP_X2APIC_MODE;
+
+ early_iommu_features_init(iommu, h);
+
break;
default:
return -EINVAL;
@@ -1602,9 +1629,11 @@ static int __init init_iommu_one(struct amd_iommu *iommu, struct ivhd_header *h)
if (ret)
return ret;
- ret = amd_iommu_create_irq_domain(iommu);
- if (ret)
- return ret;
+ if (amd_iommu_irq_remap) {
+ ret = amd_iommu_create_irq_domain(iommu);
+ if (ret)
+ return ret;
+ }
/*
* Make sure IOMMU is not considered to translate itself. The IVRS
@@ -1683,33 +1712,16 @@ static int __init init_iommu_all(struct acpi_table_header *table)
return 0;
}
-static int iommu_pc_get_set_reg(struct amd_iommu *iommu, u8 bank, u8 cntr,
- u8 fxn, u64 *value, bool is_write);
-
static void init_iommu_perf_ctr(struct amd_iommu *iommu)
{
+ u64 val;
struct pci_dev *pdev = iommu->dev;
- u64 val = 0xabcd, val2 = 0, save_reg = 0;
if (!iommu_feature(iommu, FEATURE_PC))
return;
amd_iommu_pc_present = true;
- /* save the value to restore, if writable */
- if (iommu_pc_get_set_reg(iommu, 0, 0, 0, &save_reg, false))
- goto pc_false;
-
- /* Check if the performance counters can be written to */
- if ((iommu_pc_get_set_reg(iommu, 0, 0, 0, &val, true)) ||
- (iommu_pc_get_set_reg(iommu, 0, 0, 0, &val2, false)) ||
- (val != val2))
- goto pc_false;
-
- /* restore */
- if (iommu_pc_get_set_reg(iommu, 0, 0, 0, &save_reg, true))
- goto pc_false;
-
pci_info(pdev, "IOMMU performance counters supported\n");
val = readl(iommu->mmio_base + MMIO_CNTR_CONF_OFFSET);
@@ -1717,11 +1729,6 @@ static void init_iommu_perf_ctr(struct amd_iommu *iommu)
iommu->max_counters = (u8) ((val >> 7) & 0xf);
return;
-
-pc_false:
- pci_err(pdev, "Unable to read/write to IOMMU perf counter.\n");
- amd_iommu_pc_present = false;
- return;
}
static ssize_t amd_iommu_show_cap(struct device *dev,
@@ -1758,6 +1765,35 @@ static const struct attribute_group *amd_iommu_groups[] = {
NULL,
};
+/*
+ * Note: IVHD 0x11 and 0x40 also contains exact copy
+ * of the IOMMU Extended Feature Register [MMIO Offset 0030h].
+ * Default to EFR in IVHD since it is available sooner (i.e. before PCI init).
+ */
+static void __init late_iommu_features_init(struct amd_iommu *iommu)
+{
+ u64 features;
+
+ if (!(iommu->cap & (1 << IOMMU_CAP_EFR)))
+ return;
+
+ /* read extended feature bits */
+ features = readq(iommu->mmio_base + MMIO_EXT_FEATURES);
+
+ if (!iommu->features) {
+ iommu->features = features;
+ return;
+ }
+
+ /*
+ * Sanity check and warn if EFR values from
+ * IVHD and MMIO conflict.
+ */
+ if (features != iommu->features)
+ pr_warn(FW_WARN "EFR mismatch. Use IVHD EFR (%#llx : %#llx).\n",
+ features, iommu->features);
+}
+
static int __init iommu_init_pci(struct amd_iommu *iommu)
{
int cap_ptr = iommu->cap_ptr;
@@ -1777,8 +1813,7 @@ static int __init iommu_init_pci(struct amd_iommu *iommu)
if (!(iommu->cap & (1 << IOMMU_CAP_IOTLB)))
amd_iommu_iotlb_sup = false;
- /* read extended feature bits */
- iommu->features = readq(iommu->mmio_base + MMIO_EXT_FEATURES);
+ late_iommu_features_init(iommu);
if (iommu_feature(iommu, FEATURE_GT)) {
int glxval;
@@ -1853,8 +1888,7 @@ static int __init iommu_init_pci(struct amd_iommu *iommu)
iommu_device_sysfs_add(&iommu->iommu, &iommu->dev->dev,
amd_iommu_groups, "ivhd%d", iommu->index);
- iommu_device_set_ops(&iommu->iommu, &amd_iommu_ops);
- iommu_device_register(&iommu->iommu);
+ iommu_device_register(&iommu->iommu, &amd_iommu_ops, NULL);
return pci_enable_device(iommu->dev);
}
@@ -1871,7 +1905,7 @@ static void print_iommu_info(void)
struct pci_dev *pdev = iommu->dev;
int i;
- pci_info(pdev, "Found IOMMU cap 0x%hx\n", iommu->cap_ptr);
+ pci_info(pdev, "Found IOMMU cap 0x%x\n", iommu->cap_ptr);
if (iommu->cap & (1 << IOMMU_CAP_EFR)) {
pci_info(pdev, "Extended features (%#llx):",
@@ -1899,7 +1933,7 @@ static void print_iommu_info(void)
static int __init amd_iommu_init_pci(void)
{
struct amd_iommu *iommu;
- int ret = 0;
+ int ret;
for_each_iommu(iommu) {
ret = iommu_init_pci(iommu);
@@ -1961,103 +1995,193 @@ static int iommu_setup_msi(struct amd_iommu *iommu)
return r;
}
- iommu->int_enabled = true;
-
return 0;
}
-#define XT_INT_DEST_MODE(x) (((x) & 0x1ULL) << 2)
-#define XT_INT_DEST_LO(x) (((x) & 0xFFFFFFULL) << 8)
-#define XT_INT_VEC(x) (((x) & 0xFFULL) << 32)
-#define XT_INT_DEST_HI(x) ((((x) >> 24) & 0xFFULL) << 56)
+union intcapxt {
+ u64 capxt;
+ struct {
+ u64 reserved_0 : 2,
+ dest_mode_logical : 1,
+ reserved_1 : 5,
+ destid_0_23 : 24,
+ vector : 8,
+ reserved_2 : 16,
+ destid_24_31 : 8;
+ };
+} __attribute__ ((packed));
/*
- * Setup the IntCapXT registers with interrupt routing information
- * based on the PCI MSI capability block registers, accessed via
- * MMIO MSI address low/hi and MSI data registers.
+ * There isn't really any need to mask/unmask at the irqchip level because
+ * the 64-bit INTCAPXT registers can be updated atomically without tearing
+ * when the affinity is being updated.
*/
-static void iommu_update_intcapxt(struct amd_iommu *iommu)
+static void intcapxt_unmask_irq(struct irq_data *data)
{
- u64 val;
- u32 addr_lo = readl(iommu->mmio_base + MMIO_MSI_ADDR_LO_OFFSET);
- u32 addr_hi = readl(iommu->mmio_base + MMIO_MSI_ADDR_HI_OFFSET);
- u32 data = readl(iommu->mmio_base + MMIO_MSI_DATA_OFFSET);
- bool dm = (addr_lo >> MSI_ADDR_DEST_MODE_SHIFT) & 0x1;
- u32 dest = ((addr_lo >> MSI_ADDR_DEST_ID_SHIFT) & 0xFF);
+}
- if (x2apic_enabled())
- dest |= MSI_ADDR_EXT_DEST_ID(addr_hi);
+static void intcapxt_mask_irq(struct irq_data *data)
+{
+}
- val = XT_INT_VEC(data & 0xFF) |
- XT_INT_DEST_MODE(dm) |
- XT_INT_DEST_LO(dest) |
- XT_INT_DEST_HI(dest);
+static struct irq_chip intcapxt_controller;
+
+static int intcapxt_irqdomain_activate(struct irq_domain *domain,
+ struct irq_data *irqd, bool reserve)
+{
+ struct amd_iommu *iommu = irqd->chip_data;
+ struct irq_cfg *cfg = irqd_cfg(irqd);
+ union intcapxt xt;
+
+ xt.capxt = 0ULL;
+ xt.dest_mode_logical = apic->dest_mode_logical;
+ xt.vector = cfg->vector;
+ xt.destid_0_23 = cfg->dest_apicid & GENMASK(23, 0);
+ xt.destid_24_31 = cfg->dest_apicid >> 24;
/**
* Current IOMMU implemtation uses the same IRQ for all
* 3 IOMMU interrupts.
*/
- writeq(val, iommu->mmio_base + MMIO_INTCAPXT_EVT_OFFSET);
- writeq(val, iommu->mmio_base + MMIO_INTCAPXT_PPR_OFFSET);
- writeq(val, iommu->mmio_base + MMIO_INTCAPXT_GALOG_OFFSET);
+ writeq(xt.capxt, iommu->mmio_base + MMIO_INTCAPXT_EVT_OFFSET);
+ writeq(xt.capxt, iommu->mmio_base + MMIO_INTCAPXT_PPR_OFFSET);
+ writeq(xt.capxt, iommu->mmio_base + MMIO_INTCAPXT_GALOG_OFFSET);
+ return 0;
}
-static void _irq_notifier_notify(struct irq_affinity_notify *notify,
- const cpumask_t *mask)
+static void intcapxt_irqdomain_deactivate(struct irq_domain *domain,
+ struct irq_data *irqd)
{
- struct amd_iommu *iommu;
+ intcapxt_mask_irq(irqd);
+}
- for_each_iommu(iommu) {
- if (iommu->dev->irq == notify->irq) {
- iommu_update_intcapxt(iommu);
- break;
- }
+
+static int intcapxt_irqdomain_alloc(struct irq_domain *domain, unsigned int virq,
+ unsigned int nr_irqs, void *arg)
+{
+ struct irq_alloc_info *info = arg;
+ int i, ret;
+
+ if (!info || info->type != X86_IRQ_ALLOC_TYPE_AMDVI)
+ return -EINVAL;
+
+ ret = irq_domain_alloc_irqs_parent(domain, virq, nr_irqs, arg);
+ if (ret < 0)
+ return ret;
+
+ for (i = virq; i < virq + nr_irqs; i++) {
+ struct irq_data *irqd = irq_domain_get_irq_data(domain, i);
+
+ irqd->chip = &intcapxt_controller;
+ irqd->chip_data = info->data;
+ __irq_set_handler(i, handle_edge_irq, 0, "edge");
}
+
+ return ret;
}
-static void _irq_notifier_release(struct kref *ref)
+static void intcapxt_irqdomain_free(struct irq_domain *domain, unsigned int virq,
+ unsigned int nr_irqs)
{
+ irq_domain_free_irqs_top(domain, virq, nr_irqs);
}
-static int iommu_init_intcapxt(struct amd_iommu *iommu)
+static int intcapxt_set_affinity(struct irq_data *irqd,
+ const struct cpumask *mask, bool force)
{
+ struct irq_data *parent = irqd->parent_data;
int ret;
- struct irq_affinity_notify *notify = &iommu->intcapxt_notify;
- /**
- * IntCapXT requires XTSup=1 and MsiCapMmioSup=1,
- * which can be inferred from amd_iommu_xt_mode.
- */
- if (amd_iommu_xt_mode != IRQ_REMAP_X2APIC_MODE)
- return 0;
+ ret = parent->chip->irq_set_affinity(parent, mask, force);
+ if (ret < 0 || ret == IRQ_SET_MASK_OK_DONE)
+ return ret;
- /**
- * Also, we need to setup notifier to update the IntCapXT registers
- * whenever the irq affinity is changed from user-space.
- */
- notify->irq = iommu->dev->irq;
- notify->notify = _irq_notifier_notify,
- notify->release = _irq_notifier_release,
- ret = irq_set_affinity_notifier(iommu->dev->irq, notify);
+ return intcapxt_irqdomain_activate(irqd->domain, irqd, false);
+}
+
+static struct irq_chip intcapxt_controller = {
+ .name = "IOMMU-MSI",
+ .irq_unmask = intcapxt_unmask_irq,
+ .irq_mask = intcapxt_mask_irq,
+ .irq_ack = irq_chip_ack_parent,
+ .irq_retrigger = irq_chip_retrigger_hierarchy,
+ .irq_set_affinity = intcapxt_set_affinity,
+ .flags = IRQCHIP_SKIP_SET_WAKE,
+};
+
+static const struct irq_domain_ops intcapxt_domain_ops = {
+ .alloc = intcapxt_irqdomain_alloc,
+ .free = intcapxt_irqdomain_free,
+ .activate = intcapxt_irqdomain_activate,
+ .deactivate = intcapxt_irqdomain_deactivate,
+};
+
+
+static struct irq_domain *iommu_irqdomain;
+
+static struct irq_domain *iommu_get_irqdomain(void)
+{
+ struct fwnode_handle *fn;
+
+ /* No need for locking here (yet) as the init is single-threaded */
+ if (iommu_irqdomain)
+ return iommu_irqdomain;
+
+ fn = irq_domain_alloc_named_fwnode("AMD-Vi-MSI");
+ if (!fn)
+ return NULL;
+
+ iommu_irqdomain = irq_domain_create_hierarchy(x86_vector_domain, 0, 0,
+ fn, &intcapxt_domain_ops,
+ NULL);
+ if (!iommu_irqdomain)
+ irq_domain_free_fwnode(fn);
+
+ return iommu_irqdomain;
+}
+
+static int iommu_setup_intcapxt(struct amd_iommu *iommu)
+{
+ struct irq_domain *domain;
+ struct irq_alloc_info info;
+ int irq, ret;
+
+ domain = iommu_get_irqdomain();
+ if (!domain)
+ return -ENXIO;
+
+ init_irq_alloc_info(&info, NULL);
+ info.type = X86_IRQ_ALLOC_TYPE_AMDVI;
+ info.data = iommu;
+
+ irq = irq_domain_alloc_irqs(domain, 1, NUMA_NO_NODE, &info);
+ if (irq < 0) {
+ irq_domain_remove(domain);
+ return irq;
+ }
+
+ ret = request_threaded_irq(irq, amd_iommu_int_handler,
+ amd_iommu_int_thread, 0, "AMD-Vi", iommu);
if (ret) {
- pr_err("Failed to register irq affinity notifier (devid=%#x, irq %d)\n",
- iommu->devid, iommu->dev->irq);
+ irq_domain_free_irqs(irq, 1);
+ irq_domain_remove(domain);
return ret;
}
- iommu_update_intcapxt(iommu);
iommu_feature_enable(iommu, CONTROL_INTCAPXT_EN);
- return ret;
+ return 0;
}
-static int iommu_init_msi(struct amd_iommu *iommu)
+static int iommu_init_irq(struct amd_iommu *iommu)
{
int ret;
if (iommu->int_enabled)
goto enable_faults;
- if (iommu->dev->msi_cap)
+ if (amd_iommu_xt_mode == IRQ_REMAP_X2APIC_MODE)
+ ret = iommu_setup_intcapxt(iommu);
+ else if (iommu->dev->msi_cap)
ret = iommu_setup_msi(iommu);
else
ret = -ENODEV;
@@ -2065,11 +2189,8 @@ static int iommu_init_msi(struct amd_iommu *iommu)
if (ret)
return ret;
+ iommu->int_enabled = true;
enable_faults:
- ret = iommu_init_intcapxt(iommu);
- if (ret)
- return ret;
-
iommu_feature_enable(iommu, CONTROL_EVT_INT_EN);
if (iommu->ppr_log != NULL)
@@ -2508,6 +2629,11 @@ static void __init free_dma_resources(void)
free_unity_maps();
}
+static void __init ivinfo_init(void *ivrs)
+{
+ amd_iommu_ivinfo = *((u32 *)(ivrs + IOMMU_IVINFO_OFFSET));
+}
+
/*
* This is the hardware init function for AMD IOMMU in the system.
* This function is called either from amd_iommu_init or from the interrupt
@@ -2538,9 +2664,8 @@ static void __init free_dma_resources(void)
static int __init early_amd_iommu_init(void)
{
struct acpi_table_header *ivrs_base;
+ int i, remap_cache_sz, ret;
acpi_status status;
- int i, remap_cache_sz, ret = 0;
- u32 pci_id;
if (!amd_iommu_detected)
return -ENODEV;
@@ -2562,6 +2687,8 @@ static int __init early_amd_iommu_init(void)
if (ret)
goto out;
+ ivinfo_init(ivrs_base);
+
amd_iommu_target_ivhd_type = get_highest_supported_ivhd_type(ivrs_base);
DUMP_printk("Using IVHD type %#x\n", amd_iommu_target_ivhd_type);
@@ -2628,16 +2755,6 @@ static int __init early_amd_iommu_init(void)
if (ret)
goto out;
- /* Disable IOMMU if there's Stoney Ridge graphics */
- for (i = 0; i < 32; i++) {
- pci_id = read_pci_config(0, i, 0, 0);
- if ((pci_id & 0xffff) == 0x1002 && (pci_id >> 16) == 0x98e4) {
- pr_info("Disable IOMMU on Stoney Ridge\n");
- amd_iommu_disabled = true;
- break;
- }
- }
-
/* Disable any previously enabled IOMMUs */
if (!is_kdump_kernel() || amd_iommu_disabled)
disable_iommus();
@@ -2657,7 +2774,7 @@ static int __init early_amd_iommu_init(void)
remap_cache_sz = MAX_IRQS_PER_TABLE * (sizeof(u64) * 2);
amd_iommu_irq_cache = kmem_cache_create("irq_remap_cache",
remap_cache_sz,
- IRQ_TABLE_ALIGNMENT,
+ DTE_INTTAB_ALIGNMENT,
0, NULL);
if (!amd_iommu_irq_cache)
goto out;
@@ -2681,7 +2798,6 @@ static int __init early_amd_iommu_init(void)
out:
/* Don't leak any ACPI memory */
acpi_put_table(ivrs_base);
- ivrs_base = NULL;
return ret;
}
@@ -2692,7 +2808,7 @@ static int amd_iommu_enable_interrupts(void)
int ret = 0;
for_each_iommu(iommu) {
- ret = iommu_init_msi(iommu);
+ ret = iommu_init_irq(iommu);
if (ret)
goto out;
}
@@ -2705,6 +2821,7 @@ static bool detect_ivrs(void)
{
struct acpi_table_header *ivrs_base;
acpi_status status;
+ int i;
status = acpi_get_table("IVRS", 0, &ivrs_base);
if (status == AE_NOT_FOUND)
@@ -2717,6 +2834,17 @@ static bool detect_ivrs(void)
acpi_put_table(ivrs_base);
+ /* Don't use IOMMU if there is Stoney Ridge graphics */
+ for (i = 0; i < 32; i++) {
+ u32 pci_id;
+
+ pci_id = read_pci_config(0, i, 0, 0);
+ if ((pci_id & 0xffff) == 0x1002 && (pci_id >> 16) == 0x98e4) {
+ pr_info("Disable IOMMU on Stoney Ridge\n");
+ return false;
+ }
+ }
+
/* Make sure ACS will be enabled during PCI probe */
pci_request_acs();
@@ -2743,12 +2871,12 @@ static int __init state_next(void)
}
break;
case IOMMU_IVRS_DETECTED:
- ret = early_amd_iommu_init();
- init_state = ret ? IOMMU_INIT_ERROR : IOMMU_ACPI_FINISHED;
- if (init_state == IOMMU_ACPI_FINISHED && amd_iommu_disabled) {
- pr_info("AMD IOMMU disabled\n");
+ if (amd_iommu_disabled) {
init_state = IOMMU_CMDLINE_DISABLED;
ret = -EINVAL;
+ } else {
+ ret = early_amd_iommu_init();
+ init_state = ret ? IOMMU_INIT_ERROR : IOMMU_ACPI_FINISHED;
}
break;
case IOMMU_ACPI_FINISHED:
@@ -2826,8 +2954,11 @@ int __init amd_iommu_prepare(void)
amd_iommu_irq_remap = true;
ret = iommu_go_to_state(IOMMU_ACPI_FINISHED);
- if (ret)
+ if (ret) {
+ amd_iommu_irq_remap = false;
return ret;
+ }
+
return amd_iommu_irq_remap ? 0 : -ENODEV;
}
@@ -3098,7 +3229,6 @@ struct amd_iommu *get_amd_iommu(unsigned int idx)
return iommu;
return NULL;
}
-EXPORT_SYMBOL(get_amd_iommu);
/****************************************************************************
*
@@ -3180,7 +3310,6 @@ int amd_iommu_pc_get_reg(struct amd_iommu *iommu, u8 bank, u8 cntr, u8 fxn, u64
return iommu_pc_get_set_reg(iommu, bank, cntr, fxn, value, false);
}
-EXPORT_SYMBOL(amd_iommu_pc_get_reg);
int amd_iommu_pc_set_reg(struct amd_iommu *iommu, u8 bank, u8 cntr, u8 fxn, u64 *value)
{
@@ -3189,4 +3318,3 @@ int amd_iommu_pc_set_reg(struct amd_iommu *iommu, u8 bank, u8 cntr, u8 fxn, u64
return iommu_pc_get_set_reg(iommu, bank, cntr, fxn, value, true);
}
-EXPORT_SYMBOL(amd_iommu_pc_set_reg);
diff --git a/drivers/iommu/amd/io_pgtable.c b/drivers/iommu/amd/io_pgtable.c
new file mode 100644
index 000000000000..bb0ee5c9fde7
--- /dev/null
+++ b/drivers/iommu/amd/io_pgtable.c
@@ -0,0 +1,560 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * CPU-agnostic AMD IO page table allocator.
+ *
+ * Copyright (C) 2020 Advanced Micro Devices, Inc.
+ * Author: Suravee Suthikulpanit <suravee.suthikulpanit@amd.com>
+ */
+
+#define pr_fmt(fmt) "AMD-Vi: " fmt
+#define dev_fmt(fmt) pr_fmt(fmt)
+
+#include <linux/atomic.h>
+#include <linux/bitops.h>
+#include <linux/io-pgtable.h>
+#include <linux/kernel.h>
+#include <linux/sizes.h>
+#include <linux/slab.h>
+#include <linux/types.h>
+#include <linux/dma-mapping.h>
+
+#include <asm/barrier.h>
+
+#include "amd_iommu_types.h"
+#include "amd_iommu.h"
+
+static void v1_tlb_flush_all(void *cookie)
+{
+}
+
+static void v1_tlb_flush_walk(unsigned long iova, size_t size,
+ size_t granule, void *cookie)
+{
+}
+
+static void v1_tlb_add_page(struct iommu_iotlb_gather *gather,
+ unsigned long iova, size_t granule,
+ void *cookie)
+{
+}
+
+static const struct iommu_flush_ops v1_flush_ops = {
+ .tlb_flush_all = v1_tlb_flush_all,
+ .tlb_flush_walk = v1_tlb_flush_walk,
+ .tlb_add_page = v1_tlb_add_page,
+};
+
+/*
+ * Helper function to get the first pte of a large mapping
+ */
+static u64 *first_pte_l7(u64 *pte, unsigned long *page_size,
+ unsigned long *count)
+{
+ unsigned long pte_mask, pg_size, cnt;
+ u64 *fpte;
+
+ pg_size = PTE_PAGE_SIZE(*pte);
+ cnt = PAGE_SIZE_PTE_COUNT(pg_size);
+ pte_mask = ~((cnt << 3) - 1);
+ fpte = (u64 *)(((unsigned long)pte) & pte_mask);
+
+ if (page_size)
+ *page_size = pg_size;
+
+ if (count)
+ *count = cnt;
+
+ return fpte;
+}
+
+/****************************************************************************
+ *
+ * The functions below are used the create the page table mappings for
+ * unity mapped regions.
+ *
+ ****************************************************************************/
+
+static void free_page_list(struct page *freelist)
+{
+ while (freelist != NULL) {
+ unsigned long p = (unsigned long)page_address(freelist);
+
+ freelist = freelist->freelist;
+ free_page(p);
+ }
+}
+
+static struct page *free_pt_page(unsigned long pt, struct page *freelist)
+{
+ struct page *p = virt_to_page((void *)pt);
+
+ p->freelist = freelist;
+
+ return p;
+}
+
+#define DEFINE_FREE_PT_FN(LVL, FN) \
+static struct page *free_pt_##LVL (unsigned long __pt, struct page *freelist) \
+{ \
+ unsigned long p; \
+ u64 *pt; \
+ int i; \
+ \
+ pt = (u64 *)__pt; \
+ \
+ for (i = 0; i < 512; ++i) { \
+ /* PTE present? */ \
+ if (!IOMMU_PTE_PRESENT(pt[i])) \
+ continue; \
+ \
+ /* Large PTE? */ \
+ if (PM_PTE_LEVEL(pt[i]) == 0 || \
+ PM_PTE_LEVEL(pt[i]) == 7) \
+ continue; \
+ \
+ p = (unsigned long)IOMMU_PTE_PAGE(pt[i]); \
+ freelist = FN(p, freelist); \
+ } \
+ \
+ return free_pt_page((unsigned long)pt, freelist); \
+}
+
+DEFINE_FREE_PT_FN(l2, free_pt_page)
+DEFINE_FREE_PT_FN(l3, free_pt_l2)
+DEFINE_FREE_PT_FN(l4, free_pt_l3)
+DEFINE_FREE_PT_FN(l5, free_pt_l4)
+DEFINE_FREE_PT_FN(l6, free_pt_l5)
+
+static struct page *free_sub_pt(unsigned long root, int mode,
+ struct page *freelist)
+{
+ switch (mode) {
+ case PAGE_MODE_NONE:
+ case PAGE_MODE_7_LEVEL:
+ break;
+ case PAGE_MODE_1_LEVEL:
+ freelist = free_pt_page(root, freelist);
+ break;
+ case PAGE_MODE_2_LEVEL:
+ freelist = free_pt_l2(root, freelist);
+ break;
+ case PAGE_MODE_3_LEVEL:
+ freelist = free_pt_l3(root, freelist);
+ break;
+ case PAGE_MODE_4_LEVEL:
+ freelist = free_pt_l4(root, freelist);
+ break;
+ case PAGE_MODE_5_LEVEL:
+ freelist = free_pt_l5(root, freelist);
+ break;
+ case PAGE_MODE_6_LEVEL:
+ freelist = free_pt_l6(root, freelist);
+ break;
+ default:
+ BUG();
+ }
+
+ return freelist;
+}
+
+void amd_iommu_domain_set_pgtable(struct protection_domain *domain,
+ u64 *root, int mode)
+{
+ u64 pt_root;
+
+ /* lowest 3 bits encode pgtable mode */
+ pt_root = mode & 7;
+ pt_root |= (u64)root;
+
+ amd_iommu_domain_set_pt_root(domain, pt_root);
+}
+
+/*
+ * This function is used to add another level to an IO page table. Adding
+ * another level increases the size of the address space by 9 bits to a size up
+ * to 64 bits.
+ */
+static bool increase_address_space(struct protection_domain *domain,
+ unsigned long address,
+ gfp_t gfp)
+{
+ unsigned long flags;
+ bool ret = true;
+ u64 *pte;
+
+ pte = (void *)get_zeroed_page(gfp);
+ if (!pte)
+ return false;
+
+ spin_lock_irqsave(&domain->lock, flags);
+
+ if (address <= PM_LEVEL_SIZE(domain->iop.mode))
+ goto out;
+
+ ret = false;
+ if (WARN_ON_ONCE(domain->iop.mode == PAGE_MODE_6_LEVEL))
+ goto out;
+
+ *pte = PM_LEVEL_PDE(domain->iop.mode, iommu_virt_to_phys(domain->iop.root));
+
+ domain->iop.root = pte;
+ domain->iop.mode += 1;
+ amd_iommu_update_and_flush_device_table(domain);
+ amd_iommu_domain_flush_complete(domain);
+
+ /*
+ * Device Table needs to be updated and flushed before the new root can
+ * be published.
+ */
+ amd_iommu_domain_set_pgtable(domain, pte, domain->iop.mode);
+
+ pte = NULL;
+ ret = true;
+
+out:
+ spin_unlock_irqrestore(&domain->lock, flags);
+ free_page((unsigned long)pte);
+
+ return ret;
+}
+
+static u64 *alloc_pte(struct protection_domain *domain,
+ unsigned long address,
+ unsigned long page_size,
+ u64 **pte_page,
+ gfp_t gfp,
+ bool *updated)
+{
+ int level, end_lvl;
+ u64 *pte, *page;
+
+ BUG_ON(!is_power_of_2(page_size));
+
+ while (address > PM_LEVEL_SIZE(domain->iop.mode)) {
+ /*
+ * Return an error if there is no memory to update the
+ * page-table.
+ */
+ if (!increase_address_space(domain, address, gfp))
+ return NULL;
+ }
+
+
+ level = domain->iop.mode - 1;
+ pte = &domain->iop.root[PM_LEVEL_INDEX(level, address)];
+ address = PAGE_SIZE_ALIGN(address, page_size);
+ end_lvl = PAGE_SIZE_LEVEL(page_size);
+
+ while (level > end_lvl) {
+ u64 __pte, __npte;
+ int pte_level;
+
+ __pte = *pte;
+ pte_level = PM_PTE_LEVEL(__pte);
+
+ /*
+ * If we replace a series of large PTEs, we need
+ * to tear down all of them.
+ */
+ if (IOMMU_PTE_PRESENT(__pte) &&
+ pte_level == PAGE_MODE_7_LEVEL) {
+ unsigned long count, i;
+ u64 *lpte;
+
+ lpte = first_pte_l7(pte, NULL, &count);
+
+ /*
+ * Unmap the replicated PTEs that still match the
+ * original large mapping
+ */
+ for (i = 0; i < count; ++i)
+ cmpxchg64(&lpte[i], __pte, 0ULL);
+
+ *updated = true;
+ continue;
+ }
+
+ if (!IOMMU_PTE_PRESENT(__pte) ||
+ pte_level == PAGE_MODE_NONE) {
+ page = (u64 *)get_zeroed_page(gfp);
+
+ if (!page)
+ return NULL;
+
+ __npte = PM_LEVEL_PDE(level, iommu_virt_to_phys(page));
+
+ /* pte could have been changed somewhere. */
+ if (cmpxchg64(pte, __pte, __npte) != __pte)
+ free_page((unsigned long)page);
+ else if (IOMMU_PTE_PRESENT(__pte))
+ *updated = true;
+
+ continue;
+ }
+
+ /* No level skipping support yet */
+ if (pte_level != level)
+ return NULL;
+
+ level -= 1;
+
+ pte = IOMMU_PTE_PAGE(__pte);
+
+ if (pte_page && level == end_lvl)
+ *pte_page = pte;
+
+ pte = &pte[PM_LEVEL_INDEX(level, address)];
+ }
+
+ return pte;
+}
+
+/*
+ * This function checks if there is a PTE for a given dma address. If
+ * there is one, it returns the pointer to it.
+ */
+static u64 *fetch_pte(struct amd_io_pgtable *pgtable,
+ unsigned long address,
+ unsigned long *page_size)
+{
+ int level;
+ u64 *pte;
+
+ *page_size = 0;
+
+ if (address > PM_LEVEL_SIZE(pgtable->mode))
+ return NULL;
+
+ level = pgtable->mode - 1;
+ pte = &pgtable->root[PM_LEVEL_INDEX(level, address)];
+ *page_size = PTE_LEVEL_PAGE_SIZE(level);
+
+ while (level > 0) {
+
+ /* Not Present */
+ if (!IOMMU_PTE_PRESENT(*pte))
+ return NULL;
+
+ /* Large PTE */
+ if (PM_PTE_LEVEL(*pte) == 7 ||
+ PM_PTE_LEVEL(*pte) == 0)
+ break;
+
+ /* No level skipping support yet */
+ if (PM_PTE_LEVEL(*pte) != level)
+ return NULL;
+
+ level -= 1;
+
+ /* Walk to the next level */
+ pte = IOMMU_PTE_PAGE(*pte);
+ pte = &pte[PM_LEVEL_INDEX(level, address)];
+ *page_size = PTE_LEVEL_PAGE_SIZE(level);
+ }
+
+ /*
+ * If we have a series of large PTEs, make
+ * sure to return a pointer to the first one.
+ */
+ if (PM_PTE_LEVEL(*pte) == PAGE_MODE_7_LEVEL)
+ pte = first_pte_l7(pte, page_size, NULL);
+
+ return pte;
+}
+
+static struct page *free_clear_pte(u64 *pte, u64 pteval, struct page *freelist)
+{
+ unsigned long pt;
+ int mode;
+
+ while (cmpxchg64(pte, pteval, 0) != pteval) {
+ pr_warn("AMD-Vi: IOMMU pte changed since we read it\n");
+ pteval = *pte;
+ }
+
+ if (!IOMMU_PTE_PRESENT(pteval))
+ return freelist;
+
+ pt = (unsigned long)IOMMU_PTE_PAGE(pteval);
+ mode = IOMMU_PTE_MODE(pteval);
+
+ return free_sub_pt(pt, mode, freelist);
+}
+
+/*
+ * Generic mapping functions. It maps a physical address into a DMA
+ * address space. It allocates the page table pages if necessary.
+ * In the future it can be extended to a generic mapping function
+ * supporting all features of AMD IOMMU page tables like level skipping
+ * and full 64 bit address spaces.
+ */
+static int iommu_v1_map_page(struct io_pgtable_ops *ops, unsigned long iova,
+ phys_addr_t paddr, size_t size, int prot, gfp_t gfp)
+{
+ struct protection_domain *dom = io_pgtable_ops_to_domain(ops);
+ struct page *freelist = NULL;
+ bool updated = false;
+ u64 __pte, *pte;
+ int ret, i, count;
+
+ BUG_ON(!IS_ALIGNED(iova, size));
+ BUG_ON(!IS_ALIGNED(paddr, size));
+
+ ret = -EINVAL;
+ if (!(prot & IOMMU_PROT_MASK))
+ goto out;
+
+ count = PAGE_SIZE_PTE_COUNT(size);
+ pte = alloc_pte(dom, iova, size, NULL, gfp, &updated);
+
+ ret = -ENOMEM;
+ if (!pte)
+ goto out;
+
+ for (i = 0; i < count; ++i)
+ freelist = free_clear_pte(&pte[i], pte[i], freelist);
+
+ if (freelist != NULL)
+ updated = true;
+
+ if (count > 1) {
+ __pte = PAGE_SIZE_PTE(__sme_set(paddr), size);
+ __pte |= PM_LEVEL_ENC(7) | IOMMU_PTE_PR | IOMMU_PTE_FC;
+ } else
+ __pte = __sme_set(paddr) | IOMMU_PTE_PR | IOMMU_PTE_FC;
+
+ if (prot & IOMMU_PROT_IR)
+ __pte |= IOMMU_PTE_IR;
+ if (prot & IOMMU_PROT_IW)
+ __pte |= IOMMU_PTE_IW;
+
+ for (i = 0; i < count; ++i)
+ pte[i] = __pte;
+
+ ret = 0;
+
+out:
+ if (updated) {
+ unsigned long flags;
+
+ spin_lock_irqsave(&dom->lock, flags);
+ /*
+ * Flush domain TLB(s) and wait for completion. Any Device-Table
+ * Updates and flushing already happened in
+ * increase_address_space().
+ */
+ amd_iommu_domain_flush_tlb_pde(dom);
+ amd_iommu_domain_flush_complete(dom);
+ spin_unlock_irqrestore(&dom->lock, flags);
+ }
+
+ /* Everything flushed out, free pages now */
+ free_page_list(freelist);
+
+ return ret;
+}
+
+static unsigned long iommu_v1_unmap_page(struct io_pgtable_ops *ops,
+ unsigned long iova,
+ size_t size,
+ struct iommu_iotlb_gather *gather)
+{
+ struct amd_io_pgtable *pgtable = io_pgtable_ops_to_data(ops);
+ unsigned long long unmapped;
+ unsigned long unmap_size;
+ u64 *pte;
+
+ BUG_ON(!is_power_of_2(size));
+
+ unmapped = 0;
+
+ while (unmapped < size) {
+ pte = fetch_pte(pgtable, iova, &unmap_size);
+ if (pte) {
+ int i, count;
+
+ count = PAGE_SIZE_PTE_COUNT(unmap_size);
+ for (i = 0; i < count; i++)
+ pte[i] = 0ULL;
+ }
+
+ iova = (iova & ~(unmap_size - 1)) + unmap_size;
+ unmapped += unmap_size;
+ }
+
+ BUG_ON(unmapped && !is_power_of_2(unmapped));
+
+ return unmapped;
+}
+
+static phys_addr_t iommu_v1_iova_to_phys(struct io_pgtable_ops *ops, unsigned long iova)
+{
+ struct amd_io_pgtable *pgtable = io_pgtable_ops_to_data(ops);
+ unsigned long offset_mask, pte_pgsize;
+ u64 *pte, __pte;
+
+ if (pgtable->mode == PAGE_MODE_NONE)
+ return iova;
+
+ pte = fetch_pte(pgtable, iova, &pte_pgsize);
+
+ if (!pte || !IOMMU_PTE_PRESENT(*pte))
+ return 0;
+
+ offset_mask = pte_pgsize - 1;
+ __pte = __sme_clr(*pte & PM_ADDR_MASK);
+
+ return (__pte & ~offset_mask) | (iova & offset_mask);
+}
+
+/*
+ * ----------------------------------------------------
+ */
+static void v1_free_pgtable(struct io_pgtable *iop)
+{
+ struct amd_io_pgtable *pgtable = container_of(iop, struct amd_io_pgtable, iop);
+ struct protection_domain *dom;
+ struct page *freelist = NULL;
+ unsigned long root;
+
+ if (pgtable->mode == PAGE_MODE_NONE)
+ return;
+
+ dom = container_of(pgtable, struct protection_domain, iop);
+
+ /* Update data structure */
+ amd_iommu_domain_clr_pt_root(dom);
+
+ /* Make changes visible to IOMMUs */
+ amd_iommu_domain_update(dom);
+
+ /* Page-table is not visible to IOMMU anymore, so free it */
+ BUG_ON(pgtable->mode < PAGE_MODE_NONE ||
+ pgtable->mode > PAGE_MODE_6_LEVEL);
+
+ root = (unsigned long)pgtable->root;
+ freelist = free_sub_pt(root, pgtable->mode, freelist);
+
+ free_page_list(freelist);
+}
+
+static struct io_pgtable *v1_alloc_pgtable(struct io_pgtable_cfg *cfg, void *cookie)
+{
+ struct amd_io_pgtable *pgtable = io_pgtable_cfg_to_data(cfg);
+
+ cfg->pgsize_bitmap = AMD_IOMMU_PGSIZES,
+ cfg->ias = IOMMU_IN_ADDR_BIT_SIZE,
+ cfg->oas = IOMMU_OUT_ADDR_BIT_SIZE,
+ cfg->tlb = &v1_flush_ops;
+
+ pgtable->iop.ops.map = iommu_v1_map_page;
+ pgtable->iop.ops.unmap = iommu_v1_unmap_page;
+ pgtable->iop.ops.iova_to_phys = iommu_v1_iova_to_phys;
+
+ return &pgtable->iop;
+}
+
+struct io_pgtable_init_fns io_pgtable_amd_iommu_v1_init_fns = {
+ .alloc = v1_alloc_pgtable,
+ .free = v1_free_pgtable,
+};
diff --git a/drivers/iommu/amd/iommu.c b/drivers/iommu/amd/iommu.c
index b9cf59443843..80e8e1916dd1 100644
--- a/drivers/iommu/amd/iommu.c
+++ b/drivers/iommu/amd/iommu.c
@@ -31,11 +31,11 @@
#include <linux/irqdomain.h>
#include <linux/percpu.h>
#include <linux/iova.h>
+#include <linux/io-pgtable.h>
#include <asm/irq_remapping.h>
#include <asm/io_apic.h>
#include <asm/apic.h>
#include <asm/hw_irq.h>
-#include <asm/msidef.h>
#include <asm/proto.h>
#include <asm/iommu.h>
#include <asm/gart.h>
@@ -58,16 +58,6 @@
#define HT_RANGE_START (0xfd00000000ULL)
#define HT_RANGE_END (0xffffffffffULL)
-/*
- * This bitmap is used to advertise the page sizes our hardware support
- * to the IOMMU core, which will then use this information to split
- * physically contiguous memory regions it is mapping into page sizes
- * that we support.
- *
- * 512GB Pages are not supported due to a hardware bug
- */
-#define AMD_IOMMU_PGSIZES ((~0xFFFUL) & ~(2ULL << 38))
-
#define DEFAULT_PGTABLE_LEVEL PAGE_MODE_3_LEVEL
static DEFINE_SPINLOCK(pd_bitmap_lock);
@@ -97,10 +87,7 @@ struct iommu_cmd {
struct kmem_cache *amd_iommu_irq_cache;
-static void update_domain(struct protection_domain *domain);
static void detach_device(struct device *dev);
-static void update_and_flush_device_table(struct protection_domain *domain,
- struct domain_pgtable *pgtable);
/****************************************************************************
*
@@ -152,37 +139,6 @@ static struct protection_domain *to_pdomain(struct iommu_domain *dom)
return container_of(dom, struct protection_domain, domain);
}
-static void amd_iommu_domain_get_pgtable(struct protection_domain *domain,
- struct domain_pgtable *pgtable)
-{
- u64 pt_root = atomic64_read(&domain->pt_root);
-
- pgtable->root = (u64 *)(pt_root & PAGE_MASK);
- pgtable->mode = pt_root & 7; /* lowest 3 bits encode pgtable mode */
-}
-
-static void amd_iommu_domain_set_pt_root(struct protection_domain *domain, u64 root)
-{
- atomic64_set(&domain->pt_root, root);
-}
-
-static void amd_iommu_domain_clr_pt_root(struct protection_domain *domain)
-{
- amd_iommu_domain_set_pt_root(domain, 0);
-}
-
-static void amd_iommu_domain_set_pgtable(struct protection_domain *domain,
- u64 *root, int mode)
-{
- u64 pt_root;
-
- /* lowest 3 bits encode pgtable mode */
- pt_root = mode & 7;
- pt_root |= (u64)root;
-
- amd_iommu_domain_set_pt_root(domain, pt_root);
-}
-
static struct iommu_dev_data *alloc_dev_data(u16 devid)
{
struct iommu_dev_data *dev_data;
@@ -334,15 +290,6 @@ static bool pci_iommuv2_capable(struct pci_dev *pdev)
return true;
}
-static bool pdev_pri_erratum(struct pci_dev *pdev, u32 erratum)
-{
- struct iommu_dev_data *dev_data;
-
- dev_data = dev_iommu_priv_get(&pdev->dev);
-
- return dev_data->errata & (1 << erratum) ? true : false;
-}
-
/*
* This function checks if the driver got a valid device from the caller to
* avoid dereferencing invalid pointers.
@@ -438,29 +385,6 @@ static void amd_iommu_uninit_device(struct device *dev)
*/
}
-/*
- * Helper function to get the first pte of a large mapping
- */
-static u64 *first_pte_l7(u64 *pte, unsigned long *page_size,
- unsigned long *count)
-{
- unsigned long pte_mask, pg_size, cnt;
- u64 *fpte;
-
- pg_size = PTE_PAGE_SIZE(*pte);
- cnt = PAGE_SIZE_PTE_COUNT(pg_size);
- pte_mask = ~((cnt << 3) - 1);
- fpte = (u64 *)(((unsigned long)pte) & pte_mask);
-
- if (page_size)
- *page_size = pg_size;
-
- if (count)
- *count = cnt;
-
- return fpte;
-}
-
/****************************************************************************
*
* Interrupt handling functions
@@ -928,33 +852,58 @@ static void build_inv_dte(struct iommu_cmd *cmd, u16 devid)
CMD_SET_TYPE(cmd, CMD_INV_DEV_ENTRY);
}
-static void build_inv_iommu_pages(struct iommu_cmd *cmd, u64 address,
- size_t size, u16 domid, int pde)
+/*
+ * Builds an invalidation address which is suitable for one page or multiple
+ * pages. Sets the size bit (S) as needed is more than one page is flushed.
+ */
+static inline u64 build_inv_address(u64 address, size_t size)
{
- u64 pages;
- bool s;
+ u64 pages, end, msb_diff;
pages = iommu_num_pages(address, size, PAGE_SIZE);
- s = false;
- if (pages > 1) {
+ if (pages == 1)
+ return address & PAGE_MASK;
+
+ end = address + size - 1;
+
+ /*
+ * msb_diff would hold the index of the most significant bit that
+ * flipped between the start and end.
+ */
+ msb_diff = fls64(end ^ address) - 1;
+
+ /*
+ * Bits 63:52 are sign extended. If for some reason bit 51 is different
+ * between the start and the end, invalidate everything.
+ */
+ if (unlikely(msb_diff > 51)) {
+ address = CMD_INV_IOMMU_ALL_PAGES_ADDRESS;
+ } else {
/*
- * If we have to flush more than one page, flush all
- * TLB entries for this domain
+ * The msb-bit must be clear on the address. Just set all the
+ * lower bits.
*/
- address = CMD_INV_IOMMU_ALL_PAGES_ADDRESS;
- s = true;
+ address |= 1ull << (msb_diff - 1);
}
+ /* Clear bits 11:0 */
address &= PAGE_MASK;
+ /* Set the size bit - we flush more than one 4kb page */
+ return address | CMD_INV_IOMMU_PAGES_SIZE_MASK;
+}
+
+static void build_inv_iommu_pages(struct iommu_cmd *cmd, u64 address,
+ size_t size, u16 domid, int pde)
+{
+ u64 inv_address = build_inv_address(address, size);
+
memset(cmd, 0, sizeof(*cmd));
cmd->data[1] |= domid;
- cmd->data[2] = lower_32_bits(address);
- cmd->data[3] = upper_32_bits(address);
+ cmd->data[2] = lower_32_bits(inv_address);
+ cmd->data[3] = upper_32_bits(inv_address);
CMD_SET_TYPE(cmd, CMD_INV_IOMMU_PAGES);
- if (s) /* size bit - we flush more than one 4kb page */
- cmd->data[2] |= CMD_INV_IOMMU_PAGES_SIZE_MASK;
if (pde) /* PDE bit - we want to flush everything, not only the PTEs */
cmd->data[2] |= CMD_INV_IOMMU_PAGES_PDE_MASK;
}
@@ -962,32 +911,15 @@ static void build_inv_iommu_pages(struct iommu_cmd *cmd, u64 address,
static void build_inv_iotlb_pages(struct iommu_cmd *cmd, u16 devid, int qdep,
u64 address, size_t size)
{
- u64 pages;
- bool s;
-
- pages = iommu_num_pages(address, size, PAGE_SIZE);
- s = false;
-
- if (pages > 1) {
- /*
- * If we have to flush more than one page, flush all
- * TLB entries for this domain
- */
- address = CMD_INV_IOMMU_ALL_PAGES_ADDRESS;
- s = true;
- }
-
- address &= PAGE_MASK;
+ u64 inv_address = build_inv_address(address, size);
memset(cmd, 0, sizeof(*cmd));
cmd->data[0] = devid;
cmd->data[0] |= (qdep & 0xff) << 24;
cmd->data[1] = devid;
- cmd->data[2] = lower_32_bits(address);
- cmd->data[3] = upper_32_bits(address);
+ cmd->data[2] = lower_32_bits(inv_address);
+ cmd->data[3] = upper_32_bits(inv_address);
CMD_SET_TYPE(cmd, CMD_INV_IOTLB_PAGES);
- if (s)
- cmd->data[2] |= CMD_INV_IOMMU_PAGES_SIZE_MASK;
}
static void build_inv_iommu_pasid(struct iommu_cmd *cmd, u16 domid, u32 pasid,
@@ -1336,12 +1268,12 @@ static void domain_flush_pages(struct protection_domain *domain,
}
/* Flush the whole IO/TLB for a given protection domain - including PDE */
-static void domain_flush_tlb_pde(struct protection_domain *domain)
+void amd_iommu_domain_flush_tlb_pde(struct protection_domain *domain)
{
__domain_flush_pages(domain, 0, CMD_INV_IOMMU_ALL_PAGES_ADDRESS, 1);
}
-static void domain_flush_complete(struct protection_domain *domain)
+void amd_iommu_domain_flush_complete(struct protection_domain *domain)
{
int i;
@@ -1366,7 +1298,7 @@ static void domain_flush_np_cache(struct protection_domain *domain,
spin_lock_irqsave(&domain->lock, flags);
domain_flush_pages(domain, iova, size);
- domain_flush_complete(domain);
+ amd_iommu_domain_flush_complete(domain);
spin_unlock_irqrestore(&domain->lock, flags);
}
}
@@ -1385,443 +1317,6 @@ static void domain_flush_devices(struct protection_domain *domain)
/****************************************************************************
*
- * The functions below are used the create the page table mappings for
- * unity mapped regions.
- *
- ****************************************************************************/
-
-static void free_page_list(struct page *freelist)
-{
- while (freelist != NULL) {
- unsigned long p = (unsigned long)page_address(freelist);
- freelist = freelist->freelist;
- free_page(p);
- }
-}
-
-static struct page *free_pt_page(unsigned long pt, struct page *freelist)
-{
- struct page *p = virt_to_page((void *)pt);
-
- p->freelist = freelist;
-
- return p;
-}
-
-#define DEFINE_FREE_PT_FN(LVL, FN) \
-static struct page *free_pt_##LVL (unsigned long __pt, struct page *freelist) \
-{ \
- unsigned long p; \
- u64 *pt; \
- int i; \
- \
- pt = (u64 *)__pt; \
- \
- for (i = 0; i < 512; ++i) { \
- /* PTE present? */ \
- if (!IOMMU_PTE_PRESENT(pt[i])) \
- continue; \
- \
- /* Large PTE? */ \
- if (PM_PTE_LEVEL(pt[i]) == 0 || \
- PM_PTE_LEVEL(pt[i]) == 7) \
- continue; \
- \
- p = (unsigned long)IOMMU_PTE_PAGE(pt[i]); \
- freelist = FN(p, freelist); \
- } \
- \
- return free_pt_page((unsigned long)pt, freelist); \
-}
-
-DEFINE_FREE_PT_FN(l2, free_pt_page)
-DEFINE_FREE_PT_FN(l3, free_pt_l2)
-DEFINE_FREE_PT_FN(l4, free_pt_l3)
-DEFINE_FREE_PT_FN(l5, free_pt_l4)
-DEFINE_FREE_PT_FN(l6, free_pt_l5)
-
-static struct page *free_sub_pt(unsigned long root, int mode,
- struct page *freelist)
-{
- switch (mode) {
- case PAGE_MODE_NONE:
- case PAGE_MODE_7_LEVEL:
- break;
- case PAGE_MODE_1_LEVEL:
- freelist = free_pt_page(root, freelist);
- break;
- case PAGE_MODE_2_LEVEL:
- freelist = free_pt_l2(root, freelist);
- break;
- case PAGE_MODE_3_LEVEL:
- freelist = free_pt_l3(root, freelist);
- break;
- case PAGE_MODE_4_LEVEL:
- freelist = free_pt_l4(root, freelist);
- break;
- case PAGE_MODE_5_LEVEL:
- freelist = free_pt_l5(root, freelist);
- break;
- case PAGE_MODE_6_LEVEL:
- freelist = free_pt_l6(root, freelist);
- break;
- default:
- BUG();
- }
-
- return freelist;
-}
-
-static void free_pagetable(struct domain_pgtable *pgtable)
-{
- struct page *freelist = NULL;
- unsigned long root;
-
- if (pgtable->mode == PAGE_MODE_NONE)
- return;
-
- BUG_ON(pgtable->mode < PAGE_MODE_NONE ||
- pgtable->mode > PAGE_MODE_6_LEVEL);
-
- root = (unsigned long)pgtable->root;
- freelist = free_sub_pt(root, pgtable->mode, freelist);
-
- free_page_list(freelist);
-}
-
-/*
- * This function is used to add another level to an IO page table. Adding
- * another level increases the size of the address space by 9 bits to a size up
- * to 64 bits.
- */
-static bool increase_address_space(struct protection_domain *domain,
- unsigned long address,
- gfp_t gfp)
-{
- struct domain_pgtable pgtable;
- unsigned long flags;
- bool ret = true;
- u64 *pte;
-
- spin_lock_irqsave(&domain->lock, flags);
-
- amd_iommu_domain_get_pgtable(domain, &pgtable);
-
- if (address <= PM_LEVEL_SIZE(pgtable.mode))
- goto out;
-
- ret = false;
- if (WARN_ON_ONCE(pgtable.mode == PAGE_MODE_6_LEVEL))
- goto out;
-
- pte = (void *)get_zeroed_page(gfp);
- if (!pte)
- goto out;
-
- *pte = PM_LEVEL_PDE(pgtable.mode, iommu_virt_to_phys(pgtable.root));
-
- pgtable.root = pte;
- pgtable.mode += 1;
- update_and_flush_device_table(domain, &pgtable);
- domain_flush_complete(domain);
-
- /*
- * Device Table needs to be updated and flushed before the new root can
- * be published.
- */
- amd_iommu_domain_set_pgtable(domain, pte, pgtable.mode);
-
- ret = true;
-
-out:
- spin_unlock_irqrestore(&domain->lock, flags);
-
- return ret;
-}
-
-static u64 *alloc_pte(struct protection_domain *domain,
- unsigned long address,
- unsigned long page_size,
- u64 **pte_page,
- gfp_t gfp,
- bool *updated)
-{
- struct domain_pgtable pgtable;
- int level, end_lvl;
- u64 *pte, *page;
-
- BUG_ON(!is_power_of_2(page_size));
-
- amd_iommu_domain_get_pgtable(domain, &pgtable);
-
- while (address > PM_LEVEL_SIZE(pgtable.mode)) {
- /*
- * Return an error if there is no memory to update the
- * page-table.
- */
- if (!increase_address_space(domain, address, gfp))
- return NULL;
-
- /* Read new values to check if update was successful */
- amd_iommu_domain_get_pgtable(domain, &pgtable);
- }
-
-
- level = pgtable.mode - 1;
- pte = &pgtable.root[PM_LEVEL_INDEX(level, address)];
- address = PAGE_SIZE_ALIGN(address, page_size);
- end_lvl = PAGE_SIZE_LEVEL(page_size);
-
- while (level > end_lvl) {
- u64 __pte, __npte;
- int pte_level;
-
- __pte = *pte;
- pte_level = PM_PTE_LEVEL(__pte);
-
- /*
- * If we replace a series of large PTEs, we need
- * to tear down all of them.
- */
- if (IOMMU_PTE_PRESENT(__pte) &&
- pte_level == PAGE_MODE_7_LEVEL) {
- unsigned long count, i;
- u64 *lpte;
-
- lpte = first_pte_l7(pte, NULL, &count);
-
- /*
- * Unmap the replicated PTEs that still match the
- * original large mapping
- */
- for (i = 0; i < count; ++i)
- cmpxchg64(&lpte[i], __pte, 0ULL);
-
- *updated = true;
- continue;
- }
-
- if (!IOMMU_PTE_PRESENT(__pte) ||
- pte_level == PAGE_MODE_NONE) {
- page = (u64 *)get_zeroed_page(gfp);
-
- if (!page)
- return NULL;
-
- __npte = PM_LEVEL_PDE(level, iommu_virt_to_phys(page));
-
- /* pte could have been changed somewhere. */
- if (cmpxchg64(pte, __pte, __npte) != __pte)
- free_page((unsigned long)page);
- else if (IOMMU_PTE_PRESENT(__pte))
- *updated = true;
-
- continue;
- }
-
- /* No level skipping support yet */
- if (pte_level != level)
- return NULL;
-
- level -= 1;
-
- pte = IOMMU_PTE_PAGE(__pte);
-
- if (pte_page && level == end_lvl)
- *pte_page = pte;
-
- pte = &pte[PM_LEVEL_INDEX(level, address)];
- }
-
- return pte;
-}
-
-/*
- * This function checks if there is a PTE for a given dma address. If
- * there is one, it returns the pointer to it.
- */
-static u64 *fetch_pte(struct protection_domain *domain,
- unsigned long address,
- unsigned long *page_size)
-{
- struct domain_pgtable pgtable;
- int level;
- u64 *pte;
-
- *page_size = 0;
-
- amd_iommu_domain_get_pgtable(domain, &pgtable);
-
- if (address > PM_LEVEL_SIZE(pgtable.mode))
- return NULL;
-
- level = pgtable.mode - 1;
- pte = &pgtable.root[PM_LEVEL_INDEX(level, address)];
- *page_size = PTE_LEVEL_PAGE_SIZE(level);
-
- while (level > 0) {
-
- /* Not Present */
- if (!IOMMU_PTE_PRESENT(*pte))
- return NULL;
-
- /* Large PTE */
- if (PM_PTE_LEVEL(*pte) == 7 ||
- PM_PTE_LEVEL(*pte) == 0)
- break;
-
- /* No level skipping support yet */
- if (PM_PTE_LEVEL(*pte) != level)
- return NULL;
-
- level -= 1;
-
- /* Walk to the next level */
- pte = IOMMU_PTE_PAGE(*pte);
- pte = &pte[PM_LEVEL_INDEX(level, address)];
- *page_size = PTE_LEVEL_PAGE_SIZE(level);
- }
-
- /*
- * If we have a series of large PTEs, make
- * sure to return a pointer to the first one.
- */
- if (PM_PTE_LEVEL(*pte) == PAGE_MODE_7_LEVEL)
- pte = first_pte_l7(pte, page_size, NULL);
-
- return pte;
-}
-
-static struct page *free_clear_pte(u64 *pte, u64 pteval, struct page *freelist)
-{
- unsigned long pt;
- int mode;
-
- while (cmpxchg64(pte, pteval, 0) != pteval) {
- pr_warn("AMD-Vi: IOMMU pte changed since we read it\n");
- pteval = *pte;
- }
-
- if (!IOMMU_PTE_PRESENT(pteval))
- return freelist;
-
- pt = (unsigned long)IOMMU_PTE_PAGE(pteval);
- mode = IOMMU_PTE_MODE(pteval);
-
- return free_sub_pt(pt, mode, freelist);
-}
-
-/*
- * Generic mapping functions. It maps a physical address into a DMA
- * address space. It allocates the page table pages if necessary.
- * In the future it can be extended to a generic mapping function
- * supporting all features of AMD IOMMU page tables like level skipping
- * and full 64 bit address spaces.
- */
-static int iommu_map_page(struct protection_domain *dom,
- unsigned long bus_addr,
- unsigned long phys_addr,
- unsigned long page_size,
- int prot,
- gfp_t gfp)
-{
- struct page *freelist = NULL;
- bool updated = false;
- u64 __pte, *pte;
- int ret, i, count;
-
- BUG_ON(!IS_ALIGNED(bus_addr, page_size));
- BUG_ON(!IS_ALIGNED(phys_addr, page_size));
-
- ret = -EINVAL;
- if (!(prot & IOMMU_PROT_MASK))
- goto out;
-
- count = PAGE_SIZE_PTE_COUNT(page_size);
- pte = alloc_pte(dom, bus_addr, page_size, NULL, gfp, &updated);
-
- ret = -ENOMEM;
- if (!pte)
- goto out;
-
- for (i = 0; i < count; ++i)
- freelist = free_clear_pte(&pte[i], pte[i], freelist);
-
- if (freelist != NULL)
- updated = true;
-
- if (count > 1) {
- __pte = PAGE_SIZE_PTE(__sme_set(phys_addr), page_size);
- __pte |= PM_LEVEL_ENC(7) | IOMMU_PTE_PR | IOMMU_PTE_FC;
- } else
- __pte = __sme_set(phys_addr) | IOMMU_PTE_PR | IOMMU_PTE_FC;
-
- if (prot & IOMMU_PROT_IR)
- __pte |= IOMMU_PTE_IR;
- if (prot & IOMMU_PROT_IW)
- __pte |= IOMMU_PTE_IW;
-
- for (i = 0; i < count; ++i)
- pte[i] = __pte;
-
- ret = 0;
-
-out:
- if (updated) {
- unsigned long flags;
-
- spin_lock_irqsave(&dom->lock, flags);
- /*
- * Flush domain TLB(s) and wait for completion. Any Device-Table
- * Updates and flushing already happened in
- * increase_address_space().
- */
- domain_flush_tlb_pde(dom);
- domain_flush_complete(dom);
- spin_unlock_irqrestore(&dom->lock, flags);
- }
-
- /* Everything flushed out, free pages now */
- free_page_list(freelist);
-
- return ret;
-}
-
-static unsigned long iommu_unmap_page(struct protection_domain *dom,
- unsigned long bus_addr,
- unsigned long page_size)
-{
- unsigned long long unmapped;
- unsigned long unmap_size;
- u64 *pte;
-
- BUG_ON(!is_power_of_2(page_size));
-
- unmapped = 0;
-
- while (unmapped < page_size) {
-
- pte = fetch_pte(dom, bus_addr, &unmap_size);
-
- if (pte) {
- int i, count;
-
- count = PAGE_SIZE_PTE_COUNT(unmap_size);
- for (i = 0; i < count; i++)
- pte[i] = 0ULL;
- }
-
- bus_addr = (bus_addr & ~(unmap_size - 1)) + unmap_size;
- unmapped += unmap_size;
- }
-
- BUG_ON(unmapped && !is_power_of_2(unmapped));
-
- return unmapped;
-}
-
-/****************************************************************************
- *
* The next functions belong to the domain allocation. A domain is
* allocated for every IOMMU as the default domain. If device isolation
* is enabled, every device get its own domain. The most important thing
@@ -1897,17 +1392,16 @@ static void free_gcr3_table(struct protection_domain *domain)
}
static void set_dte_entry(u16 devid, struct protection_domain *domain,
- struct domain_pgtable *pgtable,
bool ats, bool ppr)
{
u64 pte_root = 0;
u64 flags = 0;
u32 old_domid;
- if (pgtable->mode != PAGE_MODE_NONE)
- pte_root = iommu_virt_to_phys(pgtable->root);
+ if (domain->iop.mode != PAGE_MODE_NONE)
+ pte_root = iommu_virt_to_phys(domain->iop.root);
- pte_root |= (pgtable->mode & DEV_ENTRY_MODE_MASK)
+ pte_root |= (domain->iop.mode & DEV_ENTRY_MODE_MASK)
<< DEV_ENTRY_MODE_SHIFT;
pte_root |= DTE_FLAG_IR | DTE_FLAG_IW | DTE_FLAG_V | DTE_FLAG_TV;
@@ -1980,7 +1474,6 @@ static void clear_dte_entry(u16 devid)
static void do_attach(struct iommu_dev_data *dev_data,
struct protection_domain *domain)
{
- struct domain_pgtable pgtable;
struct amd_iommu *iommu;
bool ats;
@@ -1996,8 +1489,7 @@ static void do_attach(struct iommu_dev_data *dev_data,
domain->dev_cnt += 1;
/* Update device table */
- amd_iommu_domain_get_pgtable(domain, &pgtable);
- set_dte_entry(dev_data->devid, domain, &pgtable,
+ set_dte_entry(dev_data->devid, domain,
ats, dev_data->iommu_v2);
clone_aliases(dev_data->pdev);
@@ -2021,10 +1513,10 @@ static void do_detach(struct iommu_dev_data *dev_data)
device_flush_dte(dev_data);
/* Flush IOTLB */
- domain_flush_tlb_pde(domain);
+ amd_iommu_domain_flush_tlb_pde(domain);
/* Wait for the flushes to finish */
- domain_flush_complete(domain);
+ amd_iommu_domain_flush_complete(domain);
/* decrease reference counters - needs to happen after the flushes */
domain->dev_iommu[iommu->index] -= 1;
@@ -2038,33 +1530,9 @@ static void pdev_iommuv2_disable(struct pci_dev *pdev)
pci_disable_pasid(pdev);
}
-/* FIXME: Change generic reset-function to do the same */
-static int pri_reset_while_enabled(struct pci_dev *pdev)
-{
- u16 control;
- int pos;
-
- pos = pci_find_ext_capability(pdev, PCI_EXT_CAP_ID_PRI);
- if (!pos)
- return -EINVAL;
-
- pci_read_config_word(pdev, pos + PCI_PRI_CTRL, &control);
- control |= PCI_PRI_CTRL_RESET;
- pci_write_config_word(pdev, pos + PCI_PRI_CTRL, control);
-
- return 0;
-}
-
static int pdev_iommuv2_enable(struct pci_dev *pdev)
{
- bool reset_enable;
- int reqs, ret;
-
- /* FIXME: Hardcode number of outstanding requests for now */
- reqs = 32;
- if (pdev_pri_erratum(pdev, AMD_PRI_DEV_ERRATUM_LIMIT_REQ_ONE))
- reqs = 1;
- reset_enable = pdev_pri_erratum(pdev, AMD_PRI_DEV_ERRATUM_ENABLE_RESET);
+ int ret;
/* Only allow access to user-accessible pages */
ret = pci_enable_pasid(pdev, 0);
@@ -2077,16 +1545,11 @@ static int pdev_iommuv2_enable(struct pci_dev *pdev)
goto out_err;
/* Enable PRI */
- ret = pci_enable_pri(pdev, reqs);
+ /* FIXME: Hardcode number of outstanding requests for now */
+ ret = pci_enable_pri(pdev, 32);
if (ret)
goto out_err;
- if (reset_enable) {
- ret = pri_reset_while_enabled(pdev);
- if (ret)
- goto out_err;
- }
-
ret = pci_enable_ats(pdev, PAGE_SHIFT);
if (ret)
goto out_err;
@@ -2157,9 +1620,9 @@ skip_ats_check:
* left the caches in the IOMMU dirty. So we have to flush
* here to evict all dirty stuff.
*/
- domain_flush_tlb_pde(domain);
+ amd_iommu_domain_flush_tlb_pde(domain);
- domain_flush_complete(domain);
+ amd_iommu_domain_flush_complete(domain);
out:
spin_unlock(&dev_data->lock);
@@ -2222,9 +1685,6 @@ static struct iommu_device *amd_iommu_probe_device(struct device *dev)
return ERR_PTR(-ENODEV);
devid = get_device_id(dev);
- if (devid < 0)
- return ERR_PTR(devid);
-
iommu = amd_iommu_rlookup_table[devid];
if (dev_iommu_priv_get(dev))
@@ -2278,62 +1738,37 @@ static struct iommu_group *amd_iommu_device_group(struct device *dev)
return acpihid_device_group(dev);
}
-static int amd_iommu_domain_get_attr(struct iommu_domain *domain,
- enum iommu_attr attr, void *data)
-{
- switch (domain->type) {
- case IOMMU_DOMAIN_UNMANAGED:
- return -ENODEV;
- case IOMMU_DOMAIN_DMA:
- switch (attr) {
- case DOMAIN_ATTR_DMA_USE_FLUSH_QUEUE:
- *(int *)data = !amd_iommu_unmap_flush;
- return 0;
- default:
- return -ENODEV;
- }
- break;
- default:
- return -EINVAL;
- }
-}
-
/*****************************************************************************
*
* The next functions belong to the dma_ops mapping/unmapping code.
*
*****************************************************************************/
-static void update_device_table(struct protection_domain *domain,
- struct domain_pgtable *pgtable)
+static void update_device_table(struct protection_domain *domain)
{
struct iommu_dev_data *dev_data;
list_for_each_entry(dev_data, &domain->dev_list, list) {
- set_dte_entry(dev_data->devid, domain, pgtable,
+ set_dte_entry(dev_data->devid, domain,
dev_data->ats.enabled, dev_data->iommu_v2);
clone_aliases(dev_data->pdev);
}
}
-static void update_and_flush_device_table(struct protection_domain *domain,
- struct domain_pgtable *pgtable)
+void amd_iommu_update_and_flush_device_table(struct protection_domain *domain)
{
- update_device_table(domain, pgtable);
+ update_device_table(domain);
domain_flush_devices(domain);
}
-static void update_domain(struct protection_domain *domain)
+void amd_iommu_domain_update(struct protection_domain *domain)
{
- struct domain_pgtable pgtable;
-
/* Update device table */
- amd_iommu_domain_get_pgtable(domain, &pgtable);
- update_and_flush_device_table(domain, &pgtable);
+ amd_iommu_update_and_flush_device_table(domain);
/* Flush domain TLB(s) and wait for completion */
- domain_flush_tlb_pde(domain);
- domain_flush_complete(domain);
+ amd_iommu_domain_flush_tlb_pde(domain);
+ amd_iommu_domain_flush_complete(domain);
}
int __init amd_iommu_init_api(void)
@@ -2367,7 +1802,7 @@ int __init amd_iommu_init_dma_ops(void)
pr_info("IO/TLB flush on unmap enabled\n");
else
pr_info("Lazy IO/TLB flushing enabled\n");
-
+ iommu_set_dma_strict(amd_iommu_unmap_flush);
return 0;
}
@@ -2401,22 +1836,19 @@ static void cleanup_domain(struct protection_domain *domain)
static void protection_domain_free(struct protection_domain *domain)
{
- struct domain_pgtable pgtable;
-
if (!domain)
return;
if (domain->id)
domain_id_free(domain->id);
- amd_iommu_domain_get_pgtable(domain, &pgtable);
- amd_iommu_domain_clr_pt_root(domain);
- free_pagetable(&pgtable);
+ if (domain->iop.pgtbl_cfg.tlb)
+ free_io_pgtable_ops(&domain->iop.iop.ops);
kfree(domain);
}
-static int protection_domain_init(struct protection_domain *domain, int mode)
+static int protection_domain_init_v1(struct protection_domain *domain, int mode)
{
u64 *pt_root = NULL;
@@ -2439,34 +1871,55 @@ static int protection_domain_init(struct protection_domain *domain, int mode)
return 0;
}
-static struct protection_domain *protection_domain_alloc(int mode)
+static struct protection_domain *protection_domain_alloc(unsigned int type)
{
+ struct io_pgtable_ops *pgtbl_ops;
struct protection_domain *domain;
+ int pgtable = amd_iommu_pgtable;
+ int mode = DEFAULT_PGTABLE_LEVEL;
+ int ret;
domain = kzalloc(sizeof(*domain), GFP_KERNEL);
if (!domain)
return NULL;
- if (protection_domain_init(domain, mode))
+ /*
+ * Force IOMMU v1 page table when iommu=pt and
+ * when allocating domain for pass-through devices.
+ */
+ if (type == IOMMU_DOMAIN_IDENTITY) {
+ pgtable = AMD_IOMMU_V1;
+ mode = PAGE_MODE_NONE;
+ } else if (type == IOMMU_DOMAIN_UNMANAGED) {
+ pgtable = AMD_IOMMU_V1;
+ }
+
+ switch (pgtable) {
+ case AMD_IOMMU_V1:
+ ret = protection_domain_init_v1(domain, mode);
+ break;
+ default:
+ ret = -EINVAL;
+ }
+
+ if (ret)
goto out_err;
- return domain;
+ pgtbl_ops = alloc_io_pgtable_ops(pgtable, &domain->iop.pgtbl_cfg, domain);
+ if (!pgtbl_ops)
+ goto out_err;
+ return domain;
out_err:
kfree(domain);
-
return NULL;
}
static struct iommu_domain *amd_iommu_domain_alloc(unsigned type)
{
struct protection_domain *domain;
- int mode = DEFAULT_PGTABLE_LEVEL;
-
- if (type == IOMMU_DOMAIN_IDENTITY)
- mode = PAGE_MODE_NONE;
- domain = protection_domain_alloc(mode);
+ domain = protection_domain_alloc(type);
if (!domain)
return NULL;
@@ -2513,16 +1966,12 @@ static void amd_iommu_detach_device(struct iommu_domain *dom,
struct device *dev)
{
struct iommu_dev_data *dev_data = dev_iommu_priv_get(dev);
+ int devid = get_device_id(dev);
struct amd_iommu *iommu;
- int devid;
if (!check_device(dev))
return;
- devid = get_device_id(dev);
- if (devid < 0)
- return;
-
if (dev_data->domain != NULL)
detach_device(dev);
@@ -2581,12 +2030,12 @@ static int amd_iommu_map(struct iommu_domain *dom, unsigned long iova,
gfp_t gfp)
{
struct protection_domain *domain = to_pdomain(dom);
- struct domain_pgtable pgtable;
+ struct io_pgtable_ops *ops = &domain->iop.iop.ops;
int prot = 0;
- int ret;
+ int ret = -EINVAL;
- amd_iommu_domain_get_pgtable(domain, &pgtable);
- if (pgtable.mode == PAGE_MODE_NONE)
+ if ((amd_iommu_pgtable == AMD_IOMMU_V1) &&
+ (domain->iop.mode == PAGE_MODE_NONE))
return -EINVAL;
if (iommu_prot & IOMMU_READ)
@@ -2594,9 +2043,10 @@ static int amd_iommu_map(struct iommu_domain *dom, unsigned long iova,
if (iommu_prot & IOMMU_WRITE)
prot |= IOMMU_PROT_IW;
- ret = iommu_map_page(domain, iova, paddr, page_size, prot, gfp);
-
- domain_flush_np_cache(domain, iova, page_size);
+ if (ops->map) {
+ ret = ops->map(ops, iova, paddr, page_size, prot, gfp);
+ domain_flush_np_cache(domain, iova, page_size);
+ }
return ret;
}
@@ -2606,36 +2056,22 @@ static size_t amd_iommu_unmap(struct iommu_domain *dom, unsigned long iova,
struct iommu_iotlb_gather *gather)
{
struct protection_domain *domain = to_pdomain(dom);
- struct domain_pgtable pgtable;
+ struct io_pgtable_ops *ops = &domain->iop.iop.ops;
- amd_iommu_domain_get_pgtable(domain, &pgtable);
- if (pgtable.mode == PAGE_MODE_NONE)
+ if ((amd_iommu_pgtable == AMD_IOMMU_V1) &&
+ (domain->iop.mode == PAGE_MODE_NONE))
return 0;
- return iommu_unmap_page(domain, iova, page_size);
+ return (ops->unmap) ? ops->unmap(ops, iova, page_size, gather) : 0;
}
static phys_addr_t amd_iommu_iova_to_phys(struct iommu_domain *dom,
dma_addr_t iova)
{
struct protection_domain *domain = to_pdomain(dom);
- unsigned long offset_mask, pte_pgsize;
- struct domain_pgtable pgtable;
- u64 *pte, __pte;
+ struct io_pgtable_ops *ops = &domain->iop.iop.ops;
- amd_iommu_domain_get_pgtable(domain, &pgtable);
- if (pgtable.mode == PAGE_MODE_NONE)
- return iova;
-
- pte = fetch_pte(domain, iova, &pte_pgsize);
-
- if (!pte || !IOMMU_PTE_PRESENT(*pte))
- return 0;
-
- offset_mask = pte_pgsize - 1;
- __pte = __sme_clr(*pte & PM_ADDR_MASK);
-
- return (__pte & ~offset_mask) | (iova & offset_mask);
+ return ops->iova_to_phys(ops, iova);
}
static bool amd_iommu_capable(enum iommu_cap cap)
@@ -2721,8 +2157,8 @@ static void amd_iommu_flush_iotlb_all(struct iommu_domain *domain)
unsigned long flags;
spin_lock_irqsave(&dom->lock, flags);
- domain_flush_tlb_pde(dom);
- domain_flush_complete(dom);
+ amd_iommu_domain_flush_tlb_pde(dom);
+ amd_iommu_domain_flush_complete(dom);
spin_unlock_irqrestore(&dom->lock, flags);
}
@@ -2764,7 +2200,6 @@ const struct iommu_ops amd_iommu_ops = {
.release_device = amd_iommu_release_device,
.probe_finalize = amd_iommu_probe_finalize,
.device_group = amd_iommu_device_group,
- .domain_get_attr = amd_iommu_domain_get_attr,
.get_resv_regions = amd_iommu_get_resv_regions,
.put_resv_regions = generic_iommu_put_resv_regions,
.is_attach_deferred = amd_iommu_is_attach_deferred,
@@ -2800,22 +2235,12 @@ EXPORT_SYMBOL(amd_iommu_unregister_ppr_notifier);
void amd_iommu_domain_direct_map(struct iommu_domain *dom)
{
struct protection_domain *domain = to_pdomain(dom);
- struct domain_pgtable pgtable;
unsigned long flags;
spin_lock_irqsave(&domain->lock, flags);
- /* First save pgtable configuration*/
- amd_iommu_domain_get_pgtable(domain, &pgtable);
-
- /* Remove page-table from domain */
- amd_iommu_domain_clr_pt_root(domain);
-
- /* Make changes visible to IOMMUs */
- update_domain(domain);
-
- /* Page-table is not visible to IOMMU anymore, so free it */
- free_pagetable(&pgtable);
+ if (domain->iop.pgtbl_cfg.tlb)
+ free_io_pgtable_ops(&domain->iop.iop.ops);
spin_unlock_irqrestore(&domain->lock, flags);
}
@@ -2827,9 +2252,6 @@ int amd_iommu_domain_enable_v2(struct iommu_domain *dom, int pasids)
unsigned long flags;
int levels, ret;
- if (pasids <= 0 || pasids > (PASID_MASK + 1))
- return -EINVAL;
-
/* Number of GCR3 table levels required */
for (levels = 0; (pasids - 1) & ~0x1ff; pasids >>= 9)
levels += 1;
@@ -2856,7 +2278,7 @@ int amd_iommu_domain_enable_v2(struct iommu_domain *dom, int pasids)
domain->glx = levels;
domain->flags |= PD_IOMMUV2_MASK;
- update_domain(domain);
+ amd_iommu_domain_update(domain);
ret = 0;
@@ -2893,7 +2315,7 @@ static int __flush_pasid(struct protection_domain *domain, u32 pasid,
}
/* Wait until IOMMU TLB flushes are complete */
- domain_flush_complete(domain);
+ amd_iommu_domain_flush_complete(domain);
/* Now flush device TLBs */
list_for_each_entry(dev_data, &domain->dev_list, list) {
@@ -2919,7 +2341,7 @@ static int __flush_pasid(struct protection_domain *domain, u32 pasid,
}
/* Wait until all device TLBs are flushed */
- domain_flush_complete(domain);
+ amd_iommu_domain_flush_complete(domain);
ret = 0;
@@ -3004,11 +2426,9 @@ static u64 *__get_gcr3_pte(u64 *root, int level, u32 pasid, bool alloc)
static int __set_gcr3(struct protection_domain *domain, u32 pasid,
unsigned long cr3)
{
- struct domain_pgtable pgtable;
u64 *pte;
- amd_iommu_domain_get_pgtable(domain, &pgtable);
- if (pgtable.mode != PAGE_MODE_NONE)
+ if (domain->iop.mode != PAGE_MODE_NONE)
return -EINVAL;
pte = __get_gcr3_pte(domain->gcr3_tbl, domain->glx, pasid, true);
@@ -3022,11 +2442,9 @@ static int __set_gcr3(struct protection_domain *domain, u32 pasid,
static int __clear_gcr3(struct protection_domain *domain, u32 pasid)
{
- struct domain_pgtable pgtable;
u64 *pte;
- amd_iommu_domain_get_pgtable(domain, &pgtable);
- if (pgtable.mode != PAGE_MODE_NONE)
+ if (domain->iop.mode != PAGE_MODE_NONE)
return -EINVAL;
pte = __get_gcr3_pte(domain->gcr3_tbl, domain->glx, pasid, false);
@@ -3084,52 +2502,6 @@ int amd_iommu_complete_ppr(struct pci_dev *pdev, u32 pasid,
}
EXPORT_SYMBOL(amd_iommu_complete_ppr);
-struct iommu_domain *amd_iommu_get_v2_domain(struct pci_dev *pdev)
-{
- struct protection_domain *pdomain;
- struct iommu_dev_data *dev_data;
- struct device *dev = &pdev->dev;
- struct iommu_domain *io_domain;
-
- if (!check_device(dev))
- return NULL;
-
- dev_data = dev_iommu_priv_get(&pdev->dev);
- pdomain = dev_data->domain;
- io_domain = iommu_get_domain_for_dev(dev);
-
- if (pdomain == NULL && dev_data->defer_attach) {
- dev_data->defer_attach = false;
- pdomain = to_pdomain(io_domain);
- attach_device(dev, pdomain);
- }
-
- if (pdomain == NULL)
- return NULL;
-
- if (io_domain->type != IOMMU_DOMAIN_DMA)
- return NULL;
-
- /* Only return IOMMUv2 domains */
- if (!(pdomain->flags & PD_IOMMUV2_MASK))
- return NULL;
-
- return &pdomain->domain;
-}
-EXPORT_SYMBOL(amd_iommu_get_v2_domain);
-
-void amd_iommu_enable_device_erratum(struct pci_dev *pdev, u32 erratum)
-{
- struct iommu_dev_data *dev_data;
-
- if (!amd_iommu_v2_supported())
- return;
-
- dev_data = dev_iommu_priv_get(&pdev->dev);
- dev_data->errata |= (1 << erratum);
-}
-EXPORT_SYMBOL(amd_iommu_enable_device_erratum);
-
int amd_iommu_device_info(struct pci_dev *pdev,
struct amd_iommu_device_info *info)
{
@@ -3191,7 +2563,7 @@ static void set_dte_irq_entry(u16 devid, struct irq_remap_table *table)
dte &= ~DTE_IRQ_PHYS_ADDR_MASK;
dte |= iommu_virt_to_phys(table->table);
dte |= DTE_IRQ_REMAP_INTCTL;
- dte |= DTE_IRQ_TABLE_LEN;
+ dte |= DTE_INTTABLEN;
dte |= DTE_IRQ_REMAP_ENABLE;
amd_iommu_dev_table[devid].data[2] = dte;
@@ -3466,7 +2838,7 @@ static void free_irte(u16 devid, int index)
}
static void irte_prepare(void *entry,
- u32 delivery_mode, u32 dest_mode,
+ u32 delivery_mode, bool dest_mode,
u8 vector, u32 dest_apicid, int devid)
{
union irte *irte = (union irte *) entry;
@@ -3480,7 +2852,7 @@ static void irte_prepare(void *entry,
}
static void irte_ga_prepare(void *entry,
- u32 delivery_mode, u32 dest_mode,
+ u32 delivery_mode, bool dest_mode,
u8 vector, u32 dest_apicid, int devid)
{
struct irte_ga *irte = (struct irte_ga *) entry;
@@ -3602,10 +2974,8 @@ static int get_devid(struct irq_alloc_info *info)
{
switch (info->type) {
case X86_IRQ_ALLOC_TYPE_IOAPIC:
- case X86_IRQ_ALLOC_TYPE_IOAPIC_GET_PARENT:
return get_ioapic_devid(info->devid);
case X86_IRQ_ALLOC_TYPE_HPET:
- case X86_IRQ_ALLOC_TYPE_HPET_GET_PARENT:
return get_hpet_devid(info->devid);
case X86_IRQ_ALLOC_TYPE_PCI_MSI:
case X86_IRQ_ALLOC_TYPE_PCI_MSIX:
@@ -3616,54 +2986,28 @@ static int get_devid(struct irq_alloc_info *info)
}
}
-static struct irq_domain *get_irq_domain_for_devid(struct irq_alloc_info *info,
- int devid)
-{
- struct amd_iommu *iommu = amd_iommu_rlookup_table[devid];
-
- if (!iommu)
- return NULL;
-
- switch (info->type) {
- case X86_IRQ_ALLOC_TYPE_IOAPIC_GET_PARENT:
- case X86_IRQ_ALLOC_TYPE_HPET_GET_PARENT:
- return iommu->ir_domain;
- default:
- WARN_ON_ONCE(1);
- return NULL;
- }
-}
-
-static struct irq_domain *get_irq_domain(struct irq_alloc_info *info)
-{
- int devid;
-
- if (!info)
- return NULL;
-
- devid = get_devid(info);
- if (devid < 0)
- return NULL;
- return get_irq_domain_for_devid(info, devid);
-}
-
struct irq_remap_ops amd_iommu_irq_ops = {
.prepare = amd_iommu_prepare,
.enable = amd_iommu_enable,
.disable = amd_iommu_disable,
.reenable = amd_iommu_reenable,
.enable_faulting = amd_iommu_enable_faulting,
- .get_irq_domain = get_irq_domain,
};
+static void fill_msi_msg(struct msi_msg *msg, u32 index)
+{
+ msg->data = index;
+ msg->address_lo = 0;
+ msg->arch_addr_lo.base_address = X86_MSI_BASE_ADDRESS_LOW;
+ msg->address_hi = X86_MSI_BASE_ADDRESS_HIGH;
+}
+
static void irq_remapping_prepare_irte(struct amd_ir_data *data,
struct irq_cfg *irq_cfg,
struct irq_alloc_info *info,
int devid, int index, int sub_handle)
{
struct irq_2_irte *irte_info = &data->irq_2_irte;
- struct msi_msg *msg = &data->msi_entry;
- struct IO_APIC_route_entry *entry;
struct amd_iommu *iommu = amd_iommu_rlookup_table[devid];
if (!iommu)
@@ -3671,31 +3015,16 @@ static void irq_remapping_prepare_irte(struct amd_ir_data *data,
data->irq_2_irte.devid = devid;
data->irq_2_irte.index = index + sub_handle;
- iommu->irte_ops->prepare(data->entry, apic->irq_delivery_mode,
- apic->irq_dest_mode, irq_cfg->vector,
+ iommu->irte_ops->prepare(data->entry, apic->delivery_mode,
+ apic->dest_mode_logical, irq_cfg->vector,
irq_cfg->dest_apicid, devid);
switch (info->type) {
case X86_IRQ_ALLOC_TYPE_IOAPIC:
- /* Setup IOAPIC entry */
- entry = info->ioapic.entry;
- info->ioapic.entry = NULL;
- memset(entry, 0, sizeof(*entry));
- entry->vector = index;
- entry->mask = 0;
- entry->trigger = info->ioapic.trigger;
- entry->polarity = info->ioapic.polarity;
- /* Mask level triggered irqs. */
- if (info->ioapic.trigger)
- entry->mask = 1;
- break;
-
case X86_IRQ_ALLOC_TYPE_HPET:
case X86_IRQ_ALLOC_TYPE_PCI_MSI:
case X86_IRQ_ALLOC_TYPE_PCI_MSIX:
- msg->address_hi = MSI_ADDR_BASE_HI;
- msg->address_lo = MSI_ADDR_BASE_LO;
- msg->data = irte_info->index;
+ fill_msi_msg(&data->msi_entry, irte_info->index);
break;
default:
@@ -3892,7 +3221,29 @@ static void irq_remapping_deactivate(struct irq_domain *domain,
irte_info->index);
}
+static int irq_remapping_select(struct irq_domain *d, struct irq_fwspec *fwspec,
+ enum irq_domain_bus_token bus_token)
+{
+ struct amd_iommu *iommu;
+ int devid = -1;
+
+ if (!amd_iommu_irq_remap)
+ return 0;
+
+ if (x86_fwspec_is_ioapic(fwspec))
+ devid = get_ioapic_devid(fwspec->param[0]);
+ else if (x86_fwspec_is_hpet(fwspec))
+ devid = get_hpet_devid(fwspec->param[0]);
+
+ if (devid < 0)
+ return 0;
+
+ iommu = amd_iommu_rlookup_table[devid];
+ return iommu && iommu->ir_domain == d;
+}
+
static const struct irq_domain_ops amd_ir_domain_ops = {
+ .select = irq_remapping_select,
.alloc = irq_remapping_alloc,
.free = irq_remapping_free,
.activate = irq_remapping_activate,
@@ -3943,8 +3294,8 @@ int amd_iommu_deactivate_guest_mode(void *data)
entry->hi.val = 0;
entry->lo.fields_remap.valid = valid;
- entry->lo.fields_remap.dm = apic->irq_dest_mode;
- entry->lo.fields_remap.int_type = apic->irq_delivery_mode;
+ entry->lo.fields_remap.dm = apic->dest_mode_logical;
+ entry->lo.fields_remap.int_type = apic->delivery_mode;
entry->hi.fields.vector = cfg->vector;
entry->lo.fields_remap.destination =
APICID_TO_IRTE_DEST_LO(cfg->dest_apicid);
diff --git a/drivers/iommu/amd/iommu_v2.c b/drivers/iommu/amd/iommu_v2.c
index 5ecc0bc608ec..f8d4ad421e07 100644
--- a/drivers/iommu/amd/iommu_v2.c
+++ b/drivers/iommu/amd/iommu_v2.c
@@ -77,7 +77,7 @@ struct fault {
};
static LIST_HEAD(state_list);
-static spinlock_t state_lock;
+static DEFINE_SPINLOCK(state_lock);
static struct workqueue_struct *iommu_wq;
@@ -938,8 +938,6 @@ static int __init amd_iommu_v2_init(void)
return 0;
}
- spin_lock_init(&state_lock);
-
ret = -ENOMEM;
iommu_wq = alloc_workqueue("amd_iommu_v2", WQ_MEM_RECLAIM, 0);
if (iommu_wq == NULL)