diff options
Diffstat (limited to 'drivers/iommu/amd')
-rw-r--r-- | drivers/iommu/amd/Kconfig | 1 | ||||
-rw-r--r-- | drivers/iommu/amd/Makefile | 2 | ||||
-rw-r--r-- | drivers/iommu/amd/amd_iommu.h | 31 | ||||
-rw-r--r-- | drivers/iommu/amd/amd_iommu_types.h | 65 | ||||
-rw-r--r-- | drivers/iommu/amd/init.c | 394 | ||||
-rw-r--r-- | drivers/iommu/amd/io_pgtable.c | 560 | ||||
-rw-r--r-- | drivers/iommu/amd/iommu.c | 971 | ||||
-rw-r--r-- | drivers/iommu/amd/iommu_v2.c | 4 |
8 files changed, 1060 insertions, 968 deletions
diff --git a/drivers/iommu/amd/Kconfig b/drivers/iommu/amd/Kconfig index 626b97d0dd21..a3cbafb603f5 100644 --- a/drivers/iommu/amd/Kconfig +++ b/drivers/iommu/amd/Kconfig @@ -10,6 +10,7 @@ config AMD_IOMMU select IOMMU_API select IOMMU_IOVA select IOMMU_DMA + select IOMMU_IO_PGTABLE depends on X86_64 && PCI && ACPI && HAVE_CMPXCHG_DOUBLE help With this option you can enable support for AMD IOMMU hardware in diff --git a/drivers/iommu/amd/Makefile b/drivers/iommu/amd/Makefile index dc5a2fa4fd37..a935f8f4b974 100644 --- a/drivers/iommu/amd/Makefile +++ b/drivers/iommu/amd/Makefile @@ -1,4 +1,4 @@ # SPDX-License-Identifier: GPL-2.0-only -obj-$(CONFIG_AMD_IOMMU) += iommu.o init.o quirks.o +obj-$(CONFIG_AMD_IOMMU) += iommu.o init.o quirks.o io_pgtable.o obj-$(CONFIG_AMD_IOMMU_DEBUGFS) += debugfs.o obj-$(CONFIG_AMD_IOMMU_V2) += iommu_v2.o diff --git a/drivers/iommu/amd/amd_iommu.h b/drivers/iommu/amd/amd_iommu.h index 6b8cbdf71714..55dd38d814d9 100644 --- a/drivers/iommu/amd/amd_iommu.h +++ b/drivers/iommu/amd/amd_iommu.h @@ -11,7 +11,6 @@ #include "amd_iommu_types.h" -extern int amd_iommu_get_num_iommus(void); extern int amd_iommu_init_dma_ops(void); extern int amd_iommu_init_passthrough(void); extern irqreturn_t amd_iommu_int_thread(int irq, void *data); @@ -36,6 +35,7 @@ extern void amd_iommu_disable(void); extern int amd_iommu_reenable(int); extern int amd_iommu_enable_faulting(void); extern int amd_iommu_guest_ir; +extern enum io_pgtable_fmt amd_iommu_pgtable; /* IOMMUv2 specific functions */ struct iommu_domain; @@ -56,11 +56,14 @@ extern void amd_iommu_domain_direct_map(struct iommu_domain *dom); extern int amd_iommu_domain_enable_v2(struct iommu_domain *dom, int pasids); extern int amd_iommu_flush_page(struct iommu_domain *dom, u32 pasid, u64 address); +extern void amd_iommu_update_and_flush_device_table(struct protection_domain *domain); +extern void amd_iommu_domain_update(struct protection_domain *domain); +extern void amd_iommu_domain_flush_complete(struct protection_domain *domain); +extern void amd_iommu_domain_flush_tlb_pde(struct protection_domain *domain); extern int amd_iommu_flush_tlb(struct iommu_domain *dom, u32 pasid); extern int amd_iommu_domain_set_gcr3(struct iommu_domain *dom, u32 pasid, unsigned long cr3); extern int amd_iommu_domain_clear_gcr3(struct iommu_domain *dom, u32 pasid); -extern struct iommu_domain *amd_iommu_get_v2_domain(struct pci_dev *pdev); #ifdef CONFIG_IRQ_REMAP extern int amd_iommu_create_irq_domain(struct amd_iommu *iommu); @@ -84,12 +87,9 @@ static inline bool is_rd890_iommu(struct pci_dev *pdev) (pdev->device == PCI_DEVICE_ID_RD890_IOMMU); } -static inline bool iommu_feature(struct amd_iommu *iommu, u64 f) +static inline bool iommu_feature(struct amd_iommu *iommu, u64 mask) { - if (!(iommu->cap & (1 << IOMMU_CAP_EFR))) - return false; - - return !!(iommu->features & f); + return !!(iommu->features & mask); } static inline u64 iommu_virt_to_phys(void *vaddr) @@ -102,6 +102,21 @@ static inline void *iommu_phys_to_virt(unsigned long paddr) return phys_to_virt(__sme_clr(paddr)); } +static inline +void amd_iommu_domain_set_pt_root(struct protection_domain *domain, u64 root) +{ + atomic64_set(&domain->iop.pt_root, root); + domain->iop.root = (u64 *)(root & PAGE_MASK); + domain->iop.mode = root & 7; /* lowest 3 bits encode pgtable mode */ +} + +static inline +void amd_iommu_domain_clr_pt_root(struct protection_domain *domain) +{ + amd_iommu_domain_set_pt_root(domain, 0); +} + + extern bool translation_pre_enabled(struct amd_iommu *iommu); extern bool amd_iommu_is_attach_deferred(struct iommu_domain *domain, struct device *dev); @@ -114,4 +129,6 @@ void amd_iommu_apply_ivrs_quirks(void); static inline void amd_iommu_apply_ivrs_quirks(void) { } #endif +extern void amd_iommu_domain_set_pgtable(struct protection_domain *domain, + u64 *root, int mode); #endif diff --git a/drivers/iommu/amd/amd_iommu_types.h b/drivers/iommu/amd/amd_iommu_types.h index f696ac7c5f89..94c1a7a9876d 100644 --- a/drivers/iommu/amd/amd_iommu_types.h +++ b/drivers/iommu/amd/amd_iommu_types.h @@ -15,6 +15,7 @@ #include <linux/spinlock.h> #include <linux/pci.h> #include <linux/irqreturn.h> +#include <linux/io-pgtable.h> /* * Maximum number of IOMMUs supported @@ -252,14 +253,35 @@ #define GA_GUEST_NR 0x1 +#define IOMMU_IN_ADDR_BIT_SIZE 52 +#define IOMMU_OUT_ADDR_BIT_SIZE 52 + +/* + * This bitmap is used to advertise the page sizes our hardware support + * to the IOMMU core, which will then use this information to split + * physically contiguous memory regions it is mapping into page sizes + * that we support. + * + * 512GB Pages are not supported due to a hardware bug + */ +#define AMD_IOMMU_PGSIZES ((~0xFFFUL) & ~(2ULL << 38)) + /* Bit value definition for dte irq remapping fields*/ #define DTE_IRQ_PHYS_ADDR_MASK (((1ULL << 45)-1) << 6) #define DTE_IRQ_REMAP_INTCTL_MASK (0x3ULL << 60) -#define DTE_IRQ_TABLE_LEN_MASK (0xfULL << 1) #define DTE_IRQ_REMAP_INTCTL (2ULL << 60) -#define DTE_IRQ_TABLE_LEN (8ULL << 1) #define DTE_IRQ_REMAP_ENABLE 1ULL +/* + * AMD IOMMU hardware only support 512 IRTEs despite + * the architectural limitation of 2048 entries. + */ +#define DTE_INTTAB_ALIGNMENT 128 +#define DTE_INTTABLEN_VALUE 9ULL +#define DTE_INTTABLEN (DTE_INTTABLEN_VALUE << 1) +#define DTE_INTTABLEN_MASK (0xfULL << 1) +#define MAX_IRQS_PER_TABLE (1 << DTE_INTTABLEN_VALUE) + #define PAGE_MODE_NONE 0x00 #define PAGE_MODE_1_LEVEL 0x01 #define PAGE_MODE_2_LEVEL 0x02 @@ -379,6 +401,10 @@ #define IOMMU_CAP_NPCACHE 26 #define IOMMU_CAP_EFR 27 +/* IOMMU IVINFO */ +#define IOMMU_IVINFO_OFFSET 36 +#define IOMMU_IVINFO_EFRSUP BIT(0) + /* IOMMU Feature Reporting Field (for IVHD type 10h */ #define IOMMU_FEAT_GASUP_SHIFT 6 @@ -409,9 +435,6 @@ extern bool amd_iommu_np_cache; /* Only true if all IOMMUs support device IOTLBs */ extern bool amd_iommu_iotlb_sup; -#define MAX_IRQS_PER_TABLE 256 -#define IRQ_TABLE_ALIGNMENT 128 - struct irq_remap_table { raw_spinlock_t lock; unsigned min_index; @@ -461,6 +484,27 @@ struct amd_irte_ops; #define AMD_IOMMU_FLAG_TRANS_PRE_ENABLED (1 << 0) +#define io_pgtable_to_data(x) \ + container_of((x), struct amd_io_pgtable, iop) + +#define io_pgtable_ops_to_data(x) \ + io_pgtable_to_data(io_pgtable_ops_to_pgtable(x)) + +#define io_pgtable_ops_to_domain(x) \ + container_of(io_pgtable_ops_to_data(x), \ + struct protection_domain, iop) + +#define io_pgtable_cfg_to_data(x) \ + container_of((x), struct amd_io_pgtable, pgtbl_cfg) + +struct amd_io_pgtable { + struct io_pgtable_cfg pgtbl_cfg; + struct io_pgtable iop; + int mode; + u64 *root; + atomic64_t pt_root; /* pgtable root and pgtable mode */ +}; + /* * This structure contains generic data for IOMMU protection domains * independent of their use. @@ -469,9 +513,9 @@ struct protection_domain { struct list_head dev_list; /* List of all devices in this domain */ struct iommu_domain domain; /* generic domain handle used by iommu core code */ + struct amd_io_pgtable iop; spinlock_t lock; /* mostly used to lock the page table*/ u16 id; /* the domain id written to the device table */ - atomic64_t pt_root; /* pgtable root and pgtable mode */ int glx; /* Number of levels for GCR3 table */ u64 *gcr3_tbl; /* Guest CR3 table */ unsigned long flags; /* flags to find out type of domain */ @@ -479,12 +523,6 @@ struct protection_domain { unsigned dev_iommu[MAX_IOMMUS]; /* per-IOMMU reference count */ }; -/* For decocded pt_root */ -struct domain_pgtable { - int mode; - u64 *root; -}; - /* * Structure where we save information about one hardware AMD IOMMU in the * system. @@ -655,7 +693,6 @@ struct iommu_dev_data { } ats; /* ATS state */ bool pri_tlp; /* PASID TLB required for PPR completions */ - u32 errata; /* Bitmap for errata to apply */ bool use_vapic; /* Enable device to use vapic mode */ bool defer_attach; @@ -893,7 +930,7 @@ struct amd_ir_data { }; struct amd_irte_ops { - void (*prepare)(void *, u32, u32, u8, u32, int); + void (*prepare)(void *, u32, bool, u8, u32, int); void (*activate)(void *, u16, u16); void (*deactivate)(void *, u16, u16); void (*set_affinity)(void *, u16, u16, u8, u32); diff --git a/drivers/iommu/amd/init.c b/drivers/iommu/amd/init.c index 82e4af8f09bb..d006724f4dc2 100644 --- a/drivers/iommu/amd/init.c +++ b/drivers/iommu/amd/init.c @@ -16,6 +16,7 @@ #include <linux/syscore_ops.h> #include <linux/interrupt.h> #include <linux/msi.h> +#include <linux/irq.h> #include <linux/amd-iommu.h> #include <linux/export.h> #include <linux/kmemleak.h> @@ -23,12 +24,12 @@ #include <asm/pci-direct.h> #include <asm/iommu.h> #include <asm/apic.h> -#include <asm/msidef.h> #include <asm/gart.h> #include <asm/x86_init.h> #include <asm/iommu_table.h> #include <asm/io_apic.h> #include <asm/irq_remapping.h> +#include <asm/set_memory.h> #include <linux/crash_dump.h> @@ -146,6 +147,8 @@ struct ivmd_header { bool amd_iommu_dump; bool amd_iommu_irq_remap __read_mostly; +enum io_pgtable_fmt amd_iommu_pgtable = AMD_IOMMU_V1; + int amd_iommu_guest_ir = AMD_IOMMU_GUEST_IR_VAPIC; static int amd_iommu_xt_mode = IRQ_REMAP_XAPIC_MODE; @@ -204,7 +207,6 @@ u16 *amd_iommu_alias_table; * for a specific device. It is also indexed by the PCI device id. */ struct amd_iommu **amd_iommu_rlookup_table; -EXPORT_SYMBOL(amd_iommu_rlookup_table); /* * This table is used to find the irq remapping table for a given device id @@ -256,11 +258,12 @@ static void init_device_table_dma(void); static bool amd_iommu_pre_enabled = true; +static u32 amd_iommu_ivinfo __initdata; + bool translation_pre_enabled(struct amd_iommu *iommu) { return (iommu->flags & AMD_IOMMU_FLAG_TRANS_PRE_ENABLED); } -EXPORT_SYMBOL(translation_pre_enabled); static void clear_translation_pre_enabled(struct amd_iommu *iommu) { @@ -295,6 +298,18 @@ int amd_iommu_get_num_iommus(void) return amd_iommus_present; } +/* + * For IVHD type 0x11/0x40, EFR is also available via IVHD. + * Default to IVHD EFR since it is available sooner + * (i.e. before PCI init). + */ +static void __init early_iommu_features_init(struct amd_iommu *iommu, + struct ivhd_header *h) +{ + if (amd_iommu_ivinfo & IOMMU_IVINFO_EFRSUP) + iommu->features = h->efr_reg; +} + /* Access to l1 and l2 indexed register spaces */ static u32 iommu_read_l1(struct amd_iommu *iommu, u16 l1, u8 address) @@ -672,11 +687,27 @@ static void __init free_command_buffer(struct amd_iommu *iommu) free_pages((unsigned long)iommu->cmd_buf, get_order(CMD_BUFFER_SIZE)); } +static void *__init iommu_alloc_4k_pages(struct amd_iommu *iommu, + gfp_t gfp, size_t size) +{ + int order = get_order(size); + void *buf = (void *)__get_free_pages(gfp, order); + + if (buf && + iommu_feature(iommu, FEATURE_SNP) && + set_memory_4k((unsigned long)buf, (1 << order))) { + free_pages((unsigned long)buf, order); + buf = NULL; + } + + return buf; +} + /* allocates the memory where the IOMMU will log its events to */ static int __init alloc_event_buffer(struct amd_iommu *iommu) { - iommu->evt_buf = (void *)__get_free_pages(GFP_KERNEL | __GFP_ZERO, - get_order(EVT_BUFFER_SIZE)); + iommu->evt_buf = iommu_alloc_4k_pages(iommu, GFP_KERNEL | __GFP_ZERO, + EVT_BUFFER_SIZE); return iommu->evt_buf ? 0 : -ENOMEM; } @@ -715,8 +746,8 @@ static void __init free_event_buffer(struct amd_iommu *iommu) /* allocates the memory where the IOMMU will log its events to */ static int __init alloc_ppr_log(struct amd_iommu *iommu) { - iommu->ppr_log = (void *)__get_free_pages(GFP_KERNEL | __GFP_ZERO, - get_order(PPR_LOG_SIZE)); + iommu->ppr_log = iommu_alloc_4k_pages(iommu, GFP_KERNEL | __GFP_ZERO, + PPR_LOG_SIZE); return iommu->ppr_log ? 0 : -ENOMEM; } @@ -838,7 +869,7 @@ static int iommu_init_ga(struct amd_iommu *iommu) static int __init alloc_cwwb_sem(struct amd_iommu *iommu) { - iommu->cmd_sem = (void *)get_zeroed_page(GFP_KERNEL); + iommu->cmd_sem = iommu_alloc_4k_pages(iommu, GFP_KERNEL | __GFP_ZERO, 1); return iommu->cmd_sem ? 0 : -ENOMEM; } @@ -972,10 +1003,10 @@ static bool copy_device_table(void) irq_v = old_devtb[devid].data[2] & DTE_IRQ_REMAP_ENABLE; int_ctl = old_devtb[devid].data[2] & DTE_IRQ_REMAP_INTCTL_MASK; - int_tab_len = old_devtb[devid].data[2] & DTE_IRQ_TABLE_LEN_MASK; + int_tab_len = old_devtb[devid].data[2] & DTE_INTTABLEN_MASK; if (irq_v && (int_ctl || int_tab_len)) { if ((int_ctl != DTE_IRQ_REMAP_INTCTL) || - (int_tab_len != DTE_IRQ_TABLE_LEN)) { + (int_tab_len != DTE_INTTABLEN)) { pr_err("Wrong old irq remapping flag: %#x\n", devid); return false; } @@ -1558,15 +1589,11 @@ static int __init init_iommu_one(struct amd_iommu *iommu, struct ivhd_header *h) break; } - /* - * Note: Since iommu_update_intcapxt() leverages - * the IOMMU MMIO access to MSI capability block registers - * for MSI address lo/hi/data, we need to check both - * EFR[XtSup] and EFR[MsiCapMmioSup] for x2APIC support. - */ - if ((h->efr_reg & BIT(IOMMU_EFR_XTSUP_SHIFT)) && - (h->efr_reg & BIT(IOMMU_EFR_MSICAPMMIOSUP_SHIFT))) + if (h->efr_reg & BIT(IOMMU_EFR_XTSUP_SHIFT)) amd_iommu_xt_mode = IRQ_REMAP_X2APIC_MODE; + + early_iommu_features_init(iommu, h); + break; default: return -EINVAL; @@ -1602,9 +1629,11 @@ static int __init init_iommu_one(struct amd_iommu *iommu, struct ivhd_header *h) if (ret) return ret; - ret = amd_iommu_create_irq_domain(iommu); - if (ret) - return ret; + if (amd_iommu_irq_remap) { + ret = amd_iommu_create_irq_domain(iommu); + if (ret) + return ret; + } /* * Make sure IOMMU is not considered to translate itself. The IVRS @@ -1683,33 +1712,16 @@ static int __init init_iommu_all(struct acpi_table_header *table) return 0; } -static int iommu_pc_get_set_reg(struct amd_iommu *iommu, u8 bank, u8 cntr, - u8 fxn, u64 *value, bool is_write); - static void init_iommu_perf_ctr(struct amd_iommu *iommu) { + u64 val; struct pci_dev *pdev = iommu->dev; - u64 val = 0xabcd, val2 = 0, save_reg = 0; if (!iommu_feature(iommu, FEATURE_PC)) return; amd_iommu_pc_present = true; - /* save the value to restore, if writable */ - if (iommu_pc_get_set_reg(iommu, 0, 0, 0, &save_reg, false)) - goto pc_false; - - /* Check if the performance counters can be written to */ - if ((iommu_pc_get_set_reg(iommu, 0, 0, 0, &val, true)) || - (iommu_pc_get_set_reg(iommu, 0, 0, 0, &val2, false)) || - (val != val2)) - goto pc_false; - - /* restore */ - if (iommu_pc_get_set_reg(iommu, 0, 0, 0, &save_reg, true)) - goto pc_false; - pci_info(pdev, "IOMMU performance counters supported\n"); val = readl(iommu->mmio_base + MMIO_CNTR_CONF_OFFSET); @@ -1717,11 +1729,6 @@ static void init_iommu_perf_ctr(struct amd_iommu *iommu) iommu->max_counters = (u8) ((val >> 7) & 0xf); return; - -pc_false: - pci_err(pdev, "Unable to read/write to IOMMU perf counter.\n"); - amd_iommu_pc_present = false; - return; } static ssize_t amd_iommu_show_cap(struct device *dev, @@ -1758,6 +1765,35 @@ static const struct attribute_group *amd_iommu_groups[] = { NULL, }; +/* + * Note: IVHD 0x11 and 0x40 also contains exact copy + * of the IOMMU Extended Feature Register [MMIO Offset 0030h]. + * Default to EFR in IVHD since it is available sooner (i.e. before PCI init). + */ +static void __init late_iommu_features_init(struct amd_iommu *iommu) +{ + u64 features; + + if (!(iommu->cap & (1 << IOMMU_CAP_EFR))) + return; + + /* read extended feature bits */ + features = readq(iommu->mmio_base + MMIO_EXT_FEATURES); + + if (!iommu->features) { + iommu->features = features; + return; + } + + /* + * Sanity check and warn if EFR values from + * IVHD and MMIO conflict. + */ + if (features != iommu->features) + pr_warn(FW_WARN "EFR mismatch. Use IVHD EFR (%#llx : %#llx).\n", + features, iommu->features); +} + static int __init iommu_init_pci(struct amd_iommu *iommu) { int cap_ptr = iommu->cap_ptr; @@ -1777,8 +1813,7 @@ static int __init iommu_init_pci(struct amd_iommu *iommu) if (!(iommu->cap & (1 << IOMMU_CAP_IOTLB))) amd_iommu_iotlb_sup = false; - /* read extended feature bits */ - iommu->features = readq(iommu->mmio_base + MMIO_EXT_FEATURES); + late_iommu_features_init(iommu); if (iommu_feature(iommu, FEATURE_GT)) { int glxval; @@ -1853,8 +1888,7 @@ static int __init iommu_init_pci(struct amd_iommu *iommu) iommu_device_sysfs_add(&iommu->iommu, &iommu->dev->dev, amd_iommu_groups, "ivhd%d", iommu->index); - iommu_device_set_ops(&iommu->iommu, &amd_iommu_ops); - iommu_device_register(&iommu->iommu); + iommu_device_register(&iommu->iommu, &amd_iommu_ops, NULL); return pci_enable_device(iommu->dev); } @@ -1871,7 +1905,7 @@ static void print_iommu_info(void) struct pci_dev *pdev = iommu->dev; int i; - pci_info(pdev, "Found IOMMU cap 0x%hx\n", iommu->cap_ptr); + pci_info(pdev, "Found IOMMU cap 0x%x\n", iommu->cap_ptr); if (iommu->cap & (1 << IOMMU_CAP_EFR)) { pci_info(pdev, "Extended features (%#llx):", @@ -1899,7 +1933,7 @@ static void print_iommu_info(void) static int __init amd_iommu_init_pci(void) { struct amd_iommu *iommu; - int ret = 0; + int ret; for_each_iommu(iommu) { ret = iommu_init_pci(iommu); @@ -1961,103 +1995,193 @@ static int iommu_setup_msi(struct amd_iommu *iommu) return r; } - iommu->int_enabled = true; - return 0; } -#define XT_INT_DEST_MODE(x) (((x) & 0x1ULL) << 2) -#define XT_INT_DEST_LO(x) (((x) & 0xFFFFFFULL) << 8) -#define XT_INT_VEC(x) (((x) & 0xFFULL) << 32) -#define XT_INT_DEST_HI(x) ((((x) >> 24) & 0xFFULL) << 56) +union intcapxt { + u64 capxt; + struct { + u64 reserved_0 : 2, + dest_mode_logical : 1, + reserved_1 : 5, + destid_0_23 : 24, + vector : 8, + reserved_2 : 16, + destid_24_31 : 8; + }; +} __attribute__ ((packed)); /* - * Setup the IntCapXT registers with interrupt routing information - * based on the PCI MSI capability block registers, accessed via - * MMIO MSI address low/hi and MSI data registers. + * There isn't really any need to mask/unmask at the irqchip level because + * the 64-bit INTCAPXT registers can be updated atomically without tearing + * when the affinity is being updated. */ -static void iommu_update_intcapxt(struct amd_iommu *iommu) +static void intcapxt_unmask_irq(struct irq_data *data) { - u64 val; - u32 addr_lo = readl(iommu->mmio_base + MMIO_MSI_ADDR_LO_OFFSET); - u32 addr_hi = readl(iommu->mmio_base + MMIO_MSI_ADDR_HI_OFFSET); - u32 data = readl(iommu->mmio_base + MMIO_MSI_DATA_OFFSET); - bool dm = (addr_lo >> MSI_ADDR_DEST_MODE_SHIFT) & 0x1; - u32 dest = ((addr_lo >> MSI_ADDR_DEST_ID_SHIFT) & 0xFF); +} - if (x2apic_enabled()) - dest |= MSI_ADDR_EXT_DEST_ID(addr_hi); +static void intcapxt_mask_irq(struct irq_data *data) +{ +} - val = XT_INT_VEC(data & 0xFF) | - XT_INT_DEST_MODE(dm) | - XT_INT_DEST_LO(dest) | - XT_INT_DEST_HI(dest); +static struct irq_chip intcapxt_controller; + +static int intcapxt_irqdomain_activate(struct irq_domain *domain, + struct irq_data *irqd, bool reserve) +{ + struct amd_iommu *iommu = irqd->chip_data; + struct irq_cfg *cfg = irqd_cfg(irqd); + union intcapxt xt; + + xt.capxt = 0ULL; + xt.dest_mode_logical = apic->dest_mode_logical; + xt.vector = cfg->vector; + xt.destid_0_23 = cfg->dest_apicid & GENMASK(23, 0); + xt.destid_24_31 = cfg->dest_apicid >> 24; /** * Current IOMMU implemtation uses the same IRQ for all * 3 IOMMU interrupts. */ - writeq(val, iommu->mmio_base + MMIO_INTCAPXT_EVT_OFFSET); - writeq(val, iommu->mmio_base + MMIO_INTCAPXT_PPR_OFFSET); - writeq(val, iommu->mmio_base + MMIO_INTCAPXT_GALOG_OFFSET); + writeq(xt.capxt, iommu->mmio_base + MMIO_INTCAPXT_EVT_OFFSET); + writeq(xt.capxt, iommu->mmio_base + MMIO_INTCAPXT_PPR_OFFSET); + writeq(xt.capxt, iommu->mmio_base + MMIO_INTCAPXT_GALOG_OFFSET); + return 0; } -static void _irq_notifier_notify(struct irq_affinity_notify *notify, - const cpumask_t *mask) +static void intcapxt_irqdomain_deactivate(struct irq_domain *domain, + struct irq_data *irqd) { - struct amd_iommu *iommu; + intcapxt_mask_irq(irqd); +} - for_each_iommu(iommu) { - if (iommu->dev->irq == notify->irq) { - iommu_update_intcapxt(iommu); - break; - } + +static int intcapxt_irqdomain_alloc(struct irq_domain *domain, unsigned int virq, + unsigned int nr_irqs, void *arg) +{ + struct irq_alloc_info *info = arg; + int i, ret; + + if (!info || info->type != X86_IRQ_ALLOC_TYPE_AMDVI) + return -EINVAL; + + ret = irq_domain_alloc_irqs_parent(domain, virq, nr_irqs, arg); + if (ret < 0) + return ret; + + for (i = virq; i < virq + nr_irqs; i++) { + struct irq_data *irqd = irq_domain_get_irq_data(domain, i); + + irqd->chip = &intcapxt_controller; + irqd->chip_data = info->data; + __irq_set_handler(i, handle_edge_irq, 0, "edge"); } + + return ret; } -static void _irq_notifier_release(struct kref *ref) +static void intcapxt_irqdomain_free(struct irq_domain *domain, unsigned int virq, + unsigned int nr_irqs) { + irq_domain_free_irqs_top(domain, virq, nr_irqs); } -static int iommu_init_intcapxt(struct amd_iommu *iommu) +static int intcapxt_set_affinity(struct irq_data *irqd, + const struct cpumask *mask, bool force) { + struct irq_data *parent = irqd->parent_data; int ret; - struct irq_affinity_notify *notify = &iommu->intcapxt_notify; - /** - * IntCapXT requires XTSup=1 and MsiCapMmioSup=1, - * which can be inferred from amd_iommu_xt_mode. - */ - if (amd_iommu_xt_mode != IRQ_REMAP_X2APIC_MODE) - return 0; + ret = parent->chip->irq_set_affinity(parent, mask, force); + if (ret < 0 || ret == IRQ_SET_MASK_OK_DONE) + return ret; - /** - * Also, we need to setup notifier to update the IntCapXT registers - * whenever the irq affinity is changed from user-space. - */ - notify->irq = iommu->dev->irq; - notify->notify = _irq_notifier_notify, - notify->release = _irq_notifier_release, - ret = irq_set_affinity_notifier(iommu->dev->irq, notify); + return intcapxt_irqdomain_activate(irqd->domain, irqd, false); +} + +static struct irq_chip intcapxt_controller = { + .name = "IOMMU-MSI", + .irq_unmask = intcapxt_unmask_irq, + .irq_mask = intcapxt_mask_irq, + .irq_ack = irq_chip_ack_parent, + .irq_retrigger = irq_chip_retrigger_hierarchy, + .irq_set_affinity = intcapxt_set_affinity, + .flags = IRQCHIP_SKIP_SET_WAKE, +}; + +static const struct irq_domain_ops intcapxt_domain_ops = { + .alloc = intcapxt_irqdomain_alloc, + .free = intcapxt_irqdomain_free, + .activate = intcapxt_irqdomain_activate, + .deactivate = intcapxt_irqdomain_deactivate, +}; + + +static struct irq_domain *iommu_irqdomain; + +static struct irq_domain *iommu_get_irqdomain(void) +{ + struct fwnode_handle *fn; + + /* No need for locking here (yet) as the init is single-threaded */ + if (iommu_irqdomain) + return iommu_irqdomain; + + fn = irq_domain_alloc_named_fwnode("AMD-Vi-MSI"); + if (!fn) + return NULL; + + iommu_irqdomain = irq_domain_create_hierarchy(x86_vector_domain, 0, 0, + fn, &intcapxt_domain_ops, + NULL); + if (!iommu_irqdomain) + irq_domain_free_fwnode(fn); + + return iommu_irqdomain; +} + +static int iommu_setup_intcapxt(struct amd_iommu *iommu) +{ + struct irq_domain *domain; + struct irq_alloc_info info; + int irq, ret; + + domain = iommu_get_irqdomain(); + if (!domain) + return -ENXIO; + + init_irq_alloc_info(&info, NULL); + info.type = X86_IRQ_ALLOC_TYPE_AMDVI; + info.data = iommu; + + irq = irq_domain_alloc_irqs(domain, 1, NUMA_NO_NODE, &info); + if (irq < 0) { + irq_domain_remove(domain); + return irq; + } + + ret = request_threaded_irq(irq, amd_iommu_int_handler, + amd_iommu_int_thread, 0, "AMD-Vi", iommu); if (ret) { - pr_err("Failed to register irq affinity notifier (devid=%#x, irq %d)\n", - iommu->devid, iommu->dev->irq); + irq_domain_free_irqs(irq, 1); + irq_domain_remove(domain); return ret; } - iommu_update_intcapxt(iommu); iommu_feature_enable(iommu, CONTROL_INTCAPXT_EN); - return ret; + return 0; } -static int iommu_init_msi(struct amd_iommu *iommu) +static int iommu_init_irq(struct amd_iommu *iommu) { int ret; if (iommu->int_enabled) goto enable_faults; - if (iommu->dev->msi_cap) + if (amd_iommu_xt_mode == IRQ_REMAP_X2APIC_MODE) + ret = iommu_setup_intcapxt(iommu); + else if (iommu->dev->msi_cap) ret = iommu_setup_msi(iommu); else ret = -ENODEV; @@ -2065,11 +2189,8 @@ static int iommu_init_msi(struct amd_iommu *iommu) if (ret) return ret; + iommu->int_enabled = true; enable_faults: - ret = iommu_init_intcapxt(iommu); - if (ret) - return ret; - iommu_feature_enable(iommu, CONTROL_EVT_INT_EN); if (iommu->ppr_log != NULL) @@ -2508,6 +2629,11 @@ static void __init free_dma_resources(void) free_unity_maps(); } +static void __init ivinfo_init(void *ivrs) +{ + amd_iommu_ivinfo = *((u32 *)(ivrs + IOMMU_IVINFO_OFFSET)); +} + /* * This is the hardware init function for AMD IOMMU in the system. * This function is called either from amd_iommu_init or from the interrupt @@ -2538,9 +2664,8 @@ static void __init free_dma_resources(void) static int __init early_amd_iommu_init(void) { struct acpi_table_header *ivrs_base; + int i, remap_cache_sz, ret; acpi_status status; - int i, remap_cache_sz, ret = 0; - u32 pci_id; if (!amd_iommu_detected) return -ENODEV; @@ -2562,6 +2687,8 @@ static int __init early_amd_iommu_init(void) if (ret) goto out; + ivinfo_init(ivrs_base); + amd_iommu_target_ivhd_type = get_highest_supported_ivhd_type(ivrs_base); DUMP_printk("Using IVHD type %#x\n", amd_iommu_target_ivhd_type); @@ -2628,16 +2755,6 @@ static int __init early_amd_iommu_init(void) if (ret) goto out; - /* Disable IOMMU if there's Stoney Ridge graphics */ - for (i = 0; i < 32; i++) { - pci_id = read_pci_config(0, i, 0, 0); - if ((pci_id & 0xffff) == 0x1002 && (pci_id >> 16) == 0x98e4) { - pr_info("Disable IOMMU on Stoney Ridge\n"); - amd_iommu_disabled = true; - break; - } - } - /* Disable any previously enabled IOMMUs */ if (!is_kdump_kernel() || amd_iommu_disabled) disable_iommus(); @@ -2657,7 +2774,7 @@ static int __init early_amd_iommu_init(void) remap_cache_sz = MAX_IRQS_PER_TABLE * (sizeof(u64) * 2); amd_iommu_irq_cache = kmem_cache_create("irq_remap_cache", remap_cache_sz, - IRQ_TABLE_ALIGNMENT, + DTE_INTTAB_ALIGNMENT, 0, NULL); if (!amd_iommu_irq_cache) goto out; @@ -2681,7 +2798,6 @@ static int __init early_amd_iommu_init(void) out: /* Don't leak any ACPI memory */ acpi_put_table(ivrs_base); - ivrs_base = NULL; return ret; } @@ -2692,7 +2808,7 @@ static int amd_iommu_enable_interrupts(void) int ret = 0; for_each_iommu(iommu) { - ret = iommu_init_msi(iommu); + ret = iommu_init_irq(iommu); if (ret) goto out; } @@ -2705,6 +2821,7 @@ static bool detect_ivrs(void) { struct acpi_table_header *ivrs_base; acpi_status status; + int i; status = acpi_get_table("IVRS", 0, &ivrs_base); if (status == AE_NOT_FOUND) @@ -2717,6 +2834,17 @@ static bool detect_ivrs(void) acpi_put_table(ivrs_base); + /* Don't use IOMMU if there is Stoney Ridge graphics */ + for (i = 0; i < 32; i++) { + u32 pci_id; + + pci_id = read_pci_config(0, i, 0, 0); + if ((pci_id & 0xffff) == 0x1002 && (pci_id >> 16) == 0x98e4) { + pr_info("Disable IOMMU on Stoney Ridge\n"); + return false; + } + } + /* Make sure ACS will be enabled during PCI probe */ pci_request_acs(); @@ -2743,12 +2871,12 @@ static int __init state_next(void) } break; case IOMMU_IVRS_DETECTED: - ret = early_amd_iommu_init(); - init_state = ret ? IOMMU_INIT_ERROR : IOMMU_ACPI_FINISHED; - if (init_state == IOMMU_ACPI_FINISHED && amd_iommu_disabled) { - pr_info("AMD IOMMU disabled\n"); + if (amd_iommu_disabled) { init_state = IOMMU_CMDLINE_DISABLED; ret = -EINVAL; + } else { + ret = early_amd_iommu_init(); + init_state = ret ? IOMMU_INIT_ERROR : IOMMU_ACPI_FINISHED; } break; case IOMMU_ACPI_FINISHED: @@ -2826,8 +2954,11 @@ int __init amd_iommu_prepare(void) amd_iommu_irq_remap = true; ret = iommu_go_to_state(IOMMU_ACPI_FINISHED); - if (ret) + if (ret) { + amd_iommu_irq_remap = false; return ret; + } + return amd_iommu_irq_remap ? 0 : -ENODEV; } @@ -3098,7 +3229,6 @@ struct amd_iommu *get_amd_iommu(unsigned int idx) return iommu; return NULL; } -EXPORT_SYMBOL(get_amd_iommu); /**************************************************************************** * @@ -3180,7 +3310,6 @@ int amd_iommu_pc_get_reg(struct amd_iommu *iommu, u8 bank, u8 cntr, u8 fxn, u64 return iommu_pc_get_set_reg(iommu, bank, cntr, fxn, value, false); } -EXPORT_SYMBOL(amd_iommu_pc_get_reg); int amd_iommu_pc_set_reg(struct amd_iommu *iommu, u8 bank, u8 cntr, u8 fxn, u64 *value) { @@ -3189,4 +3318,3 @@ int amd_iommu_pc_set_reg(struct amd_iommu *iommu, u8 bank, u8 cntr, u8 fxn, u64 return iommu_pc_get_set_reg(iommu, bank, cntr, fxn, value, true); } -EXPORT_SYMBOL(amd_iommu_pc_set_reg); diff --git a/drivers/iommu/amd/io_pgtable.c b/drivers/iommu/amd/io_pgtable.c new file mode 100644 index 000000000000..bb0ee5c9fde7 --- /dev/null +++ b/drivers/iommu/amd/io_pgtable.c @@ -0,0 +1,560 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * CPU-agnostic AMD IO page table allocator. + * + * Copyright (C) 2020 Advanced Micro Devices, Inc. + * Author: Suravee Suthikulpanit <suravee.suthikulpanit@amd.com> + */ + +#define pr_fmt(fmt) "AMD-Vi: " fmt +#define dev_fmt(fmt) pr_fmt(fmt) + +#include <linux/atomic.h> +#include <linux/bitops.h> +#include <linux/io-pgtable.h> +#include <linux/kernel.h> +#include <linux/sizes.h> +#include <linux/slab.h> +#include <linux/types.h> +#include <linux/dma-mapping.h> + +#include <asm/barrier.h> + +#include "amd_iommu_types.h" +#include "amd_iommu.h" + +static void v1_tlb_flush_all(void *cookie) +{ +} + +static void v1_tlb_flush_walk(unsigned long iova, size_t size, + size_t granule, void *cookie) +{ +} + +static void v1_tlb_add_page(struct iommu_iotlb_gather *gather, + unsigned long iova, size_t granule, + void *cookie) +{ +} + +static const struct iommu_flush_ops v1_flush_ops = { + .tlb_flush_all = v1_tlb_flush_all, + .tlb_flush_walk = v1_tlb_flush_walk, + .tlb_add_page = v1_tlb_add_page, +}; + +/* + * Helper function to get the first pte of a large mapping + */ +static u64 *first_pte_l7(u64 *pte, unsigned long *page_size, + unsigned long *count) +{ + unsigned long pte_mask, pg_size, cnt; + u64 *fpte; + + pg_size = PTE_PAGE_SIZE(*pte); + cnt = PAGE_SIZE_PTE_COUNT(pg_size); + pte_mask = ~((cnt << 3) - 1); + fpte = (u64 *)(((unsigned long)pte) & pte_mask); + + if (page_size) + *page_size = pg_size; + + if (count) + *count = cnt; + + return fpte; +} + +/**************************************************************************** + * + * The functions below are used the create the page table mappings for + * unity mapped regions. + * + ****************************************************************************/ + +static void free_page_list(struct page *freelist) +{ + while (freelist != NULL) { + unsigned long p = (unsigned long)page_address(freelist); + + freelist = freelist->freelist; + free_page(p); + } +} + +static struct page *free_pt_page(unsigned long pt, struct page *freelist) +{ + struct page *p = virt_to_page((void *)pt); + + p->freelist = freelist; + + return p; +} + +#define DEFINE_FREE_PT_FN(LVL, FN) \ +static struct page *free_pt_##LVL (unsigned long __pt, struct page *freelist) \ +{ \ + unsigned long p; \ + u64 *pt; \ + int i; \ + \ + pt = (u64 *)__pt; \ + \ + for (i = 0; i < 512; ++i) { \ + /* PTE present? */ \ + if (!IOMMU_PTE_PRESENT(pt[i])) \ + continue; \ + \ + /* Large PTE? */ \ + if (PM_PTE_LEVEL(pt[i]) == 0 || \ + PM_PTE_LEVEL(pt[i]) == 7) \ + continue; \ + \ + p = (unsigned long)IOMMU_PTE_PAGE(pt[i]); \ + freelist = FN(p, freelist); \ + } \ + \ + return free_pt_page((unsigned long)pt, freelist); \ +} + +DEFINE_FREE_PT_FN(l2, free_pt_page) +DEFINE_FREE_PT_FN(l3, free_pt_l2) +DEFINE_FREE_PT_FN(l4, free_pt_l3) +DEFINE_FREE_PT_FN(l5, free_pt_l4) +DEFINE_FREE_PT_FN(l6, free_pt_l5) + +static struct page *free_sub_pt(unsigned long root, int mode, + struct page *freelist) +{ + switch (mode) { + case PAGE_MODE_NONE: + case PAGE_MODE_7_LEVEL: + break; + case PAGE_MODE_1_LEVEL: + freelist = free_pt_page(root, freelist); + break; + case PAGE_MODE_2_LEVEL: + freelist = free_pt_l2(root, freelist); + break; + case PAGE_MODE_3_LEVEL: + freelist = free_pt_l3(root, freelist); + break; + case PAGE_MODE_4_LEVEL: + freelist = free_pt_l4(root, freelist); + break; + case PAGE_MODE_5_LEVEL: + freelist = free_pt_l5(root, freelist); + break; + case PAGE_MODE_6_LEVEL: + freelist = free_pt_l6(root, freelist); + break; + default: + BUG(); + } + + return freelist; +} + +void amd_iommu_domain_set_pgtable(struct protection_domain *domain, + u64 *root, int mode) +{ + u64 pt_root; + + /* lowest 3 bits encode pgtable mode */ + pt_root = mode & 7; + pt_root |= (u64)root; + + amd_iommu_domain_set_pt_root(domain, pt_root); +} + +/* + * This function is used to add another level to an IO page table. Adding + * another level increases the size of the address space by 9 bits to a size up + * to 64 bits. + */ +static bool increase_address_space(struct protection_domain *domain, + unsigned long address, + gfp_t gfp) +{ + unsigned long flags; + bool ret = true; + u64 *pte; + + pte = (void *)get_zeroed_page(gfp); + if (!pte) + return false; + + spin_lock_irqsave(&domain->lock, flags); + + if (address <= PM_LEVEL_SIZE(domain->iop.mode)) + goto out; + + ret = false; + if (WARN_ON_ONCE(domain->iop.mode == PAGE_MODE_6_LEVEL)) + goto out; + + *pte = PM_LEVEL_PDE(domain->iop.mode, iommu_virt_to_phys(domain->iop.root)); + + domain->iop.root = pte; + domain->iop.mode += 1; + amd_iommu_update_and_flush_device_table(domain); + amd_iommu_domain_flush_complete(domain); + + /* + * Device Table needs to be updated and flushed before the new root can + * be published. + */ + amd_iommu_domain_set_pgtable(domain, pte, domain->iop.mode); + + pte = NULL; + ret = true; + +out: + spin_unlock_irqrestore(&domain->lock, flags); + free_page((unsigned long)pte); + + return ret; +} + +static u64 *alloc_pte(struct protection_domain *domain, + unsigned long address, + unsigned long page_size, + u64 **pte_page, + gfp_t gfp, + bool *updated) +{ + int level, end_lvl; + u64 *pte, *page; + + BUG_ON(!is_power_of_2(page_size)); + + while (address > PM_LEVEL_SIZE(domain->iop.mode)) { + /* + * Return an error if there is no memory to update the + * page-table. + */ + if (!increase_address_space(domain, address, gfp)) + return NULL; + } + + + level = domain->iop.mode - 1; + pte = &domain->iop.root[PM_LEVEL_INDEX(level, address)]; + address = PAGE_SIZE_ALIGN(address, page_size); + end_lvl = PAGE_SIZE_LEVEL(page_size); + + while (level > end_lvl) { + u64 __pte, __npte; + int pte_level; + + __pte = *pte; + pte_level = PM_PTE_LEVEL(__pte); + + /* + * If we replace a series of large PTEs, we need + * to tear down all of them. + */ + if (IOMMU_PTE_PRESENT(__pte) && + pte_level == PAGE_MODE_7_LEVEL) { + unsigned long count, i; + u64 *lpte; + + lpte = first_pte_l7(pte, NULL, &count); + + /* + * Unmap the replicated PTEs that still match the + * original large mapping + */ + for (i = 0; i < count; ++i) + cmpxchg64(&lpte[i], __pte, 0ULL); + + *updated = true; + continue; + } + + if (!IOMMU_PTE_PRESENT(__pte) || + pte_level == PAGE_MODE_NONE) { + page = (u64 *)get_zeroed_page(gfp); + + if (!page) + return NULL; + + __npte = PM_LEVEL_PDE(level, iommu_virt_to_phys(page)); + + /* pte could have been changed somewhere. */ + if (cmpxchg64(pte, __pte, __npte) != __pte) + free_page((unsigned long)page); + else if (IOMMU_PTE_PRESENT(__pte)) + *updated = true; + + continue; + } + + /* No level skipping support yet */ + if (pte_level != level) + return NULL; + + level -= 1; + + pte = IOMMU_PTE_PAGE(__pte); + + if (pte_page && level == end_lvl) + *pte_page = pte; + + pte = &pte[PM_LEVEL_INDEX(level, address)]; + } + + return pte; +} + +/* + * This function checks if there is a PTE for a given dma address. If + * there is one, it returns the pointer to it. + */ +static u64 *fetch_pte(struct amd_io_pgtable *pgtable, + unsigned long address, + unsigned long *page_size) +{ + int level; + u64 *pte; + + *page_size = 0; + + if (address > PM_LEVEL_SIZE(pgtable->mode)) + return NULL; + + level = pgtable->mode - 1; + pte = &pgtable->root[PM_LEVEL_INDEX(level, address)]; + *page_size = PTE_LEVEL_PAGE_SIZE(level); + + while (level > 0) { + + /* Not Present */ + if (!IOMMU_PTE_PRESENT(*pte)) + return NULL; + + /* Large PTE */ + if (PM_PTE_LEVEL(*pte) == 7 || + PM_PTE_LEVEL(*pte) == 0) + break; + + /* No level skipping support yet */ + if (PM_PTE_LEVEL(*pte) != level) + return NULL; + + level -= 1; + + /* Walk to the next level */ + pte = IOMMU_PTE_PAGE(*pte); + pte = &pte[PM_LEVEL_INDEX(level, address)]; + *page_size = PTE_LEVEL_PAGE_SIZE(level); + } + + /* + * If we have a series of large PTEs, make + * sure to return a pointer to the first one. + */ + if (PM_PTE_LEVEL(*pte) == PAGE_MODE_7_LEVEL) + pte = first_pte_l7(pte, page_size, NULL); + + return pte; +} + +static struct page *free_clear_pte(u64 *pte, u64 pteval, struct page *freelist) +{ + unsigned long pt; + int mode; + + while (cmpxchg64(pte, pteval, 0) != pteval) { + pr_warn("AMD-Vi: IOMMU pte changed since we read it\n"); + pteval = *pte; + } + + if (!IOMMU_PTE_PRESENT(pteval)) + return freelist; + + pt = (unsigned long)IOMMU_PTE_PAGE(pteval); + mode = IOMMU_PTE_MODE(pteval); + + return free_sub_pt(pt, mode, freelist); +} + +/* + * Generic mapping functions. It maps a physical address into a DMA + * address space. It allocates the page table pages if necessary. + * In the future it can be extended to a generic mapping function + * supporting all features of AMD IOMMU page tables like level skipping + * and full 64 bit address spaces. + */ +static int iommu_v1_map_page(struct io_pgtable_ops *ops, unsigned long iova, + phys_addr_t paddr, size_t size, int prot, gfp_t gfp) +{ + struct protection_domain *dom = io_pgtable_ops_to_domain(ops); + struct page *freelist = NULL; + bool updated = false; + u64 __pte, *pte; + int ret, i, count; + + BUG_ON(!IS_ALIGNED(iova, size)); + BUG_ON(!IS_ALIGNED(paddr, size)); + + ret = -EINVAL; + if (!(prot & IOMMU_PROT_MASK)) + goto out; + + count = PAGE_SIZE_PTE_COUNT(size); + pte = alloc_pte(dom, iova, size, NULL, gfp, &updated); + + ret = -ENOMEM; + if (!pte) + goto out; + + for (i = 0; i < count; ++i) + freelist = free_clear_pte(&pte[i], pte[i], freelist); + + if (freelist != NULL) + updated = true; + + if (count > 1) { + __pte = PAGE_SIZE_PTE(__sme_set(paddr), size); + __pte |= PM_LEVEL_ENC(7) | IOMMU_PTE_PR | IOMMU_PTE_FC; + } else + __pte = __sme_set(paddr) | IOMMU_PTE_PR | IOMMU_PTE_FC; + + if (prot & IOMMU_PROT_IR) + __pte |= IOMMU_PTE_IR; + if (prot & IOMMU_PROT_IW) + __pte |= IOMMU_PTE_IW; + + for (i = 0; i < count; ++i) + pte[i] = __pte; + + ret = 0; + +out: + if (updated) { + unsigned long flags; + + spin_lock_irqsave(&dom->lock, flags); + /* + * Flush domain TLB(s) and wait for completion. Any Device-Table + * Updates and flushing already happened in + * increase_address_space(). + */ + amd_iommu_domain_flush_tlb_pde(dom); + amd_iommu_domain_flush_complete(dom); + spin_unlock_irqrestore(&dom->lock, flags); + } + + /* Everything flushed out, free pages now */ + free_page_list(freelist); + + return ret; +} + +static unsigned long iommu_v1_unmap_page(struct io_pgtable_ops *ops, + unsigned long iova, + size_t size, + struct iommu_iotlb_gather *gather) +{ + struct amd_io_pgtable *pgtable = io_pgtable_ops_to_data(ops); + unsigned long long unmapped; + unsigned long unmap_size; + u64 *pte; + + BUG_ON(!is_power_of_2(size)); + + unmapped = 0; + + while (unmapped < size) { + pte = fetch_pte(pgtable, iova, &unmap_size); + if (pte) { + int i, count; + + count = PAGE_SIZE_PTE_COUNT(unmap_size); + for (i = 0; i < count; i++) + pte[i] = 0ULL; + } + + iova = (iova & ~(unmap_size - 1)) + unmap_size; + unmapped += unmap_size; + } + + BUG_ON(unmapped && !is_power_of_2(unmapped)); + + return unmapped; +} + +static phys_addr_t iommu_v1_iova_to_phys(struct io_pgtable_ops *ops, unsigned long iova) +{ + struct amd_io_pgtable *pgtable = io_pgtable_ops_to_data(ops); + unsigned long offset_mask, pte_pgsize; + u64 *pte, __pte; + + if (pgtable->mode == PAGE_MODE_NONE) + return iova; + + pte = fetch_pte(pgtable, iova, &pte_pgsize); + + if (!pte || !IOMMU_PTE_PRESENT(*pte)) + return 0; + + offset_mask = pte_pgsize - 1; + __pte = __sme_clr(*pte & PM_ADDR_MASK); + + return (__pte & ~offset_mask) | (iova & offset_mask); +} + +/* + * ---------------------------------------------------- + */ +static void v1_free_pgtable(struct io_pgtable *iop) +{ + struct amd_io_pgtable *pgtable = container_of(iop, struct amd_io_pgtable, iop); + struct protection_domain *dom; + struct page *freelist = NULL; + unsigned long root; + + if (pgtable->mode == PAGE_MODE_NONE) + return; + + dom = container_of(pgtable, struct protection_domain, iop); + + /* Update data structure */ + amd_iommu_domain_clr_pt_root(dom); + + /* Make changes visible to IOMMUs */ + amd_iommu_domain_update(dom); + + /* Page-table is not visible to IOMMU anymore, so free it */ + BUG_ON(pgtable->mode < PAGE_MODE_NONE || + pgtable->mode > PAGE_MODE_6_LEVEL); + + root = (unsigned long)pgtable->root; + freelist = free_sub_pt(root, pgtable->mode, freelist); + + free_page_list(freelist); +} + +static struct io_pgtable *v1_alloc_pgtable(struct io_pgtable_cfg *cfg, void *cookie) +{ + struct amd_io_pgtable *pgtable = io_pgtable_cfg_to_data(cfg); + + cfg->pgsize_bitmap = AMD_IOMMU_PGSIZES, + cfg->ias = IOMMU_IN_ADDR_BIT_SIZE, + cfg->oas = IOMMU_OUT_ADDR_BIT_SIZE, + cfg->tlb = &v1_flush_ops; + + pgtable->iop.ops.map = iommu_v1_map_page; + pgtable->iop.ops.unmap = iommu_v1_unmap_page; + pgtable->iop.ops.iova_to_phys = iommu_v1_iova_to_phys; + + return &pgtable->iop; +} + +struct io_pgtable_init_fns io_pgtable_amd_iommu_v1_init_fns = { + .alloc = v1_alloc_pgtable, + .free = v1_free_pgtable, +}; diff --git a/drivers/iommu/amd/iommu.c b/drivers/iommu/amd/iommu.c index b9cf59443843..80e8e1916dd1 100644 --- a/drivers/iommu/amd/iommu.c +++ b/drivers/iommu/amd/iommu.c @@ -31,11 +31,11 @@ #include <linux/irqdomain.h> #include <linux/percpu.h> #include <linux/iova.h> +#include <linux/io-pgtable.h> #include <asm/irq_remapping.h> #include <asm/io_apic.h> #include <asm/apic.h> #include <asm/hw_irq.h> -#include <asm/msidef.h> #include <asm/proto.h> #include <asm/iommu.h> #include <asm/gart.h> @@ -58,16 +58,6 @@ #define HT_RANGE_START (0xfd00000000ULL) #define HT_RANGE_END (0xffffffffffULL) -/* - * This bitmap is used to advertise the page sizes our hardware support - * to the IOMMU core, which will then use this information to split - * physically contiguous memory regions it is mapping into page sizes - * that we support. - * - * 512GB Pages are not supported due to a hardware bug - */ -#define AMD_IOMMU_PGSIZES ((~0xFFFUL) & ~(2ULL << 38)) - #define DEFAULT_PGTABLE_LEVEL PAGE_MODE_3_LEVEL static DEFINE_SPINLOCK(pd_bitmap_lock); @@ -97,10 +87,7 @@ struct iommu_cmd { struct kmem_cache *amd_iommu_irq_cache; -static void update_domain(struct protection_domain *domain); static void detach_device(struct device *dev); -static void update_and_flush_device_table(struct protection_domain *domain, - struct domain_pgtable *pgtable); /**************************************************************************** * @@ -152,37 +139,6 @@ static struct protection_domain *to_pdomain(struct iommu_domain *dom) return container_of(dom, struct protection_domain, domain); } -static void amd_iommu_domain_get_pgtable(struct protection_domain *domain, - struct domain_pgtable *pgtable) -{ - u64 pt_root = atomic64_read(&domain->pt_root); - - pgtable->root = (u64 *)(pt_root & PAGE_MASK); - pgtable->mode = pt_root & 7; /* lowest 3 bits encode pgtable mode */ -} - -static void amd_iommu_domain_set_pt_root(struct protection_domain *domain, u64 root) -{ - atomic64_set(&domain->pt_root, root); -} - -static void amd_iommu_domain_clr_pt_root(struct protection_domain *domain) -{ - amd_iommu_domain_set_pt_root(domain, 0); -} - -static void amd_iommu_domain_set_pgtable(struct protection_domain *domain, - u64 *root, int mode) -{ - u64 pt_root; - - /* lowest 3 bits encode pgtable mode */ - pt_root = mode & 7; - pt_root |= (u64)root; - - amd_iommu_domain_set_pt_root(domain, pt_root); -} - static struct iommu_dev_data *alloc_dev_data(u16 devid) { struct iommu_dev_data *dev_data; @@ -334,15 +290,6 @@ static bool pci_iommuv2_capable(struct pci_dev *pdev) return true; } -static bool pdev_pri_erratum(struct pci_dev *pdev, u32 erratum) -{ - struct iommu_dev_data *dev_data; - - dev_data = dev_iommu_priv_get(&pdev->dev); - - return dev_data->errata & (1 << erratum) ? true : false; -} - /* * This function checks if the driver got a valid device from the caller to * avoid dereferencing invalid pointers. @@ -438,29 +385,6 @@ static void amd_iommu_uninit_device(struct device *dev) */ } -/* - * Helper function to get the first pte of a large mapping - */ -static u64 *first_pte_l7(u64 *pte, unsigned long *page_size, - unsigned long *count) -{ - unsigned long pte_mask, pg_size, cnt; - u64 *fpte; - - pg_size = PTE_PAGE_SIZE(*pte); - cnt = PAGE_SIZE_PTE_COUNT(pg_size); - pte_mask = ~((cnt << 3) - 1); - fpte = (u64 *)(((unsigned long)pte) & pte_mask); - - if (page_size) - *page_size = pg_size; - - if (count) - *count = cnt; - - return fpte; -} - /**************************************************************************** * * Interrupt handling functions @@ -928,33 +852,58 @@ static void build_inv_dte(struct iommu_cmd *cmd, u16 devid) CMD_SET_TYPE(cmd, CMD_INV_DEV_ENTRY); } -static void build_inv_iommu_pages(struct iommu_cmd *cmd, u64 address, - size_t size, u16 domid, int pde) +/* + * Builds an invalidation address which is suitable for one page or multiple + * pages. Sets the size bit (S) as needed is more than one page is flushed. + */ +static inline u64 build_inv_address(u64 address, size_t size) { - u64 pages; - bool s; + u64 pages, end, msb_diff; pages = iommu_num_pages(address, size, PAGE_SIZE); - s = false; - if (pages > 1) { + if (pages == 1) + return address & PAGE_MASK; + + end = address + size - 1; + + /* + * msb_diff would hold the index of the most significant bit that + * flipped between the start and end. + */ + msb_diff = fls64(end ^ address) - 1; + + /* + * Bits 63:52 are sign extended. If for some reason bit 51 is different + * between the start and the end, invalidate everything. + */ + if (unlikely(msb_diff > 51)) { + address = CMD_INV_IOMMU_ALL_PAGES_ADDRESS; + } else { /* - * If we have to flush more than one page, flush all - * TLB entries for this domain + * The msb-bit must be clear on the address. Just set all the + * lower bits. */ - address = CMD_INV_IOMMU_ALL_PAGES_ADDRESS; - s = true; + address |= 1ull << (msb_diff - 1); } + /* Clear bits 11:0 */ address &= PAGE_MASK; + /* Set the size bit - we flush more than one 4kb page */ + return address | CMD_INV_IOMMU_PAGES_SIZE_MASK; +} + +static void build_inv_iommu_pages(struct iommu_cmd *cmd, u64 address, + size_t size, u16 domid, int pde) +{ + u64 inv_address = build_inv_address(address, size); + memset(cmd, 0, sizeof(*cmd)); cmd->data[1] |= domid; - cmd->data[2] = lower_32_bits(address); - cmd->data[3] = upper_32_bits(address); + cmd->data[2] = lower_32_bits(inv_address); + cmd->data[3] = upper_32_bits(inv_address); CMD_SET_TYPE(cmd, CMD_INV_IOMMU_PAGES); - if (s) /* size bit - we flush more than one 4kb page */ - cmd->data[2] |= CMD_INV_IOMMU_PAGES_SIZE_MASK; if (pde) /* PDE bit - we want to flush everything, not only the PTEs */ cmd->data[2] |= CMD_INV_IOMMU_PAGES_PDE_MASK; } @@ -962,32 +911,15 @@ static void build_inv_iommu_pages(struct iommu_cmd *cmd, u64 address, static void build_inv_iotlb_pages(struct iommu_cmd *cmd, u16 devid, int qdep, u64 address, size_t size) { - u64 pages; - bool s; - - pages = iommu_num_pages(address, size, PAGE_SIZE); - s = false; - - if (pages > 1) { - /* - * If we have to flush more than one page, flush all - * TLB entries for this domain - */ - address = CMD_INV_IOMMU_ALL_PAGES_ADDRESS; - s = true; - } - - address &= PAGE_MASK; + u64 inv_address = build_inv_address(address, size); memset(cmd, 0, sizeof(*cmd)); cmd->data[0] = devid; cmd->data[0] |= (qdep & 0xff) << 24; cmd->data[1] = devid; - cmd->data[2] = lower_32_bits(address); - cmd->data[3] = upper_32_bits(address); + cmd->data[2] = lower_32_bits(inv_address); + cmd->data[3] = upper_32_bits(inv_address); CMD_SET_TYPE(cmd, CMD_INV_IOTLB_PAGES); - if (s) - cmd->data[2] |= CMD_INV_IOMMU_PAGES_SIZE_MASK; } static void build_inv_iommu_pasid(struct iommu_cmd *cmd, u16 domid, u32 pasid, @@ -1336,12 +1268,12 @@ static void domain_flush_pages(struct protection_domain *domain, } /* Flush the whole IO/TLB for a given protection domain - including PDE */ -static void domain_flush_tlb_pde(struct protection_domain *domain) +void amd_iommu_domain_flush_tlb_pde(struct protection_domain *domain) { __domain_flush_pages(domain, 0, CMD_INV_IOMMU_ALL_PAGES_ADDRESS, 1); } -static void domain_flush_complete(struct protection_domain *domain) +void amd_iommu_domain_flush_complete(struct protection_domain *domain) { int i; @@ -1366,7 +1298,7 @@ static void domain_flush_np_cache(struct protection_domain *domain, spin_lock_irqsave(&domain->lock, flags); domain_flush_pages(domain, iova, size); - domain_flush_complete(domain); + amd_iommu_domain_flush_complete(domain); spin_unlock_irqrestore(&domain->lock, flags); } } @@ -1385,443 +1317,6 @@ static void domain_flush_devices(struct protection_domain *domain) /**************************************************************************** * - * The functions below are used the create the page table mappings for - * unity mapped regions. - * - ****************************************************************************/ - -static void free_page_list(struct page *freelist) -{ - while (freelist != NULL) { - unsigned long p = (unsigned long)page_address(freelist); - freelist = freelist->freelist; - free_page(p); - } -} - -static struct page *free_pt_page(unsigned long pt, struct page *freelist) -{ - struct page *p = virt_to_page((void *)pt); - - p->freelist = freelist; - - return p; -} - -#define DEFINE_FREE_PT_FN(LVL, FN) \ -static struct page *free_pt_##LVL (unsigned long __pt, struct page *freelist) \ -{ \ - unsigned long p; \ - u64 *pt; \ - int i; \ - \ - pt = (u64 *)__pt; \ - \ - for (i = 0; i < 512; ++i) { \ - /* PTE present? */ \ - if (!IOMMU_PTE_PRESENT(pt[i])) \ - continue; \ - \ - /* Large PTE? */ \ - if (PM_PTE_LEVEL(pt[i]) == 0 || \ - PM_PTE_LEVEL(pt[i]) == 7) \ - continue; \ - \ - p = (unsigned long)IOMMU_PTE_PAGE(pt[i]); \ - freelist = FN(p, freelist); \ - } \ - \ - return free_pt_page((unsigned long)pt, freelist); \ -} - -DEFINE_FREE_PT_FN(l2, free_pt_page) -DEFINE_FREE_PT_FN(l3, free_pt_l2) -DEFINE_FREE_PT_FN(l4, free_pt_l3) -DEFINE_FREE_PT_FN(l5, free_pt_l4) -DEFINE_FREE_PT_FN(l6, free_pt_l5) - -static struct page *free_sub_pt(unsigned long root, int mode, - struct page *freelist) -{ - switch (mode) { - case PAGE_MODE_NONE: - case PAGE_MODE_7_LEVEL: - break; - case PAGE_MODE_1_LEVEL: - freelist = free_pt_page(root, freelist); - break; - case PAGE_MODE_2_LEVEL: - freelist = free_pt_l2(root, freelist); - break; - case PAGE_MODE_3_LEVEL: - freelist = free_pt_l3(root, freelist); - break; - case PAGE_MODE_4_LEVEL: - freelist = free_pt_l4(root, freelist); - break; - case PAGE_MODE_5_LEVEL: - freelist = free_pt_l5(root, freelist); - break; - case PAGE_MODE_6_LEVEL: - freelist = free_pt_l6(root, freelist); - break; - default: - BUG(); - } - - return freelist; -} - -static void free_pagetable(struct domain_pgtable *pgtable) -{ - struct page *freelist = NULL; - unsigned long root; - - if (pgtable->mode == PAGE_MODE_NONE) - return; - - BUG_ON(pgtable->mode < PAGE_MODE_NONE || - pgtable->mode > PAGE_MODE_6_LEVEL); - - root = (unsigned long)pgtable->root; - freelist = free_sub_pt(root, pgtable->mode, freelist); - - free_page_list(freelist); -} - -/* - * This function is used to add another level to an IO page table. Adding - * another level increases the size of the address space by 9 bits to a size up - * to 64 bits. - */ -static bool increase_address_space(struct protection_domain *domain, - unsigned long address, - gfp_t gfp) -{ - struct domain_pgtable pgtable; - unsigned long flags; - bool ret = true; - u64 *pte; - - spin_lock_irqsave(&domain->lock, flags); - - amd_iommu_domain_get_pgtable(domain, &pgtable); - - if (address <= PM_LEVEL_SIZE(pgtable.mode)) - goto out; - - ret = false; - if (WARN_ON_ONCE(pgtable.mode == PAGE_MODE_6_LEVEL)) - goto out; - - pte = (void *)get_zeroed_page(gfp); - if (!pte) - goto out; - - *pte = PM_LEVEL_PDE(pgtable.mode, iommu_virt_to_phys(pgtable.root)); - - pgtable.root = pte; - pgtable.mode += 1; - update_and_flush_device_table(domain, &pgtable); - domain_flush_complete(domain); - - /* - * Device Table needs to be updated and flushed before the new root can - * be published. - */ - amd_iommu_domain_set_pgtable(domain, pte, pgtable.mode); - - ret = true; - -out: - spin_unlock_irqrestore(&domain->lock, flags); - - return ret; -} - -static u64 *alloc_pte(struct protection_domain *domain, - unsigned long address, - unsigned long page_size, - u64 **pte_page, - gfp_t gfp, - bool *updated) -{ - struct domain_pgtable pgtable; - int level, end_lvl; - u64 *pte, *page; - - BUG_ON(!is_power_of_2(page_size)); - - amd_iommu_domain_get_pgtable(domain, &pgtable); - - while (address > PM_LEVEL_SIZE(pgtable.mode)) { - /* - * Return an error if there is no memory to update the - * page-table. - */ - if (!increase_address_space(domain, address, gfp)) - return NULL; - - /* Read new values to check if update was successful */ - amd_iommu_domain_get_pgtable(domain, &pgtable); - } - - - level = pgtable.mode - 1; - pte = &pgtable.root[PM_LEVEL_INDEX(level, address)]; - address = PAGE_SIZE_ALIGN(address, page_size); - end_lvl = PAGE_SIZE_LEVEL(page_size); - - while (level > end_lvl) { - u64 __pte, __npte; - int pte_level; - - __pte = *pte; - pte_level = PM_PTE_LEVEL(__pte); - - /* - * If we replace a series of large PTEs, we need - * to tear down all of them. - */ - if (IOMMU_PTE_PRESENT(__pte) && - pte_level == PAGE_MODE_7_LEVEL) { - unsigned long count, i; - u64 *lpte; - - lpte = first_pte_l7(pte, NULL, &count); - - /* - * Unmap the replicated PTEs that still match the - * original large mapping - */ - for (i = 0; i < count; ++i) - cmpxchg64(&lpte[i], __pte, 0ULL); - - *updated = true; - continue; - } - - if (!IOMMU_PTE_PRESENT(__pte) || - pte_level == PAGE_MODE_NONE) { - page = (u64 *)get_zeroed_page(gfp); - - if (!page) - return NULL; - - __npte = PM_LEVEL_PDE(level, iommu_virt_to_phys(page)); - - /* pte could have been changed somewhere. */ - if (cmpxchg64(pte, __pte, __npte) != __pte) - free_page((unsigned long)page); - else if (IOMMU_PTE_PRESENT(__pte)) - *updated = true; - - continue; - } - - /* No level skipping support yet */ - if (pte_level != level) - return NULL; - - level -= 1; - - pte = IOMMU_PTE_PAGE(__pte); - - if (pte_page && level == end_lvl) - *pte_page = pte; - - pte = &pte[PM_LEVEL_INDEX(level, address)]; - } - - return pte; -} - -/* - * This function checks if there is a PTE for a given dma address. If - * there is one, it returns the pointer to it. - */ -static u64 *fetch_pte(struct protection_domain *domain, - unsigned long address, - unsigned long *page_size) -{ - struct domain_pgtable pgtable; - int level; - u64 *pte; - - *page_size = 0; - - amd_iommu_domain_get_pgtable(domain, &pgtable); - - if (address > PM_LEVEL_SIZE(pgtable.mode)) - return NULL; - - level = pgtable.mode - 1; - pte = &pgtable.root[PM_LEVEL_INDEX(level, address)]; - *page_size = PTE_LEVEL_PAGE_SIZE(level); - - while (level > 0) { - - /* Not Present */ - if (!IOMMU_PTE_PRESENT(*pte)) - return NULL; - - /* Large PTE */ - if (PM_PTE_LEVEL(*pte) == 7 || - PM_PTE_LEVEL(*pte) == 0) - break; - - /* No level skipping support yet */ - if (PM_PTE_LEVEL(*pte) != level) - return NULL; - - level -= 1; - - /* Walk to the next level */ - pte = IOMMU_PTE_PAGE(*pte); - pte = &pte[PM_LEVEL_INDEX(level, address)]; - *page_size = PTE_LEVEL_PAGE_SIZE(level); - } - - /* - * If we have a series of large PTEs, make - * sure to return a pointer to the first one. - */ - if (PM_PTE_LEVEL(*pte) == PAGE_MODE_7_LEVEL) - pte = first_pte_l7(pte, page_size, NULL); - - return pte; -} - -static struct page *free_clear_pte(u64 *pte, u64 pteval, struct page *freelist) -{ - unsigned long pt; - int mode; - - while (cmpxchg64(pte, pteval, 0) != pteval) { - pr_warn("AMD-Vi: IOMMU pte changed since we read it\n"); - pteval = *pte; - } - - if (!IOMMU_PTE_PRESENT(pteval)) - return freelist; - - pt = (unsigned long)IOMMU_PTE_PAGE(pteval); - mode = IOMMU_PTE_MODE(pteval); - - return free_sub_pt(pt, mode, freelist); -} - -/* - * Generic mapping functions. It maps a physical address into a DMA - * address space. It allocates the page table pages if necessary. - * In the future it can be extended to a generic mapping function - * supporting all features of AMD IOMMU page tables like level skipping - * and full 64 bit address spaces. - */ -static int iommu_map_page(struct protection_domain *dom, - unsigned long bus_addr, - unsigned long phys_addr, - unsigned long page_size, - int prot, - gfp_t gfp) -{ - struct page *freelist = NULL; - bool updated = false; - u64 __pte, *pte; - int ret, i, count; - - BUG_ON(!IS_ALIGNED(bus_addr, page_size)); - BUG_ON(!IS_ALIGNED(phys_addr, page_size)); - - ret = -EINVAL; - if (!(prot & IOMMU_PROT_MASK)) - goto out; - - count = PAGE_SIZE_PTE_COUNT(page_size); - pte = alloc_pte(dom, bus_addr, page_size, NULL, gfp, &updated); - - ret = -ENOMEM; - if (!pte) - goto out; - - for (i = 0; i < count; ++i) - freelist = free_clear_pte(&pte[i], pte[i], freelist); - - if (freelist != NULL) - updated = true; - - if (count > 1) { - __pte = PAGE_SIZE_PTE(__sme_set(phys_addr), page_size); - __pte |= PM_LEVEL_ENC(7) | IOMMU_PTE_PR | IOMMU_PTE_FC; - } else - __pte = __sme_set(phys_addr) | IOMMU_PTE_PR | IOMMU_PTE_FC; - - if (prot & IOMMU_PROT_IR) - __pte |= IOMMU_PTE_IR; - if (prot & IOMMU_PROT_IW) - __pte |= IOMMU_PTE_IW; - - for (i = 0; i < count; ++i) - pte[i] = __pte; - - ret = 0; - -out: - if (updated) { - unsigned long flags; - - spin_lock_irqsave(&dom->lock, flags); - /* - * Flush domain TLB(s) and wait for completion. Any Device-Table - * Updates and flushing already happened in - * increase_address_space(). - */ - domain_flush_tlb_pde(dom); - domain_flush_complete(dom); - spin_unlock_irqrestore(&dom->lock, flags); - } - - /* Everything flushed out, free pages now */ - free_page_list(freelist); - - return ret; -} - -static unsigned long iommu_unmap_page(struct protection_domain *dom, - unsigned long bus_addr, - unsigned long page_size) -{ - unsigned long long unmapped; - unsigned long unmap_size; - u64 *pte; - - BUG_ON(!is_power_of_2(page_size)); - - unmapped = 0; - - while (unmapped < page_size) { - - pte = fetch_pte(dom, bus_addr, &unmap_size); - - if (pte) { - int i, count; - - count = PAGE_SIZE_PTE_COUNT(unmap_size); - for (i = 0; i < count; i++) - pte[i] = 0ULL; - } - - bus_addr = (bus_addr & ~(unmap_size - 1)) + unmap_size; - unmapped += unmap_size; - } - - BUG_ON(unmapped && !is_power_of_2(unmapped)); - - return unmapped; -} - -/**************************************************************************** - * * The next functions belong to the domain allocation. A domain is * allocated for every IOMMU as the default domain. If device isolation * is enabled, every device get its own domain. The most important thing @@ -1897,17 +1392,16 @@ static void free_gcr3_table(struct protection_domain *domain) } static void set_dte_entry(u16 devid, struct protection_domain *domain, - struct domain_pgtable *pgtable, bool ats, bool ppr) { u64 pte_root = 0; u64 flags = 0; u32 old_domid; - if (pgtable->mode != PAGE_MODE_NONE) - pte_root = iommu_virt_to_phys(pgtable->root); + if (domain->iop.mode != PAGE_MODE_NONE) + pte_root = iommu_virt_to_phys(domain->iop.root); - pte_root |= (pgtable->mode & DEV_ENTRY_MODE_MASK) + pte_root |= (domain->iop.mode & DEV_ENTRY_MODE_MASK) << DEV_ENTRY_MODE_SHIFT; pte_root |= DTE_FLAG_IR | DTE_FLAG_IW | DTE_FLAG_V | DTE_FLAG_TV; @@ -1980,7 +1474,6 @@ static void clear_dte_entry(u16 devid) static void do_attach(struct iommu_dev_data *dev_data, struct protection_domain *domain) { - struct domain_pgtable pgtable; struct amd_iommu *iommu; bool ats; @@ -1996,8 +1489,7 @@ static void do_attach(struct iommu_dev_data *dev_data, domain->dev_cnt += 1; /* Update device table */ - amd_iommu_domain_get_pgtable(domain, &pgtable); - set_dte_entry(dev_data->devid, domain, &pgtable, + set_dte_entry(dev_data->devid, domain, ats, dev_data->iommu_v2); clone_aliases(dev_data->pdev); @@ -2021,10 +1513,10 @@ static void do_detach(struct iommu_dev_data *dev_data) device_flush_dte(dev_data); /* Flush IOTLB */ - domain_flush_tlb_pde(domain); + amd_iommu_domain_flush_tlb_pde(domain); /* Wait for the flushes to finish */ - domain_flush_complete(domain); + amd_iommu_domain_flush_complete(domain); /* decrease reference counters - needs to happen after the flushes */ domain->dev_iommu[iommu->index] -= 1; @@ -2038,33 +1530,9 @@ static void pdev_iommuv2_disable(struct pci_dev *pdev) pci_disable_pasid(pdev); } -/* FIXME: Change generic reset-function to do the same */ -static int pri_reset_while_enabled(struct pci_dev *pdev) -{ - u16 control; - int pos; - - pos = pci_find_ext_capability(pdev, PCI_EXT_CAP_ID_PRI); - if (!pos) - return -EINVAL; - - pci_read_config_word(pdev, pos + PCI_PRI_CTRL, &control); - control |= PCI_PRI_CTRL_RESET; - pci_write_config_word(pdev, pos + PCI_PRI_CTRL, control); - - return 0; -} - static int pdev_iommuv2_enable(struct pci_dev *pdev) { - bool reset_enable; - int reqs, ret; - - /* FIXME: Hardcode number of outstanding requests for now */ - reqs = 32; - if (pdev_pri_erratum(pdev, AMD_PRI_DEV_ERRATUM_LIMIT_REQ_ONE)) - reqs = 1; - reset_enable = pdev_pri_erratum(pdev, AMD_PRI_DEV_ERRATUM_ENABLE_RESET); + int ret; /* Only allow access to user-accessible pages */ ret = pci_enable_pasid(pdev, 0); @@ -2077,16 +1545,11 @@ static int pdev_iommuv2_enable(struct pci_dev *pdev) goto out_err; /* Enable PRI */ - ret = pci_enable_pri(pdev, reqs); + /* FIXME: Hardcode number of outstanding requests for now */ + ret = pci_enable_pri(pdev, 32); if (ret) goto out_err; - if (reset_enable) { - ret = pri_reset_while_enabled(pdev); - if (ret) - goto out_err; - } - ret = pci_enable_ats(pdev, PAGE_SHIFT); if (ret) goto out_err; @@ -2157,9 +1620,9 @@ skip_ats_check: * left the caches in the IOMMU dirty. So we have to flush * here to evict all dirty stuff. */ - domain_flush_tlb_pde(domain); + amd_iommu_domain_flush_tlb_pde(domain); - domain_flush_complete(domain); + amd_iommu_domain_flush_complete(domain); out: spin_unlock(&dev_data->lock); @@ -2222,9 +1685,6 @@ static struct iommu_device *amd_iommu_probe_device(struct device *dev) return ERR_PTR(-ENODEV); devid = get_device_id(dev); - if (devid < 0) - return ERR_PTR(devid); - iommu = amd_iommu_rlookup_table[devid]; if (dev_iommu_priv_get(dev)) @@ -2278,62 +1738,37 @@ static struct iommu_group *amd_iommu_device_group(struct device *dev) return acpihid_device_group(dev); } -static int amd_iommu_domain_get_attr(struct iommu_domain *domain, - enum iommu_attr attr, void *data) -{ - switch (domain->type) { - case IOMMU_DOMAIN_UNMANAGED: - return -ENODEV; - case IOMMU_DOMAIN_DMA: - switch (attr) { - case DOMAIN_ATTR_DMA_USE_FLUSH_QUEUE: - *(int *)data = !amd_iommu_unmap_flush; - return 0; - default: - return -ENODEV; - } - break; - default: - return -EINVAL; - } -} - /***************************************************************************** * * The next functions belong to the dma_ops mapping/unmapping code. * *****************************************************************************/ -static void update_device_table(struct protection_domain *domain, - struct domain_pgtable *pgtable) +static void update_device_table(struct protection_domain *domain) { struct iommu_dev_data *dev_data; list_for_each_entry(dev_data, &domain->dev_list, list) { - set_dte_entry(dev_data->devid, domain, pgtable, + set_dte_entry(dev_data->devid, domain, dev_data->ats.enabled, dev_data->iommu_v2); clone_aliases(dev_data->pdev); } } -static void update_and_flush_device_table(struct protection_domain *domain, - struct domain_pgtable *pgtable) +void amd_iommu_update_and_flush_device_table(struct protection_domain *domain) { - update_device_table(domain, pgtable); + update_device_table(domain); domain_flush_devices(domain); } -static void update_domain(struct protection_domain *domain) +void amd_iommu_domain_update(struct protection_domain *domain) { - struct domain_pgtable pgtable; - /* Update device table */ - amd_iommu_domain_get_pgtable(domain, &pgtable); - update_and_flush_device_table(domain, &pgtable); + amd_iommu_update_and_flush_device_table(domain); /* Flush domain TLB(s) and wait for completion */ - domain_flush_tlb_pde(domain); - domain_flush_complete(domain); + amd_iommu_domain_flush_tlb_pde(domain); + amd_iommu_domain_flush_complete(domain); } int __init amd_iommu_init_api(void) @@ -2367,7 +1802,7 @@ int __init amd_iommu_init_dma_ops(void) pr_info("IO/TLB flush on unmap enabled\n"); else pr_info("Lazy IO/TLB flushing enabled\n"); - + iommu_set_dma_strict(amd_iommu_unmap_flush); return 0; } @@ -2401,22 +1836,19 @@ static void cleanup_domain(struct protection_domain *domain) static void protection_domain_free(struct protection_domain *domain) { - struct domain_pgtable pgtable; - if (!domain) return; if (domain->id) domain_id_free(domain->id); - amd_iommu_domain_get_pgtable(domain, &pgtable); - amd_iommu_domain_clr_pt_root(domain); - free_pagetable(&pgtable); + if (domain->iop.pgtbl_cfg.tlb) + free_io_pgtable_ops(&domain->iop.iop.ops); kfree(domain); } -static int protection_domain_init(struct protection_domain *domain, int mode) +static int protection_domain_init_v1(struct protection_domain *domain, int mode) { u64 *pt_root = NULL; @@ -2439,34 +1871,55 @@ static int protection_domain_init(struct protection_domain *domain, int mode) return 0; } -static struct protection_domain *protection_domain_alloc(int mode) +static struct protection_domain *protection_domain_alloc(unsigned int type) { + struct io_pgtable_ops *pgtbl_ops; struct protection_domain *domain; + int pgtable = amd_iommu_pgtable; + int mode = DEFAULT_PGTABLE_LEVEL; + int ret; domain = kzalloc(sizeof(*domain), GFP_KERNEL); if (!domain) return NULL; - if (protection_domain_init(domain, mode)) + /* + * Force IOMMU v1 page table when iommu=pt and + * when allocating domain for pass-through devices. + */ + if (type == IOMMU_DOMAIN_IDENTITY) { + pgtable = AMD_IOMMU_V1; + mode = PAGE_MODE_NONE; + } else if (type == IOMMU_DOMAIN_UNMANAGED) { + pgtable = AMD_IOMMU_V1; + } + + switch (pgtable) { + case AMD_IOMMU_V1: + ret = protection_domain_init_v1(domain, mode); + break; + default: + ret = -EINVAL; + } + + if (ret) goto out_err; - return domain; + pgtbl_ops = alloc_io_pgtable_ops(pgtable, &domain->iop.pgtbl_cfg, domain); + if (!pgtbl_ops) + goto out_err; + return domain; out_err: kfree(domain); - return NULL; } static struct iommu_domain *amd_iommu_domain_alloc(unsigned type) { struct protection_domain *domain; - int mode = DEFAULT_PGTABLE_LEVEL; - - if (type == IOMMU_DOMAIN_IDENTITY) - mode = PAGE_MODE_NONE; - domain = protection_domain_alloc(mode); + domain = protection_domain_alloc(type); if (!domain) return NULL; @@ -2513,16 +1966,12 @@ static void amd_iommu_detach_device(struct iommu_domain *dom, struct device *dev) { struct iommu_dev_data *dev_data = dev_iommu_priv_get(dev); + int devid = get_device_id(dev); struct amd_iommu *iommu; - int devid; if (!check_device(dev)) return; - devid = get_device_id(dev); - if (devid < 0) - return; - if (dev_data->domain != NULL) detach_device(dev); @@ -2581,12 +2030,12 @@ static int amd_iommu_map(struct iommu_domain *dom, unsigned long iova, gfp_t gfp) { struct protection_domain *domain = to_pdomain(dom); - struct domain_pgtable pgtable; + struct io_pgtable_ops *ops = &domain->iop.iop.ops; int prot = 0; - int ret; + int ret = -EINVAL; - amd_iommu_domain_get_pgtable(domain, &pgtable); - if (pgtable.mode == PAGE_MODE_NONE) + if ((amd_iommu_pgtable == AMD_IOMMU_V1) && + (domain->iop.mode == PAGE_MODE_NONE)) return -EINVAL; if (iommu_prot & IOMMU_READ) @@ -2594,9 +2043,10 @@ static int amd_iommu_map(struct iommu_domain *dom, unsigned long iova, if (iommu_prot & IOMMU_WRITE) prot |= IOMMU_PROT_IW; - ret = iommu_map_page(domain, iova, paddr, page_size, prot, gfp); - - domain_flush_np_cache(domain, iova, page_size); + if (ops->map) { + ret = ops->map(ops, iova, paddr, page_size, prot, gfp); + domain_flush_np_cache(domain, iova, page_size); + } return ret; } @@ -2606,36 +2056,22 @@ static size_t amd_iommu_unmap(struct iommu_domain *dom, unsigned long iova, struct iommu_iotlb_gather *gather) { struct protection_domain *domain = to_pdomain(dom); - struct domain_pgtable pgtable; + struct io_pgtable_ops *ops = &domain->iop.iop.ops; - amd_iommu_domain_get_pgtable(domain, &pgtable); - if (pgtable.mode == PAGE_MODE_NONE) + if ((amd_iommu_pgtable == AMD_IOMMU_V1) && + (domain->iop.mode == PAGE_MODE_NONE)) return 0; - return iommu_unmap_page(domain, iova, page_size); + return (ops->unmap) ? ops->unmap(ops, iova, page_size, gather) : 0; } static phys_addr_t amd_iommu_iova_to_phys(struct iommu_domain *dom, dma_addr_t iova) { struct protection_domain *domain = to_pdomain(dom); - unsigned long offset_mask, pte_pgsize; - struct domain_pgtable pgtable; - u64 *pte, __pte; + struct io_pgtable_ops *ops = &domain->iop.iop.ops; - amd_iommu_domain_get_pgtable(domain, &pgtable); - if (pgtable.mode == PAGE_MODE_NONE) - return iova; - - pte = fetch_pte(domain, iova, &pte_pgsize); - - if (!pte || !IOMMU_PTE_PRESENT(*pte)) - return 0; - - offset_mask = pte_pgsize - 1; - __pte = __sme_clr(*pte & PM_ADDR_MASK); - - return (__pte & ~offset_mask) | (iova & offset_mask); + return ops->iova_to_phys(ops, iova); } static bool amd_iommu_capable(enum iommu_cap cap) @@ -2721,8 +2157,8 @@ static void amd_iommu_flush_iotlb_all(struct iommu_domain *domain) unsigned long flags; spin_lock_irqsave(&dom->lock, flags); - domain_flush_tlb_pde(dom); - domain_flush_complete(dom); + amd_iommu_domain_flush_tlb_pde(dom); + amd_iommu_domain_flush_complete(dom); spin_unlock_irqrestore(&dom->lock, flags); } @@ -2764,7 +2200,6 @@ const struct iommu_ops amd_iommu_ops = { .release_device = amd_iommu_release_device, .probe_finalize = amd_iommu_probe_finalize, .device_group = amd_iommu_device_group, - .domain_get_attr = amd_iommu_domain_get_attr, .get_resv_regions = amd_iommu_get_resv_regions, .put_resv_regions = generic_iommu_put_resv_regions, .is_attach_deferred = amd_iommu_is_attach_deferred, @@ -2800,22 +2235,12 @@ EXPORT_SYMBOL(amd_iommu_unregister_ppr_notifier); void amd_iommu_domain_direct_map(struct iommu_domain *dom) { struct protection_domain *domain = to_pdomain(dom); - struct domain_pgtable pgtable; unsigned long flags; spin_lock_irqsave(&domain->lock, flags); - /* First save pgtable configuration*/ - amd_iommu_domain_get_pgtable(domain, &pgtable); - - /* Remove page-table from domain */ - amd_iommu_domain_clr_pt_root(domain); - - /* Make changes visible to IOMMUs */ - update_domain(domain); - - /* Page-table is not visible to IOMMU anymore, so free it */ - free_pagetable(&pgtable); + if (domain->iop.pgtbl_cfg.tlb) + free_io_pgtable_ops(&domain->iop.iop.ops); spin_unlock_irqrestore(&domain->lock, flags); } @@ -2827,9 +2252,6 @@ int amd_iommu_domain_enable_v2(struct iommu_domain *dom, int pasids) unsigned long flags; int levels, ret; - if (pasids <= 0 || pasids > (PASID_MASK + 1)) - return -EINVAL; - /* Number of GCR3 table levels required */ for (levels = 0; (pasids - 1) & ~0x1ff; pasids >>= 9) levels += 1; @@ -2856,7 +2278,7 @@ int amd_iommu_domain_enable_v2(struct iommu_domain *dom, int pasids) domain->glx = levels; domain->flags |= PD_IOMMUV2_MASK; - update_domain(domain); + amd_iommu_domain_update(domain); ret = 0; @@ -2893,7 +2315,7 @@ static int __flush_pasid(struct protection_domain *domain, u32 pasid, } /* Wait until IOMMU TLB flushes are complete */ - domain_flush_complete(domain); + amd_iommu_domain_flush_complete(domain); /* Now flush device TLBs */ list_for_each_entry(dev_data, &domain->dev_list, list) { @@ -2919,7 +2341,7 @@ static int __flush_pasid(struct protection_domain *domain, u32 pasid, } /* Wait until all device TLBs are flushed */ - domain_flush_complete(domain); + amd_iommu_domain_flush_complete(domain); ret = 0; @@ -3004,11 +2426,9 @@ static u64 *__get_gcr3_pte(u64 *root, int level, u32 pasid, bool alloc) static int __set_gcr3(struct protection_domain *domain, u32 pasid, unsigned long cr3) { - struct domain_pgtable pgtable; u64 *pte; - amd_iommu_domain_get_pgtable(domain, &pgtable); - if (pgtable.mode != PAGE_MODE_NONE) + if (domain->iop.mode != PAGE_MODE_NONE) return -EINVAL; pte = __get_gcr3_pte(domain->gcr3_tbl, domain->glx, pasid, true); @@ -3022,11 +2442,9 @@ static int __set_gcr3(struct protection_domain *domain, u32 pasid, static int __clear_gcr3(struct protection_domain *domain, u32 pasid) { - struct domain_pgtable pgtable; u64 *pte; - amd_iommu_domain_get_pgtable(domain, &pgtable); - if (pgtable.mode != PAGE_MODE_NONE) + if (domain->iop.mode != PAGE_MODE_NONE) return -EINVAL; pte = __get_gcr3_pte(domain->gcr3_tbl, domain->glx, pasid, false); @@ -3084,52 +2502,6 @@ int amd_iommu_complete_ppr(struct pci_dev *pdev, u32 pasid, } EXPORT_SYMBOL(amd_iommu_complete_ppr); -struct iommu_domain *amd_iommu_get_v2_domain(struct pci_dev *pdev) -{ - struct protection_domain *pdomain; - struct iommu_dev_data *dev_data; - struct device *dev = &pdev->dev; - struct iommu_domain *io_domain; - - if (!check_device(dev)) - return NULL; - - dev_data = dev_iommu_priv_get(&pdev->dev); - pdomain = dev_data->domain; - io_domain = iommu_get_domain_for_dev(dev); - - if (pdomain == NULL && dev_data->defer_attach) { - dev_data->defer_attach = false; - pdomain = to_pdomain(io_domain); - attach_device(dev, pdomain); - } - - if (pdomain == NULL) - return NULL; - - if (io_domain->type != IOMMU_DOMAIN_DMA) - return NULL; - - /* Only return IOMMUv2 domains */ - if (!(pdomain->flags & PD_IOMMUV2_MASK)) - return NULL; - - return &pdomain->domain; -} -EXPORT_SYMBOL(amd_iommu_get_v2_domain); - -void amd_iommu_enable_device_erratum(struct pci_dev *pdev, u32 erratum) -{ - struct iommu_dev_data *dev_data; - - if (!amd_iommu_v2_supported()) - return; - - dev_data = dev_iommu_priv_get(&pdev->dev); - dev_data->errata |= (1 << erratum); -} -EXPORT_SYMBOL(amd_iommu_enable_device_erratum); - int amd_iommu_device_info(struct pci_dev *pdev, struct amd_iommu_device_info *info) { @@ -3191,7 +2563,7 @@ static void set_dte_irq_entry(u16 devid, struct irq_remap_table *table) dte &= ~DTE_IRQ_PHYS_ADDR_MASK; dte |= iommu_virt_to_phys(table->table); dte |= DTE_IRQ_REMAP_INTCTL; - dte |= DTE_IRQ_TABLE_LEN; + dte |= DTE_INTTABLEN; dte |= DTE_IRQ_REMAP_ENABLE; amd_iommu_dev_table[devid].data[2] = dte; @@ -3466,7 +2838,7 @@ static void free_irte(u16 devid, int index) } static void irte_prepare(void *entry, - u32 delivery_mode, u32 dest_mode, + u32 delivery_mode, bool dest_mode, u8 vector, u32 dest_apicid, int devid) { union irte *irte = (union irte *) entry; @@ -3480,7 +2852,7 @@ static void irte_prepare(void *entry, } static void irte_ga_prepare(void *entry, - u32 delivery_mode, u32 dest_mode, + u32 delivery_mode, bool dest_mode, u8 vector, u32 dest_apicid, int devid) { struct irte_ga *irte = (struct irte_ga *) entry; @@ -3602,10 +2974,8 @@ static int get_devid(struct irq_alloc_info *info) { switch (info->type) { case X86_IRQ_ALLOC_TYPE_IOAPIC: - case X86_IRQ_ALLOC_TYPE_IOAPIC_GET_PARENT: return get_ioapic_devid(info->devid); case X86_IRQ_ALLOC_TYPE_HPET: - case X86_IRQ_ALLOC_TYPE_HPET_GET_PARENT: return get_hpet_devid(info->devid); case X86_IRQ_ALLOC_TYPE_PCI_MSI: case X86_IRQ_ALLOC_TYPE_PCI_MSIX: @@ -3616,54 +2986,28 @@ static int get_devid(struct irq_alloc_info *info) } } -static struct irq_domain *get_irq_domain_for_devid(struct irq_alloc_info *info, - int devid) -{ - struct amd_iommu *iommu = amd_iommu_rlookup_table[devid]; - - if (!iommu) - return NULL; - - switch (info->type) { - case X86_IRQ_ALLOC_TYPE_IOAPIC_GET_PARENT: - case X86_IRQ_ALLOC_TYPE_HPET_GET_PARENT: - return iommu->ir_domain; - default: - WARN_ON_ONCE(1); - return NULL; - } -} - -static struct irq_domain *get_irq_domain(struct irq_alloc_info *info) -{ - int devid; - - if (!info) - return NULL; - - devid = get_devid(info); - if (devid < 0) - return NULL; - return get_irq_domain_for_devid(info, devid); -} - struct irq_remap_ops amd_iommu_irq_ops = { .prepare = amd_iommu_prepare, .enable = amd_iommu_enable, .disable = amd_iommu_disable, .reenable = amd_iommu_reenable, .enable_faulting = amd_iommu_enable_faulting, - .get_irq_domain = get_irq_domain, }; +static void fill_msi_msg(struct msi_msg *msg, u32 index) +{ + msg->data = index; + msg->address_lo = 0; + msg->arch_addr_lo.base_address = X86_MSI_BASE_ADDRESS_LOW; + msg->address_hi = X86_MSI_BASE_ADDRESS_HIGH; +} + static void irq_remapping_prepare_irte(struct amd_ir_data *data, struct irq_cfg *irq_cfg, struct irq_alloc_info *info, int devid, int index, int sub_handle) { struct irq_2_irte *irte_info = &data->irq_2_irte; - struct msi_msg *msg = &data->msi_entry; - struct IO_APIC_route_entry *entry; struct amd_iommu *iommu = amd_iommu_rlookup_table[devid]; if (!iommu) @@ -3671,31 +3015,16 @@ static void irq_remapping_prepare_irte(struct amd_ir_data *data, data->irq_2_irte.devid = devid; data->irq_2_irte.index = index + sub_handle; - iommu->irte_ops->prepare(data->entry, apic->irq_delivery_mode, - apic->irq_dest_mode, irq_cfg->vector, + iommu->irte_ops->prepare(data->entry, apic->delivery_mode, + apic->dest_mode_logical, irq_cfg->vector, irq_cfg->dest_apicid, devid); switch (info->type) { case X86_IRQ_ALLOC_TYPE_IOAPIC: - /* Setup IOAPIC entry */ - entry = info->ioapic.entry; - info->ioapic.entry = NULL; - memset(entry, 0, sizeof(*entry)); - entry->vector = index; - entry->mask = 0; - entry->trigger = info->ioapic.trigger; - entry->polarity = info->ioapic.polarity; - /* Mask level triggered irqs. */ - if (info->ioapic.trigger) - entry->mask = 1; - break; - case X86_IRQ_ALLOC_TYPE_HPET: case X86_IRQ_ALLOC_TYPE_PCI_MSI: case X86_IRQ_ALLOC_TYPE_PCI_MSIX: - msg->address_hi = MSI_ADDR_BASE_HI; - msg->address_lo = MSI_ADDR_BASE_LO; - msg->data = irte_info->index; + fill_msi_msg(&data->msi_entry, irte_info->index); break; default: @@ -3892,7 +3221,29 @@ static void irq_remapping_deactivate(struct irq_domain *domain, irte_info->index); } +static int irq_remapping_select(struct irq_domain *d, struct irq_fwspec *fwspec, + enum irq_domain_bus_token bus_token) +{ + struct amd_iommu *iommu; + int devid = -1; + + if (!amd_iommu_irq_remap) + return 0; + + if (x86_fwspec_is_ioapic(fwspec)) + devid = get_ioapic_devid(fwspec->param[0]); + else if (x86_fwspec_is_hpet(fwspec)) + devid = get_hpet_devid(fwspec->param[0]); + + if (devid < 0) + return 0; + + iommu = amd_iommu_rlookup_table[devid]; + return iommu && iommu->ir_domain == d; +} + static const struct irq_domain_ops amd_ir_domain_ops = { + .select = irq_remapping_select, .alloc = irq_remapping_alloc, .free = irq_remapping_free, .activate = irq_remapping_activate, @@ -3943,8 +3294,8 @@ int amd_iommu_deactivate_guest_mode(void *data) entry->hi.val = 0; entry->lo.fields_remap.valid = valid; - entry->lo.fields_remap.dm = apic->irq_dest_mode; - entry->lo.fields_remap.int_type = apic->irq_delivery_mode; + entry->lo.fields_remap.dm = apic->dest_mode_logical; + entry->lo.fields_remap.int_type = apic->delivery_mode; entry->hi.fields.vector = cfg->vector; entry->lo.fields_remap.destination = APICID_TO_IRTE_DEST_LO(cfg->dest_apicid); diff --git a/drivers/iommu/amd/iommu_v2.c b/drivers/iommu/amd/iommu_v2.c index 5ecc0bc608ec..f8d4ad421e07 100644 --- a/drivers/iommu/amd/iommu_v2.c +++ b/drivers/iommu/amd/iommu_v2.c @@ -77,7 +77,7 @@ struct fault { }; static LIST_HEAD(state_list); -static spinlock_t state_lock; +static DEFINE_SPINLOCK(state_lock); static struct workqueue_struct *iommu_wq; @@ -938,8 +938,6 @@ static int __init amd_iommu_v2_init(void) return 0; } - spin_lock_init(&state_lock); - ret = -ENOMEM; iommu_wq = alloc_workqueue("amd_iommu_v2", WQ_MEM_RECLAIM, 0); if (iommu_wq == NULL) |